├── app ├── agent │ ├── __init__.py │ ├── react.py │ ├── swe.py │ ├── toolcall.py │ ├── base.py │ └── manus.py ├── flow │ ├── __init__.py │ ├── flow_factory.py │ ├── base.py │ └── planning.py ├── prompt │ ├── __init__.py │ ├── toolcall.py │ ├── planning.py │ ├── swe.py │ └── manus.py ├── __init__.py ├── tool │ ├── __init__.py │ ├── tool_collection.py │ ├── run.py │ ├── file_saver.py │ ├── google_search.py │ ├── terminate.py │ ├── bash.py │ ├── python_execute.py │ ├── planning.py │ ├── create_chat_completion.py │ ├── str_replace_editor.py │ ├── browser_use_tool.py │ ├── base.py │ ├── markdown_generator.py │ └── firecrawl_research.py ├── exceptions.py ├── logger.py ├── llm.py ├── config.py └── schema.py ├── .DS_Store ├── main.py ├── requirements.txt ├── .gitignore ├── config └── config.example.toml ├── setup.py ├── run_flow.py ├── README.md └── requirement.md /app/agent/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/flow/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/prompt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manus-pro/open-manus/HEAD/.DS_Store -------------------------------------------------------------------------------- /app/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | OpenManus Python Agent Package 3 | """ 4 | import os 5 | import sys 6 | from app.logger import get_logger 7 | 8 | # Set up logger 9 | logger = get_logger("open-manus") 10 | 11 | # Import and initialize tool registry 12 | from app.tool.tool_collection import registry 13 | 14 | # Log that tools are registered 15 | logger.info(f"Registered {len(registry.tools)} tools in the tool registry") 16 | -------------------------------------------------------------------------------- /app/prompt/toolcall.py: -------------------------------------------------------------------------------- 1 | TOOLCALL_PROMPT = """ 2 | You are a helpful assistant that can use tools to complete tasks. 3 | You have access to the following tools: 4 | 5 | {tools} 6 | 7 | Given the task below, describe how you would approach it using the available tools. 8 | For each step, specify which tool you would use and why. 9 | 10 | Task: {task} 11 | 12 | Please provide a detailed response with your approach to completing this task. 13 | """ 14 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from app.agent.manus import Manus 4 | from app.logger import get_logger 5 | 6 | logger = get_logger("open-manus") 7 | 8 | 9 | async def main(): 10 | agent = Manus() 11 | while True: 12 | try: 13 | prompt = input("Enter your prompt (or 'quit' to quit): ") 14 | if prompt.lower() == "quit": 15 | logger.info("Goodbye!") 16 | break 17 | logger.warning("Processing your request...") 18 | await agent.run(prompt) 19 | except KeyboardInterrupt: 20 | logger.warning("Goodbye!") 21 | break 22 | 23 | 24 | if __name__ == "__main__": 25 | asyncio.run(main()) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Core dependencies 2 | langchain>=0.1.0 3 | pydantic>=1.10.8 4 | reportlab>=4.0.0 5 | wheel>=0.45.1 6 | packaging>=24.2 7 | 8 | # LLM providers 9 | openai>=1.6.0 10 | anthropic>=0.6.0 # Optional: for Claude support 11 | 12 | # PDF generation 13 | reportlab>=4.0.0 14 | 15 | # Visualization (optional) 16 | matplotlib>=3.7.0 # Optional: for generating visualizations 17 | numpy>=1.24.0 # Optional: for data manipulation 18 | 19 | # Utilities 20 | python-dotenv>=1.0.0 21 | requests>=2.31.0 22 | 23 | # Web Research and Browser Automation 24 | firecrawl-py>=0.1.0 # For web crawling and research 25 | selenium>=4.10.0 # For browser automation 26 | webdriver-manager>=3.8.5 # For managing webdriver installations 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | artifacts/ 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # Virtual environments 29 | venv/ 30 | env/ 31 | ENV/ 32 | .venv/ 33 | .env/ 34 | .python-version 35 | 36 | # IDE files 37 | .idea/ 38 | .vscode/ 39 | *.swp 40 | *.swo 41 | 42 | # Configuration 43 | config/config.toml 44 | 45 | # Environment variables 46 | .env 47 | 48 | # Logs 49 | logs/ 50 | *.log 51 | 52 | # Generated files 53 | output/ 54 | temp/ -------------------------------------------------------------------------------- /app/prompt/planning.py: -------------------------------------------------------------------------------- 1 | PLANNING_PROMPT = """ 2 | You are a planning assistant that helps break down tasks into clear, actionable steps. 3 | Given the following task, create a step-by-step plan to accomplish it. 4 | 5 | Task: {task} 6 | 7 | Please provide a detailed plan with specific steps. Each step should be clear and actionable. 8 | Focus on what needs to be done, not how to do it. 9 | """ 10 | 11 | EXECUTION_PROMPT = """ 12 | You are an execution assistant that helps carry out plans. 13 | You have the following plan to execute: 14 | 15 | {plan} 16 | 17 | Your task is to execute each step of the plan in order. 18 | For each step, describe what you're doing and provide the result. 19 | If a step cannot be completed, explain why and suggest an alternative approach. 20 | """ 21 | -------------------------------------------------------------------------------- /config/config.example.toml: -------------------------------------------------------------------------------- 1 | [api] 2 | openai_api_key = "your-openai-api-key" 3 | firecrawl_api_key = "your-firecrawl-api-key" 4 | 5 | [llm] 6 | model = "gpt-4" 7 | temperature = 0.7 8 | max_tokens = 4000 9 | timeout = 120 10 | 11 | [agent] 12 | default_agent = "manus" # Options: "manus", "react", "planning", "swe" 13 | 14 | [browser] 15 | headless = true 16 | timeout = 30 17 | webdriver = "chrome" # Options: "chrome", "firefox", "edge", "safari" 18 | 19 | [document] 20 | pdf_output_dir = "./output/pdf" 21 | markdown_output_dir = "./output/markdown" 22 | code_output_dir = "./output/code" 23 | 24 | [logging] 25 | level = "INFO" # Options: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL" 26 | file = "./logs/app.log" 27 | rotation = "1 day" 28 | format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" 29 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | with open("README.md", "r", encoding="utf-8") as fh: 4 | long_description = fh.read() 5 | 6 | with open("requirements.txt", "r", encoding="utf-8") as fh: 7 | requirements = fh.read().splitlines() 8 | 9 | setup( 10 | name="open-manus", 11 | version="0.1.0", 12 | author="OpenManus Team", 13 | author_email="example@example.com", 14 | description="A modular and extensible Python agent system", 15 | long_description=long_description, 16 | long_description_content_type="text/markdown", 17 | url="https://github.com/mannaandpoem/OpenManus", 18 | packages=find_packages(), 19 | classifiers=[ 20 | "Programming Language :: Python :: 3", 21 | "License :: OSI Approved :: MIT License", 22 | "Operating System :: OS Independent", 23 | ], 24 | python_requires=">=3.8", 25 | install_requires=requirements, 26 | entry_points={ 27 | "console_scripts": [ 28 | "open-manus=main:main", 29 | "open-manus-flow=run_flow:main", 30 | ], 31 | }, 32 | ) 33 | -------------------------------------------------------------------------------- /app/flow/flow_factory.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Type, Any 2 | 3 | from app.config import Config 4 | from app.flow.base import BaseFlow 5 | from app.flow.planning import PlanningFlow 6 | from app.logger import get_logger 7 | 8 | # Registry of available flows 9 | FLOW_REGISTRY: Dict[str, Type[BaseFlow]] = { 10 | "planning": PlanningFlow, 11 | # Add more flows here as they are implemented 12 | } 13 | 14 | def create_flow(flow_name: str, config: Config) -> BaseFlow: 15 | """ 16 | Create a flow instance by name. 17 | 18 | Args: 19 | flow_name (str): Name of the flow to create 20 | config (Config): Configuration object 21 | 22 | Returns: 23 | BaseFlow: Flow instance 24 | 25 | Raises: 26 | ValueError: If flow_name is not found in the registry 27 | """ 28 | logger = get_logger("flow_factory") 29 | 30 | # Check if flow exists in registry 31 | flow_class = FLOW_REGISTRY.get(flow_name) 32 | if not flow_class: 33 | logger.error(f"Flow '{flow_name}' not found in registry") 34 | raise ValueError(f"Unknown flow: {flow_name}") 35 | 36 | # Create flow instance 37 | logger.debug(f"Creating flow '{flow_name}'") 38 | flow = flow_class() 39 | 40 | return flow 41 | -------------------------------------------------------------------------------- /app/flow/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Any, Dict, Optional 3 | 4 | from app.logger import get_logger 5 | from app.schema import TaskOutput 6 | 7 | 8 | class BaseFlow(ABC): 9 | """Base class for all flows in the system.""" 10 | 11 | def __init__(self, name: str): 12 | """ 13 | Initialize a flow. 14 | 15 | Args: 16 | name (str): Flow name 17 | """ 18 | self.name = name 19 | self.logger = get_logger(f"flow.{name}") 20 | 21 | @abstractmethod 22 | def _run(self, **kwargs) -> TaskOutput: 23 | """ 24 | Execute the flow with the given arguments. 25 | 26 | Args: 27 | **kwargs: Flow-specific arguments 28 | 29 | Returns: 30 | TaskOutput: Result of the flow execution 31 | """ 32 | pass 33 | 34 | def run(self, **kwargs) -> TaskOutput: 35 | """ 36 | Run the flow with error handling. 37 | 38 | Args: 39 | **kwargs: Flow-specific arguments 40 | 41 | Returns: 42 | TaskOutput: Result of the flow execution 43 | """ 44 | try: 45 | self.logger.info(f"Running flow '{self.name}'") 46 | result = self._run(**kwargs) 47 | self.logger.info(f"Flow '{self.name}' completed successfully") 48 | return result 49 | except Exception as e: 50 | error_msg = f"Error running flow '{self.name}': {str(e)}" 51 | self.logger.error(error_msg) 52 | 53 | # Create error output 54 | return TaskOutput( 55 | success=False, 56 | error=error_msg 57 | ) 58 | -------------------------------------------------------------------------------- /app/prompt/swe.py: -------------------------------------------------------------------------------- 1 | SWE_PROMPT = """ 2 | You are an expert software engineering assistant specializing in writing high-quality, production-ready code. 3 | Your expertise spans multiple programming languages, frameworks, and best practices in software development. 4 | 5 | WHEN GENERATING CODE: 6 | - Write clean, well-structured, and maintainable code following language-specific conventions 7 | - Include comprehensive error handling and edge case management 8 | - Add clear, detailed comments and documentation (including function/method docstrings) 9 | - Follow industry best practices and design patterns appropriate for the task 10 | - Use proper naming conventions for variables, functions, and classes 11 | - Implement robust validation for inputs and proper error messaging 12 | - Include unit tests when appropriate 13 | - Optimize for both readability and performance 14 | 15 | WHEN DOCUMENTING CODE: 16 | - Explain the purpose and functionality of the code clearly 17 | - Document parameters, return values, and exceptions/errors 18 | - Include usage examples where appropriate 19 | - Provide context about design decisions and alternatives considered 20 | 21 | WHEN DEBUGGING: 22 | - Analyze problems systematically and methodically 23 | - Consider common failure patterns and edge cases 24 | - Provide clear explanations of the issues and their solutions 25 | - Suggest improvements beyond just fixing the immediate problem 26 | 27 | You have access to the following tools: 28 | 1. Code Generator - Generate high-quality code based on detailed requirements 29 | 2. Markdown Generator - Create well-structured documentation 30 | 3. Bash - Execute shell commands in a controlled environment 31 | 4. Python Execute - Run and test Python code 32 | 5. File Saver - Persist code and documentation to the filesystem 33 | 34 | Task: {task} 35 | 36 | Respond with a comprehensive solution that demonstrates software engineering excellence. 37 | """ 38 | -------------------------------------------------------------------------------- /app/prompt/manus.py: -------------------------------------------------------------------------------- 1 | SYSTEM_PROMPT = """ 2 | You are OpenManus, an advanced AI agent designed to help users with various tasks. 3 | You can generate high-quality documents, automate browser tasks, conduct web research, and generate well-structured code. 4 | 5 | You have access to the following tools: 6 | 1. PDF Generator - Generate well-formatted PDF documents from Markdown content 7 | 2. Markdown Generator - Generate structured Markdown documents with proper formatting 8 | 3. Browser - Automate browser tasks using Selenium 9 | 4. Web Research - Conduct comprehensive web research using the Firecrawl API 10 | 5. Code Generator - Generate clean, well-documented code following best practices 11 | 12 | When a user asks you to perform a task, follow these guidelines: 13 | 14 | FOR DOCUMENT GENERATION: 15 | - Always use proper Markdown formatting for document content 16 | - Include clear headings, subheadings, and structured sections 17 | - Format code examples using triple backticks with language specification 18 | - Use bullet points and numbered lists appropriately 19 | - Include a clear title and table of contents when appropriate 20 | 21 | FOR CODE GENERATION: 22 | - Write clean, well-documented code following language-specific conventions 23 | - Include comprehensive error handling 24 | - Add detailed comments and docstrings 25 | - Follow best practices for the specific programming language 26 | - Ensure code is properly indented and formatted 27 | - Include sample usage examples where appropriate 28 | 29 | FOR RESEARCH TASKS: 30 | - Provide comprehensive information from multiple sources 31 | - Structure research findings with clear organization 32 | - Include citations or references to sources 33 | - Summarize findings in a coherent manner 34 | 35 | Always be precise, accurate, and thorough in your responses. 36 | Focus on delivering high-quality outputs that meet the user's needs. 37 | If you're unsure about any aspect of the task, ask clarifying questions. 38 | """ 39 | -------------------------------------------------------------------------------- /app/agent/react.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | from app.agent.base import BaseAgent 4 | from app.schema import AgentType, TaskInput, TaskOutput 5 | from app.llm import llm_manager 6 | 7 | 8 | class ReactAgent(BaseAgent): 9 | """ 10 | React agent that uses the ReAct (Reasoning and Acting) approach. 11 | """ 12 | 13 | def __init__(self, tools: Optional[List[str]] = None): 14 | """ 15 | Initialize the React agent. 16 | 17 | Args: 18 | tools (List[str], optional): List of tool names to use 19 | """ 20 | super().__init__(name=AgentType.REACT.value, tools=tools) 21 | 22 | # Define default tools if none provided 23 | if not tools: 24 | default_tools = [ 25 | "pdf_generator", 26 | "markdown_generator", 27 | "browser", 28 | "firecrawl_research", 29 | "code_generator" 30 | ] 31 | for tool_name in default_tools: 32 | self.add_tool(tool_name) 33 | 34 | def _run(self, task_input: TaskInput) -> TaskOutput: 35 | """ 36 | Execute the React agent with the given task input. 37 | 38 | Args: 39 | task_input (TaskInput): Task input 40 | 41 | Returns: 42 | TaskOutput: Task output 43 | """ 44 | # This is a placeholder implementation 45 | # In a real implementation, this would use the ReAct approach 46 | # with thought-action-observation cycles 47 | 48 | self.logger.info(f"React agent received task: {task_input.task_description}") 49 | 50 | # For now, just return a simple response 51 | response = f"I would process the task: {task_input.task_description} using the ReAct approach, but this is a placeholder implementation." 52 | 53 | # Create output 54 | output = TaskOutput( 55 | success=True, 56 | result=response, 57 | conversation=task_input.conversation, 58 | metadata={"tools_available": [t for t in self.tools.keys()]} 59 | ) 60 | 61 | return output 62 | -------------------------------------------------------------------------------- /app/tool/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tool module for OpenManus Python Agent. 3 | """ 4 | from app.tool.base import BaseTool, ToolRegistry 5 | from app.tool.file_saver import FileSaverTool 6 | from app.tool.browser_use_tool import BrowserTool 7 | 8 | # Try/except imports for tools that have external dependencies 9 | registry = ToolRegistry() 10 | 11 | # Register core tools 12 | registry.register(FileSaverTool()) 13 | registry.register(BrowserTool()) 14 | 15 | # Try to import and register other tools with external dependencies 16 | try: 17 | from app.tool.pdf_generator import PDFGeneratorTool, create_pdf_generator_from_input 18 | registry.register(PDFGeneratorTool()) 19 | except ImportError as e: 20 | print(f"Warning: Could not import PDFGeneratorTool: {e}") 21 | 22 | try: 23 | from app.tool.markdown_generator import MarkdownGeneratorTool, create_markdown_from_input 24 | registry.register(MarkdownGeneratorTool()) 25 | except ImportError as e: 26 | print(f"Warning: Could not import MarkdownGeneratorTool: {e}") 27 | 28 | try: 29 | from app.tool.code_generator import CodeGeneratorTool, generate_code_from_input 30 | registry.register(CodeGeneratorTool()) 31 | except ImportError as e: 32 | print(f"Warning: Could not import CodeGeneratorTool: {e}") 33 | 34 | try: 35 | from app.tool.firecrawl_research import FirecrawlResearchTool, conduct_web_research 36 | registry.register(FirecrawlResearchTool()) 37 | except ImportError as e: 38 | print(f"Warning: Could not import FirecrawlResearchTool: {e}") 39 | 40 | try: 41 | from app.tool.google_search import GoogleSearchTool 42 | registry.register(GoogleSearchTool()) 43 | except ImportError as e: 44 | print(f"Warning: Could not import GoogleSearchTool: {e}") 45 | 46 | __all__ = [ 47 | 'BaseTool', 'ToolRegistry', 'registry', 48 | 'FileSaverTool', 'BrowserTool', 49 | # These may or may not be available depending on imports 50 | 'MarkdownGeneratorTool', 'PDFGeneratorTool', 'CodeGeneratorTool', 51 | 'FirecrawlResearchTool', 'GoogleSearchTool', 'CreateChatCompletionTool', 52 | # Functions for direct usage 53 | 'create_markdown_from_input', 'create_pdf_generator_from_input', 54 | 'generate_code_from_input', 'conduct_web_research' 55 | ] 56 | -------------------------------------------------------------------------------- /app/agent/swe.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | from app.agent.base import BaseAgent 4 | from app.schema import AgentType, TaskInput, TaskOutput 5 | from app.llm import llm_manager 6 | 7 | 8 | class SWEAgent(BaseAgent): 9 | """ 10 | Software Engineering agent specialized in code generation and software development tasks. 11 | """ 12 | 13 | def __init__(self, tools: Optional[List[str]] = None): 14 | """ 15 | Initialize the SWE agent. 16 | 17 | Args: 18 | tools (List[str], optional): List of tool names to use 19 | """ 20 | super().__init__(name=AgentType.SWE.value, tools=tools) 21 | 22 | # Define default tools if none provided 23 | if not tools: 24 | default_tools = [ 25 | "code_generator", 26 | "markdown_generator", 27 | "bash", 28 | "python_execute", 29 | "file_saver" 30 | ] 31 | for tool_name in default_tools: 32 | self.add_tool(tool_name) 33 | 34 | def _run(self, task_input: TaskInput) -> TaskOutput: 35 | """ 36 | Execute the SWE agent with the given task input. 37 | 38 | Args: 39 | task_input (TaskInput): Task input 40 | 41 | Returns: 42 | TaskOutput: Task output 43 | """ 44 | # This is a placeholder implementation 45 | # In a real implementation, this would use specialized software engineering 46 | # capabilities to generate and execute code 47 | 48 | self.logger.info(f"SWE agent received task: {task_input.task_description}") 49 | 50 | # For now, just return a simple response 51 | response = f"I would process the software engineering task: {task_input.task_description}, but this is a placeholder implementation." 52 | 53 | # Create output 54 | output = TaskOutput( 55 | success=True, 56 | result=response, 57 | conversation=task_input.conversation, 58 | metadata={"tools_available": [t for t in self.tools.keys()]} 59 | ) 60 | 61 | return output 62 | -------------------------------------------------------------------------------- /app/tool/tool_collection.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tool collection module for registering all available tools in the system. 3 | """ 4 | from app.tool.base import ToolRegistry 5 | from app.logger import get_logger 6 | 7 | logger = get_logger("tool_collection") 8 | 9 | # Import core tools that should always be available 10 | from app.tool.file_saver import FileSaverTool 11 | try: 12 | from app.tool.browser_use_tool import BrowserTool 13 | except ImportError: 14 | logger.warning("Unable to import BrowserTool") 15 | BrowserTool = None 16 | 17 | # Registry singleton 18 | registry = ToolRegistry() 19 | 20 | def register_all_tools(): 21 | """Register all available tools in the tool registry.""" 22 | # Clear existing tools to avoid duplicates 23 | registry.clear() 24 | 25 | # Register core tools 26 | registry.register(FileSaverTool()) 27 | if BrowserTool: 28 | registry.register(BrowserTool()) 29 | 30 | # Try to register tools with external dependencies 31 | try_register_tool("app.tool.pdf_generator", "PDFGeneratorTool") 32 | try_register_tool("app.tool.markdown_generator", "MarkdownGeneratorTool") 33 | try_register_tool("app.tool.code_generator", "CodeGeneratorTool") 34 | try_register_tool("app.tool.firecrawl_research", "FirecrawlResearchTool") 35 | try_register_tool("app.tool.google_search", "GoogleSearchTool") 36 | try_register_tool("app.tool.planning", "PlanningTool") 37 | 38 | # Log registered tools 39 | logger.info(f"Registered {len(registry.tools)} tools") 40 | 41 | return registry 42 | 43 | def try_register_tool(module_path, class_name): 44 | """Try to import and register a tool, handling any import errors gracefully.""" 45 | try: 46 | module = __import__(module_path, fromlist=[class_name]) 47 | tool_class = getattr(module, class_name) 48 | registry.register(tool_class()) 49 | logger.debug(f"Registered tool: {class_name}") 50 | except (ImportError, AttributeError) as e: 51 | logger.warning(f"Failed to register {class_name}: {str(e)}") 52 | 53 | # Log successful registrations 54 | if class_name == "CodeGeneratorTool" and class_name in registry.tools: 55 | logger.info("Successfully registered CodeGeneratorTool") 56 | 57 | def get_tool_registry(): 58 | """Get the tool registry with all tools registered.""" 59 | return registry 60 | 61 | # Initialize the tool registry when the module is imported 62 | register_all_tools() 63 | -------------------------------------------------------------------------------- /app/exceptions.py: -------------------------------------------------------------------------------- 1 | class OpenManusError(Exception): 2 | """Base exception for OpenManus errors.""" 3 | 4 | def __init__(self, message="An error occurred in OpenManus"): 5 | self.message = message 6 | super().__init__(self.message) 7 | 8 | 9 | class ConfigError(OpenManusError): 10 | """Exception raised for configuration errors.""" 11 | 12 | def __init__(self, message="Configuration error"): 13 | super().__init__(message) 14 | 15 | 16 | class LLMError(OpenManusError): 17 | """Exception raised for LLM-related errors.""" 18 | 19 | def __init__(self, message="LLM error"): 20 | super().__init__(message) 21 | 22 | 23 | class ToolError(OpenManusError): 24 | """Exception raised for tool-related errors.""" 25 | 26 | def __init__(self, message="Tool error"): 27 | super().__init__(message) 28 | 29 | 30 | class AgentError(OpenManusError): 31 | """Exception raised for agent-related errors.""" 32 | 33 | def __init__(self, message="Agent error"): 34 | super().__init__(message) 35 | 36 | 37 | class BrowserError(OpenManusError): 38 | """Exception raised for browser automation errors.""" 39 | 40 | def __init__(self, message="Browser automation error"): 41 | super().__init__(message) 42 | 43 | 44 | class DocumentGenerationError(OpenManusError): 45 | """Exception raised for document generation errors.""" 46 | 47 | def __init__(self, message="Document generation error"): 48 | super().__init__(message) 49 | 50 | 51 | class CodeGenerationError(OpenManusError): 52 | """Exception raised for code generation errors.""" 53 | 54 | def __init__(self, message="Code generation error"): 55 | super().__init__(message) 56 | 57 | 58 | class WebResearchError(OpenManusError): 59 | """Exception raised for web research errors.""" 60 | 61 | def __init__(self, message="Web research error"): 62 | super().__init__(message) 63 | 64 | 65 | class APIKeyError(OpenManusError): 66 | """Exception raised for API key-related errors.""" 67 | 68 | def __init__(self, message="API key error"): 69 | super().__init__(message) 70 | 71 | 72 | class ValidationError(OpenManusError): 73 | """Exception raised for validation errors.""" 74 | 75 | def __init__(self, message="Validation error"): 76 | super().__init__(message) 77 | 78 | 79 | class FileOperationError(OpenManusError): 80 | """Exception raised for file operation errors.""" 81 | 82 | def __init__(self, message="File operation error"): 83 | super().__init__(message) 84 | -------------------------------------------------------------------------------- /app/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | from logging.handlers import TimedRotatingFileHandler 5 | from pathlib import Path 6 | 7 | def setup_logger(name, level=logging.INFO, log_file=None, rotation='midnight', format_str=None): 8 | """ 9 | Set up a logger with configurable settings. 10 | 11 | Args: 12 | name (str): Logger name 13 | level (int): Logging level 14 | log_file (str, optional): Path to log file 15 | rotation (str, optional): Log rotation interval 16 | format_str (str, optional): Log format string 17 | 18 | Returns: 19 | logging.Logger: Configured logger 20 | """ 21 | logger = logging.getLogger(name) 22 | logger.setLevel(level) 23 | 24 | # Use default format if none provided 25 | if format_str is None: 26 | format_str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" 27 | 28 | formatter = logging.Formatter(format_str) 29 | 30 | # Console handler 31 | console_handler = logging.StreamHandler(sys.stdout) 32 | console_handler.setFormatter(formatter) 33 | logger.addHandler(console_handler) 34 | 35 | # File handler (if log file is specified) 36 | if log_file: 37 | # Create directory if it doesn't exist 38 | log_dir = os.path.dirname(log_file) 39 | if log_dir: 40 | Path(log_dir).mkdir(parents=True, exist_ok=True) 41 | 42 | file_handler = TimedRotatingFileHandler( 43 | log_file, 44 | when=rotation, 45 | backupCount=7 46 | ) 47 | file_handler.setFormatter(formatter) 48 | logger.addHandler(file_handler) 49 | 50 | return logger 51 | 52 | def get_logger(name="open-manus", config=None): 53 | """ 54 | Get a logger with configuration from the config file. 55 | 56 | Args: 57 | name (str, optional): Logger name 58 | config (dict, optional): Configuration dictionary 59 | 60 | Returns: 61 | logging.Logger: Configured logger 62 | """ 63 | if config is None: 64 | return setup_logger(name) 65 | 66 | log_config = config.get("logging", {}) 67 | 68 | # Parse log level 69 | level_str = log_config.get("level", "INFO").upper() 70 | level = getattr(logging, level_str, logging.INFO) 71 | 72 | # Get log file path 73 | log_file = log_config.get("file") 74 | 75 | # Get rotation 76 | rotation = log_config.get("rotation", "midnight") 77 | 78 | # Get format 79 | format_str = log_config.get("format") 80 | 81 | return setup_logger(name, level, log_file, rotation, format_str) 82 | -------------------------------------------------------------------------------- /app/agent/toolcall.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | from app.agent.base import BaseAgent 4 | from app.schema import AgentType, TaskInput, TaskOutput 5 | from app.llm import llm_manager 6 | from app.prompt.toolcall import TOOLCALL_PROMPT 7 | 8 | 9 | class ToolCallAgent(BaseAgent): 10 | """ 11 | ToolCall agent that specializes in using tools via function calling. 12 | """ 13 | 14 | def __init__(self, tools: Optional[List[str]] = None): 15 | """ 16 | Initialize the ToolCall agent. 17 | 18 | Args: 19 | tools (List[str], optional): List of tool names to use 20 | """ 21 | super().__init__(name=AgentType.TOOLCALL.value, tools=tools) 22 | 23 | # Define default tools if none provided 24 | if not tools: 25 | default_tools = [ 26 | "pdf_generator", 27 | "markdown_generator", 28 | "browser", 29 | "firecrawl_research", 30 | "code_generator" 31 | ] 32 | for tool_name in default_tools: 33 | self.add_tool(tool_name) 34 | 35 | def _run(self, task_input: TaskInput) -> TaskOutput: 36 | """ 37 | Execute the ToolCall agent with the given task input. 38 | 39 | Args: 40 | task_input (TaskInput): Task input 41 | 42 | Returns: 43 | TaskOutput: Task output 44 | """ 45 | # This is a placeholder implementation 46 | # In a real implementation, this would use OpenAI function calling 47 | # to select and execute tools 48 | 49 | self.logger.info(f"ToolCall agent received task: {task_input.task_description}") 50 | 51 | # Get available tools 52 | available_tools = [ 53 | {"name": tool.name, "description": tool.description} 54 | for tool in self.tools.values() 55 | ] 56 | 57 | # Format prompt with available tools 58 | tools_text = "\n".join([f"- {t['name']}: {t['description']}" for t in available_tools]) 59 | prompt = TOOLCALL_PROMPT.format( 60 | task=task_input.task_description, 61 | tools=tools_text 62 | ) 63 | 64 | # Generate response 65 | response = llm_manager.generate_text(prompt) 66 | 67 | # Create output 68 | output = TaskOutput( 69 | success=True, 70 | result=response, 71 | conversation=task_input.conversation, 72 | metadata={"tools_available": [t["name"] for t in available_tools]} 73 | ) 74 | 75 | return output 76 | -------------------------------------------------------------------------------- /run_flow.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | from typing import Dict, Any, Optional 5 | 6 | from app.config import Config 7 | from app.logger import get_logger 8 | from app.flow.flow_factory import create_flow 9 | 10 | 11 | def parse_args(): 12 | """Parse command line arguments.""" 13 | parser = argparse.ArgumentParser(description="Run an OpenManus flow") 14 | parser.add_argument("--config", type=str, help="Path to the config file") 15 | parser.add_argument("--flow", type=str, required=True, help="Name of the flow to run") 16 | parser.add_argument("--input", type=str, help="Path to the input file") 17 | parser.add_argument("--output", type=str, help="Path for the output file") 18 | parser.add_argument("--verbose", action="store_true", help="Enable verbose logging") 19 | 20 | return parser.parse_args() 21 | 22 | 23 | def load_config(config_path: Optional[str] = None) -> Config: 24 | """ 25 | Load configuration from the given path. 26 | 27 | Args: 28 | config_path (str, optional): Path to the config file 29 | 30 | Returns: 31 | Config: Configuration object 32 | """ 33 | return Config(config_path) 34 | 35 | 36 | def main(): 37 | """Main entry point for running flows.""" 38 | args = parse_args() 39 | 40 | # Set up logging 41 | log_level = "DEBUG" if args.verbose else "INFO" 42 | logger = get_logger("run_flow", {"logging": {"level": log_level}}) 43 | 44 | try: 45 | # Load config 46 | config = load_config(args.config) 47 | 48 | # Get flow name from args 49 | flow_name = args.flow 50 | 51 | # Create flow 52 | flow = create_flow(flow_name, config) 53 | 54 | # Get input parameters 55 | input_params = {} 56 | if args.input and os.path.exists(args.input): 57 | with open(args.input, 'r') as f: 58 | content = f.read() 59 | input_params["content"] = content 60 | 61 | # Add output path if specified 62 | if args.output: 63 | input_params["output_path"] = args.output 64 | 65 | # Run the flow 66 | logger.info(f"Running flow: {flow_name}") 67 | result = flow.run(**input_params) 68 | 69 | # Display result 70 | if result.success: 71 | logger.info("Flow completed successfully") 72 | print(f"Result: {result.result}") 73 | else: 74 | logger.error(f"Flow failed: {result.error}") 75 | print(f"Error: {result.error}") 76 | 77 | return 0 78 | 79 | except Exception as e: 80 | logger.error(f"Error: {str(e)}") 81 | return 1 82 | 83 | 84 | if __name__ == "__main__": 85 | sys.exit(main()) 86 | -------------------------------------------------------------------------------- /app/tool/run.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run tool for executing various operations conveniently. 3 | """ 4 | from typing import Any, Dict, Optional, Union, List 5 | 6 | from app.tool.base import BaseTool 7 | from app.exceptions import ToolError 8 | from app.tool.bash import BashTool 9 | from app.tool.python_execute import PythonExecuteTool 10 | from app.tool.file_saver import FileSaverTool 11 | 12 | 13 | class RunTool(BaseTool): 14 | """General purpose tool for executing various operations.""" 15 | 16 | def __init__(self): 17 | """Initialize the run tool.""" 18 | super().__init__( 19 | name="run", 20 | description="Execute various operations such as commands, code, and file operations" 21 | ) 22 | # Initialize sub-tools 23 | self.bash_tool = BashTool() 24 | self.python_tool = PythonExecuteTool() 25 | self.file_tool = FileSaverTool() 26 | 27 | def _run(self, 28 | operation: str, 29 | **kwargs) -> Dict[str, Any]: 30 | """ 31 | Run a specified operation. 32 | 33 | Args: 34 | operation (str): Type of operation to run 35 | **kwargs: Operation-specific arguments 36 | 37 | Returns: 38 | Dict[str, Any]: Result of the operation 39 | """ 40 | try: 41 | # Delegate to appropriate tool based on operation 42 | if operation == "command" or operation == "bash": 43 | return self._run_command(**kwargs) 44 | elif operation == "python" or operation == "code": 45 | return self._run_python(**kwargs) 46 | elif operation == "file" or operation == "save": 47 | return self._run_file_operation(**kwargs) 48 | else: 49 | raise ToolError(f"Unknown operation: {operation}") 50 | 51 | except Exception as e: 52 | error_msg = f"Failed to run operation {operation}: {str(e)}" 53 | self.logger.error(error_msg) 54 | raise ToolError(error_msg) 55 | 56 | def _run_command(self, command: str, **kwargs) -> Dict[str, Any]: 57 | """ 58 | Run a shell command. 59 | 60 | Args: 61 | command (str): Command to execute 62 | **kwargs: Additional arguments for BashTool 63 | 64 | Returns: 65 | Dict[str, Any]: Command execution results 66 | """ 67 | return self.bash_tool.run(command=command, **kwargs) 68 | 69 | def _run_python(self, code: str, **kwargs) -> Dict[str, Any]: 70 | """ 71 | Run Python code. 72 | 73 | Args: 74 | code (str): Python code to execute 75 | **kwargs: Additional arguments for PythonExecuteTool 76 | 77 | Returns: 78 | Dict[str, Any]: Python code execution results 79 | """ 80 | return self.python_tool.run(code=code, **kwargs) 81 | 82 | def _run_file_operation(self, content: str, file_path: str, **kwargs) -> Dict[str, Any]: 83 | """ 84 | Run a file operation (save content to file). 85 | 86 | Args: 87 | content (str): Content to save 88 | file_path (str): Path to save the file 89 | **kwargs: Additional arguments for FileSaverTool 90 | 91 | Returns: 92 | Dict[str, Any]: File operation results 93 | """ 94 | return self.file_tool.run(content=content, file_path=file_path, **kwargs) 95 | 96 | 97 | def run_operation(operation: str, **kwargs) -> Dict[str, Any]: 98 | """ 99 | Run a specified operation using the RunTool. 100 | 101 | Args: 102 | operation (str): Type of operation to run (command, python, file) 103 | **kwargs: Operation-specific arguments 104 | 105 | Returns: 106 | Dict[str, Any]: Result of the operation 107 | """ 108 | tool = RunTool() 109 | return tool.run(operation=operation, **kwargs) 110 | -------------------------------------------------------------------------------- /app/tool/file_saver.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from typing import Any, Dict, Optional 4 | 5 | from app.tool.base import BaseTool 6 | from app.exceptions import FileOperationError 7 | from app.config import config 8 | 9 | 10 | class FileSaverTool(BaseTool): 11 | """Tool for saving content to files.""" 12 | 13 | def __init__(self): 14 | """Initialize the file saver tool.""" 15 | super().__init__( 16 | name="file_saver", 17 | description="Save content to files on the filesystem" 18 | ) 19 | 20 | def _run(self, 21 | content: str, 22 | file_path: str, 23 | mode: str = "w", 24 | encoding: str = "utf-8", 25 | create_dirs: bool = True) -> str: 26 | """ 27 | Save content to a file. 28 | 29 | Args: 30 | content (str): Content to save 31 | file_path (str): Path to save the file 32 | mode (str, optional): File mode ('w' for write, 'a' for append) 33 | encoding (str, optional): File encoding 34 | create_dirs (bool, optional): Create directories if they don't exist 35 | 36 | Returns: 37 | str: Path to the saved file 38 | """ 39 | try: 40 | # Make file_path absolute if it's not already 41 | if not os.path.isabs(file_path): 42 | file_path = os.path.abspath(file_path) 43 | 44 | # Create directories if they don't exist 45 | if create_dirs: 46 | os.makedirs(os.path.dirname(file_path), exist_ok=True) 47 | 48 | # Write content to file 49 | with open(file_path, mode=mode, encoding=encoding) as f: 50 | f.write(content) 51 | 52 | self.logger.info(f"Content successfully saved to {file_path}") 53 | return file_path 54 | 55 | except Exception as e: 56 | error_msg = f"Failed to save content to file: {str(e)}" 57 | self.logger.error(error_msg) 58 | raise FileOperationError(error_msg) 59 | 60 | def save_binary(self, content: bytes, file_path: str, create_dirs: bool = True) -> str: 61 | """ 62 | Save binary content to a file. 63 | 64 | Args: 65 | content (bytes): Binary content to save 66 | file_path (str): Path to save the file 67 | create_dirs (bool, optional): Create directories if they don't exist 68 | 69 | Returns: 70 | str: Path to the saved file 71 | """ 72 | try: 73 | # Make file_path absolute if it's not already 74 | if not os.path.isabs(file_path): 75 | file_path = os.path.abspath(file_path) 76 | 77 | # Create directories if they don't exist 78 | if create_dirs: 79 | os.makedirs(os.path.dirname(file_path), exist_ok=True) 80 | 81 | # Write binary content to file 82 | with open(file_path, mode="wb") as f: 83 | f.write(content) 84 | 85 | self.logger.info(f"Binary content successfully saved to {file_path}") 86 | return file_path 87 | 88 | except Exception as e: 89 | error_msg = f"Failed to save binary content to file: {str(e)}" 90 | self.logger.error(error_msg) 91 | raise FileOperationError(error_msg) 92 | 93 | 94 | def save_file_content(content: str, file_path: str, mode: str = "w", encoding: str = "utf-8", create_dirs: bool = True) -> str: 95 | """ 96 | Save content to a file using the FileSaverTool. 97 | 98 | Args: 99 | content (str): Content to save 100 | file_path (str): Path to save the file 101 | mode (str, optional): File mode ('w' for write, 'a' for append) 102 | encoding (str, optional): File encoding 103 | create_dirs (bool, optional): Create directories if they don't exist 104 | 105 | Returns: 106 | str: Path to the saved file 107 | """ 108 | tool = FileSaverTool() 109 | return tool.run( 110 | content=content, 111 | file_path=file_path, 112 | mode=mode, 113 | encoding=encoding, 114 | create_dirs=create_dirs 115 | ) 116 | -------------------------------------------------------------------------------- /app/llm.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict, List, Any, Optional 3 | from langchain_core.language_models import BaseChatModel 4 | from langchain_openai import ChatOpenAI 5 | from langchain_core.messages import HumanMessage, SystemMessage, AIMessage 6 | 7 | from app.config import config 8 | from app.logger import get_logger 9 | 10 | logger = get_logger("llm") 11 | 12 | def get_llm_from_config(config_data: Dict[str, Any] = None) -> BaseChatModel: 13 | """ 14 | Create a language model instance from configuration. 15 | 16 | Args: 17 | config_data (Dict[str, Any], optional): Configuration dictionary 18 | 19 | Returns: 20 | BaseChatModel: Language model instance 21 | """ 22 | if config_data is None: 23 | config_data = config.config_data 24 | 25 | # Extract LLM configuration 26 | api_key = config.get_nested_value(["api", "openai_api_key"]) 27 | model = config.get_nested_value(["llm", "model"], "gpt-4") 28 | temperature = config.get_nested_value(["llm", "temperature"], 0.7) 29 | max_tokens = config.get_nested_value(["llm", "max_tokens"], 4000) 30 | timeout = config.get_nested_value(["llm", "timeout"], 120) 31 | 32 | # Check if API key is available 33 | if not api_key: 34 | api_key = os.environ.get("OPENAI_API_KEY") 35 | if not api_key: 36 | logger.warning("OpenAI API key not found in config or environment variables.") 37 | 38 | # Initialize OpenAI chat model 39 | try: 40 | llm = ChatOpenAI( 41 | model=model, 42 | temperature=temperature, 43 | max_tokens=max_tokens, 44 | openai_api_key=api_key, 45 | request_timeout=timeout, 46 | ) 47 | return llm 48 | except Exception as e: 49 | logger.error(f"Error initializing ChatOpenAI: {e}") 50 | raise 51 | 52 | 53 | class LLMManager: 54 | """Manager for LLM interactions.""" 55 | 56 | def __init__(self, llm: Optional[BaseChatModel] = None, config_data: Dict[str, Any] = None): 57 | """ 58 | Initialize LLM manager. 59 | 60 | Args: 61 | llm (BaseChatModel, optional): Language model instance 62 | config_data (Dict[str, Any], optional): Configuration dictionary 63 | """ 64 | self.llm = llm or get_llm_from_config(config_data) 65 | self.logger = get_logger("llm_manager") 66 | 67 | def generate_text(self, prompt: str, system_prompt: Optional[str] = None) -> str: 68 | """ 69 | Generate text using the LLM. 70 | 71 | Args: 72 | prompt (str): User prompt 73 | system_prompt (str, optional): System prompt 74 | 75 | Returns: 76 | str: Generated text 77 | """ 78 | messages = [] 79 | 80 | if system_prompt: 81 | messages.append(SystemMessage(content=system_prompt)) 82 | 83 | messages.append(HumanMessage(content=prompt)) 84 | 85 | try: 86 | self.logger.debug(f"Sending prompt to LLM: {prompt[:100]}...") 87 | result = self.llm.invoke(messages) 88 | return result.content 89 | except Exception as e: 90 | self.logger.error(f"Error generating text: {e}") 91 | return f"Error: {str(e)}" 92 | 93 | def generate_from_messages(self, messages: List[Dict[str, str]]) -> str: 94 | """ 95 | Generate text from a list of messages. 96 | 97 | Args: 98 | messages (List[Dict[str, str]]): List of message dictionaries 99 | 100 | Returns: 101 | str: Generated text 102 | """ 103 | langchain_messages = [] 104 | 105 | for message in messages: 106 | role = message.get("role", "user") 107 | content = message.get("content", "") 108 | 109 | if role == "system": 110 | langchain_messages.append(SystemMessage(content=content)) 111 | elif role == "user": 112 | langchain_messages.append(HumanMessage(content=content)) 113 | elif role == "assistant": 114 | langchain_messages.append(AIMessage(content=content)) 115 | 116 | try: 117 | self.logger.debug(f"Sending {len(messages)} messages to LLM") 118 | result = self.llm.invoke(langchain_messages) 119 | return result.content 120 | except Exception as e: 121 | self.logger.error(f"Error generating from messages: {e}") 122 | return f"Error: {str(e)}" 123 | 124 | 125 | # Global LLM manager instance 126 | llm_manager = LLMManager() 127 | -------------------------------------------------------------------------------- /app/agent/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Any, Dict, List, Optional 3 | 4 | from app.schema import Conversation, TaskInput, TaskOutput 5 | from app.logger import get_logger 6 | from app.exceptions import AgentError 7 | from app.tool.base import ToolRegistry 8 | 9 | 10 | class BaseAgent(ABC): 11 | """Base class for all agents in the system.""" 12 | 13 | def __init__(self, name: str, tools: Optional[List[str]] = None): 14 | """ 15 | Initialize an agent. 16 | 17 | Args: 18 | name (str): Agent name 19 | tools (List[str], optional): List of tool names to use 20 | """ 21 | self.name = name 22 | self.logger = get_logger(f"agent.{name}") 23 | self.tool_registry = ToolRegistry() 24 | 25 | # Initialize tools 26 | self.tools = {} 27 | if tools: 28 | for tool_name in tools: 29 | tool = self.tool_registry.get(tool_name) 30 | if tool: 31 | self.tools[tool_name] = tool 32 | else: 33 | self.logger.warning(f"Tool '{tool_name}' not found in registry") 34 | 35 | @abstractmethod 36 | def _run(self, task_input: TaskInput) -> TaskOutput: 37 | """ 38 | Execute the agent with the given task input. 39 | 40 | Args: 41 | task_input (TaskInput): Task input 42 | 43 | Returns: 44 | TaskOutput: Task output 45 | """ 46 | pass 47 | 48 | def run(self, task_input: TaskInput) -> TaskOutput: 49 | """ 50 | Run the agent with error handling. 51 | 52 | Args: 53 | task_input (TaskInput): Task input 54 | 55 | Returns: 56 | TaskOutput: Task output 57 | """ 58 | try: 59 | self.logger.info(f"Running agent '{self.name}' with task: {task_input.task_description}") 60 | 61 | # Add specific tools if specified in the task input 62 | if task_input.tools: 63 | for tool_name in task_input.tools: 64 | if tool_name not in self.tools: 65 | tool = self.tool_registry.get(tool_name) 66 | if tool: 67 | self.tools[tool_name] = tool 68 | self.logger.debug(f"Added tool '{tool_name}' to agent '{self.name}'") 69 | else: 70 | self.logger.warning(f"Tool '{tool_name}' not found in registry") 71 | 72 | # Execute the agent 73 | result = self._run(task_input) 74 | 75 | self.logger.info(f"Agent '{self.name}' completed task successfully") 76 | return result 77 | 78 | except Exception as e: 79 | error_msg = f"Error running agent '{self.name}': {str(e)}" 80 | self.logger.error(error_msg) 81 | 82 | # Create error output 83 | return TaskOutput( 84 | success=False, 85 | error=error_msg, 86 | conversation=task_input.conversation 87 | ) 88 | 89 | def get_tool(self, name: str) -> Any: 90 | """ 91 | Get a tool by name. 92 | 93 | Args: 94 | name (str): Tool name 95 | 96 | Returns: 97 | Any: The tool if found 98 | 99 | Raises: 100 | AgentError: If tool not found 101 | """ 102 | tool = self.tools.get(name) 103 | if not tool: 104 | tool = self.tool_registry.get(name) 105 | if tool: 106 | self.tools[name] = tool 107 | else: 108 | raise AgentError(f"Tool '{name}' not found") 109 | return tool 110 | 111 | def add_tool(self, tool_name: str) -> bool: 112 | """ 113 | Add a tool to the agent. 114 | 115 | Args: 116 | tool_name (str): Tool name 117 | 118 | Returns: 119 | bool: True if tool was added, False otherwise 120 | """ 121 | tool = self.tool_registry.get(tool_name) 122 | if tool: 123 | self.tools[tool_name] = tool 124 | return True 125 | return False 126 | 127 | def list_tools(self) -> List[Dict[str, Any]]: 128 | """ 129 | List all tools available to the agent. 130 | 131 | Returns: 132 | List[Dict[str, Any]]: List of tool dictionaries 133 | """ 134 | return [tool.to_dict() for tool in self.tools.values()] 135 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpenManus - Python Agent System 2 | [![Discord](https://img.shields.io/badge/Join-Discord-5865F2?logo=discord&logoColor=white)](https://discord.gg/jkT5udP9bw) 3 | [![Twitter](https://img.shields.io/badge/Follow-@xinyzng-1DA1F2?logo=twitter&logoColor=white)](https://x.com/xinyzng) 4 | 5 | ManusPro is an agent-based system that can execute tasks, generate documents, conduct research, and more. 6 | 7 | ## Demo video 8 | https://github.com/user-attachments/assets/9af20224-d496-4b54-9634-d72c7c8139b7 9 | 10 | Checkout the frontend branch: 11 | image 12 | 13 | ## Installation 14 | 15 | 1. Clone the repository: 16 | ```bash 17 | git clone https://github.com/yourusername/open-manus.git 18 | cd open-manus/python 19 | ``` 20 | 21 | 2. Install the required dependencies: 22 | ```bash 23 | pip install -r requirements.txt 24 | ``` 25 | 26 | 3. Optional dependencies: 27 | - For generating visualizations in PDFs: `pip install matplotlib numpy` 28 | 29 | ## Features 30 | 31 | - Task planning and execution 32 | - Document generation 33 | - PDF generation with data tables and visualizations (auto-opens generated PDFs) 34 | - Markdown generation with auto-open capability 35 | - Web research using Firecrawl API 36 | - Code generation and automatic execution 37 | - Terminal command integration 38 | - Artifact management and tracking 39 | - Conversation handling 40 | 41 | ## Configuration 42 | 43 | Create a `.env` file in the project root with your API keys: 44 | 45 | ``` 46 | OPENAI_API_KEY=your_openai_key 47 | FIRECRAWL_API_KEY=your_firecrawl_key # For web research capabilities 48 | # Optional: If using Claude 49 | ANTHROPIC_API_KEY=your_anthropic_key 50 | ``` 51 | 52 | ## Usage 53 | 54 | ```python 55 | from app.agent.manus import Manus 56 | import asyncio 57 | 58 | # Create Manus agent 59 | agent = Manus() 60 | 61 | # Run a task 62 | async def main(): 63 | await agent.run("Generate a report about renewable energy technologies") 64 | 65 | if __name__ == "__main__": 66 | asyncio.run(main()) 67 | ``` 68 | 69 | ## Document Generation 70 | 71 | OpenManus can generate different types of documents: 72 | 73 | ### PDF Generation 74 | 75 | ```python 76 | from app.tool.pdf_generator import PDFGeneratorTool 77 | 78 | pdf_tool = PDFGeneratorTool() 79 | result = pdf_tool.run( 80 | content="# This is a PDF report\n\nContent goes here...", 81 | title="Sample Report", 82 | options={"auto_open": True} # Automatically open the PDF after generation 83 | ) 84 | print(f"PDF created at: {result['artifact_path']}") 85 | ``` 86 | 87 | ### Markdown Generation 88 | 89 | ```python 90 | from app.tool.markdown_generator import MarkdownGeneratorTool 91 | 92 | md_tool = MarkdownGeneratorTool() 93 | result = md_tool.run( 94 | content="## This is a Markdown document\n\nContent goes here...", 95 | title="Sample Document", 96 | options={"auto_open": True} # Automatically open the markdown file 97 | ) 98 | print(f"Markdown created at: {result['artifact_path']}") 99 | ``` 100 | 101 | ## Code Generation and Execution 102 | 103 | Generate and automatically execute code: 104 | 105 | ```python 106 | from app.tool.code_generator import CodeGeneratorTool 107 | 108 | code_tool = CodeGeneratorTool() 109 | result = code_tool.run( 110 | description="Create a function that calculates the factorial of a number", 111 | language="python" # Code will be auto-executed if it's safe 112 | ) 113 | print(f"Code created at: {result['artifact_path']}") 114 | if result.get("executed"): 115 | print(f"Execution result: {result['execution_result']['output']}") 116 | ``` 117 | 118 | ## Web Research 119 | 120 | Perform web research using the Firecrawl API: 121 | 122 | ```python 123 | from app.tool.firecrawl_research import FirecrawlResearchTool 124 | 125 | research_tool = FirecrawlResearchTool() 126 | result = research_tool.run( 127 | query="Latest advancements in quantum computing", 128 | output_format="markdown", 129 | include_visualizations=True 130 | ) 131 | print(f"Research data saved to: {result['artifact_path']}") 132 | ``` 133 | 134 | ## Optional Modules 135 | 136 | ### Visualization Support 137 | 138 | The PDF generator can include data visualizations if matplotlib is installed. To enable this feature: 139 | 140 | ```bash 141 | pip install matplotlib numpy 142 | ``` 143 | 144 | Without matplotlib, the system will still work but will display a message in the PDF when visualizations are requested. 145 | 146 | ### Browser Automation 147 | 148 | For automating browser tasks, Selenium is used: 149 | 150 | ```bash 151 | pip install selenium webdriver-manager 152 | ``` 153 | 154 | ### Web Research 155 | 156 | For web research capabilities, the firecrawl-py package is required: 157 | 158 | ```bash 159 | pip install firecrawl-py 160 | ``` 161 | 162 | ## License 163 | 164 | [MIT License](LICENSE) 165 | -------------------------------------------------------------------------------- /app/tool/google_search.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import requests 4 | from typing import Dict, List, Optional, Any 5 | 6 | from app.tool.base import BaseTool 7 | from app.exceptions import WebResearchError 8 | from app.config import config 9 | from app.llm import llm_manager 10 | 11 | # Note: This is a placeholder implementation since actual Google Search API usage would require API keys 12 | # and subscription. In a real implementation, you would use the Google Custom Search API or another 13 | # search API provider. 14 | 15 | 16 | class GoogleSearchTool(BaseTool): 17 | """Tool for performing Google searches.""" 18 | 19 | def __init__(self): 20 | """Initialize the Google search tool.""" 21 | super().__init__( 22 | name="google_search", 23 | description="Perform Google searches and retrieve results" 24 | ) 25 | 26 | def _run(self, 27 | query: str, 28 | num_results: int = 5, 29 | search_type: str = "web") -> Dict[str, Any]: 30 | """ 31 | Perform a Google search. 32 | 33 | Args: 34 | query (str): Search query 35 | num_results (int, optional): Number of results to return 36 | search_type (str, optional): Type of search (web, images, news) 37 | 38 | Returns: 39 | Dict[str, Any]: Search results 40 | """ 41 | try: 42 | # In a real implementation, you would use the Google Search API here 43 | # For now, we'll simulate results using LLM 44 | 45 | self.logger.info(f"Performing simulated Google search for: {query}") 46 | 47 | # Simulate search results using LLM 48 | prompt = f""" 49 | You are simulating a Google search tool. Generate realistic search results for the following query: 50 | 51 | "{query}" 52 | 53 | Please provide {num_results} search results in a structured format including: 54 | - Title of the page 55 | - URL of the page 56 | - A brief snippet/description 57 | 58 | Make the results look like realistic Google search results for this query. 59 | Format the results as a JSON array with title, url, and snippet fields. 60 | """ 61 | 62 | response = llm_manager.generate_text(prompt) 63 | 64 | # Try to parse as JSON 65 | try: 66 | # Extract JSON array from response if needed 67 | json_start = response.find('[') 68 | json_end = response.rfind(']') + 1 69 | if json_start >= 0 and json_end > json_start: 70 | json_string = response[json_start:json_end] 71 | results = json.loads(json_string) 72 | else: 73 | # Fallback: generate a structured response 74 | results = self._generate_fallback_results(query, num_results) 75 | except json.JSONDecodeError: 76 | # Fallback: generate a structured response 77 | results = self._generate_fallback_results(query, num_results) 78 | 79 | return { 80 | "query": query, 81 | "results": results, 82 | "search_type": search_type, 83 | "simulated": True # Flag to indicate these are simulated results 84 | } 85 | 86 | except Exception as e: 87 | error_msg = f"Failed to perform Google search: {str(e)}" 88 | self.logger.error(error_msg) 89 | raise WebResearchError(error_msg) 90 | 91 | def _generate_fallback_results(self, query: str, num_results: int) -> List[Dict[str, str]]: 92 | """ 93 | Generate fallback search results if JSON parsing fails. 94 | 95 | Args: 96 | query (str): Search query 97 | num_results (int): Number of results to generate 98 | 99 | Returns: 100 | List[Dict[str, str]]: Generated search results 101 | """ 102 | results = [] 103 | base_terms = query.split() 104 | 105 | for i in range(min(num_results, 5)): 106 | results.append({ 107 | "title": f"Result {i+1} for {query}", 108 | "url": f"https://example.com/result-{i+1}-{'-'.join(base_terms)}", 109 | "snippet": f"This is a simulated search result for the query '{query}'. It contains relevant information about {' and '.join(base_terms)}." 110 | }) 111 | 112 | return results 113 | 114 | 115 | def perform_google_search(query: str, num_results: int = 5, search_type: str = "web") -> Dict[str, Any]: 116 | """ 117 | Perform a Google search using the GoogleSearchTool. 118 | 119 | Args: 120 | query (str): Search query 121 | num_results (int, optional): Number of results to return 122 | search_type (str, optional): Type of search (web, images, news) 123 | 124 | Returns: 125 | Dict[str, Any]: Search results 126 | """ 127 | tool = GoogleSearchTool() 128 | return tool.run( 129 | query=query, 130 | num_results=num_results, 131 | search_type=search_type 132 | ) 133 | -------------------------------------------------------------------------------- /app/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import toml 3 | from dotenv import load_dotenv 4 | from pathlib import Path 5 | from typing import Dict, Any, Optional 6 | 7 | # Load environment variables from .env file 8 | load_dotenv() 9 | 10 | class Config: 11 | """Configuration manager for OpenManus.""" 12 | 13 | def __init__(self, config_path: Optional[str] = None): 14 | """ 15 | Initialize the configuration manager. 16 | 17 | Args: 18 | config_path (str, optional): Path to the config file. If None, default paths will be checked. 19 | """ 20 | self.config_data: Dict[str, Any] = {} 21 | 22 | # Default config paths to check 23 | default_paths = [ 24 | os.path.join(os.getcwd(), "config", "config.toml"), 25 | os.path.join(os.path.dirname(os.path.dirname(__file__)), "config", "config.toml"), 26 | os.path.expanduser("~/.config/open-manus/config.toml"), 27 | ] 28 | 29 | # Load config file 30 | if config_path: 31 | self.load_config(config_path) 32 | else: 33 | # Try default paths 34 | for path in default_paths: 35 | if os.path.exists(path): 36 | self.load_config(path) 37 | break 38 | 39 | # Apply environment variable overrides 40 | self._apply_env_overrides() 41 | 42 | # Create required directories 43 | self._create_required_directories() 44 | 45 | def load_config(self, config_path: str) -> None: 46 | """ 47 | Load configuration from a TOML file. 48 | 49 | Args: 50 | config_path (str): Path to the config file 51 | """ 52 | try: 53 | self.config_data = toml.load(config_path) 54 | except Exception as e: 55 | print(f"Error loading config file {config_path}: {e}") 56 | 57 | def _apply_env_overrides(self) -> None: 58 | """Apply environment variable overrides to the config.""" 59 | # API keys 60 | if os.environ.get("OPENAI_API_KEY"): 61 | self.set_nested_value(["api", "openai_api_key"], os.environ["OPENAI_API_KEY"]) 62 | 63 | if os.environ.get("FIRECRAWL_API_KEY"): 64 | self.set_nested_value(["api", "firecrawl_api_key"], os.environ["FIRECRAWL_API_KEY"]) 65 | 66 | # LLM model 67 | if os.environ.get("OPENAI_MODEL"): 68 | self.set_nested_value(["llm", "model"], os.environ["OPENAI_MODEL"]) 69 | 70 | def _create_required_directories(self) -> None: 71 | """Create required directories specified in the config.""" 72 | # Document output directories 73 | for dir_key in ["pdf_output_dir", "markdown_output_dir", "code_output_dir"]: 74 | dir_path = self.get_nested_value(["document", dir_key]) 75 | if dir_path: 76 | Path(dir_path).mkdir(parents=True, exist_ok=True) 77 | 78 | # Log directory 79 | log_file = self.get_nested_value(["logging", "file"]) 80 | if log_file: 81 | log_dir = os.path.dirname(log_file) 82 | if log_dir: 83 | Path(log_dir).mkdir(parents=True, exist_ok=True) 84 | 85 | def get_nested_value(self, keys: list, default: Any = None) -> Any: 86 | """ 87 | Get a nested value from the config data using a list of keys. 88 | 89 | Args: 90 | keys (list): List of keys to traverse 91 | default (Any, optional): Default value if key is not found 92 | 93 | Returns: 94 | Any: The value at the specified key path or default if not found 95 | """ 96 | value = self.config_data 97 | for key in keys: 98 | if not isinstance(value, dict) or key not in value: 99 | return default 100 | value = value[key] 101 | return value 102 | 103 | def set_nested_value(self, keys: list, value: Any) -> None: 104 | """ 105 | Set a nested value in the config data using a list of keys. 106 | 107 | Args: 108 | keys (list): List of keys to traverse 109 | value (Any): Value to set 110 | """ 111 | if not keys: 112 | return 113 | 114 | # Navigate to the parent dictionary 115 | current = self.config_data 116 | for key in keys[:-1]: 117 | if key not in current: 118 | current[key] = {} 119 | current = current[key] 120 | 121 | # Set the value 122 | current[keys[-1]] = value 123 | 124 | def get(self, key: str, default: Any = None) -> Any: 125 | """ 126 | Get a top-level value from the config. 127 | 128 | Args: 129 | key (str): Config key 130 | default (Any, optional): Default value if key not found 131 | 132 | Returns: 133 | Any: The value or default if not found 134 | """ 135 | return self.config_data.get(key, default) 136 | 137 | def set(self, key: str, value: Any) -> None: 138 | """ 139 | Set a top-level value in the config. 140 | 141 | Args: 142 | key (str): Config key 143 | value (Any): Value to set 144 | """ 145 | self.config_data[key] = value 146 | 147 | def __getitem__(self, key: str) -> Any: 148 | """Dictionary-like access to config values.""" 149 | return self.config_data[key] 150 | 151 | def get_timestamp(self) -> str: 152 | """ 153 | Get a formatted timestamp string for use in artifacts and logs. 154 | 155 | Returns: 156 | str: ISO format timestamp string 157 | """ 158 | import datetime 159 | return datetime.datetime.now().isoformat() 160 | 161 | 162 | # Global config instance 163 | config = Config() 164 | -------------------------------------------------------------------------------- /app/tool/terminate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import signal 3 | import sys 4 | import threading 5 | import time 6 | from typing import Dict, Optional, Any 7 | 8 | from app.tool.base import BaseTool 9 | from app.exceptions import ToolError 10 | 11 | 12 | class TerminateTool(BaseTool): 13 | """Tool for terminating processes or the current execution.""" 14 | 15 | def __init__(self): 16 | """Initialize the terminate tool.""" 17 | super().__init__( 18 | name="terminate", 19 | description="Gracefully terminate processes or the current execution" 20 | ) 21 | 22 | def _run(self, 23 | message: str = "Task completed successfully", 24 | exit_code: int = 0, 25 | delay: int = 0, 26 | terminate_type: str = "soft") -> Dict[str, Any]: 27 | """ 28 | Terminate the current process. 29 | 30 | Args: 31 | message (str, optional): Message to display before termination 32 | exit_code (int, optional): Exit code to return 33 | delay (int, optional): Delay in seconds before termination 34 | terminate_type (str, optional): Type of termination (soft, hard) 35 | 36 | Returns: 37 | Dict[str, Any]: Result of termination (only returned if terminate_type is "soft") 38 | """ 39 | try: 40 | # Log termination request 41 | self.logger.info(f"Termination requested with message: {message}") 42 | 43 | # Return result if terminate_type is "soft" 44 | if terminate_type.lower() == "soft": 45 | return { 46 | "status": "terminated", 47 | "message": message, 48 | "exit_code": exit_code, 49 | "terminate_type": terminate_type 50 | } 51 | 52 | # Schedule hard termination after delay 53 | if delay > 0: 54 | def delayed_exit(): 55 | time.sleep(delay) 56 | print(f"Terminating: {message}") 57 | sys.exit(exit_code) 58 | 59 | # Start delayed exit thread 60 | thread = threading.Thread(target=delayed_exit) 61 | thread.daemon = True 62 | thread.start() 63 | 64 | return { 65 | "status": "terminating", 66 | "message": message, 67 | "exit_code": exit_code, 68 | "delay": delay, 69 | "terminate_type": terminate_type 70 | } 71 | 72 | # Immediate termination 73 | print(f"Terminating: {message}") 74 | sys.exit(exit_code) 75 | 76 | except Exception as e: 77 | error_msg = f"Failed to terminate process: {str(e)}" 78 | self.logger.error(error_msg) 79 | raise ToolError(error_msg) 80 | 81 | def terminate_process(self, pid: int, signal_type: int = signal.SIGTERM) -> Dict[str, Any]: 82 | """ 83 | Terminate a specific process by PID. 84 | 85 | Args: 86 | pid (int): Process ID to terminate 87 | signal_type (int, optional): Signal type to send 88 | 89 | Returns: 90 | Dict[str, Any]: Result of termination 91 | """ 92 | try: 93 | # Check if process exists 94 | if not self._check_process_exists(pid): 95 | return { 96 | "status": "error", 97 | "message": f"Process with PID {pid} does not exist", 98 | "success": False 99 | } 100 | 101 | # Send signal to process 102 | os.kill(pid, signal_type) 103 | 104 | # Check if process was terminated 105 | time.sleep(0.5) # Short delay to allow process to terminate 106 | process_exists = self._check_process_exists(pid) 107 | 108 | return { 109 | "status": "terminated" if not process_exists else "signal_sent", 110 | "message": f"Process with PID {pid} {'terminated' if not process_exists else 'received signal'}", 111 | "pid": pid, 112 | "signal": signal_type, 113 | "success": True 114 | } 115 | 116 | except Exception as e: 117 | error_msg = f"Failed to terminate process {pid}: {str(e)}" 118 | self.logger.error(error_msg) 119 | return { 120 | "status": "error", 121 | "message": error_msg, 122 | "pid": pid, 123 | "signal": signal_type, 124 | "success": False 125 | } 126 | 127 | def _check_process_exists(self, pid: int) -> bool: 128 | """ 129 | Check if a process with the given PID exists. 130 | 131 | Args: 132 | pid (int): Process ID to check 133 | 134 | Returns: 135 | bool: True if process exists, False otherwise 136 | """ 137 | try: 138 | # Sending signal 0 tests if the process exists without actually sending a signal 139 | os.kill(pid, 0) 140 | return True 141 | except OSError: 142 | return False 143 | 144 | 145 | def terminate_execution(message: str = "Task completed successfully", exit_code: int = 0, 146 | delay: int = 0, terminate_type: str = "soft") -> Dict[str, Any]: 147 | """ 148 | Terminate the current execution using the TerminateTool. 149 | 150 | Args: 151 | message (str, optional): Message to display before termination 152 | exit_code (int, optional): Exit code to return 153 | delay (int, optional): Delay in seconds before termination 154 | terminate_type (str, optional): Type of termination (soft, hard) 155 | 156 | Returns: 157 | Dict[str, Any]: Result of termination (only returned if terminate_type is "soft") 158 | """ 159 | tool = TerminateTool() 160 | return tool.run( 161 | message=message, 162 | exit_code=exit_code, 163 | delay=delay, 164 | terminate_type=terminate_type 165 | ) 166 | -------------------------------------------------------------------------------- /app/tool/bash.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import tempfile 4 | from typing import Dict, List, Optional, Union 5 | 6 | from app.tool.base import BaseTool 7 | from app.exceptions import ToolError 8 | 9 | 10 | class BashTool(BaseTool): 11 | """Tool for executing bash/shell commands.""" 12 | 13 | def __init__(self): 14 | """Initialize the bash tool.""" 15 | super().__init__( 16 | name="bash", 17 | description="Execute bash/shell commands in the system" 18 | ) 19 | 20 | def _run(self, 21 | command: str, 22 | cwd: Optional[str] = None, 23 | env: Optional[Dict[str, str]] = None, 24 | timeout: Optional[int] = 60, 25 | capture_stderr: bool = True, 26 | text: bool = True) -> Dict[str, Union[str, int, List[str]]]: 27 | """ 28 | Execute a bash/shell command. 29 | 30 | Args: 31 | command (str): Command to execute 32 | cwd (str, optional): Working directory 33 | env (Dict[str, str], optional): Environment variables 34 | timeout (int, optional): Timeout in seconds 35 | capture_stderr (bool, optional): Capture stderr output 36 | text (bool, optional): Return string output (vs bytes) 37 | 38 | Returns: 39 | Dict[str, Union[str, int, List[str]]]: Command execution results 40 | """ 41 | try: 42 | self.logger.debug(f"Executing command: {command}") 43 | 44 | # Set up process arguments 45 | kwargs = { 46 | "shell": True, 47 | "stdout": subprocess.PIPE, 48 | "stderr": subprocess.PIPE if capture_stderr else subprocess.DEVNULL, 49 | "text": text 50 | } 51 | 52 | if cwd: 53 | kwargs["cwd"] = cwd 54 | if env: 55 | # Merge with current environment 56 | full_env = os.environ.copy() 57 | full_env.update(env) 58 | kwargs["env"] = full_env 59 | 60 | # Execute command 61 | process = subprocess.run(command, timeout=timeout, **kwargs) 62 | 63 | # Prepare result 64 | result = { 65 | "returncode": process.returncode, 66 | "stdout": process.stdout, 67 | "command": command 68 | } 69 | 70 | if capture_stderr: 71 | result["stderr"] = process.stderr 72 | 73 | # Log success or failure 74 | if process.returncode == 0: 75 | self.logger.debug(f"Command executed successfully: {command}") 76 | else: 77 | self.logger.warning(f"Command returned non-zero exit code {process.returncode}: {command}") 78 | if capture_stderr: 79 | self.logger.warning(f"stderr: {process.stderr}") 80 | 81 | return result 82 | 83 | except subprocess.TimeoutExpired: 84 | error_msg = f"Command timed out after {timeout} seconds: {command}" 85 | self.logger.error(error_msg) 86 | return { 87 | "returncode": 124, # Standard timeout exit code 88 | "stdout": "", 89 | "stderr": "Command timed out", 90 | "command": command, 91 | "error": "timeout" 92 | } 93 | except Exception as e: 94 | error_msg = f"Failed to execute command: {str(e)}" 95 | self.logger.error(error_msg) 96 | raise ToolError(error_msg) 97 | 98 | def execute_script(self, script_content: str, script_type: str = "bash", **kwargs) -> Dict[str, Union[str, int, List[str]]]: 99 | """ 100 | Execute a script by saving it to a temporary file and running it. 101 | 102 | Args: 103 | script_content (str): Content of the script 104 | script_type (str, optional): Type of script (bash, python, etc.) 105 | **kwargs: Additional arguments for _run method 106 | 107 | Returns: 108 | Dict[str, Union[str, int, List[str]]]: Script execution results 109 | """ 110 | # Create temporary script file 111 | script_extension_map = { 112 | "bash": ".sh", 113 | "python": ".py", 114 | "perl": ".pl", 115 | "ruby": ".rb", 116 | "node": ".js" 117 | } 118 | extension = script_extension_map.get(script_type, ".sh") 119 | 120 | with tempfile.NamedTemporaryFile(suffix=extension, mode='w', delete=False) as script_file: 121 | script_path = script_file.name 122 | script_file.write(script_content) 123 | 124 | try: 125 | # Make script executable 126 | os.chmod(script_path, 0o755) 127 | 128 | # Prepare command based on script type 129 | command_prefix_map = { 130 | "bash": "bash", 131 | "python": "python", 132 | "perl": "perl", 133 | "ruby": "ruby", 134 | "node": "node" 135 | } 136 | prefix = command_prefix_map.get(script_type, "bash") 137 | command = f"{prefix} {script_path}" 138 | 139 | # Execute the script 140 | result = self._run(command, **kwargs) 141 | 142 | return result 143 | finally: 144 | # Clean up temporary file 145 | try: 146 | os.unlink(script_path) 147 | except Exception as e: 148 | self.logger.warning(f"Failed to remove temporary script file: {str(e)}") 149 | 150 | 151 | def execute_bash_command(command: str, cwd: Optional[str] = None, env: Optional[Dict[str, str]] = None, 152 | timeout: Optional[int] = 60, capture_stderr: bool = True) -> Dict[str, Union[str, int, List[str]]]: 153 | """ 154 | Execute a bash command using the BashTool. 155 | 156 | Args: 157 | command (str): Command to execute 158 | cwd (str, optional): Working directory 159 | env (Dict[str, str], optional): Environment variables 160 | timeout (int, optional): Timeout in seconds 161 | capture_stderr (bool, optional): Capture stderr output 162 | 163 | Returns: 164 | Dict[str, Union[str, int, List[str]]]: Command execution results 165 | """ 166 | tool = BashTool() 167 | return tool.run( 168 | command=command, 169 | cwd=cwd, 170 | env=env, 171 | timeout=timeout, 172 | capture_stderr=capture_stderr 173 | ) 174 | -------------------------------------------------------------------------------- /app/flow/planning.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | import re 3 | 4 | from app.flow.base import BaseFlow 5 | from app.schema import TaskInput, TaskOutput 6 | from app.agent.planning import PlanningAgent 7 | from app.llm import llm_manager 8 | from app.prompt.planning import PLANNING_PROMPT 9 | 10 | 11 | class PlanningFlow(BaseFlow): 12 | """ 13 | Flow for planning and executing tasks using the planning agent. 14 | """ 15 | 16 | def __init__(self): 17 | """Initialize the planning flow.""" 18 | super().__init__(name="planning") 19 | self.agent = PlanningAgent() 20 | 21 | def _generate_plan(self, task_description: str) -> List[str]: 22 | """ 23 | Generate a detailed, step-by-step plan for the given task. 24 | 25 | Args: 26 | task_description (str): Description of the task 27 | 28 | Returns: 29 | List[str]: List of detailed steps in the plan 30 | """ 31 | # Generate plan using LLM with improved prompt for more detailed steps 32 | prompt = f""" 33 | You are a task planning expert. Create a comprehensive, structured plan to accomplish the following task: 34 | 35 | TASK: {task_description} 36 | 37 | Your plan should: 38 | 1. Break down the task into logical stages 39 | 2. Include specific, actionable steps for each stage 40 | 3. Specify required dependencies, tools, or resources for each step 41 | 4. Consider potential challenges and alternative approaches 42 | 5. Define clear success criteria for each step 43 | 44 | FORMAT GUIDELINES: 45 | - Use clear, numbered steps (e.g., "1. Research X", "2. Implement Y") 46 | - Group related steps under descriptive headings 47 | - Keep each step focused on a single, well-defined action 48 | - Include estimated complexity/effort for each step (Low/Medium/High) 49 | - For code-related tasks, specify programming language and key components 50 | - For document tasks, outline document structure and key sections 51 | 52 | Avoid vague steps like "research" without specifying what to research. 53 | Ensure each step has a clear, measurable outcome. 54 | """ 55 | 56 | response = llm_manager.generate_text(prompt) 57 | 58 | # Parse steps from response with improved parsing logic 59 | steps = [] 60 | current_step = "" 61 | in_step = False 62 | step_pattern = re.compile(r'^(?:\d+\.|\-|\*)\s+(.+)$') 63 | 64 | for line in response.split("\n"): 65 | line = line.strip() 66 | if not line: 67 | continue 68 | 69 | # Check for step markers with more robust pattern matching 70 | step_match = step_pattern.match(line) 71 | 72 | if step_match or line.startswith("Step ") or line.upper().startswith("STAGE "): 73 | # If we were already processing a step, save it 74 | if in_step and current_step: 75 | steps.append(current_step.strip()) 76 | 77 | # Start a new step 78 | current_step = line 79 | in_step = True 80 | elif in_step: 81 | # Continue current step 82 | current_step += "\n" + line 83 | 84 | # Add the last step if there is one 85 | if in_step and current_step: 86 | steps.append(current_step.strip()) 87 | 88 | # If no steps were found with the pattern matching, fall back to splitting by newlines 89 | if not steps and response.strip(): 90 | steps = [line.strip() for line in response.split("\n") if line.strip()] 91 | 92 | # Sanitize steps - remove any markdown formatting characters 93 | sanitized_steps = [] 94 | for step in steps: 95 | # Remove markdown headers 96 | step = re.sub(r'^#+\s+', '', step) 97 | # Remove bullet points if they weren't caught by the main parsing 98 | step = re.sub(r'^[-*+]\s+', '', step) 99 | sanitized_steps.append(step) 100 | 101 | return sanitized_steps 102 | 103 | def _run(self, content: str, output_path: Optional[str] = None, **kwargs) -> TaskOutput: 104 | """ 105 | Run the planning flow. 106 | 107 | Args: 108 | content (str): Task description 109 | output_path (Optional[str]): Path to save the output (unused) 110 | **kwargs: Additional arguments 111 | 112 | Returns: 113 | TaskOutput: Task output containing the plan 114 | """ 115 | # Generate a plan 116 | plan = self._generate_plan(content) 117 | 118 | # Parse key files needed from the plan (if any) 119 | files = [] 120 | for step in plan: 121 | # Extract filenames using regex patterns 122 | file_patterns = [ 123 | r'Create (?:a|the) file[:\s]+[\'"]?([^\s\'"]+)[\'"]?', 124 | r'Save (?:to|as) [\'"]?([^\s\'"]+)[\'"]?', 125 | r'Generate [\'"]?([^\s\'"\.]+\.(?:py|js|html|css|md|json|yaml|txt))[\'"]?', 126 | r'(?:Edit|Modify|Update) [\'"]?([^\s\'"]+)[\'"]?', 127 | r'(?:Named|called)[:\s]+[\'"]?([^\s\'"]+\.[a-zA-Z0-9]+)[\'"]?' 128 | ] 129 | 130 | for pattern in file_patterns: 131 | matches = re.findall(pattern, step) 132 | files.extend(matches) 133 | 134 | # Remove duplicates 135 | files = list(set(files)) 136 | 137 | # Create summary 138 | summary = f"## Task Planning Summary\n\n" 139 | summary += f"**Task:** {content}\n\n" 140 | summary += f"### Plan\n\n" 141 | summary += self._format_plan_for_summary(plan) 142 | 143 | if files: 144 | summary += f"\n### Key Files\n\n" 145 | summary += self._format_files_for_summary(files) 146 | 147 | # Create output with the plan 148 | output = TaskOutput( 149 | content=summary, 150 | result={ 151 | "plan": plan, 152 | "files": files, 153 | "summary": summary 154 | }, 155 | status="success" 156 | ) 157 | 158 | self.logger.info(f"Generated plan with {len(plan)} steps") 159 | return output 160 | 161 | def _format_plan_for_summary(self, plan: List[str]) -> str: 162 | """Format the plan for inclusion in the summary.""" 163 | return "\n".join([f"- {step}" for step in plan]) 164 | 165 | def _format_files_for_summary(self, files: List[str]) -> str: 166 | """Format the files list for inclusion in the summary.""" 167 | return "\n".join([f"- `{file}`" for file in files]) 168 | -------------------------------------------------------------------------------- /app/tool/python_execute.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import tempfile 4 | import traceback 5 | from io import StringIO 6 | from contextlib import redirect_stdout, redirect_stderr 7 | from typing import Any, Dict, Optional, Union 8 | 9 | from app.tool.base import BaseTool 10 | from app.exceptions import ToolError 11 | from app.tool.bash import BashTool 12 | 13 | 14 | class PythonExecuteTool(BaseTool): 15 | """Tool for executing Python code.""" 16 | 17 | def __init__(self): 18 | """Initialize the Python execution tool.""" 19 | super().__init__( 20 | name="python_execute", 21 | description="Execute Python code and return the results" 22 | ) 23 | 24 | def _run(self, 25 | code: str, 26 | use_subprocess: bool = False, 27 | capture_locals: bool = False, 28 | input_vars: Optional[Dict[str, Any]] = None, 29 | timeout: Optional[int] = 30) -> Dict[str, Any]: 30 | """ 31 | Execute Python code and return the results. 32 | 33 | Args: 34 | code (str): Python code to execute 35 | use_subprocess (bool, optional): Run in a subprocess for isolation 36 | capture_locals (bool, optional): Capture local variables after execution 37 | input_vars (Dict[str, Any], optional): Variables to inject into the context 38 | timeout (int, optional): Timeout in seconds (only for subprocess) 39 | 40 | Returns: 41 | Dict[str, Any]: Execution results 42 | """ 43 | if use_subprocess: 44 | return self._execute_in_subprocess(code, timeout) 45 | else: 46 | return self._execute_in_current_process(code, capture_locals, input_vars) 47 | 48 | def _execute_in_current_process(self, code: str, capture_locals: bool, input_vars: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: 49 | """ 50 | Execute Python code in the current process. 51 | 52 | Args: 53 | code (str): Python code to execute 54 | capture_locals (bool): Capture local variables after execution 55 | input_vars (Dict[str, Any], optional): Variables to inject into the context 56 | 57 | Returns: 58 | Dict[str, Any]: Execution results 59 | """ 60 | # Create a dictionary for locals to capture variables 61 | local_vars = {} 62 | if input_vars: 63 | local_vars.update(input_vars) 64 | 65 | # Capture stdout and stderr 66 | stdout_buffer = StringIO() 67 | stderr_buffer = StringIO() 68 | 69 | result = { 70 | "success": False, 71 | "stdout": "", 72 | "stderr": "", 73 | "locals": {}, 74 | "exception": None 75 | } 76 | 77 | try: 78 | with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer): 79 | exec(code, globals(), local_vars) 80 | 81 | result["success"] = True 82 | 83 | # Capture stdout and stderr 84 | result["stdout"] = stdout_buffer.getvalue() 85 | result["stderr"] = stderr_buffer.getvalue() 86 | 87 | # Capture local variables if requested 88 | if capture_locals: 89 | # Filter out internal variables (starting with underscore) 90 | result["locals"] = {k: v for k, v in local_vars.items() 91 | if not k.startswith('_') and k != 'input_vars'} 92 | 93 | self.logger.debug("Python code executed successfully in current process") 94 | 95 | except Exception as e: 96 | result["success"] = False 97 | result["exception"] = { 98 | "type": type(e).__name__, 99 | "message": str(e), 100 | "traceback": traceback.format_exc() 101 | } 102 | # Capture stdout and stderr (what was captured before the exception) 103 | result["stdout"] = stdout_buffer.getvalue() 104 | result["stderr"] = stderr_buffer.getvalue() + "\n" + traceback.format_exc() 105 | 106 | self.logger.warning(f"Error executing Python code: {type(e).__name__}: {str(e)}") 107 | 108 | return result 109 | 110 | def _execute_in_subprocess(self, code: str, timeout: Optional[int] = 30) -> Dict[str, Any]: 111 | """ 112 | Execute Python code in a subprocess for isolation. 113 | 114 | Args: 115 | code (str): Python code to execute 116 | timeout (int, optional): Timeout in seconds 117 | 118 | Returns: 119 | Dict[str, Any]: Execution results 120 | """ 121 | # Create a temporary Python file 122 | with tempfile.NamedTemporaryFile(suffix=".py", mode='w', delete=False) as code_file: 123 | code_path = code_file.name 124 | code_file.write(code) 125 | 126 | try: 127 | # Use BashTool to execute the script 128 | bash_tool = BashTool() 129 | cmd_result = bash_tool.run( 130 | command=f"{sys.executable} {code_path}", 131 | timeout=timeout, 132 | capture_stderr=True 133 | ) 134 | 135 | result = { 136 | "success": cmd_result["returncode"] == 0, 137 | "stdout": cmd_result["stdout"], 138 | "stderr": cmd_result["stderr"] if "stderr" in cmd_result else "", 139 | "returncode": cmd_result["returncode"] 140 | } 141 | 142 | if result["success"]: 143 | self.logger.debug("Python code executed successfully in subprocess") 144 | else: 145 | self.logger.warning(f"Python subprocess execution failed with code {cmd_result['returncode']}") 146 | 147 | return result 148 | 149 | finally: 150 | # Clean up temporary file 151 | try: 152 | os.unlink(code_path) 153 | except Exception as e: 154 | self.logger.warning(f"Failed to remove temporary Python file: {str(e)}") 155 | 156 | 157 | def execute_python_code(code: str, use_subprocess: bool = False, capture_locals: bool = False, 158 | input_vars: Optional[Dict[str, Any]] = None, timeout: Optional[int] = 30) -> Dict[str, Any]: 159 | """ 160 | Execute Python code using the PythonExecuteTool. 161 | 162 | Args: 163 | code (str): Python code to execute 164 | use_subprocess (bool, optional): Run in a subprocess for isolation 165 | capture_locals (bool, optional): Capture local variables after execution 166 | input_vars (Dict[str, Any], optional): Variables to inject into the context 167 | timeout (int, optional): Timeout in seconds (only for subprocess) 168 | 169 | Returns: 170 | Dict[str, Any]: Execution results 171 | """ 172 | tool = PythonExecuteTool() 173 | return tool.run( 174 | code=code, 175 | use_subprocess=use_subprocess, 176 | capture_locals=capture_locals, 177 | input_vars=input_vars, 178 | timeout=timeout 179 | ) 180 | -------------------------------------------------------------------------------- /app/tool/planning.py: -------------------------------------------------------------------------------- 1 | """ 2 | Planning tool for generating and executing task plans. 3 | """ 4 | from typing import Any, Dict, List, Optional 5 | 6 | from app.tool.base import BaseTool 7 | from app.exceptions import ToolError 8 | from app.llm import llm_manager 9 | from app.prompt.planning import PLANNING_PROMPT 10 | 11 | 12 | class PlanningTool(BaseTool): 13 | """Tool for generating and executing task plans.""" 14 | 15 | def __init__(self): 16 | """Initialize the planning tool.""" 17 | super().__init__( 18 | name="planning", 19 | description="Generate and execute task plans" 20 | ) 21 | 22 | def _run(self, 23 | task: str, 24 | max_steps: int = 10, 25 | generate_only: bool = True) -> Dict[str, Any]: 26 | """ 27 | Generate a plan for the given task. 28 | 29 | Args: 30 | task (str): Task to plan for 31 | max_steps (int, optional): Maximum number of steps in the plan 32 | generate_only (bool, optional): Only generate the plan without executing it 33 | 34 | Returns: 35 | Dict[str, Any]: Generated plan 36 | """ 37 | try: 38 | # Generate plan using LLM 39 | prompt = PLANNING_PROMPT.format(task=task) 40 | response = llm_manager.generate_text(prompt) 41 | 42 | # Parse steps from response 43 | steps = [] 44 | for line in response.split("\n"): 45 | line = line.strip() 46 | if line and (line.startswith("- ") or line.startswith("* ") or 47 | (line[0].isdigit() and line[1] in [".", ")", ":"])): 48 | steps.append(line.lstrip("- *0123456789.):").strip()) 49 | 50 | # Limit steps to max_steps 51 | steps = steps[:max_steps] 52 | 53 | # If generate_only, return the plan without executing 54 | if generate_only: 55 | return { 56 | "task": task, 57 | "plan": steps, 58 | "generate_only": generate_only, 59 | "num_steps": len(steps) 60 | } 61 | 62 | # Otherwise, execute the plan 63 | from app.agent.planning import PlanningAgent 64 | from app.schema import TaskInput, AgentType 65 | 66 | # Create planning agent 67 | planning_agent = PlanningAgent() 68 | 69 | # Create task input 70 | task_input = TaskInput( 71 | task_description=task, 72 | agent_type=AgentType.PLANNING, 73 | parameters={"plan": steps} 74 | ) 75 | 76 | # Execute plan 77 | self.logger.info(f"Executing plan with {len(steps)} steps") 78 | result = planning_agent.run(task_input) 79 | 80 | # Process results 81 | if result.success: 82 | # Extract artifact information if available 83 | artifact_info = {} 84 | if result.metadata and "artifact_counts" in result.metadata: 85 | artifact_info = result.metadata["artifact_counts"] 86 | 87 | # Extract generated files if available 88 | generated_files = [] 89 | if result.metadata and "results" in result.metadata: 90 | for step_result in result.metadata["results"]: 91 | if isinstance(step_result.get("result"), dict) and "filepath" in step_result["result"]: 92 | generated_files.append(step_result["result"]["filepath"]) 93 | 94 | # Return execution results 95 | return { 96 | "task": task, 97 | "plan": steps, 98 | "execution_result": result.result, 99 | "num_steps": len(steps), 100 | "tool_calls": artifact_info.get("tool_calls", 0), 101 | "web_sources": artifact_info.get("web_sources", 0), 102 | "generated_files": artifact_info.get("generated_files", 0), 103 | "file_paths": generated_files 104 | } 105 | else: 106 | raise ToolError(f"Plan execution failed: {result.error}") 107 | 108 | except Exception as e: 109 | error_msg = f"Failed to generate plan: {str(e)}" 110 | self.logger.error(error_msg) 111 | raise ToolError(error_msg) 112 | 113 | def refine_plan(self, task: str, initial_plan: List[str], feedback: str) -> Dict[str, Any]: 114 | """ 115 | Refine an existing plan based on feedback. 116 | 117 | Args: 118 | task (str): Original task 119 | initial_plan (List[str]): Initial plan steps 120 | feedback (str): Feedback for refinement 121 | 122 | Returns: 123 | Dict[str, Any]: Refined plan 124 | """ 125 | try: 126 | # Format initial plan for prompt 127 | plan_text = "\n".join([f"{i+1}. {step}" for i, step in enumerate(initial_plan)]) 128 | 129 | # Create refinement prompt 130 | prompt = f""" 131 | You are a planning assistant that helps refine task plans. 132 | 133 | Original Task: {task} 134 | 135 | Initial Plan: 136 | {plan_text} 137 | 138 | Feedback: {feedback} 139 | 140 | Please provide a revised plan based on the feedback. 141 | Each step should be clear and actionable. 142 | """ 143 | 144 | # Generate refined plan 145 | response = llm_manager.generate_text(prompt) 146 | 147 | # Parse steps from response 148 | steps = [] 149 | for line in response.split("\n"): 150 | line = line.strip() 151 | if line and (line.startswith("- ") or line.startswith("* ") or 152 | (line[0].isdigit() and line[1] in [".", ")", ":"])): 153 | steps.append(line.lstrip("- *0123456789.):").strip()) 154 | 155 | # Return refined plan 156 | return { 157 | "task": task, 158 | "original_plan": initial_plan, 159 | "refined_plan": steps, 160 | "feedback": feedback, 161 | "num_steps": len(steps) 162 | } 163 | 164 | except Exception as e: 165 | error_msg = f"Failed to refine plan: {str(e)}" 166 | self.logger.error(error_msg) 167 | raise ToolError(error_msg) 168 | 169 | 170 | def generate_plan(task: str, max_steps: int = 10, generate_only: bool = True) -> Dict[str, Any]: 171 | """ 172 | Generate a plan for the given task using the PlanningTool. 173 | 174 | Args: 175 | task (str): Task to plan for 176 | max_steps (int, optional): Maximum number of steps in the plan 177 | generate_only (bool, optional): Only generate the plan without executing it 178 | 179 | Returns: 180 | Dict[str, Any]: Generated plan 181 | """ 182 | tool = PlanningTool() 183 | return tool.run( 184 | task=task, 185 | max_steps=max_steps, 186 | generate_only=generate_only 187 | ) 188 | -------------------------------------------------------------------------------- /requirement.md: -------------------------------------------------------------------------------- 1 | ```markdown 2 | # Python Agent Requirements and Implementation Guide 3 | 4 | ## Overview 5 | This Python Agent is a modular and extensible system designed to autonomously or semi-autonomously execute various tasks. The agent leverages an LLM (e.g., OpenAI) and tools orchestrated via the LangChain framework to: 6 | - Generate PDFs 7 | - Generate Markdown files 8 | - Create code base artifacts 9 | - Execute browser tasks using Selenium 10 | - Conduct web research using the Firecrawl API 11 | 12 | The design draws inspiration from the modularity and multi-modal task execution of the [Owl Project](https://github.com/camel-ai/owl.git) and [OpenManus](https://github.com/mannaandpoem/OpenManus). 13 | 14 | --- 15 | 16 | ## Functional Requirements 17 | 18 | 1. **Task Interpretation** 19 | - Interpret natural language descriptions provided by users. 20 | - Utilize an LLM (e.g., OpenAI API) to parse tasks and determine the required actions. 21 | 22 | 2. **Document Generation** 23 | - **PDF Generation:** Convert text or data into a PDF file with basic formatting using libraries like `reportlab`. 24 | - **Markdown Generation:** Create Markdown files from text/data and save them to specified paths. 25 | 26 | 3. **Code Base Artifact Generation** 27 | - Generate Python scripts or modules based on task descriptions or research findings. 28 | - Ensure the generated code is functional and stored at a designated location. 29 | 30 | 4. **Browser Task Execution** 31 | - Automate browser tasks such as navigating web pages, clicking elements, or filling forms using Selenium. 32 | - Optionally, use the LLM to generate specific Selenium actions for complex workflows. 33 | 34 | 5. **Web Research with Firecrawl API** 35 | - Conduct research by crawling websites with the Firecrawl API. 36 | - Retrieve clean Markdown or structured data from the crawled content for further processing. 37 | 38 | --- 39 | 40 | ## Non-Functional Requirements 41 | 42 | - **Modularity and Extensibility** 43 | - Structure the agent with separate components for each task (e.g., document generation, code generation, browser automation, research). 44 | - Ensure new tools and functionalities can be easily integrated. 45 | 46 | - **Security** 47 | - Handle API keys and sensitive data securely via environment variables. 48 | - Avoid executing arbitrary code in production environments; use sandboxing or predefined actions. 49 | 50 | - **Performance and Robustness** 51 | - Ensure efficient processing, even when handling large datasets. 52 | - Implement robust error handling for API failures and invalid inputs. 53 | 54 | --- 55 | 56 | ## File Structure Considerations 57 | 58 | The following directory structure is suggested to maintain a clear separation of concerns, inspired by [OpenManus](https://github.com/mannaandpoem/OpenManus): 59 | 60 | ``` 61 | └── open-manus/ 62 | ├── README.md 63 | ├── LICENSE 64 | ├── README_zh.md 65 | ├── main.py 66 | ├── requirements.txt 67 | ├── run_flow.py 68 | ├── setup.py 69 | ├── .pre-commit-config.yaml 70 | ├── app/ 71 | │ ├── __init__.py 72 | │ ├── config.py 73 | │ ├── exceptions.py 74 | │ ├── llm.py 75 | │ ├── logger.py 76 | │ ├── schema.py 77 | │ ├── agent/ 78 | │ │ ├── __init__.py 79 | │ │ ├── base.py 80 | │ │ ├── manus.py 81 | │ │ ├── planning.py 82 | │ │ ├── react.py 83 | │ │ ├── swe.py 84 | │ │ └── toolcall.py 85 | │ ├── flow/ 86 | │ │ ├── __init__.py 87 | │ │ ├── base.py 88 | │ │ ├── flow_factory.py 89 | │ │ └── planning.py 90 | │ ├── prompt/ 91 | │ │ ├── __init__.py 92 | │ │ ├── manus.py 93 | │ │ ├── planning.py 94 | │ │ ├── swe.py 95 | │ │ └── toolcall.py 96 | │ └── tool/ 97 | │ ├── __init__.py 98 | │ ├── base.py 99 | │ ├── bash.py 100 | │ ├── browser_use_tool.py 101 | │ ├── create_chat_completion.py 102 | │ ├── file_saver.py 103 | │ ├── google_search.py 104 | │ ├── planning.py 105 | │ ├── python_execute.py 106 | │ ├── run.py 107 | │ ├── str_replace_editor.py 108 | │ ├── terminate.py 109 | │ └── tool_collection.py 110 | ├── assets/ 111 | └── config/ 112 | └── config.example.toml 113 | ``` 114 | 115 | This structure ensures that: 116 | - **Agent logic** is organized under `app/agent`. 117 | - **Flow and orchestration** logic resides in `app/flow`. 118 | - **Prompt templates** and configurations are under `app/prompt`. 119 | - **Tools and integrations** are maintained in `app/tool`. 120 | - **Configuration files and assets** are isolated for easy management. 121 | 122 | --- 123 | 124 | ## Step-by-Step Implementation Plan 125 | 126 | ### Step 1: Environment Setup 127 | - **Install Required Libraries** 128 | ```bash 129 | pip install langchain openai firecrawl selenium reportlab 130 | ``` 131 | - **Set Up API Keys** 132 | ```bash 133 | export OPENAI_API_KEY="your_openai_key" 134 | export FIRECRAWL_API_KEY="your_firecrawl_key" 135 | ``` 136 | - **Install Web Driver** 137 | - Download and configure a compatible web driver (e.g., ChromeDriver) for Selenium. 138 | 139 | ### Step 2: Define Tools 140 | - **Firecrawl Research Tool** 141 | - Crawl a website using the Firecrawl API and return Markdown content. 142 | - **PDF Generation Tool** 143 | - Use `reportlab` to convert text input into a PDF file. 144 | - **Markdown Generation Tool** 145 | - Create and save Markdown files from given text. 146 | - **Code Generator Tool** 147 | - Generate Python scripts or modules using an LLM based on task descriptions. 148 | - **Browser Task Tool** 149 | - Automate browser actions using Selenium for tasks like navigation and form submissions. 150 | 151 | ### Step 3: Integrate Tools with the LangChain Agent 152 | - **LLM Initialization** 153 | - Use the OpenAI API via LangChain to initialize the LLM. 154 | - **Tool Registration** 155 | - Register all the tools (research, PDF, Markdown, code, browser) with the LangChain agent. 156 | - **Agent Setup** 157 | - Configure the agent (e.g., using `AgentType.ZERO_SHOT_REACT_DESCRIPTION`) for task interpretation and execution. 158 | 159 | ### Step 4: Task Execution Flow 160 | - **User Input** 161 | - Accept natural language task descriptions. 162 | - **Task Parsing** 163 | - Utilize the LLM to interpret and break down the task into actionable steps. 164 | - **Sequential Processing** 165 | - Allow for multi-step tasks, e.g., research a URL, generate Markdown, then produce a PDF report. 166 | - **Logging and Debugging** 167 | - Provide detailed logging for debugging and task tracking. 168 | 169 | ### Step 5: Testing and Validation 170 | - **Unit Testing** 171 | - Validate each tool individually (e.g., test PDF generation with sample text). 172 | - **Integration Testing** 173 | - Execute complete workflows that span multiple tools. 174 | - **Error Handling** 175 | - Incorporate robust error handling to manage API failures and unexpected inputs. 176 | 177 | ### Step 6: Documentation and Maintenance 178 | - **Comprehensive README** 179 | - Include setup instructions, usage examples, and directory structure details. 180 | - **Inline Documentation** 181 | - Comment code thoroughly to assist future development and troubleshooting. 182 | - **Security Reviews** 183 | - Regularly update security practices, especially concerning API key management and dynamic code execution. 184 | 185 | --- 186 | 187 | ## References 188 | 189 | - [Owl Project on GitHub](https://github.com/camel-ai/owl.git) 190 | - [OpenManus Project on GitHub](https://github.com/mannaandpoem/OpenManus) 191 | 192 | --- 193 | 194 | python env is created by uv and use 195 | ``` 196 | uv pip install ... 197 | ``` -------------------------------------------------------------------------------- /app/tool/create_chat_completion.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Optional, Any, Union 2 | 3 | from app.tool.base import BaseTool 4 | from app.exceptions import LLMError 5 | from app.llm import llm_manager 6 | from app.config import config 7 | 8 | 9 | class CreateChatCompletionTool(BaseTool): 10 | """Tool for creating chat completions using LLM.""" 11 | 12 | def __init__(self): 13 | """Initialize the chat completion tool.""" 14 | super().__init__( 15 | name="create_chat_completion", 16 | description="Generate text completions using the LLM" 17 | ) 18 | 19 | def _run(self, 20 | prompt: Optional[str] = None, 21 | system_prompt: Optional[str] = None, 22 | messages: Optional[List[Dict[str, str]]] = None, 23 | temperature: Optional[float] = None, 24 | max_tokens: Optional[int] = None) -> Dict[str, Any]: 25 | """ 26 | Create a chat completion using the LLM. 27 | 28 | Args: 29 | prompt (str, optional): User prompt (used if messages not provided) 30 | system_prompt (str, optional): System prompt (used if messages not provided) 31 | messages (List[Dict[str, str]], optional): List of message dictionaries 32 | temperature (float, optional): Temperature for generation 33 | max_tokens (int, optional): Maximum tokens for generation 34 | 35 | Returns: 36 | Dict[str, Any]: Generated completion 37 | """ 38 | try: 39 | # Use either messages or prompt 40 | if messages: 41 | # Use provided messages 42 | result = llm_manager.generate_from_messages(messages) 43 | else: 44 | # Use prompt and system_prompt 45 | if not prompt: 46 | raise LLMError("Either 'messages' or 'prompt' must be provided") 47 | 48 | result = llm_manager.generate_text(prompt, system_prompt) 49 | 50 | # Return formatted result 51 | return { 52 | "content": result, 53 | "success": True 54 | } 55 | 56 | except Exception as e: 57 | error_msg = f"Failed to create chat completion: {str(e)}" 58 | self.logger.error(error_msg) 59 | 60 | return { 61 | "content": "", 62 | "success": False, 63 | "error": str(e) 64 | } 65 | 66 | def create_chat_with_functions(self, 67 | messages: List[Dict[str, str]], 68 | functions: List[Dict[str, Any]], 69 | function_call: Optional[Union[str, Dict[str, str]]] = None) -> Dict[str, Any]: 70 | """ 71 | Create a chat completion with function calling. 72 | 73 | Args: 74 | messages (List[Dict[str, str]]): List of message dictionaries 75 | functions (List[Dict[str, Any]]): List of function definitions 76 | function_call (Union[str, Dict[str, str]], optional): Function call instruction 77 | 78 | Returns: 79 | Dict[str, Any]: Generated completion with possible function call 80 | """ 81 | try: 82 | # Note: This is a placeholder - in a real implementation, you would use 83 | # the OpenAI API's function calling feature. Since we're using a generic 84 | # LLM wrapper, we'll simulate the function calling behavior. 85 | 86 | # Construct a prompt that includes function info 87 | functions_str = "\n".join([ 88 | f"Function {i+1}: {fn.get('name', 'unknown')}\n" 89 | f"Description: {fn.get('description', 'No description')}\n" 90 | f"Parameters: {fn.get('parameters', {})}\n" 91 | for i, fn in enumerate(functions) 92 | ]) 93 | 94 | messages_with_functions = messages.copy() 95 | if messages[-1]["role"] == "user": 96 | # Append function information to the user's message 97 | messages_with_functions[-1]["content"] += ( 98 | f"\n\nYou have access to the following functions:\n{functions_str}\n" 99 | f"To call a function, respond with the function name and parameters in JSON format." 100 | ) 101 | 102 | # Generate response 103 | result = llm_manager.generate_from_messages(messages_with_functions) 104 | 105 | # Simple heuristic to detect if the response is a function call 106 | is_function_call = ( 107 | result.strip().startswith("{") and 108 | result.strip().endswith("}") and 109 | "function" in result.lower() 110 | ) 111 | 112 | if is_function_call: 113 | # Try to parse as a function call 114 | try: 115 | # Extract JSON content if needed 116 | json_start = result.find("{") 117 | json_end = result.rfind("}") + 1 118 | 119 | if json_start >= 0 and json_end > json_start: 120 | import json 121 | function_data = json.loads(result[json_start:json_end]) 122 | 123 | return { 124 | "content": None, 125 | "function_call": { 126 | "name": function_data.get("function", function_data.get("name", "")), 127 | "arguments": json.dumps(function_data.get("parameters", function_data.get("arguments", {}))) 128 | }, 129 | "success": True 130 | } 131 | except Exception: 132 | # If parsing fails, return as regular text 133 | pass 134 | 135 | # Regular text response 136 | return { 137 | "content": result, 138 | "success": True 139 | } 140 | 141 | except Exception as e: 142 | error_msg = f"Failed to create chat completion with functions: {str(e)}" 143 | self.logger.error(error_msg) 144 | 145 | return { 146 | "content": "", 147 | "success": False, 148 | "error": str(e) 149 | } 150 | 151 | 152 | def create_chat_completion(prompt: Optional[str] = None, system_prompt: Optional[str] = None, 153 | messages: Optional[List[Dict[str, str]]] = None, 154 | temperature: Optional[float] = None, 155 | max_tokens: Optional[int] = None) -> Dict[str, Any]: 156 | """ 157 | Create a chat completion using the CreateChatCompletionTool. 158 | 159 | Args: 160 | prompt (str, optional): User prompt (used if messages not provided) 161 | system_prompt (str, optional): System prompt (used if messages not provided) 162 | messages (List[Dict[str, str]], optional): List of message dictionaries 163 | temperature (float, optional): Temperature for generation 164 | max_tokens (int, optional): Maximum tokens for generation 165 | 166 | Returns: 167 | Dict[str, Any]: Generated completion 168 | """ 169 | tool = CreateChatCompletionTool() 170 | return tool.run( 171 | prompt=prompt, 172 | system_prompt=system_prompt, 173 | messages=messages, 174 | temperature=temperature, 175 | max_tokens=max_tokens 176 | ) 177 | -------------------------------------------------------------------------------- /app/schema.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Dict, List, Optional, Union, Any 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class AgentType(str, Enum): 7 | """Types of agents available in the system.""" 8 | MANUS = "manus" 9 | REACT = "react" 10 | PLANNING = "planning" 11 | SWE = "swe" 12 | TOOLCALL = "toolcall" 13 | 14 | 15 | class ToolType(str, Enum): 16 | """Types of tools available in the system.""" 17 | PDF_GENERATOR = "pdf_generator" 18 | MARKDOWN_GENERATOR = "markdown_generator" 19 | CODE_GENERATOR = "code_generator" 20 | BROWSER = "browser" 21 | FIRECRAWL = "firecrawl" 22 | BASH = "bash" 23 | PYTHON_EXECUTE = "python_execute" 24 | FILE_SAVER = "file_saver" 25 | GOOGLE_SEARCH = "google_search" 26 | CREATE_CHAT_COMPLETION = "create_chat_completion" 27 | STR_REPLACE_EDITOR = "str_replace_editor" 28 | TERMINATE = "terminate" 29 | 30 | 31 | class DocumentFormat(str, Enum): 32 | """Document formats supported by generators.""" 33 | PDF = "pdf" 34 | MARKDOWN = "markdown" 35 | HTML = "html" 36 | TEXT = "text" 37 | 38 | 39 | class WebDriverType(str, Enum): 40 | """Web browsers supported by Selenium.""" 41 | CHROME = "chrome" 42 | FIREFOX = "firefox" 43 | EDGE = "edge" 44 | SAFARI = "safari" 45 | 46 | 47 | class Message(BaseModel): 48 | """A message in a conversation.""" 49 | role: str = Field(..., description="Role of the message sender (system, user, assistant)") 50 | content: str = Field(..., description="Content of the message") 51 | 52 | 53 | class Conversation(BaseModel): 54 | """A conversation consisting of messages.""" 55 | messages: List[Message] = Field(default_factory=list, description="List of messages in the conversation") 56 | 57 | 58 | class TaskInput(BaseModel): 59 | """Input for a task to be executed by the agent.""" 60 | task_description: str = Field(..., description="Natural language description of the task") 61 | conversation: Optional[Conversation] = Field(None, description="Optional conversation context") 62 | tools: Optional[List[str]] = Field(None, description="Optional list of tools to use") 63 | agent_type: Optional[AgentType] = Field(None, description="Type of agent to use") 64 | parameters: Optional[Dict[str, Any]] = Field(None, description="Optional parameters for the task") 65 | 66 | 67 | class WebResearchInput(BaseModel): 68 | """Input for web research tasks.""" 69 | query: str = Field(..., description="Research query or URL to crawl") 70 | output_format: Optional[DocumentFormat] = Field(DocumentFormat.MARKDOWN, description="Format for the research output") 71 | max_depth: Optional[int] = Field(1, description="Maximum depth for web crawling") 72 | max_pages: Optional[int] = Field(10, description="Maximum number of pages to crawl") 73 | include_visualizations: Optional[bool] = Field(True, description="Whether to include visualization suggestions") 74 | 75 | 76 | class BrowserTaskInput(BaseModel): 77 | """Input for browser automation tasks.""" 78 | url: str = Field(..., description="URL to navigate to") 79 | actions: List[Dict[str, Any]] = Field(..., description="List of actions to perform") 80 | webdriver: Optional[WebDriverType] = Field(WebDriverType.CHROME, description="Web driver to use") 81 | headless: Optional[bool] = Field(True, description="Whether to run the browser in headless mode") 82 | timeout: Optional[int] = Field(30, description="Timeout for browser actions in seconds") 83 | query: Optional[str] = Field(None, description="Search query parameter (will navigate to search engine with this query)") 84 | 85 | 86 | class CodeGenerationInput(BaseModel): 87 | """Input for code generation tasks.""" 88 | description: str = Field(..., description="Description of the code to generate") 89 | language: str = Field(..., description="Programming language for the code") 90 | output_path: Optional[str] = Field(None, description="Optional output path for the code") 91 | dependencies: Optional[List[str]] = Field(None, description="Optional list of dependencies") 92 | template: Optional[str] = Field(None, description="Optional template for the code") 93 | save_as_artifact: Optional[bool] = Field(True, description="Whether to save the code as an artifact") 94 | execute_code: Optional[bool] = Field(True, description="Whether to execute the generated code") 95 | 96 | 97 | class TaskOutput(BaseModel): 98 | """Output from a task execution.""" 99 | success: bool = Field(..., description="Whether the task was successful") 100 | result: Optional[Any] = Field(None, description="Result of the task") 101 | error: Optional[str] = Field(None, description="Error message if task failed") 102 | conversation: Optional[Conversation] = Field(None, description="Updated conversation after task execution") 103 | metadata: Optional[Dict[str, Any]] = Field(None, description="Optional metadata about the task execution") 104 | 105 | 106 | class VisualizationData(BaseModel): 107 | """Data for a visualization.""" 108 | title: str = Field(..., description="Title of the visualization") 109 | visualization_type: str = Field(..., description="Type of visualization (e.g., bar_chart, line_chart)") 110 | description: Optional[str] = Field(None, description="Description of what this visualization shows") 111 | data: Dict[str, Any] = Field(..., description="Data for the visualization") 112 | x_axis: Optional[str] = Field(None, description="Label for the x-axis") 113 | y_axis: Optional[str] = Field(None, description="Label for the y-axis") 114 | options: Optional[Dict[str, Any]] = Field(None, description="Additional options for the visualization") 115 | 116 | 117 | class DataTable(BaseModel): 118 | """Representation of a data table.""" 119 | title: str = Field(..., description="Title of the table") 120 | description: Optional[str] = Field(None, description="Description of what this table represents") 121 | columns: List[str] = Field(..., description="Column headers for the table") 122 | rows: List[List[Any]] = Field(..., description="Data rows for the table") 123 | metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata for the table") 124 | 125 | 126 | class DocumentGenerationOptions(BaseModel): 127 | """Options for document generation.""" 128 | include_table_of_contents: Optional[bool] = Field(True, description="Whether to include a table of contents") 129 | include_cover_page: Optional[bool] = Field(True, description="Whether to include a cover page") 130 | include_visualizations: Optional[bool] = Field(True, description="Whether to include visualizations") 131 | include_data_tables: Optional[bool] = Field(True, description="Whether to include data tables") 132 | include_sources: Optional[bool] = Field(True, description="Whether to include sources") 133 | template: Optional[str] = Field(None, description="Template to use for the document") 134 | style: Optional[Dict[str, Any]] = Field(None, description="Style options for the document") 135 | 136 | 137 | class GenerateDocumentInput(BaseModel): 138 | """Input for document generation tasks.""" 139 | content: str = Field(..., description="Content to be included in the document") 140 | format: DocumentFormat = Field(..., description="Format of the document to generate") 141 | output_path: Optional[str] = Field(None, description="Optional output path for the document") 142 | title: Optional[str] = Field(None, description="Optional title for the document") 143 | metadata: Optional[Dict[str, Any]] = Field(None, description="Optional metadata for the document") 144 | visualizations: Optional[List[VisualizationData]] = Field(None, description="List of visualizations to include") 145 | data_tables: Optional[List[DataTable]] = Field(None, description="List of data tables to include") 146 | options: Optional[DocumentGenerationOptions] = Field(None, description="Options for document generation") 147 | -------------------------------------------------------------------------------- /app/tool/str_replace_editor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from typing import Dict, List, Optional, Any 4 | 5 | from app.tool.base import BaseTool 6 | from app.exceptions import FileOperationError 7 | from app.config import config 8 | 9 | 10 | class StrReplaceEditorTool(BaseTool): 11 | """Tool for editing text using string replacement operations.""" 12 | 13 | def __init__(self): 14 | """Initialize the string replace editor tool.""" 15 | super().__init__( 16 | name="str_replace_editor", 17 | description="Edit text or files using string replacement operations" 18 | ) 19 | 20 | def _run(self, 21 | text: Optional[str] = None, 22 | file_path: Optional[str] = None, 23 | replacements: List[Dict[str, str]] = None, 24 | regex: bool = False, 25 | save_changes: bool = True, 26 | backup: bool = True) -> Dict[str, Any]: 27 | """ 28 | Edit text using string replacement operations. 29 | 30 | Args: 31 | text (str, optional): Text to edit (used if file_path not provided) 32 | file_path (str, optional): Path to file to edit 33 | replacements (List[Dict[str, str]]): List of replacement dictionaries 34 | regex (bool, optional): Use regex for replacements 35 | save_changes (bool, optional): Save changes to file 36 | backup (bool, optional): Create backup before editing file 37 | 38 | Returns: 39 | Dict[str, Any]: Editing results 40 | """ 41 | try: 42 | # Initialize content from text or file 43 | content = text 44 | if file_path and not content: 45 | content = self._read_file(file_path) 46 | 47 | if not content: 48 | raise FileOperationError("Either 'text' or 'file_path' must be provided") 49 | 50 | if not replacements: 51 | replacements = [] 52 | 53 | # Perform replacements 54 | original_content = content 55 | num_replacements = 0 56 | 57 | for replacement in replacements: 58 | old = replacement.get("old", "") 59 | new = replacement.get("new", "") 60 | 61 | if not old: 62 | continue 63 | 64 | if regex: 65 | # Use regex replacement 66 | pattern = re.compile(old, re.MULTILINE) 67 | result = pattern.subn(new, content) 68 | content = result[0] 69 | num_replacements += result[1] 70 | else: 71 | # Use simple string replacement 72 | if old in content: 73 | count = content.count(old) 74 | content = content.replace(old, new) 75 | num_replacements += count 76 | 77 | # Save changes to file if requested 78 | if file_path and save_changes: 79 | # Create backup if requested 80 | if backup: 81 | backup_path = f"{file_path}.bak" 82 | with open(backup_path, "w", encoding="utf-8") as f: 83 | f.write(original_content) 84 | 85 | # Write updated content 86 | with open(file_path, "w", encoding="utf-8") as f: 87 | f.write(content) 88 | 89 | self.logger.info(f"Successfully edited file {file_path} with {num_replacements} replacements") 90 | 91 | # Return results 92 | return { 93 | "original_content": original_content, 94 | "updated_content": content, 95 | "num_replacements": num_replacements, 96 | "file_path": file_path if file_path else None, 97 | "success": True 98 | } 99 | 100 | except Exception as e: 101 | error_msg = f"Failed to perform text replacements: {str(e)}" 102 | self.logger.error(error_msg) 103 | raise FileOperationError(error_msg) 104 | 105 | def _read_file(self, file_path: str) -> str: 106 | """ 107 | Read content from a file. 108 | 109 | Args: 110 | file_path (str): Path to the file 111 | 112 | Returns: 113 | str: File content 114 | """ 115 | try: 116 | with open(file_path, "r", encoding="utf-8") as f: 117 | return f.read() 118 | except Exception as e: 119 | error_msg = f"Failed to read file {file_path}: {str(e)}" 120 | self.logger.error(error_msg) 121 | raise FileOperationError(error_msg) 122 | 123 | def edit_lines(self, 124 | file_path: str, 125 | line_edits: List[Dict[str, Any]], 126 | save_changes: bool = True, 127 | backup: bool = True) -> Dict[str, Any]: 128 | """ 129 | Edit specific lines in a file. 130 | 131 | Args: 132 | file_path (str): Path to the file 133 | line_edits (List[Dict[str, Any]]): List of line edit operations 134 | save_changes (bool, optional): Save changes to file 135 | backup (bool, optional): Create backup before editing file 136 | 137 | Returns: 138 | Dict[str, Any]: Editing results 139 | """ 140 | try: 141 | # Read file content 142 | with open(file_path, "r", encoding="utf-8") as f: 143 | lines = f.readlines() 144 | 145 | original_lines = lines.copy() 146 | num_edits = 0 147 | 148 | # Perform line edits 149 | for edit in line_edits: 150 | line_num = edit.get("line") 151 | action = edit.get("action", "replace") 152 | content = edit.get("content", "") 153 | 154 | if line_num is None: 155 | continue 156 | 157 | # Adjust for 0-indexed list 158 | idx = line_num - 1 if line_num > 0 else line_num 159 | 160 | if action == "replace" and 0 <= idx < len(lines): 161 | lines[idx] = content + ("\n" if not content.endswith("\n") else "") 162 | num_edits += 1 163 | elif action == "insert" and 0 <= idx <= len(lines): 164 | lines.insert(idx, content + ("\n" if not content.endswith("\n") else "")) 165 | num_edits += 1 166 | elif action == "delete" and 0 <= idx < len(lines): 167 | lines.pop(idx) 168 | num_edits += 1 169 | elif action == "append": 170 | lines.append(content + ("\n" if not content.endswith("\n") else "")) 171 | num_edits += 1 172 | 173 | # Save changes to file if requested 174 | if save_changes: 175 | # Create backup if requested 176 | if backup: 177 | backup_path = f"{file_path}.bak" 178 | with open(backup_path, "w", encoding="utf-8") as f: 179 | f.writelines(original_lines) 180 | 181 | # Write updated content 182 | with open(file_path, "w", encoding="utf-8") as f: 183 | f.writelines(lines) 184 | 185 | self.logger.info(f"Successfully edited file {file_path} with {num_edits} line edits") 186 | 187 | # Return results 188 | return { 189 | "original_lines": original_lines, 190 | "updated_lines": lines, 191 | "num_edits": num_edits, 192 | "file_path": file_path, 193 | "success": True 194 | } 195 | 196 | except Exception as e: 197 | error_msg = f"Failed to perform line edits: {str(e)}" 198 | self.logger.error(error_msg) 199 | raise FileOperationError(error_msg) 200 | 201 | 202 | def edit_text(text: Optional[str] = None, file_path: Optional[str] = None, 203 | replacements: List[Dict[str, str]] = None, regex: bool = False, 204 | save_changes: bool = True, backup: bool = True) -> Dict[str, Any]: 205 | """ 206 | Edit text using the StrReplaceEditorTool. 207 | 208 | Args: 209 | text (str, optional): Text to edit (used if file_path not provided) 210 | file_path (str, optional): Path to file to edit 211 | replacements (List[Dict[str, str]]): List of replacement dictionaries 212 | regex (bool, optional): Use regex for replacements 213 | save_changes (bool, optional): Save changes to file 214 | backup (bool, optional): Create backup before editing file 215 | 216 | Returns: 217 | Dict[str, Any]: Editing results 218 | """ 219 | tool = StrReplaceEditorTool() 220 | return tool.run( 221 | text=text, 222 | file_path=file_path, 223 | replacements=replacements, 224 | regex=regex, 225 | save_changes=save_changes, 226 | backup=backup 227 | ) 228 | -------------------------------------------------------------------------------- /app/tool/browser_use_tool.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from typing import Any, Dict, List, Optional, Tuple 4 | from selenium import webdriver 5 | from selenium.webdriver.chrome.service import Service as ChromeService 6 | from selenium.webdriver.firefox.service import Service as FirefoxService 7 | from selenium.webdriver.edge.service import Service as EdgeService 8 | from selenium.webdriver.safari.service import Service as SafariService 9 | from selenium.webdriver.common.by import By 10 | from selenium.webdriver.common.keys import Keys 11 | from selenium.webdriver.support.ui import WebDriverWait 12 | from selenium.webdriver.support import expected_conditions as EC 13 | from selenium.common.exceptions import TimeoutException, WebDriverException 14 | from webdriver_manager.chrome import ChromeDriverManager 15 | from webdriver_manager.firefox import GeckoDriverManager 16 | from webdriver_manager.microsoft import EdgeChromiumDriverManager 17 | 18 | from app.tool.base import BaseTool 19 | from app.schema import BrowserTaskInput, WebDriverType 20 | from app.exceptions import BrowserError 21 | from app.config import config 22 | 23 | 24 | class BrowserTool(BaseTool): 25 | """Tool for automating browser tasks using Selenium.""" 26 | 27 | def __init__(self): 28 | """Initialize the browser tool.""" 29 | super().__init__( 30 | name="browser", 31 | description="Automate browser tasks using Selenium", 32 | parameters={ 33 | "type": "object", 34 | "properties": { 35 | "url": { 36 | "type": "string", 37 | "description": "URL to navigate to (required)" 38 | }, 39 | "actions": { 40 | "type": "array", 41 | "description": "List of actions to perform (required)", 42 | "items": { 43 | "type": "object" 44 | } 45 | }, 46 | "query": { 47 | "type": "string", 48 | "description": "Search query (will create a search URL if provided)" 49 | }, 50 | "webdriver": { 51 | "type": "string", 52 | "description": "Web driver to use (chrome, firefox, edge, safari)", 53 | "default": "chrome" 54 | }, 55 | "headless": { 56 | "type": "boolean", 57 | "description": "Whether to run in headless mode", 58 | "default": True 59 | }, 60 | "timeout": { 61 | "type": "integer", 62 | "description": "Timeout in seconds", 63 | "default": 30 64 | } 65 | }, 66 | "required": ["url", "actions"] 67 | } 68 | ) 69 | self.driver = None 70 | 71 | def _initialize_driver(self, webdriver_type: str, headless: bool) -> None: 72 | """ 73 | Initialize the web driver. 74 | 75 | Args: 76 | webdriver_type (str): Type of web driver to use 77 | headless (bool): Whether to run the browser in headless mode 78 | """ 79 | try: 80 | if webdriver_type.lower() == WebDriverType.CHROME.value: 81 | options = webdriver.ChromeOptions() 82 | if headless: 83 | options.add_argument("--headless") 84 | options.add_argument("--no-sandbox") 85 | options.add_argument("--disable-dev-shm-usage") 86 | self.driver = webdriver.Chrome( 87 | service=ChromeService(ChromeDriverManager().install()), 88 | options=options 89 | ) 90 | elif webdriver_type.lower() == WebDriverType.FIREFOX.value: 91 | options = webdriver.FirefoxOptions() 92 | if headless: 93 | options.add_argument("--headless") 94 | self.driver = webdriver.Firefox( 95 | service=FirefoxService(GeckoDriverManager().install()), 96 | options=options 97 | ) 98 | elif webdriver_type.lower() == WebDriverType.EDGE.value: 99 | options = webdriver.EdgeOptions() 100 | if headless: 101 | options.add_argument("--headless") 102 | self.driver = webdriver.Edge( 103 | service=EdgeService(EdgeChromiumDriverManager().install()), 104 | options=options 105 | ) 106 | elif webdriver_type.lower() == WebDriverType.SAFARI.value: 107 | self.driver = webdriver.Safari(service=SafariService()) 108 | else: 109 | raise BrowserError(f"Unsupported web driver type: {webdriver_type}") 110 | 111 | # Set timeout 112 | self.driver.set_page_load_timeout(60) 113 | 114 | except Exception as e: 115 | raise BrowserError(f"Failed to initialize web driver: {str(e)}") 116 | 117 | def _find_element(self, by_type: str, selector: str, timeout: int = 10) -> Any: 118 | """ 119 | Find an element on the page. 120 | 121 | Args: 122 | by_type (str): Type of selector (e.g., "id", "xpath", "css") 123 | selector (str): Selector value 124 | timeout (int, optional): Timeout in seconds 125 | 126 | Returns: 127 | Any: Found element 128 | """ 129 | by_map = { 130 | "id": By.ID, 131 | "name": By.NAME, 132 | "xpath": By.XPATH, 133 | "css": By.CSS_SELECTOR, 134 | "class": By.CLASS_NAME, 135 | "tag": By.TAG_NAME, 136 | "link_text": By.LINK_TEXT, 137 | "partial_link_text": By.PARTIAL_LINK_TEXT, 138 | } 139 | 140 | by_value = by_map.get(by_type.lower()) 141 | if not by_value: 142 | raise BrowserError(f"Invalid selector type: {by_type}") 143 | 144 | try: 145 | element = WebDriverWait(self.driver, timeout).until( 146 | EC.presence_of_element_located((by_value, selector)) 147 | ) 148 | return element 149 | except TimeoutException: 150 | raise BrowserError(f"Timed out waiting for element with {by_type}='{selector}'") 151 | 152 | def _perform_action(self, action_type: str, params: Dict[str, Any]) -> Any: 153 | """ 154 | Perform a browser action. 155 | 156 | Args: 157 | action_type (str): Type of action to perform 158 | params (Dict[str, Any]): Action parameters 159 | 160 | Returns: 161 | Any: Result of the action 162 | """ 163 | # Navigation actions 164 | if action_type == "navigate": 165 | url = params.get("url") 166 | if not url: 167 | raise BrowserError("URL is required for navigate action") 168 | self.driver.get(url) 169 | return {"status": "success", "message": f"Navigated to {url}"} 170 | 171 | # Click actions 172 | elif action_type == "click": 173 | by_type = params.get("by", "css") 174 | selector = params.get("selector") 175 | if not selector: 176 | raise BrowserError("Selector is required for click action") 177 | 178 | element = self._find_element(by_type, selector) 179 | element.click() 180 | return {"status": "success", "message": f"Clicked element {by_type}='{selector}'"} 181 | 182 | # Input actions 183 | elif action_type == "input": 184 | by_type = params.get("by", "css") 185 | selector = params.get("selector") 186 | text = params.get("text", "") 187 | clear = params.get("clear", True) 188 | 189 | if not selector: 190 | raise BrowserError("Selector is required for input action") 191 | 192 | element = self._find_element(by_type, selector) 193 | if clear: 194 | element.clear() 195 | element.send_keys(text) 196 | return {"status": "success", "message": f"Input text into {by_type}='{selector}'"} 197 | 198 | # Scroll actions 199 | elif action_type == "scroll": 200 | x = params.get("x", 0) 201 | y = params.get("y", 0) 202 | self.driver.execute_script(f"window.scrollTo({x}, {y});") 203 | return {"status": "success", "message": f"Scrolled to position ({x}, {y})"} 204 | 205 | # Wait actions 206 | elif action_type == "wait": 207 | seconds = params.get("seconds", 1) 208 | time.sleep(seconds) 209 | return {"status": "success", "message": f"Waited for {seconds} seconds"} 210 | 211 | # Get text action 212 | elif action_type == "get_text": 213 | by_type = params.get("by", "css") 214 | selector = params.get("selector") 215 | 216 | if not selector: 217 | raise BrowserError("Selector is required for get_text action") 218 | 219 | element = self._find_element(by_type, selector) 220 | text = element.text 221 | return {"status": "success", "text": text} 222 | 223 | # Screenshot action 224 | elif action_type == "screenshot": 225 | path = params.get("path", "screenshot.png") 226 | self.driver.save_screenshot(path) 227 | return {"status": "success", "message": f"Screenshot saved to {path}"} 228 | 229 | # Execute JavaScript action 230 | elif action_type == "execute_script": 231 | script = params.get("script") 232 | if not script: 233 | raise BrowserError("Script is required for execute_script action") 234 | 235 | result = self.driver.execute_script(script) 236 | return {"status": "success", "result": result} 237 | 238 | else: 239 | raise BrowserError(f"Unsupported action type: {action_type}") 240 | 241 | def _run(self, **kwargs) -> Any: 242 | """ 243 | This method has been removed. 244 | 245 | Args: 246 | **kwargs: Tool-specific arguments (ignored) 247 | 248 | Returns: 249 | Dict: A message indicating this tool has been disabled 250 | """ 251 | self.logger.warning("BrowserTool._run has been removed/disabled") 252 | return { 253 | "status": "error", 254 | "message": "The browser tool functionality has been removed. Please use an alternative tool." 255 | } 256 | 257 | 258 | def execute_browser_task(input_data: BrowserTaskInput) -> List[Dict[str, Any]]: 259 | """ 260 | This function previously executed a browser task from BrowserTaskInput, 261 | but the browser tool functionality has been removed. 262 | 263 | Args: 264 | input_data (BrowserTaskInput): Input data for browser task 265 | 266 | Returns: 267 | List[Dict[str, Any]]: Error message indicating removal 268 | """ 269 | return [{ 270 | "status": "error", 271 | "message": "The browser tool functionality has been removed. Please use an alternative tool." 272 | }] 273 | -------------------------------------------------------------------------------- /app/tool/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Any, Dict, List, Optional, Callable 3 | 4 | from app.logger import get_logger 5 | from app.exceptions import ToolError 6 | 7 | 8 | class BaseTool(ABC): 9 | """Base class for all tools in the system.""" 10 | 11 | def __init__(self, name: str, description: str, parameters: Optional[Dict[str, Any]] = None): 12 | """ 13 | Initialize a tool. 14 | 15 | Args: 16 | name (str): Tool name 17 | description (str): Tool description 18 | parameters (Dict[str, Any], optional): JSON Schema for parameters 19 | """ 20 | self.name = name 21 | self.description = description 22 | self.parameters = parameters or self._default_parameters() 23 | self.logger = get_logger(f"tool.{name}") 24 | 25 | def _default_parameters(self) -> Dict[str, Any]: 26 | """ 27 | Get default parameters schema for the tool. 28 | Override this in subclasses to provide a specific schema. 29 | 30 | Returns: 31 | Dict[str, Any]: Default parameters schema 32 | """ 33 | return { 34 | "type": "object", 35 | "properties": {}, 36 | "required": [] 37 | } 38 | 39 | @abstractmethod 40 | def _run(self, **kwargs) -> Any: 41 | """ 42 | Execute the tool with the given arguments. 43 | 44 | Args: 45 | **kwargs: Tool-specific arguments 46 | 47 | Returns: 48 | Any: Result of the tool execution 49 | """ 50 | pass 51 | 52 | def run(self, **kwargs) -> Any: 53 | """ 54 | Run the tool with error handling. 55 | 56 | Args: 57 | **kwargs: Tool-specific arguments 58 | 59 | Returns: 60 | Any: Result of the tool execution 61 | """ 62 | try: 63 | self.logger.debug(f"Running tool '{self.name}' with args: {kwargs}") 64 | 65 | # Special handling for firecrawl_research tool to ensure query parameter is present 66 | if self.name == "firecrawl_research" and "query" not in kwargs: 67 | # Check if we have any parameter that could be used as query 68 | query_param = None 69 | for param in ['input', 'text', 'content', 'search_query']: 70 | if param in kwargs and isinstance(kwargs[param], str): 71 | query_param = kwargs.pop(param) 72 | self.logger.debug(f"Using '{param}' value as 'query' for firecrawl_research") 73 | break 74 | 75 | if query_param: 76 | kwargs['query'] = query_param 77 | else: 78 | raise ValueError("Missing required parameter 'query' for firecrawl_research tool") 79 | 80 | result = self._run(**kwargs) 81 | self.logger.debug(f"Tool '{self.name}' completed successfully") 82 | return result 83 | except Exception as e: 84 | self.logger.error(f"Error running tool '{self.name}': {e}") 85 | raise ToolError(f"Error running tool '{self.name}': {str(e)}") 86 | 87 | def safe_run(self, *args, **kwargs) -> Any: 88 | """ 89 | Safe run method that can be used with LangChain. 90 | This handles various ways LangChain might call tools. 91 | 92 | Args: 93 | *args: Positional arguments (could include string input or tool instance) 94 | **kwargs: Keyword arguments for the tool 95 | 96 | Returns: 97 | Any: Result of the tool execution 98 | """ 99 | # Log what's being passed for debugging 100 | self.logger.debug(f"Safe run called with args: {args}, kwargs: {kwargs}") 101 | 102 | # Handle parameter name differences for certain tools 103 | if self.name == "browser": 104 | # Map search_query to query for browser tool 105 | if 'search_query' in kwargs and 'query' not in kwargs: 106 | self.logger.debug(f"Mapping 'search_query' to 'query' for browser tool") 107 | kwargs['query'] = kwargs.pop('search_query') 108 | 109 | # Set default URL if not provided 110 | if 'url' not in kwargs: 111 | # Use query as URL if provided, otherwise use a default URL 112 | if 'query' in kwargs: 113 | kwargs['url'] = f"https://www.google.com/search?q={kwargs['query']}" 114 | self.logger.debug(f"Setting default URL from query: {kwargs['url']}") 115 | else: 116 | kwargs['url'] = "https://www.google.com" 117 | self.logger.debug("Setting default URL to google.com") 118 | 119 | # Ensure actions exists and is an empty list if not provided 120 | if 'actions' not in kwargs: 121 | kwargs['actions'] = [] 122 | self.logger.debug("Setting default empty actions list") 123 | 124 | # Handle 'args' parameter which LangChain sometimes uses 125 | if 'args' in kwargs: 126 | args_value = kwargs.pop('args') 127 | self.logger.debug(f"Found 'args' in kwargs: {args_value}") 128 | 129 | # If args is a list with one element and it's a string, use it for content/description 130 | if isinstance(args_value, list) and len(args_value) == 1 and isinstance(args_value[0], str): 131 | # Determine parameter name based on tool 132 | if self.name == "code_generator": 133 | kwargs['description'] = args_value[0] 134 | kwargs['language'] = 'python' # Default to Python if not specified 135 | elif self.name in ["pdf_generator", "markdown_generator"]: 136 | kwargs['content'] = args_value[0] 137 | elif self.name in ["firecrawl_research", "google_search"]: 138 | kwargs['query'] = args_value[0] 139 | elif self.name == "browser": 140 | # For browser tool, interpret the string as a URL 141 | kwargs['url'] = args_value[0] 142 | # Ensure actions exists and is an empty list if not provided 143 | if 'actions' not in kwargs: 144 | kwargs['actions'] = [] 145 | else: 146 | # For unknown tools, try a generic parameter name 147 | kwargs['input'] = args_value[0] 148 | 149 | # Check for different calling patterns 150 | if len(args) == 1 and isinstance(args[0], str) and not kwargs: 151 | # This is the single-string argument case 152 | self.logger.debug("Single string argument detected, using as content/query") 153 | 154 | # Determine what parameter to use based on tool name 155 | if self.name == "pdf_generator" or self.name == "markdown_generator": 156 | return self.run(content=args[0]) 157 | elif self.name == "code_generator": 158 | return self.run(description=args[0], language="python") 159 | elif self.name in ["firecrawl_research", "google_search"]: 160 | return self.run(query=args[0]) 161 | elif self.name == "browser": 162 | # For browser tool, interpret the single string as a URL with no actions 163 | return self.run(url=args[0], actions=[]) 164 | else: 165 | # Default for other tools 166 | return self.run(input=args[0]) 167 | 168 | # Handle special case for firecrawl_research tool 169 | if self.name == "firecrawl_research": 170 | self.logger.debug(f"Handling firecrawl_research tool, kwargs: {kwargs}") 171 | 172 | # Check if query parameter is missing but we have another parameter that could be used 173 | if 'query' not in kwargs: 174 | potential_query_params = ['input', 'text', 'content', 'search_query', 'question', 'prompt'] 175 | for param in potential_query_params: 176 | if param in kwargs and isinstance(kwargs[param], str): 177 | self.logger.debug(f"Mapping '{param}' to 'query' for firecrawl_research") 178 | kwargs['query'] = kwargs.pop(param) 179 | break 180 | 181 | # If we still don't have a query parameter but have args, use the first string arg 182 | if 'query' not in kwargs and len(args) > 0 and isinstance(args[0], str): 183 | self.logger.debug(f"Using first positional argument as query for firecrawl_research") 184 | kwargs['query'] = args[0] 185 | 186 | # If we still don't have a query, ensure we'll raise a clear error 187 | if 'query' not in kwargs: 188 | self.logger.warning(f"No suitable query parameter found for firecrawl_research: {kwargs}") 189 | 190 | # For most tools, just use the kwargs 191 | return self.run(**kwargs) 192 | 193 | def to_dict(self) -> Dict[str, Any]: 194 | """ 195 | Convert the tool to a dictionary representation. 196 | 197 | Returns: 198 | Dict[str, Any]: Dictionary representation of the tool 199 | """ 200 | return { 201 | "name": self.name, 202 | "description": self.description, 203 | "parameters": self.parameters 204 | } 205 | 206 | def to_openai_function(self) -> Dict[str, Any]: 207 | """ 208 | Convert the tool to OpenAI function format. 209 | 210 | Returns: 211 | Dict[str, Any]: OpenAI function representation 212 | """ 213 | return { 214 | "name": self.name, 215 | "description": self.description, 216 | "parameters": self.parameters 217 | } 218 | 219 | 220 | class ToolRegistry: 221 | """Registry for tools in the system.""" 222 | 223 | _instance = None 224 | 225 | def __new__(cls): 226 | """Singleton pattern for tool registry.""" 227 | if cls._instance is None: 228 | cls._instance = super(ToolRegistry, cls).__new__(cls) 229 | cls._instance.tools = {} 230 | return cls._instance 231 | 232 | def register(self, tool: BaseTool) -> None: 233 | """ 234 | Register a tool in the registry. 235 | 236 | Args: 237 | tool (BaseTool): Tool to register 238 | """ 239 | if tool.name in self.tools: 240 | get_logger("tool_registry").warning(f"Tool '{tool.name}' already registered, overwriting") 241 | self.tools[tool.name] = tool 242 | 243 | def get(self, name: str) -> Optional[BaseTool]: 244 | """ 245 | Get a tool by name. 246 | 247 | Args: 248 | name (str): Tool name 249 | 250 | Returns: 251 | Optional[BaseTool]: The tool if found, None otherwise 252 | """ 253 | return self.tools.get(name) 254 | 255 | def list_tools(self) -> List[Dict[str, Any]]: 256 | """ 257 | List all registered tools. 258 | 259 | Returns: 260 | List[Dict[str, Any]]: List of tool dictionaries 261 | """ 262 | return [tool.to_dict() for tool in self.tools.values()] 263 | 264 | def clear(self) -> None: 265 | """Clear all registered tools.""" 266 | self.tools = {} 267 | -------------------------------------------------------------------------------- /app/tool/markdown_generator.py: -------------------------------------------------------------------------------- 1 | import os 2 | import uuid 3 | import subprocess 4 | from pathlib import Path 5 | from typing import Any, Dict, Optional 6 | from pydantic import BaseModel, Field 7 | 8 | from app.tool.base import BaseTool 9 | from app.schema import DocumentFormat, GenerateDocumentInput 10 | from app.exceptions import DocumentGenerationError 11 | from app.config import config 12 | from app.tool.file_saver import FileSaverTool 13 | 14 | 15 | # Define a Pydantic model for the tool parameters 16 | class MarkdownGeneratorParams(BaseModel): 17 | content: str = Field(..., description="Content for the Markdown document") 18 | output_path: Optional[str] = Field(None, description="Path to save the Markdown file (optional)") 19 | title: Optional[str] = Field(None, description="Document title (optional)") 20 | metadata: Optional[Dict[str, Any]] = Field(None, description="Additional metadata for the document (optional)") 21 | options: Optional[Dict[str, Any]] = Field(None, description="Additional options for document generation (optional)") 22 | 23 | 24 | class MarkdownGeneratorTool(BaseTool): 25 | """Tool for generating Markdown documents.""" 26 | 27 | def __init__(self): 28 | """Initialize the Markdown generator tool.""" 29 | parameters = { 30 | "type": "object", 31 | "properties": { 32 | "content": { 33 | "type": "string", 34 | "description": "Content for the Markdown document" 35 | }, 36 | "output_path": { 37 | "type": "string", 38 | "description": "Path to save the Markdown file (optional)" 39 | }, 40 | "title": { 41 | "type": "string", 42 | "description": "Document title (optional)" 43 | }, 44 | "metadata": { 45 | "type": "object", 46 | "description": "Additional metadata for the document (optional)" 47 | }, 48 | "options": { 49 | "type": "object", 50 | "description": "Additional options for document generation (optional)" 51 | } 52 | }, 53 | "required": ["content"] 54 | } 55 | 56 | super().__init__( 57 | name="markdown_generator", 58 | description="Generate Markdown documents from text content", 59 | parameters=parameters 60 | ) 61 | 62 | # Create artifacts directory if it doesn't exist 63 | self.artifacts_dir = config.get_nested_value(["artifacts", "base_dir"], "./artifacts") 64 | self.markdown_artifacts_dir = os.path.join(self.artifacts_dir, "markdown") 65 | os.makedirs(self.markdown_artifacts_dir, exist_ok=True) 66 | 67 | # Initialize the file saver tool 68 | self.file_saver = FileSaverTool() 69 | 70 | def _save_artifact(self, markdown_path: str, title: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None) -> str: 71 | """ 72 | Save Markdown as an artifact. 73 | 74 | Args: 75 | markdown_path (str): Path to the Markdown file 76 | title (Optional[str]): Title of the document 77 | metadata (Optional[Dict[str, Any]]): Additional metadata 78 | 79 | Returns: 80 | str: Path to the saved artifact 81 | """ 82 | # Generate a unique ID for this artifact 83 | artifact_id = str(uuid.uuid4()) 84 | 85 | # Create filename if title is provided 86 | if title: 87 | safe_title = "".join(c if c.isalnum() else "_" for c in title[:30]).lower() 88 | filename = f"{safe_title}_{artifact_id[:8]}.md" 89 | else: 90 | filename = f"markdown_{artifact_id[:8]}.md" 91 | 92 | # Create artifact path 93 | artifact_path = os.path.join(self.markdown_artifacts_dir, filename) 94 | 95 | # Copy the file to artifacts directory 96 | with open(markdown_path, "r", encoding="utf-8") as src_file: 97 | content = src_file.read() 98 | 99 | with open(artifact_path, "w", encoding="utf-8") as dst_file: 100 | dst_file.write(content) 101 | 102 | # Create metadata file 103 | metadata_path = os.path.join(self.markdown_artifacts_dir, f"{filename}.meta.json") 104 | 105 | # Prepare metadata 106 | meta = { 107 | "id": artifact_id, 108 | "title": title, 109 | "original_path": markdown_path, 110 | "timestamp": config.get_timestamp(), 111 | "type": "markdown" 112 | } 113 | 114 | if metadata: 115 | meta.update(metadata) 116 | 117 | # Save metadata 118 | with open(metadata_path, "w", encoding="utf-8") as f: 119 | import json 120 | json.dump(meta, f, indent=2) 121 | 122 | self.logger.info(f"Markdown artifact saved to {artifact_path}") 123 | return artifact_path 124 | 125 | def _open_markdown(self, markdown_path: str) -> None: 126 | """ 127 | Open the Markdown file using the appropriate system command. 128 | 129 | Args: 130 | markdown_path (str): Path to the Markdown file 131 | """ 132 | try: 133 | # Check if markdown_path exists 134 | if not os.path.exists(markdown_path): 135 | self.logger.warning(f"Cannot open Markdown - file does not exist: {markdown_path}") 136 | return 137 | 138 | if os.name == 'nt': # Windows 139 | os.startfile(markdown_path) 140 | elif os.name == 'posix': # macOS or Linux 141 | platform = os.uname().sysname 142 | if platform == 'Darwin': # macOS 143 | # Try to use a markdown editor if available, otherwise just open with default app 144 | try: 145 | # Check if VS Code is available 146 | subprocess.run(['which', 'code'], check=True, capture_output=True) 147 | subprocess.Popen(['code', markdown_path]) 148 | except subprocess.CalledProcessError: 149 | # Fall back to default app 150 | subprocess.Popen(['open', markdown_path]) 151 | else: # Linux 152 | # Try different editors in order of preference 153 | editors = ['xdg-open', 'gedit', 'kate', 'nano', 'vim'] 154 | for editor in editors: 155 | try: 156 | subprocess.run(['which', editor], check=True, capture_output=True) 157 | subprocess.Popen([editor, markdown_path]) 158 | break 159 | except subprocess.CalledProcessError: 160 | continue 161 | else: 162 | self.logger.warning(f"Unsupported operating system for auto-opening Markdown: {os.name}") 163 | except Exception as e: 164 | self.logger.warning(f"Error opening Markdown file: {str(e)}") 165 | 166 | def _run(self, 167 | content: str, 168 | output_path: Optional[str] = None, 169 | title: Optional[str] = None, 170 | metadata: Optional[Dict[str, Any]] = None, 171 | options: Optional[Dict[str, Any]] = None, 172 | action: Optional[str] = None, 173 | file_name: Optional[str] = None, 174 | file_path: Optional[str] = None, 175 | format: Optional[str] = None, 176 | text: Optional[str] = None, 177 | file: Optional[str] = None, 178 | **kwargs) -> Dict[str, Any]: 179 | """ 180 | Generate a Markdown document. 181 | 182 | Args: 183 | content (str): Content for the Markdown file 184 | output_path (str, optional): Path to save the Markdown file 185 | title (str, optional): Document title 186 | metadata (Dict[str, Any], optional): Document metadata 187 | options (Dict[str, Any], optional): Additional options for document generation 188 | action (str, optional): Action parameter (ignored, for compatibility) 189 | file_name (str, optional): Alternative name for the output file (ignored, use output_path instead) 190 | file_path (str, optional): Alternative path for the output file (ignored, use output_path instead) 191 | format (str, optional): Format for the output (defaults to markdown, ignored as this tool always generates markdown) 192 | text (str, optional): Alternative parameter for content (will be used if content is empty) 193 | file (str, optional): Alternative parameter for output file (ignored, use output_path instead) 194 | **kwargs: Any other parameters (ignored for compatibility) 195 | 196 | Returns: 197 | Dict[str, Any]: Dictionary containing path to the generated Markdown file and additional information 198 | """ 199 | # Use text parameter as content if content is empty 200 | if not content and text: 201 | content = text 202 | self.logger.info(f"Using 'text' parameter as content") 203 | 204 | # Handle file parameter as an alternative to output_path if output_path is not provided 205 | if not output_path and file: 206 | output_path = file 207 | self.logger.info(f"Using 'file' parameter as output_path: {output_path}") 208 | 209 | # Handle file_path parameter as an alternative to output_path if output_path is still not provided 210 | if not output_path and file_path: 211 | output_path = file_path 212 | self.logger.info(f"Using file_path parameter as output_path: {output_path}") 213 | 214 | # Handle file_name parameter to append to output directory if output_path is still not provided 215 | if not output_path and file_name: 216 | # Use default output directory 217 | output_dir = config.get_nested_value(["document", "markdown_output_dir"], "./output/markdown") 218 | os.makedirs(output_dir, exist_ok=True) 219 | output_path = os.path.join(output_dir, file_name) 220 | self.logger.info(f"Using file_name parameter to construct output_path: {output_path}") 221 | 222 | if not output_path: 223 | # Use default output directory from config 224 | output_dir = config.get_nested_value(["document", "markdown_output_dir"], "./output/markdown") 225 | os.makedirs(output_dir, exist_ok=True) 226 | 227 | # Generate filename from title or use default 228 | filename = f"{title.lower().replace(' ', '_')}.md" if title else "generated_document.md" 229 | output_path = os.path.join(output_dir, filename) 230 | 231 | try: 232 | # Prepare content 233 | final_content = "" 234 | 235 | # Add title if provided 236 | if title: 237 | final_content += f"# {title}\n\n" 238 | 239 | # Add metadata if provided 240 | if metadata: 241 | final_content += "---\n" 242 | for key, value in metadata.items(): 243 | final_content += f"{key}: {value}\n" 244 | final_content += "---\n\n" 245 | 246 | # Add main content 247 | final_content += content 248 | 249 | # Use the file_saver tool to save the content 250 | self.file_saver.run( 251 | content=final_content, 252 | file_path=output_path, 253 | mode="w", 254 | encoding="utf-8", 255 | create_dirs=True 256 | ) 257 | 258 | # Save as artifact 259 | artifact_path = self._save_artifact(output_path, title, metadata) 260 | 261 | # Auto-open Markdown if specified in options 262 | auto_open = options.get('auto_open', False) if options else False 263 | if auto_open: 264 | try: 265 | self._open_markdown(artifact_path) 266 | except Exception as e: 267 | self.logger.warning(f"Could not automatically open Markdown: {str(e)}") 268 | 269 | self.logger.info(f"Markdown file successfully generated at {output_path}") 270 | 271 | return { 272 | "artifact_path": artifact_path, 273 | "original_path": output_path, 274 | "title": title, 275 | "metadata": metadata 276 | } 277 | 278 | except Exception as e: 279 | error_msg = f"Failed to generate Markdown file: {str(e)}" 280 | self.logger.error(error_msg) 281 | raise DocumentGenerationError(error_msg) 282 | 283 | 284 | def create_markdown_from_input(input_data: GenerateDocumentInput) -> Dict[str, Any]: 285 | """ 286 | Create a Markdown file from the GenerateDocumentInput. 287 | 288 | Args: 289 | input_data (GenerateDocumentInput): Input data for document generation 290 | 291 | Returns: 292 | Dict[str, Any]: Dictionary containing path to the generated Markdown file and additional information 293 | """ 294 | tool = MarkdownGeneratorTool() 295 | options = {"auto_open": input_data.options.get("auto_open", False)} if input_data.options else None 296 | 297 | return tool.run( 298 | content=input_data.content, 299 | output_path=input_data.output_path, 300 | title=input_data.title, 301 | metadata=input_data.metadata, 302 | options=options, 303 | format=input_data.format.value if hasattr(input_data, 'format') else None, 304 | # Add compatibility with any other fields that might be present 305 | **{k: v for k, v in input_data.__dict__.items() if k not in [ 306 | 'content', 'output_path', 'title', 'metadata', 'options', 'format' 307 | ] and not k.startswith('_')} 308 | ) 309 | -------------------------------------------------------------------------------- /app/agent/manus.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | import asyncio 3 | import os 4 | import sys 5 | import subprocess 6 | import re 7 | 8 | from langchain.agents import AgentExecutor, create_openai_functions_agent 9 | from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder 10 | from langchain.tools import StructuredTool 11 | from langchain_core.messages import AIMessage, HumanMessage 12 | 13 | from app.agent.base import BaseAgent 14 | from app.schema import AgentType, Conversation, TaskInput, TaskOutput, Message, WebResearchInput 15 | from app.llm import llm_manager, get_llm_from_config 16 | from app.logger import get_logger 17 | from app.exceptions import AgentError 18 | from app.prompt.manus import SYSTEM_PROMPT 19 | from app.tool.pdf_generator import PDFGeneratorParams 20 | from app.tool.markdown_generator import MarkdownGeneratorParams 21 | from app.tool.code_generator import CodeGeneratorParams 22 | from app.agent.planning import PlanningAgent 23 | 24 | 25 | class ManusAgent(BaseAgent): 26 | """ 27 | The Manus Agent, based on the OpenManus project. 28 | Uses LangChain's OpenAI Functions Agent for task execution. 29 | """ 30 | 31 | def __init__(self, tools: Optional[List[str]] = None): 32 | """ 33 | Initialize the Manus agent. 34 | 35 | Args: 36 | tools (List[str], optional): List of tool names to use 37 | """ 38 | super().__init__(name=AgentType.MANUS.value, tools=tools) 39 | self.llm = llm_manager.llm 40 | 41 | # Define default tools if none provided 42 | if not tools: 43 | default_tools = [ 44 | "pdf_generator", 45 | "markdown_generator", 46 | "browser", 47 | "firecrawl_research", 48 | "code_generator" 49 | ] 50 | for tool_name in default_tools: 51 | self.add_tool(tool_name) 52 | 53 | def _infer_task_and_plan(self, input_text: str) -> tuple[bool, Optional[List[str]]]: 54 | """ 55 | Determine if the user input is a task and generate a plan if needed. 56 | 57 | Args: 58 | input_text (str): User input text 59 | 60 | Returns: 61 | tuple[bool, Optional[List[str]]]: (is_task, plan) 62 | """ 63 | # Ask the LLM to determine if this is a task requiring a plan 64 | prompt = f""" 65 | Analyze the following user input and determine if it's a task that requires multiple steps to complete: 66 | 67 | USER INPUT: {input_text} 68 | 69 | First, determine if this is a task (requiring actions) or just a question/conversation: 70 | - If it's just a question or conversation, respond with "NOT_A_TASK" 71 | - If it's a task requiring actions, respond with "TASK" followed by a numbered list of clear, specific steps to complete it 72 | 73 | Example response for a task: 74 | TASK 75 | 1. Search for information about Python memory management 76 | 2. Generate a summary of key points 77 | 3. Create a PDF document with the findings 78 | 79 | Example response for a non-task: 80 | NOT_A_TASK 81 | """ 82 | 83 | response = llm_manager.generate_text(prompt) 84 | 85 | # Process the response 86 | if response.strip().startswith("TASK"): 87 | # Extract the plan from the response 88 | plan_lines = response.strip().split("\n")[1:] 89 | # Clean up the plan steps 90 | plan = [line.strip() for line in plan_lines if line.strip()] 91 | return True, plan 92 | else: 93 | return False, None 94 | 95 | def _create_agent_executor(self) -> AgentExecutor: 96 | """ 97 | Create an agent executor with the agent's tools. 98 | 99 | Returns: 100 | AgentExecutor: LangChain agent executor 101 | """ 102 | # Convert tools to LangChain-compatible tools 103 | langchain_tools = [] 104 | for tool_name, tool in self.tools.items(): 105 | # Get the appropriate args schema for the tool 106 | args_schema = None 107 | if tool_name == "pdf_generator": 108 | args_schema = PDFGeneratorParams 109 | elif tool_name == "markdown_generator": 110 | args_schema = MarkdownGeneratorParams 111 | elif tool_name == "code_generator": 112 | args_schema = CodeGeneratorParams 113 | elif tool_name == "firecrawl_research": 114 | args_schema = WebResearchInput 115 | 116 | # Create a structured tool that properly handles multiple arguments 117 | structured_tool = StructuredTool.from_function( 118 | name=tool.name, 119 | description=tool.description, 120 | func=tool.safe_run, 121 | args_schema=args_schema 122 | ) 123 | langchain_tools.append(structured_tool) 124 | 125 | # Create prompt 126 | prompt = ChatPromptTemplate.from_messages([ 127 | ("system", SYSTEM_PROMPT), 128 | MessagesPlaceholder(variable_name="conversation"), 129 | ("human", "{input}"), 130 | MessagesPlaceholder(variable_name="agent_scratchpad"), 131 | ]) 132 | 133 | # Create agent 134 | agent = create_openai_functions_agent(self.llm, langchain_tools, prompt) 135 | 136 | # Create agent executor 137 | agent_executor = AgentExecutor( 138 | agent=agent, 139 | tools=langchain_tools, 140 | verbose=True, 141 | handle_parsing_errors=True, 142 | return_intermediate_steps=True 143 | ) 144 | 145 | return agent_executor 146 | 147 | def _run(self, task_input: TaskInput) -> TaskOutput: 148 | """ 149 | Run the Manus agent. 150 | 151 | Args: 152 | task_input (TaskInput): Task input 153 | 154 | Returns: 155 | TaskOutput: Task output 156 | """ 157 | # Extract inputs 158 | task_description = task_input.task_description 159 | parameters = task_input.parameters or {} 160 | 161 | # Get conversation history if provided 162 | conversation = parameters.get("conversation", None) 163 | 164 | # Convert conversation to the format expected by the agent 165 | formatted_conversation = [] 166 | if conversation and isinstance(conversation, Conversation): 167 | for message in conversation.messages: 168 | if message.role == "user": 169 | formatted_conversation.append(HumanMessage(content=message.content)) 170 | elif message.role == "assistant": 171 | formatted_conversation.append(AIMessage(content=message.content)) 172 | # If no conversation is provided, we'll use an empty list 173 | else: 174 | formatted_conversation = [] 175 | 176 | # Check if we need a plan 177 | is_task, plan = self._infer_task_and_plan(task_description) 178 | 179 | if not is_task: 180 | # If this doesn't appear to be a task, handle as a regular query 181 | agent_executor = self._create_agent_executor() 182 | try: 183 | result = agent_executor.invoke({ 184 | "input": task_description, 185 | "conversation": formatted_conversation 186 | }) 187 | 188 | return TaskOutput( 189 | content=result.get("output", ""), 190 | success=True, 191 | result=result.get("output", ""), 192 | metadata={"agent_type": self.name} 193 | ) 194 | except Exception as e: 195 | self.logger.error(f"Error in agent execution: {str(e)}") 196 | return TaskOutput( 197 | content=f"There was an error processing your request: {str(e)}", 198 | success=False, 199 | result=None, 200 | metadata={"agent_type": self.name, "error": str(e)} 201 | ) 202 | 203 | # Run the agent with the task and plan 204 | agent_executor = self._create_agent_executor() 205 | 206 | # Format the input with the plan if available 207 | prompt_with_plan = task_description 208 | if plan: 209 | prompt_with_plan = f""" 210 | Task: {task_description} 211 | 212 | I've created a plan to help accomplish this task: 213 | {self._format_plan_for_agent(plan)} 214 | 215 | Please execute this plan step by step, using the available tools when needed. 216 | For document generation, ensure high-quality, well-formatted Markdown content. 217 | For code generation, write clean, well-documented code following best practices. 218 | """ 219 | 220 | # Execute the agent 221 | try: 222 | self.logger.info(f"Executing task with {len(self.tools)} tools") 223 | result = agent_executor.invoke({ 224 | "input": prompt_with_plan, 225 | "conversation": formatted_conversation 226 | }) 227 | 228 | # Process the output 229 | raw_output = result.get("output", "") 230 | 231 | # Extract artifacts from the output 232 | artifacts = self._extract_artifacts_from_output(raw_output) 233 | 234 | # Also check for artifacts in intermediate_steps tool results 235 | if not artifacts and "intermediate_steps" in result: 236 | for action, action_result in result["intermediate_steps"]: 237 | if isinstance(action_result, dict) and "artifact_path" in action_result: 238 | # Found an artifact in a tool result 239 | artifact_path = action_result["artifact_path"] 240 | ext = os.path.splitext(artifact_path)[1][1:] # Get extension without the dot 241 | 242 | # Determine artifact type based on extension 243 | artifact_type = "code" if ext in ["py", "js", "ts", "jsx", "tsx", "html", "css", "sh"] else ext 244 | 245 | # Create or update artifacts dict 246 | if not artifacts: 247 | artifacts = {} 248 | if artifact_type not in artifacts: 249 | artifacts[artifact_type] = [] 250 | artifacts[artifact_type].append(artifact_path) 251 | 252 | return TaskOutput( 253 | success=True, 254 | result=raw_output, 255 | metadata={ 256 | "agent_type": self.name, 257 | "plan": plan, 258 | "artifacts": artifacts, 259 | "tool_calls": self._count_tool_calls(result) 260 | } 261 | ) 262 | except Exception as e: 263 | self.logger.error(f"Error in agent execution: {str(e)}") 264 | return TaskOutput( 265 | content=f"There was an error processing your request: {str(e)}", 266 | success=False, 267 | result=None, 268 | metadata={"agent_type": self.name, "error": str(e), "plan": plan} 269 | ) 270 | 271 | def _format_plan_for_agent(self, plan: List[str]) -> str: 272 | """Format the plan for inclusion in the agent prompt.""" 273 | return "\n".join([f"- {step}" for step in plan]) 274 | 275 | def _extract_artifacts_from_output(self, output: str) -> Optional[Dict[str, Any]]: 276 | """ 277 | Extract artifacts information from the agent output. 278 | 279 | Args: 280 | output (str): Agent output 281 | 282 | Returns: 283 | Optional[Dict[str, Any]]: Extracted artifacts or None 284 | """ 285 | # Look for file paths in the output 286 | file_patterns = [ 287 | r'generated (?:file|document|code file).*?:\s*([^\s]+\.(?:py|js|html|css|pdf|md|txt|json|sh|ts|jsx|tsx))', 288 | r'created (?:file|document|code).*?:\s*([^\s]+\.(?:py|js|html|css|pdf|md|txt|json|sh|ts|jsx|tsx))', 289 | r'saved (?:to|as).*?:\s*([^\s]+\.(?:py|js|html|css|pdf|md|txt|json|sh|ts|jsx|tsx))', 290 | r'file (?:is at|available at).*?:\s*([^\s]+\.(?:py|js|html|css|pdf|md|txt|json|sh|ts|jsx|tsx))', 291 | r'output (?:file|saved to).*?:\s*([^\s]+\.(?:py|js|html|css|pdf|md|txt|json|sh|ts|jsx|tsx))', 292 | r'code (?:file saved as|artifact saved).*?:\s*([^\s]+\.(?:py|js|html|css|md|txt|json|sh|ts|jsx|tsx))' 293 | ] 294 | 295 | artifacts = {} 296 | 297 | for pattern in file_patterns: 298 | matches = re.findall(pattern, output, re.IGNORECASE) 299 | if matches: 300 | for match in matches: 301 | file_path = match.strip() 302 | # Extract file extension 303 | _, ext = os.path.splitext(file_path) 304 | artifact_type = ext[1:] if ext else "file" 305 | 306 | # Check if this is code 307 | if artifact_type in ['py', 'js', 'ts', 'jsx', 'tsx', 'html', 'css', 'sh']: 308 | if 'code' not in artifacts: 309 | artifacts['code'] = [] 310 | artifacts['code'].append(file_path) 311 | else: 312 | if artifact_type not in artifacts: 313 | artifacts[artifact_type] = [] 314 | artifacts[artifact_type].append(file_path) 315 | 316 | # Additional check: see if there are artifacts in tool results from intermediate steps 317 | if not artifacts and hasattr(self, 'agent_executor') and hasattr(self.agent_executor, 'intermediate_steps'): 318 | for step in self.agent_executor.intermediate_steps: 319 | if isinstance(step, tuple) and len(step) == 2: 320 | result = step[1] 321 | if isinstance(result, dict) and 'artifact_path' in result: 322 | artifact_path = result['artifact_path'] 323 | _, ext = os.path.splitext(artifact_path) 324 | artifact_type = ext[1:] if ext else "file" 325 | 326 | # Classify code artifacts 327 | if artifact_type in ['py', 'js', 'ts', 'jsx', 'tsx', 'html', 'css', 'sh']: 328 | if 'code' not in artifacts: 329 | artifacts['code'] = [] 330 | artifacts['code'].append(artifact_path) 331 | else: 332 | if artifact_type not in artifacts: 333 | artifacts[artifact_type] = [] 334 | artifacts[artifact_type].append(artifact_path) 335 | 336 | if not artifacts: 337 | return None 338 | 339 | return artifacts 340 | 341 | def _count_tool_calls(self, result: Dict[str, Any]) -> int: 342 | """Count the number of tool calls made during execution.""" 343 | if "intermediate_steps" not in result: 344 | return 0 345 | 346 | return len(result["intermediate_steps"]) 347 | 348 | 349 | class Manus: 350 | """ 351 | Main Manus class for interfacing with the OpenManus agent. 352 | """ 353 | 354 | def __init__(self): 355 | """Initialize the Manus interface.""" 356 | self.agent = ManusAgent() 357 | self.logger = get_logger("manus") 358 | self.recent_artifacts = [] 359 | self.conversation = Conversation(messages=[]) 360 | 361 | async def run(self, prompt: str): 362 | """ 363 | Process a user prompt asynchronously. 364 | 365 | Args: 366 | prompt (str): User input prompt 367 | """ 368 | # Add user message to conversation 369 | self.conversation.messages.append(Message(role="user", content=prompt)) 370 | 371 | # Check if the prompt is asking to open a previously generated artifact 372 | if await self._check_artifact_request(prompt): 373 | return 374 | 375 | # Create task input with conversation history 376 | task_input = TaskInput( 377 | task_description=prompt, 378 | parameters={"conversation": self.conversation} 379 | ) 380 | 381 | try: 382 | # Run the agent (asynchronously) 383 | loop = asyncio.get_event_loop() 384 | result = await loop.run_in_executor(None, lambda: self.agent.run(task_input)) 385 | 386 | # Process the result 387 | if result.success: 388 | # Get the output content 389 | output = result.result 390 | 391 | # Store any artifacts found in the output 392 | if result.metadata and "artifacts" in result.metadata: 393 | await self._process_artifacts(result.metadata["artifacts"]) 394 | 395 | # Display the output to the user 396 | print(f"\n{output}\n") 397 | 398 | # Add assistant message to conversation 399 | self.conversation.messages.append(Message(role="assistant", content=output)) 400 | else: 401 | # Handle error case 402 | error_msg = f"I'm sorry, I encountered an error: {result.metadata.get('error', 'Unknown error')}" 403 | print(f"\n{error_msg}\n") 404 | 405 | # Add error message to conversation 406 | self.conversation.messages.append(Message(role="assistant", content=error_msg)) 407 | 408 | except Exception as e: 409 | # Handle unexpected exceptions 410 | self.logger.error(f"Error executing Manus agent: {str(e)}") 411 | error_msg = f"I'm sorry, something went wrong: {str(e)}" 412 | print(f"\n{error_msg}\n") 413 | 414 | # Add error message to conversation 415 | self.conversation.messages.append(Message(role="assistant", content=error_msg)) 416 | 417 | async def _check_artifact_request(self, prompt: str) -> bool: 418 | """ 419 | Check if the user is requesting to open an artifact. 420 | 421 | Args: 422 | prompt (str): User prompt 423 | 424 | Returns: 425 | bool: True if the request was handled 426 | """ 427 | if not self.recent_artifacts: 428 | return False 429 | 430 | # Check if this might be a request to open a file 431 | open_terms = ["open", "show", "display", "view", "run", "execute"] 432 | if not any(term in prompt.lower() for term in open_terms): 433 | return False 434 | 435 | # Get a list of artifact file paths 436 | artifact_paths = [] 437 | for artifact_type, paths in self.recent_artifacts[-1].items(): 438 | artifact_paths.extend(paths) 439 | 440 | if not artifact_paths: 441 | return False 442 | 443 | # Look for file names or extensions in the prompt 444 | for path in artifact_paths: 445 | filename = os.path.basename(path) 446 | if filename.lower() in prompt.lower() or os.path.splitext(filename)[1][1:].lower() in prompt.lower(): 447 | await self._open_artifact(path) 448 | 449 | # Add response to conversation 450 | response = f"I've opened the file: {path}" 451 | print(f"\n{response}\n") 452 | self.conversation.messages.append(Message(role="assistant", content=response)) 453 | return True 454 | 455 | return False 456 | 457 | async def _process_artifacts(self, artifacts: Dict[str, Any]): 458 | """ 459 | Process and store artifacts. 460 | 461 | Args: 462 | artifacts (Dict[str, Any]): Artifacts information 463 | """ 464 | if not artifacts: 465 | return 466 | 467 | # Store in recent artifacts 468 | self.recent_artifacts.append(artifacts) 469 | if len(self.recent_artifacts) > 5: # Keep only the 5 most recent sets 470 | self.recent_artifacts.pop(0) 471 | 472 | # Display artifacts to the user 473 | print("\nGenerated files:") 474 | for artifact_type, paths in artifacts.items(): 475 | for path in paths: 476 | print(f"- {path} ({artifact_type})") 477 | 478 | # Check if we should auto-open any artifacts 479 | await self._check_auto_open_artifacts(artifacts) 480 | 481 | async def _check_auto_open_artifacts(self, artifacts: Dict[str, Any]): 482 | """ 483 | Check if any artifacts should be automatically opened. 484 | 485 | Args: 486 | artifacts (Dict[str, Any]): Artifacts information 487 | """ 488 | # Auto-open PDF files if there's only one 489 | if "pdf" in artifacts and len(artifacts["pdf"]) == 1: 490 | pdf_path = artifacts["pdf"][0] 491 | await self._open_artifact(pdf_path) 492 | print(f"\nAutomatically opened: {pdf_path}\n") 493 | 494 | # Auto-open code files 495 | if "code" in artifacts and len(artifacts["code"]) > 0: 496 | code_path = artifacts["code"][0] # Open the first code file 497 | await self._open_artifact(code_path) 498 | print(f"\nAutomatically opened code file: {code_path}\n") 499 | 500 | async def _open_artifact(self, file_path: str): 501 | """ 502 | Open an artifact using the appropriate application. 503 | 504 | Args: 505 | file_path (str): Path to the file 506 | """ 507 | if not os.path.exists(file_path): 508 | self.logger.error(f"File does not exist: {file_path}") 509 | return 510 | 511 | try: 512 | # Use appropriate command based on platform 513 | if sys.platform == "darwin": # macOS 514 | subprocess.Popen(["open", file_path]) 515 | elif sys.platform == "win32": # Windows 516 | os.startfile(file_path) 517 | else: # Linux 518 | subprocess.Popen(["xdg-open", file_path]) 519 | 520 | self.logger.info(f"Opened file: {file_path}") 521 | except Exception as e: 522 | self.logger.error(f"Error opening file: {str(e)}") 523 | -------------------------------------------------------------------------------- /app/tool/firecrawl_research.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import uuid 4 | import datetime 5 | from typing import Any, Dict, List, Optional, Union 6 | from pathlib import Path 7 | 8 | from app.tool.base import BaseTool 9 | from app.schema import WebResearchInput, DocumentFormat 10 | from app.exceptions import WebResearchError 11 | from app.config import config 12 | from app.llm import llm_manager 13 | 14 | # Import the firecrawl package 15 | try: 16 | import firecrawl 17 | # Check if FirecrawlApp is available (preferred modern API) 18 | if hasattr(firecrawl, 'FirecrawlApp'): 19 | FIRECRAWL_AVAILABLE = True 20 | # Check if older client implementations exist as fallback 21 | elif hasattr(firecrawl, 'FirecrawlClient') or hasattr(firecrawl, 'Client'): 22 | FIRECRAWL_AVAILABLE = True 23 | else: 24 | FIRECRAWL_AVAILABLE = False 25 | except ImportError: 26 | FIRECRAWL_AVAILABLE = False 27 | 28 | # Note: This implementation supports the modern FirecrawlApp API and falls back to older clients if necessary 29 | 30 | 31 | class FirecrawlResearchTool(BaseTool): 32 | """Tool for conducting web research using the Firecrawl API.""" 33 | 34 | def __init__(self): 35 | """Initialize the Firecrawl research tool.""" 36 | super().__init__( 37 | name="firecrawl_research", 38 | description="Conduct web research using the Firecrawl API to gather information, data, and visualizations" 39 | ) 40 | 41 | # Create artifacts directory if it doesn't exist 42 | self.artifacts_dir = config.get_nested_value(["artifacts", "base_dir"], "./artifacts") 43 | self.research_artifacts_dir = os.path.join(self.artifacts_dir, "research") 44 | self.visualizations_dir = os.path.join(self.artifacts_dir, "visualizations") 45 | os.makedirs(self.research_artifacts_dir, exist_ok=True) 46 | os.makedirs(self.visualizations_dir, exist_ok=True) 47 | 48 | def _save_research_artifact(self, content: Dict[str, Any], query: str) -> str: 49 | """ 50 | Save research results as an artifact. 51 | 52 | Args: 53 | content (Dict[str, Any]): Research content 54 | query (str): Research query 55 | 56 | Returns: 57 | str: Path to the saved artifact 58 | """ 59 | # Generate a unique ID for this artifact 60 | artifact_id = str(uuid.uuid4()) 61 | 62 | # Create a safe filename from query (first 30 chars) 63 | safe_query = "".join(c if c.isalnum() else "_" for c in query[:30]).lower() 64 | filename = f"research_{safe_query}_{artifact_id[:8]}.json" 65 | 66 | # Create artifact path 67 | artifact_path = os.path.join(self.research_artifacts_dir, filename) 68 | 69 | # Create metadata 70 | metadata = { 71 | "id": artifact_id, 72 | "type": "research", 73 | "query": query, 74 | "filename": filename, 75 | "created_at": datetime.datetime.now().isoformat(), 76 | } 77 | 78 | # Add metadata to content 79 | content["metadata"] = metadata 80 | 81 | # Save content file 82 | with open(artifact_path, "w", encoding="utf-8") as f: 83 | json.dump(content, f, indent=2) 84 | 85 | self.logger.info(f"Research artifact saved to {artifact_path}") 86 | return artifact_path 87 | 88 | def _extract_data_and_visualizations(self, query: str, content: str) -> Dict[str, Any]: 89 | """ 90 | Extract data tables and generate visualization suggestions from research content. 91 | 92 | Args: 93 | query (str): Research query 94 | content (str): Research content 95 | 96 | Returns: 97 | Dict[str, Any]: Data and visualization information 98 | """ 99 | # Prompt the LLM to identify data tables and visualization opportunities 100 | prompt = f""" 101 | Analyze the following research content on "{query}" and extract: 102 | 103 | 1. Any numerical or statistical data that could be presented in tables 104 | 2. Suggestions for visualizations (charts, graphs) that would enhance understanding of this data 105 | 106 | RESEARCH CONTENT: 107 | {content} 108 | 109 | Return your response as JSON with the following structure: 110 | {{ 111 | "data_tables": [ 112 | {{ 113 | "title": "Table title", 114 | "description": "Brief description of what this data represents", 115 | "columns": ["Column1", "Column2", ...], 116 | "rows": [ 117 | ["Value1", "Value2", ...], 118 | ["Value1", "Value2", ...], 119 | ... 120 | ] 121 | }} 122 | ], 123 | "visualization_suggestions": [ 124 | {{ 125 | "title": "Visualization title", 126 | "type": "bar_chart|line_chart|pie_chart|scatter_plot|etc", 127 | "description": "What this visualization would show and why it's useful", 128 | "data_source": "Which data table this visualization would use", 129 | "x_axis": "What the x-axis represents (if applicable)", 130 | "y_axis": "What the y-axis represents (if applicable)" 131 | }} 132 | ] 133 | }} 134 | 135 | If no suitable data or visualization opportunities are found, return empty arrays. 136 | """ 137 | 138 | # Generate data and visualization suggestions 139 | try: 140 | response = llm_manager.generate_text(prompt) 141 | 142 | # Parse the JSON response 143 | data_viz = json.loads(response) 144 | 145 | # Save data and visualizations as artifacts 146 | if data_viz.get("data_tables") or data_viz.get("visualization_suggestions"): 147 | artifacts_dir = config.get_nested_value(["artifacts", "base_dir"], "./artifacts") 148 | research_artifacts_dir = os.path.join(artifacts_dir, "research") 149 | os.makedirs(research_artifacts_dir, exist_ok=True) 150 | 151 | # Clean query for filename 152 | clean_query = "".join(c.lower() if c.isalnum() else '_' for c in query[:30]) 153 | timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") 154 | 155 | # Save to markdown file 156 | artifact_filename = f"{clean_query}_{timestamp}_data_viz.md" 157 | artifact_path = os.path.join(research_artifacts_dir, artifact_filename) 158 | 159 | # Create markdown content 160 | md_content = f"# Data and Visualization Analysis for: {query}\n\n" 161 | 162 | # Add data tables 163 | if data_viz.get("data_tables"): 164 | md_content += "## Data Tables\n\n" 165 | for i, table in enumerate(data_viz["data_tables"]): 166 | md_content += f"### {table.get('title', f'Table {i+1}')}\n\n" 167 | if table.get("description"): 168 | md_content += f"{table.get('description')}\n\n" 169 | 170 | # Format table 171 | if table.get("columns") and table.get("rows"): 172 | md_content += "| " + " | ".join(table["columns"]) + " |\n" 173 | md_content += "| " + " | ".join(["---" for _ in table["columns"]]) + " |\n" 174 | for row in table["rows"]: 175 | row_values = [str(cell) for cell in row] 176 | while len(row_values) < len(table["columns"]): 177 | row_values.append("") 178 | md_content += "| " + " | ".join(row_values) + " |\n" 179 | md_content += "\n" 180 | 181 | # Add visualization suggestions 182 | if data_viz.get("visualization_suggestions"): 183 | md_content += "## Visualization Suggestions\n\n" 184 | for i, viz in enumerate(data_viz["visualization_suggestions"]): 185 | md_content += f"### {viz.get('title', f'Visualization {i+1}')}\n\n" 186 | md_content += f"**Type**: {viz.get('type', 'N/A')}\n\n" 187 | if viz.get("description"): 188 | md_content += f"{viz.get('description')}\n\n" 189 | if viz.get("data_source"): 190 | md_content += f"**Data Source**: {viz.get('data_source')}\n\n" 191 | if viz.get("x_axis"): 192 | md_content += f"**X-Axis**: {viz.get('x_axis')}\n\n" 193 | if viz.get("y_axis"): 194 | md_content += f"**Y-Axis**: {viz.get('y_axis')}\n\n" 195 | 196 | # Save markdown file 197 | with open(artifact_path, 'w', encoding='utf-8') as f: 198 | f.write(md_content) 199 | 200 | # Update the data_viz dictionary with artifact info 201 | data_viz["artifact_path"] = artifact_path 202 | 203 | # Create summary information for the response 204 | data_viz["summary"] = { 205 | "data_tables_count": len(data_viz.get("data_tables", [])), 206 | "visualization_suggestions_count": len(data_viz.get("visualization_suggestions", [])), 207 | "artifact_file": artifact_path 208 | } 209 | 210 | # Remove full data from the response to avoid terminal output 211 | if "data_tables" in data_viz: 212 | data_tables_info = [] 213 | for table in data_viz["data_tables"]: 214 | data_tables_info.append({ 215 | "title": table.get("title", ""), 216 | "columns_count": len(table.get("columns", [])), 217 | "rows_count": len(table.get("rows", [])) 218 | }) 219 | data_viz["data_tables_info"] = data_tables_info 220 | del data_viz["data_tables"] 221 | 222 | if "visualization_suggestions" in data_viz: 223 | viz_info = [] 224 | for viz in data_viz["visualization_suggestions"]: 225 | viz_info.append({ 226 | "title": viz.get("title", ""), 227 | "type": viz.get("type", "") 228 | }) 229 | data_viz["visualization_suggestions_info"] = viz_info 230 | del data_viz["visualization_suggestions"] 231 | 232 | return data_viz 233 | except Exception as e: 234 | self.logger.warning(f"Error extracting data and visualizations: {str(e)}") 235 | return { 236 | "error": str(e), 237 | "message": "Failed to extract data and visualizations" 238 | } 239 | 240 | def _run(self, 241 | query: str, 242 | output_format: str = DocumentFormat.MARKDOWN.value, 243 | max_depth: int = 1, 244 | max_pages: int = 10, 245 | include_visualizations: bool = True) -> Dict[str, Any]: 246 | """ 247 | Conduct web research using the Firecrawl API. 248 | 249 | Args: 250 | query (str): Research query or URL to crawl 251 | output_format (str, optional): Format for the research output 252 | max_depth (int, optional): Maximum depth for web crawling 253 | max_pages (int, optional): Maximum number of pages to crawl 254 | include_visualizations (bool, optional): Whether to include visualization suggestions 255 | 256 | Returns: 257 | Dict[str, Any]: Research results 258 | """ 259 | try: 260 | # Get API key from config 261 | api_key = config.get_nested_value(["api", "firecrawl_api_key"]) 262 | if not api_key: 263 | api_key = os.environ.get("FIRECRAWL_API_KEY") 264 | if not api_key: 265 | raise WebResearchError("Firecrawl API key not found in config or environment variables") 266 | 267 | # Check if firecrawl is available 268 | if not FIRECRAWL_AVAILABLE: 269 | self.logger.warning("firecrawl-py package is not installed. Falling back to LLM simulation.") 270 | return self._simulate_with_llm(query, output_format, include_visualizations) 271 | 272 | self.logger.info(f"Conducting web research for query: {query}") 273 | 274 | try: 275 | # Prioritize using the newer FirecrawlApp API 276 | if hasattr(firecrawl, 'FirecrawlApp'): 277 | self.logger.info("Using FirecrawlApp API") 278 | app = firecrawl.FirecrawlApp(api_key=api_key) 279 | 280 | # Determine if the query is a URL or a search query 281 | if query.startswith('http://') or query.startswith('https://'): 282 | # For URL crawling, use crawl_url method 283 | crawl_status = app.crawl_url( 284 | query, 285 | params={ 286 | 'limit': max_pages, 287 | 'scrapeOptions': {'formats': [output_format.lower()]}, 288 | 'maxDepth': max_depth 289 | }, 290 | poll_interval=30 291 | ) 292 | 293 | # Wait for the crawl to complete 294 | if hasattr(crawl_status, 'wait_until_done'): 295 | crawl_status.wait_until_done() 296 | 297 | # Get the results 298 | if hasattr(app, 'get_crawl_results'): 299 | crawl_result = app.get_crawl_results(crawl_status.id) 300 | else: 301 | crawl_result = { 302 | 'content': f"Crawl completed with ID: {crawl_status.id if hasattr(crawl_status, 'id') else 'unknown'}", 303 | 'metadata': { 304 | 'query': query, 305 | 'timestamp': datetime.datetime.now().isoformat(), 306 | 'sources': [query] 307 | } 308 | } 309 | else: 310 | # For search queries, we'll convert to a crawl request for a search results page 311 | # This is a workaround since direct search isn't supported in FirecrawlApp 312 | search_url = f"https://www.google.com/search?q={query.replace(' ', '+')}" 313 | self.logger.info(f"Converting search query to URL crawl: {search_url}") 314 | 315 | try: 316 | crawl_status = app.crawl_url( 317 | search_url, 318 | params={ 319 | 'limit': max_pages, 320 | 'scrapeOptions': {'formats': [output_format.lower()]}, 321 | 'maxDepth': 1 # Keep depth shallow for search results 322 | }, 323 | poll_interval=30 324 | ) 325 | 326 | # Wait for the crawl to complete 327 | if hasattr(crawl_status, 'wait_until_done'): 328 | crawl_status.wait_until_done() 329 | 330 | # Get the results 331 | if hasattr(app, 'get_crawl_results'): 332 | crawl_result = app.get_crawl_results(crawl_status.id) 333 | else: 334 | crawl_result = { 335 | 'content': f"Search crawl completed with ID: {crawl_status.id if hasattr(crawl_status, 'id') else 'unknown'}", 336 | 'metadata': { 337 | 'query': query, 338 | 'timestamp': datetime.datetime.now().isoformat(), 339 | 'sources': [search_url] 340 | } 341 | } 342 | except Exception as e: 343 | self.logger.warning(f"Error during search crawl: {str(e)}. Falling back to LLM simulation.") 344 | return self._simulate_with_llm(query, output_format, include_visualizations) 345 | 346 | # Fall back to older client-based approaches if FirecrawlApp is not available 347 | elif hasattr(firecrawl, 'Client') or hasattr(firecrawl, 'FirecrawlClient'): 348 | self.logger.info("Using Client API - consider upgrading to FirecrawlApp API for better compatibility") 349 | # Create the appropriate client based on what's available 350 | if hasattr(firecrawl, 'Client'): 351 | client = firecrawl.Client(api_key=api_key) 352 | else: 353 | client = firecrawl.FirecrawlClient(api_key=api_key) 354 | 355 | # Determine if the query is a URL or a search query 356 | if query.startswith('http://') or query.startswith('https://'): 357 | crawl_result = client.crawl( 358 | url=query, 359 | max_depth=max_depth, 360 | max_pages=max_pages, 361 | format=output_format.lower() 362 | ) 363 | else: 364 | # If it's not a URL, use search functionality 365 | search_result = client.search( 366 | query=query, 367 | max_results=max_pages, 368 | format=output_format.lower() 369 | ) 370 | # Combine search results into a single document 371 | crawl_result = { 372 | 'content': "\n\n".join([r.get('content', '') for r in search_result.get('results', [])]), 373 | 'metadata': { 374 | 'query': query, 375 | 'timestamp': datetime.datetime.now().isoformat(), 376 | 'sources': [r.get('url') for r in search_result.get('results', []) if 'url' in r] 377 | } 378 | } 379 | else: 380 | self.logger.warning("No recognized Firecrawl API implementation found. Falling back to LLM simulation.") 381 | return self._simulate_with_llm(query, output_format, include_visualizations) 382 | 383 | # Process the result to extract data and create visualizations if needed 384 | if include_visualizations: 385 | research_data = self._extract_data_and_visualizations(query, crawl_result.get('content', '')) 386 | crawl_result.update(research_data) 387 | 388 | # Save the research artifact 389 | artifact_path = self._save_research_artifact(crawl_result, query) 390 | crawl_result['artifact_path'] = artifact_path 391 | 392 | return crawl_result 393 | 394 | except Exception as e: 395 | self.logger.error(f"Error using firecrawl: {str(e)}") 396 | error_details = { 397 | "error_type": type(e).__name__, 398 | "error_message": str(e), 399 | "resolution": "Falling back to LLM simulation. To fix this error, check the firecrawl package installation and documentation." 400 | } 401 | 402 | # Provide specific advice based on error type 403 | if "API key" in str(e).lower() or "authentication" in str(e).lower(): 404 | error_details["resolution"] = ( 405 | "There seems to be an issue with your Firecrawl API key. Make sure it's correctly set " 406 | "in your config file or as an environment variable FIRECRAWL_API_KEY." 407 | ) 408 | 409 | self.logger.warning(f"Resolution: {error_details['resolution']}") 410 | result = self._simulate_with_llm(query, output_format, include_visualizations) 411 | 412 | # Add error information to the result 413 | if isinstance(result, dict): 414 | result["error_info"] = error_details 415 | 416 | return result 417 | 418 | except Exception as e: 419 | raise WebResearchError(f"Error conducting web research: {str(e)}") 420 | 421 | def _simulate_with_llm(self, query: str, output_format: str, include_visualizations: bool) -> Dict[str, Any]: 422 | """ 423 | Simulate research results using the LLM when firecrawl is not available. 424 | """ 425 | self.logger.info(f"Simulating web research for query: {query} using LLM") 426 | 427 | # More detailed prompt for better simulation 428 | prompt = f""" 429 | You are simulating a web research tool that crawls the internet for information. 430 | 431 | Please generate realistic and comprehensive research results for the following query: 432 | "{query}" 433 | 434 | Your response should: 435 | 1. Be detailed, factual and up-to-date as of your training data 436 | 2. Include specific statistics, numbers, and data points where relevant 437 | 3. Present multiple perspectives or viewpoints on the topic when applicable 438 | 4. Cite fictional but plausible sources (like articles, research papers, websites) 439 | 5. Be structured with clear sections and headings 440 | 6. Be formatted in {output_format} 441 | 442 | Note: This is a simulation of web research results, but should appear as realistic as possible. 443 | """ 444 | 445 | content = llm_manager.generate_text(prompt) 446 | 447 | # Extract data and visualization suggestions if requested 448 | data_viz = {} 449 | if include_visualizations: 450 | data_viz = self._extract_data_and_visualizations(query, content) 451 | 452 | # Create a simulated sources list 453 | simulated_sources = self._generate_simulated_sources(query) 454 | 455 | # Construct the result 456 | timestamp = datetime.datetime.now().isoformat() 457 | result = { 458 | 'content': content, 459 | 'metadata': { 460 | 'query': query, 461 | 'timestamp': timestamp, 462 | 'sources': simulated_sources, 463 | 'simulated': True 464 | } 465 | } 466 | 467 | # Add data and visualizations 468 | if data_viz: 469 | result.update(data_viz) 470 | 471 | # Save the simulated research as an artifact 472 | artifact_path = self._save_research_artifact(result, query) 473 | result['artifact_path'] = artifact_path 474 | 475 | return result 476 | 477 | def _generate_simulated_sources(self, query: str) -> List[str]: 478 | """ 479 | Generate a list of plausible simulated sources based on the query. 480 | 481 | Args: 482 | query (str): The research query 483 | 484 | Returns: 485 | List[str]: List of simulated source URLs 486 | """ 487 | # Clean query for use in domain names 488 | clean_query = "".join(c.lower() if c.isalnum() else '-' for c in query) 489 | if len(clean_query) > 20: 490 | clean_query = clean_query[:20] 491 | 492 | # Create a list of simulated sources 493 | sources = [ 494 | f"https://en.wikipedia.org/wiki/{clean_query.replace('-', '_')}", 495 | f"https://www.{clean_query.split('-')[0]}research.org/articles/{clean_query}", 496 | f"https://academic.journals.com/research/{clean_query}-analysis", 497 | f"https://www.sciencedaily.com/releases/2023/topics/{clean_query}.htm", 498 | f"https://news.tech-review.com/insights/{clean_query}-latest-developments" 499 | ] 500 | 501 | return sources 502 | 503 | 504 | def conduct_web_research(input_data: WebResearchInput) -> Dict[str, Any]: 505 | """ 506 | Conduct web research from the WebResearchInput. 507 | 508 | Args: 509 | input_data (WebResearchInput): Input data for web research 510 | 511 | Returns: 512 | Dict[str, Any]: Research results 513 | """ 514 | tool = FirecrawlResearchTool() 515 | return tool.run( 516 | query=input_data.query, 517 | output_format=input_data.output_format, 518 | max_depth=input_data.max_depth, 519 | max_pages=input_data.max_pages, 520 | include_visualizations=True 521 | ) 522 | --------------------------------------------------------------------------------