├── mcp_tool
    ├── __init__.py
    ├── __main__.py
    ├── tools
    │   ├── loader.py
    │   ├── url_tool.py
    │   ├── __init__.py
    │   ├── excel_tool.py
    │   ├── csv_tool.py
    │   ├── file_tool.py
    │   ├── markdown_tool.py
    │   ├── pdf_tool.py
    │   ├── maxkb_tool.py
    │   └── word_tool.py
    └── server.py
├── cursor-run-mcp-server.sh
├── .env
├── .dockerignore
├── .lh
    ├── .lhignore
    ├── mcp_simple_tool
    │   ├── core
    │   │   ├── __init__.py.json
    │   │   ├── stdio_adapter.py.json
    │   │   └── sse_adapter.py.json
    │   ├── tools
    │   │   ├── utils
    │   │   │   ├── __init__.py.json
    │   │   │   └── pdf_helpers.py.json
    │   │   ├── url_tool.py.json
    │   │   ├── web_tool.py.json
    │   │   ├── __init__.py.json
    │   │   ├── base.py.json
    │   │   ├── loader.py.json
    │   │   ├── word_tool.py.json
    │   │   ├── README.md.json
    │   │   └── image_recognition_tool.py.json
    │   └── __main__.py.json
    ├── app.json.json
    ├── mcp_tool
    │   └── tools
    │   │   ├── README.md.json
    │   │   ├── loader.py.json
    │   │   └── __init__.py.json
    ├── pyproject.toml.json
    ├── .env.example.json
    └── Dockerfile.json
├── .gitignore
├── .env.example
├── Dockerfile
├── app.json
├── smithery.yaml
├── LICENSE
├── BUGFIX.md
├── docker-compose.yml
├── pyproject.toml
├── deploy.sh
├── DEPLOY.md
└── README.md


/mcp_tool/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/mcp_tool/__main__.py:
--------------------------------------------------------------------------------
1 | import sys
2 | 
3 | from .server import main
4 | 
5 | sys.exit(main())
6 | 


--------------------------------------------------------------------------------
/cursor-run-mcp-server.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # Change to the directory where this script is located
4 | cd "$(dirname "$0")"
5 | 
6 | # Run the server
7 | exec uv run mcp-simple-tool


--------------------------------------------------------------------------------
/.env:
--------------------------------------------------------------------------------
 1 | # MCP服务器配置
 2 | MCP_SERVER_PORT=8080
 3 | MCP_SERVER_HOST=0.0.0.0
 4 | DEBUG=true
 5 | # MaxKB配置（可选，如果不使用可以留空）
 6 | MAXKB_HOST=http://host.docker.internal:8080
 7 | MAXKB_CHAT_ID=
 8 | MAXKB_APPLICATION_ID=
 9 | MAXKB_AUTHORIZATION=
10 | # 本地文件挂载配置
11 | HOST_MOUNT_SOURCE=/Users/liuyanzhi/Documents
12 | HOST_MOUNT_TARGET=/host_files
13 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Git
 2 | .git
 3 | .gitignore
 4 | 
 5 | # Python
 6 | __pycache__
 7 | *.pyc
 8 | *.pyo
 9 | *.pyd
10 | .Python
11 | *.py[cod]
12 | *$py.class
13 | .pytest_cache
14 | .coverage
15 | htmlcov
16 | .env
17 | .venv
18 | venv/
19 | ENV/
20 | 
21 | # IDE
22 | .idea
23 | .vscode
24 | *.swp
25 | *.swo
26 | .DS_Store
27 | 
28 | # Project specific
29 | temp/
30 | *.log 


--------------------------------------------------------------------------------
/.lh/.lhignore:
--------------------------------------------------------------------------------
1 | # list file to not track by the local-history extension. comment line starts with a '#' character
2 | # each line describe a regular expression pattern (search for 'Javascript regex')
3 | # it will relate to the workspace directory root. for example:
4 | # '.*\.txt' ignores any file with 'txt' extension
5 | # '/test/.*' ignores all the files under the 'test' directory
6 | # '.*/test/.*' ignores all the files under any 'test' directory (even under sub-folders)
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | *.so
 6 | .Python
 7 | build/
 8 | develop-eggs/
 9 | dist/
10 | downloads/
11 | eggs/
12 | .eggs/
13 | lib/
14 | lib64/
15 | parts/
16 | sdist/
17 | var/
18 | wheels/
19 | *.egg-info/
20 | .installed.cfg
21 | *.egg
22 | 
23 | # IDE
24 | .idea/
25 | .vscode/
26 | *.swp
27 | *.swo
28 | .lh/
29 | 
30 | # Environment
31 | .venv
32 | env/
33 | venv/
34 | ENV/
35 | 
36 | # Logs
37 | *.log
38 | 
39 | # Local history
40 | .lh/
41 | 
42 | # OS
43 | .DS_Store
44 | .DS_Store?
45 | ._*
46 | .Spotlight-V100
47 | .Trashes
48 | ehthumbs.db
49 | Thumbs.db 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
 1 | # Server Configuration
 2 | MCP_SERVER_PORT=8000
 3 | MCP_SERVER_HOST=0.0.0.0
 4 | # 鉴权配置
 5 | MCP_AUTH_URL=http://170.106.105.206:4000/users
 6 | 
 7 | # MaxKB配置
 8 | MAXKB_HOST=http://host.docker.internal:8080
 9 | MAXKB_CHAT_ID=your_chat_id_here
10 | MAXKB_APPLICATION_ID=your_application_id_here
11 | 
12 | # Optional: Set to 'true' to enable debug mode
13 | DEBUG=false
14 | 
15 | # Optional: Set custom User-Agent for website fetching
16 | MCP_USER_AGENT="MCP Test Server (github.com/modelcontextprotocol/python-sdk)"
17 | 
18 | # 本地目录挂载配置
19 | HOST_MOUNT_SOURCE=/path/to/your/local/directory
20 | HOST_MOUNT_TARGET=/host_files 


--------------------------------------------------------------------------------
/.lh/mcp_simple_tool/core/__init__.py.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sourceFile": "mcp_simple_tool/core/__init__.py",
 3 |     "activeCommit": 0,
 4 |     "commits": [
 5 |         {
 6 |             "activePatchIndex": 0,
 7 |             "patches": [
 8 |                 {
 9 |                     "date": 1741332330423,
10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
11 |                 }
12 |             ],
13 |             "date": 1741332330423,
14 |             "name": "Commit-0",
15 |             "content": "\"\"\"\n核心功能模块\n\"\"\" "
16 |         }
17 |     ]
18 | }


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use Python 3.10 slim image as base
 2 | FROM python:3.10-slim
 3 | 
 4 | # Set working directory
 5 | WORKDIR /app
 6 | 
 7 | # Install build dependencies and required libraries
 8 | RUN apt-get update && apt-get install -y \
 9 |     build-essential \
10 |     curl \
11 |     poppler-utils \
12 |     tesseract-ocr \
13 |     tesseract-ocr-chi-sim \
14 |     tesseract-ocr-eng \
15 |     ffmpeg \
16 |     libsm6 \
17 |     libxext6 \
18 |     libreoffice \
19 |     && rm -rf /var/lib/apt/lists/*
20 | 
21 | # Copy project files
22 | COPY . .
23 | 
24 | # Install the package in editable mode with proper path
25 | RUN pip install --no-cache-dir -e .
26 | 
27 | # Expose the port
28 | EXPOSE 8000
29 | 
30 | # Run the server with SSE transport
31 | CMD ["python", "-m", "mcp_tool", "--transport", "sse", "--port", "8000"] 


--------------------------------------------------------------------------------
/.lh/mcp_simple_tool/tools/utils/__init__.py.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sourceFile": "mcp_simple_tool/tools/utils/__init__.py",
 3 |     "activeCommit": 0,
 4 |     "commits": [
 5 |         {
 6 |             "activePatchIndex": 1,
 7 |             "patches": [
 8 |                 {
 9 |                     "date": 1741332185821,
10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
11 |                 },
12 |                 {
13 |                     "date": 1741332589298,
14 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,3 +1,1 @@\n-\"\"\"\n-工具共享的工具函数和辅助类\n-\"\"\" \n\\ No newline at end of file\n+ \n\\ No newline at end of file\n"
15 |                 }
16 |             ],
17 |             "date": 1741332185821,
18 |             "name": "Commit-0",
19 |             "content": "\"\"\"\n工具共享的工具函数和辅助类\n\"\"\" "
20 |         }
21 |     ]
22 | }


--------------------------------------------------------------------------------
/.lh/mcp_simple_tool/__main__.py.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sourceFile": "mcp_simple_tool/__main__.py",
 3 |     "activeCommit": 0,
 4 |     "commits": [
 5 |         {
 6 |             "activePatchIndex": 1,
 7 |             "patches": [
 8 |                 {
 9 |                     "date": 1741332487497,
10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
11 |                 },
12 |                 {
13 |                     "date": 1741332594194,
14 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,8 +1,5 @@\n-\"\"\"\n-主入口文件\n-\"\"\"\n+import sys\n \n-from mcp_simple_tool.server import main\n+from .server import main\n \n-if __name__ == \"__main__\":\n-    main()\n+sys.exit(main())\n"
15 |                 }
16 |             ],
17 |             "date": 1741332487497,
18 |             "name": "Commit-0",
19 |             "content": "\"\"\"\n主入口文件\n\"\"\"\n\nfrom mcp_simple_tool.server import main\n\nif __name__ == \"__main__\":\n    main()\n"
20 |         }
21 |     ]
22 | }


--------------------------------------------------------------------------------
/app.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "MCP Development Framework",
 3 |   "description": "A powerful framework for creating custom tools that interact with large language models",
 4 |   "repository": "https://github.com/your-username/mcp-development-framework",
 5 |   "keywords": ["python", "mcp", "llm", "pdf", "tool", "framework"],
 6 |   "env": {
 7 |     "MCP_SERVER_PORT": {
 8 |       "description": "Port to run the server on",
 9 |       "value": "8000",
10 |       "required": false
11 |     },
12 |     "MCP_SERVER_HOST": {
13 |       "description": "Host to bind the server to",
14 |       "value": "0.0.0.0",
15 |       "required": false
16 |     },
17 |     "DEBUG": {
18 |       "description": "Enable debug mode",
19 |       "value": "false",
20 |       "required": false
21 |     },
22 |     "MCP_USER_AGENT": {
23 |       "description": "Custom User-Agent for website fetching",
24 |       "value": "MCP Development Framework (github.com/modelcontextprotocol/python-sdk)",
25 |       "required": false
26 |     }
27 |   },
28 |   "stack": "container",
29 |   "formation": {
30 |     "web": {
31 |       "quantity": 1,
32 |       "size": "eco"
33 |     }
34 |   }
35 | } 


--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------
 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml
 2 | 
 3 | startCommand:
 4 |   type: stdio
 5 |   configSchema:
 6 |     # JSON Schema defining the configuration options for the MCP.
 7 |     type: object
 8 |     required: []
 9 |     properties:
10 |       mcpServerPort:
11 |         type: number
12 |         default: 8000
13 |         description: Port to run the MCP server on.
14 |       mcpServerHost:
15 |         type: string
16 |         default: 0.0.0.0
17 |         description: Host to bind the MCP server to.
18 |       debug:
19 |         type: boolean
20 |         default: false
21 |         description: Enable debug mode.
22 |       mcpUserAgent:
23 |         type: string
24 |         description: Custom User-Agent for website fetching.
25 |   commandFunction:
26 |     # A function that produces the CLI command to start the MCP on stdio.
27 |     |-
28 |     (config) => ({ command: 'mcp-simple-tool', args: ['--transport', 'sse', '--port', String(config.mcpServerPort)], env: { MCP_SERVER_HOST: config.mcpServerHost, DEBUG: config.debug ? 'true' : 'false', MCP_USER_AGENT: config.mcpUserAgent || '' } })
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Kirill Markin
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/BUGFIX.md:
--------------------------------------------------------------------------------
 1 | # Word工具Bug修复记录 (v1.0.1-bugfix)
 2 | 
 3 | 本次修复了Word文档处理工具的两个关键Bug，增强了工具的兼容性和稳定性。
 4 | 
 5 | ## Bug 1: Word 97-2003 (.doc) 文件无法解析
 6 | 
 7 | ### 问题描述
 8 | Word工具仅支持.docx格式，无法处理旧版的.doc文件，导致用户尝试解析.doc文件时失败。
 9 | 
10 | ### 解决方案
11 | 1. 增加LibreOffice依赖，实现.doc到.docx的格式转换
12 | 2. 添加完善的错误处理和用户提示
13 | 3. 优化临时文件管理和资源清理
14 | 
15 | ### 实现细节
16 | - 添加`_is_libreoffice_installed`方法检测系统是否安装LibreOffice
17 | - 实现`_convert_doc_to_docx`方法处理格式转换
18 | - 使用临时目录存放转换后的文件
19 | - 完善的异常处理和资源清理机制
20 | 
21 | ## Bug 2: 嵌入外部文档被误识别为图片导致会话终止
22 | 
23 | ### 问题描述
24 | Word文档中嵌入的外部文档（如.txt、.xlsx等）被错误识别为图片并尝试展示，导致返回无法解析的图片格式，进而引起会话中断。
25 | 
26 | ### 解决方案
27 | 1. 添加多层图片有效性验证机制
28 | 2. 实现图片文件头特征识别
29 | 3. 使用PIL库进行图片完整性验证
30 | 4. 改进异常处理机制，防止会话中断
31 | 
32 | ### 实现细节
33 | - 新增`_is_valid_image`方法，通过多重检查确保数据真的是图片：
34 |   - 检查文件大小是否合理
35 |   - 使用imghdr验证图片类型
36 |   - 验证常见图片格式的文件头特征（PNG、JPEG、GIF等）
37 |   - 尝试用PIL库加载并验证图片完整性
38 | - 改进`_extract_images_from_word`方法，只返回验证通过的真实图片
39 | - 完善错误处理和用户提示信息
40 | 
41 | ## 测试验证
42 | 1. 成功解析Word 97-2003 (.doc)格式文件
43 | 2. 正确处理包含嵌入外部文档的Word文件，不再将嵌入文档误识别为图片
44 | 3. 即使遇到异常情况，也能保持会话的正常进行
45 | 
46 | ## 影响范围
47 | - 增强了Word文档解析工具的兼容性和稳定性
48 | - 提高了解析含有嵌入对象的Word文档的成功率
49 | - 改善了错误提示信息的用户友好性 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.8'
 2 | 
 3 | services:
 4 |   mcp-server:
 5 |     build: .
 6 |     container_name: mcp-framework-server
 7 |     ports:
 8 |       - "${MCP_SERVER_PORT:-8000}:8000"
 9 |     environment:
10 |       - MCP_SERVER_PORT=${MCP_SERVER_PORT:-8000}
11 |       - MCP_SERVER_HOST=${MCP_SERVER_HOST:-0.0.0.0}
12 |       - MCP_AUTH_URL=${MCP_AUTH_URL:-http://170.106.105.206:4000/users}
13 |       - DEBUG=${DEBUG:-false}
14 |       - MCP_USER_AGENT=${MCP_USER_AGENT:-"MCP Test Server (github.com/modelcontextprotocol/python-sdk)"}
15 |       # MaxKB配置
16 |       - MAXKB_HOST=${MAXKB_HOST:-http://host.docker.internal:8080}
17 |       - MAXKB_CHAT_ID=${MAXKB_CHAT_ID}
18 |       - MAXKB_APPLICATION_ID=${MAXKB_APPLICATION_ID}
19 |       - MAXKB_AUTHORIZATION=${MAXKB_AUTHORIZATION}
20 |       # 挂载目录配置
21 |       - HOST_MOUNT_SOURCE=${HOST_MOUNT_SOURCE:-/tmp}
22 |       - HOST_MOUNT_TARGET=${HOST_MOUNT_TARGET:-/host_files}
23 |     volumes:
24 |       # 挂载用户的本地目录到容器内的指定目录
25 |       - ${HOST_MOUNT_SOURCE:-/tmp}:${HOST_MOUNT_TARGET:-/host_files}
26 |     extra_hosts:
27 |       - "host.docker.internal:host-gateway"
28 |     restart: unless-stopped
29 |     healthcheck:
30 |       test: ["CMD", "curl", "-f", "http://localhost:8000/sse"]
31 |       interval: 30s
32 |       timeout: 10s
33 |       retries: 3
34 |       start_period: 10s 


--------------------------------------------------------------------------------
/mcp_tool/tools/loader.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 工具加载器模块，用于自动加载所有已注册的工具
 3 | """
 4 | import importlib
 5 | import pkgutil
 6 | import inspect
 7 | import os
 8 | import sys
 9 | from typing import List, Type
10 | from . import BaseTool, ToolRegistry
11 | 
12 | def load_tools() -> List[Type[BaseTool]]:
13 |     """
14 |     自动加载tools目录下的所有工具模块
15 |     
16 |     Returns:
17 |         List[Type[BaseTool]]: 已加载的工具类列表
18 |     """
19 |     # 获取当前模块的路径
20 |     package_path = os.path.dirname(__file__)
21 |     
22 |     # 获取所有子模块
23 |     for _, name, is_pkg in pkgutil.iter_modules([package_path]):
24 |         # 跳过__init__.py和loader.py
25 |         if name in ['__init__', 'loader']:
26 |             continue
27 |         
28 |         # 导入模块
29 |         module_name = f"{__package__}.{name}"
30 |         try:
31 |             importlib.import_module(module_name)
32 |         except ImportError as e:
33 |             print(f"Warning: Failed to import module {module_name}: {e}")
34 |     
35 |     # 收集所有已注册的工具类
36 |     tools = list(ToolRegistry._tools.values())
37 |     
38 |     return tools
39 | 
40 | def get_tool_instances() -> dict:
41 |     """
42 |     创建所有工具类的实例
43 |     
44 |     Returns:
45 |         dict: 工具名称到工具实例的映射
46 |     """
47 |     tools = load_tools()
48 |     tool_instances = {}
49 |     
50 |     for tool_class in tools:
51 |         try:
52 |             tool_instance = tool_class()
53 |             tool_instances[tool_class.name] = tool_instance
54 |         except Exception as e:
55 |             print(f"Warning: Failed to instantiate tool {tool_class.name}: {e}")
56 |     
57 |     return tool_instances 


--------------------------------------------------------------------------------
/.lh/app.json.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sourceFile": "app.json",
 3 |     "activeCommit": 0,
 4 |     "commits": [
 5 |         {
 6 |             "activePatchIndex": 0,
 7 |             "patches": [
 8 |                 {
 9 |                     "date": 1741252783974,
10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
11 |                 }
12 |             ],
13 |             "date": 1741252783974,
14 |             "name": "Commit-0",
15 |             "content": "{\n  \"name\": \"MCP Development Framework\",\n  \"description\": \"A powerful framework for creating custom tools that interact with large language models\",\n  \"repository\": \"https://github.com/your-username/mcp-development-framework\",\n  \"keywords\": [\"python\", \"mcp\", \"llm\", \"pdf\", \"tool\", \"framework\"],\n  \"env\": {\n    \"MCP_SERVER_PORT\": {\n      \"description\": \"Port to run the server on\",\n      \"value\": \"8000\",\n      \"required\": false\n    },\n    \"MCP_SERVER_HOST\": {\n      \"description\": \"Host to bind the server to\",\n      \"value\": \"0.0.0.0\",\n      \"required\": false\n    },\n    \"DEBUG\": {\n      \"description\": \"Enable debug mode\",\n      \"value\": \"false\",\n      \"required\": false\n    },\n    \"MCP_USER_AGENT\": {\n      \"description\": \"Custom User-Agent for website fetching\",\n      \"value\": \"MCP Development Framework (github.com/modelcontextprotocol/python-sdk)\",\n      \"required\": false\n    }\n  },\n  \"stack\": \"container\",\n  \"formation\": {\n    \"web\": {\n      \"quantity\": 1,\n      \"size\": \"eco\"\n    }\n  }\n} "
16 |         }
17 |     ]
18 | }


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "mcp-simple-tool"
 3 | version = "1.2.0"
 4 | description = "MCP工具集合，包含文件处理和网页获取功能"
 5 | readme = "README.md"
 6 | requires-python = ">=3.10"
 7 | authors = [{ name = "aigo666" }]
 8 | keywords = ["mcp", "llm", "automation", "web", "fetch", "pdf", "word", "excel", "csv"]
 9 | license = { text = "MIT" }
10 | classifiers = [
11 |     "Development Status :: 4 - Beta",
12 |     "Intended Audience :: Developers",
13 |     "License :: OSI Approved :: MIT License",
14 |     "Programming Language :: Python :: 3",
15 |     "Programming Language :: Python :: 3.10",
16 | ]
17 | dependencies = [
18 |     "anyio>=4.5", 
19 |     "click>=8.1.0", 
20 |     "httpx>=0.27", 
21 |     "mcp",
22 |     "PyPDF2>=3.0.0",
23 |     "pdf2image>=1.16.0",
24 |     "Pillow>=10.0.0",
25 |     "pymupdf4llm==0.0.17",
26 |     "PyMuPDF>=1.22.0",
27 |     "python-docx>=0.8.11",
28 |     "pandas>=2.0.0",
29 |     "openpyxl>=3.1.0",
30 |     "pytesseract>=0.3.10",
31 |     "chardet>=5.0.0",
32 | ]
33 | 
34 | [project.scripts]
35 | mcp-simple-tool = "mcp_tool.server:main"
36 | 
37 | [project.optional-dependencies]
38 | dev = ["pyright>=1.1.378", "pytest>=8.3.3", "ruff>=0.6.9", "pytest-asyncio>=0.23.5"]
39 | 
40 | [build-system]
41 | requires = ["hatchling"]
42 | build-backend = "hatchling.build"
43 | 
44 | [tool.hatch.build.targets.wheel]
45 | packages = ["mcp_tool"]
46 | 
47 | [tool.pyright]
48 | include = ["mcp_tool"]
49 | venvPath = "."
50 | venv = ".venv"
51 | 
52 | [tool.ruff.lint]
53 | select = ["E", "F", "I"]
54 | ignore = []
55 | 
56 | [tool.ruff]
57 | line-length = 88
58 | target-version = "py310"
59 | 
60 | [tool.uv]
61 | dev-dependencies = ["pyright>=1.1.378", "pytest>=8.3.3", "ruff>=0.6.9", "pytest-asyncio>=0.23.5"]
62 | 


--------------------------------------------------------------------------------
/mcp_tool/tools/url_tool.py:
--------------------------------------------------------------------------------
 1 | import httpx
 2 | import mcp.types as types
 3 | from . import BaseTool, ToolRegistry
 4 | 
 5 | @ToolRegistry.register
 6 | class UrlTool(BaseTool):
 7 |     """URL获取工具，用于获取网站内容"""
 8 |     name = "url"
 9 |     description = "Fetches a website and returns its content"
10 |     input_schema = {
11 |         "type": "object",
12 |         "required": ["url"],
13 |         "properties": {
14 |             "url": {
15 |                 "type": "string",
16 |                 "description": "URL to fetch",
17 |             }
18 |         },
19 |     }
20 |     
21 |     async def execute(self, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
22 |         """获取网站内容"""
23 |         if "url" not in arguments:
24 |             return [types.TextContent(
25 |                 type="text",
26 |                 text="Error: Missing required argument 'url'"
27 |             )]
28 |             
29 |         url = arguments["url"]
30 |         headers = {
31 |             "User-Agent": "MCP Test Server (github.com/modelcontextprotocol/python-sdk)"
32 |         }
33 |         try:
34 |             timeout = httpx.Timeout(10.0, connect=5.0)
35 |             async with httpx.AsyncClient(
36 |                 follow_redirects=True, 
37 |                 headers=headers,
38 |                 timeout=timeout
39 |             ) as client:
40 |                 response = await client.get(url)
41 |                 response.raise_for_status()
42 |                 return [types.TextContent(type="text", text=response.text)]
43 |         except httpx.TimeoutException:
44 |             return [types.TextContent(
45 |                 type="text",
46 |                 text="Error: Request timed out while trying to fetch the website."
47 |             )]
48 |         except httpx.HTTPStatusError as e:
49 |             return [types.TextContent(
50 |                 type="text",
51 |                 text=(f"Error: HTTP {e.response.status_code} "
52 |                       "error while fetching the website.")
53 |             )]
54 |         except Exception as e:
55 |             return [types.TextContent(
56 |                 type="text",
57 |                 text=f"Error: Failed to fetch website: {str(e)}"
58 |             )] 


--------------------------------------------------------------------------------
/deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # 显示彩色输出的函数
 4 | print_green() {
 5 |     echo -e "\033[0;32m$1\033[0m"
 6 | }
 7 | 
 8 | print_yellow() {
 9 |     echo -e "\033[0;33m$1\033[0m"
10 | }
11 | 
12 | print_red() {
13 |     echo -e "\033[0;31m$1\033[0m"
14 | }
15 | 
16 | # 检查Docker是否已安装
17 | if ! [ -x "$(command -v docker)" ]; then
18 |     print_red "错误: Docker未安装。请先安装Docker: https://docs.docker.com/get-docker/"
19 |     exit 1
20 | fi
21 | 
22 | # 检查Docker Compose是否已安装
23 | if ! [ -x "$(command -v docker-compose)" ]; then
24 |     print_red "错误: Docker Compose未安装。请先安装Docker Compose: https://docs.docker.com/compose/install/"
25 |     exit 1
26 | fi
27 | 
28 | # 检查是否存在.env文件，如果不存在则创建
29 | if [ ! -f .env ]; then
30 |     print_yellow "未找到.env文件，正在创建默认配置..."
31 |     cat > .env << EOL
32 | # MCP服务器配置
33 | MCP_SERVER_PORT=8000
34 | MCP_SERVER_HOST=0.0.0.0
35 | DEBUG=true
36 | 
37 | # MaxKB配置（可选，如果不使用可以留空）
38 | MAXKB_HOST=http://host.docker.internal:8080
39 | MAXKB_CHAT_ID=
40 | MAXKB_APPLICATION_ID=
41 | MAXKB_AUTHORIZATION=
42 | 
43 | # 本地文件挂载配置
44 | # 修改HOST_MOUNT_SOURCE为您需要让MCP服务器访问的本地目录路径
45 | HOST_MOUNT_SOURCE=/tmp
46 | HOST_MOUNT_TARGET=/host_files
47 | EOL
48 |     print_green ".env文件已创建，请根据需要修改配置。"
49 |     
50 |     # 提示用户设置HOST_MOUNT_SOURCE
51 |     print_yellow "提示: 您可能需要编辑.env文件，设置HOST_MOUNT_SOURCE为您希望MCP服务器能够访问的目录。"
52 |     print_yellow "例如: Mac/Linux用户可设置为 HOST_MOUNT_SOURCE=/Users/username/Documents"
53 |     print_yellow "     Windows用户可设置为 HOST_MOUNT_SOURCE=C:/Users/username/Documents"
54 | fi
55 | 
56 | # 构建并启动Docker容器
57 | print_green "正在构建并启动MCP服务器..."
58 | docker-compose down
59 | docker-compose build --no-cache
60 | docker-compose up -d
61 | 
62 | # 检查服务是否成功启动
63 | sleep 5
64 | if [ "$(docker-compose ps -q mcp-server)" ]; then
65 |     if [ "$(docker inspect --format='{{.State.Running}}' $(docker-compose ps -q mcp-server))" = "true" ]; then
66 |         print_green "MCP服务器已成功启动！"
67 |         print_green "服务地址: http://localhost:$(grep MCP_SERVER_PORT .env | cut -d '=' -f2)/sse"
68 |         print_green "您现在可以在Claude Desktop或Cursor中配置MCP服务器。"
69 |         print_green "查看DEPLOY.md了解详细配置步骤。"
70 |     else
71 |         print_red "MCP服务器启动失败，请检查日志:"
72 |         print_yellow "docker-compose logs -f"
73 |     fi
74 | else
75 |     print_red "MCP服务器启动失败，请检查日志:"
76 |     print_yellow "docker-compose logs -f"
77 | fi 


--------------------------------------------------------------------------------
/mcp_tool/tools/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Type, List
 2 | import mcp.types as types
 3 | import os
 4 | 
 5 | # 工具基类
 6 | class BaseTool:
 7 |     """所有工具的基类"""
 8 |     name: str = ""
 9 |     description: str = ""
10 |     input_schema: dict = {}
11 |     
12 |     @classmethod
13 |     def get_tool_definition(cls) -> types.Tool:
14 |         """获取工具定义"""
15 |         return types.Tool(
16 |             name=cls.name,
17 |             description=cls.description,
18 |             inputSchema=cls.input_schema
19 |         )
20 |     
21 |     async def execute(self, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
22 |         """执行工具逻辑，需要在子类中实现"""
23 |         raise NotImplementedError("Tool implementation must override execute method")
24 |     
25 |     def process_file_path(self, file_path: str) -> str:
26 |         """
27 |         处理文件路径，支持挂载目录的转换
28 |         
29 |         如果路径以HOST_MOUNT_SOURCE环境变量开头，则将其转换为容器内的路径
30 |         """
31 |         host_mount_source = os.environ.get('HOST_MOUNT_SOURCE', '')
32 |         host_mount_target = os.environ.get('HOST_MOUNT_TARGET', '/host_files')
33 |         
34 |         # 如果路径以挂载源目录开头，则替换为挂载目标目录
35 |         if host_mount_source and file_path.startswith(host_mount_source):
36 |             return file_path.replace(host_mount_source, host_mount_target, 1)
37 |         
38 |         return file_path
39 | 
40 | 
41 | # 工具注册器
42 | class ToolRegistry:
43 |     """工具注册器，用于管理所有可用工具"""
44 |     _tools: Dict[str, Type[BaseTool]] = {}
45 |     
46 |     @classmethod
47 |     def register(cls, tool_class: Type[BaseTool]) -> Type[BaseTool]:
48 |         """注册工具"""
49 |         cls._tools[tool_class.name] = tool_class
50 |         return tool_class
51 |     
52 |     @classmethod
53 |     def get_tool(cls, name: str) -> Type[BaseTool]:
54 |         """获取工具类"""
55 |         if name not in cls._tools:
56 |             raise ValueError(f"Unknown tool: {name}")
57 |         return cls._tools[name]
58 |     
59 |     @classmethod
60 |     def list_tools(cls) -> List[types.Tool]:
61 |         """列出所有可用工具"""
62 |         return [tool_class.get_tool_definition() for tool_class in cls._tools.values()]
63 |     
64 |     @classmethod
65 |     def has_tool(cls, name: str) -> bool:
66 |         """检查工具是否存在"""
67 |         return name in cls._tools 


--------------------------------------------------------------------------------
/.lh/mcp_simple_tool/tools/url_tool.py.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sourceFile": "mcp_simple_tool/tools/url_tool.py",
 3 |     "activeCommit": 0,
 4 |     "commits": [
 5 |         {
 6 |             "activePatchIndex": 0,
 7 |             "patches": [
 8 |                 {
 9 |                     "date": 1741333258090,
10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
11 |                 }
12 |             ],
13 |             "date": 1741333258090,
14 |             "name": "Commit-0",
15 |             "content": "import httpx\nimport mcp.types as types\nfrom . import BaseTool, ToolRegistry\n\n@ToolRegistry.register\nclass UrlTool(BaseTool):\n    \"\"\"URL获取工具，用于获取网站内容\"\"\"\n    name = \"url\"\n    description = \"Fetches a website and returns its content\"\n    input_schema = {\n        \"type\": \"object\",\n        \"required\": [\"url\"],\n        \"properties\": {\n            \"url\": {\n                \"type\": \"string\",\n                \"description\": \"URL to fetch\",\n            }\n        },\n    }\n    \n    async def execute(self, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n        \"\"\"获取网站内容\"\"\"\n        if \"url\" not in arguments:\n            return [types.TextContent(\n                type=\"text\",\n                text=\"Error: Missing required argument 'url'\"\n            )]\n            \n        url = arguments[\"url\"]\n        headers = {\n            \"User-Agent\": \"MCP Test Server (github.com/modelcontextprotocol/python-sdk)\"\n        }\n        try:\n            timeout = httpx.Timeout(10.0, connect=5.0)\n            async with httpx.AsyncClient(\n                follow_redirects=True, \n                headers=headers,\n                timeout=timeout\n            ) as client:\n                response = await client.get(url)\n                response.raise_for_status()\n                return [types.TextContent(type=\"text\", text=response.text)]\n        except httpx.TimeoutException:\n            return [types.TextContent(\n                type=\"text\",\n                text=\"Error: Request timed out while trying to fetch the website.\"\n            )]\n        except httpx.HTTPStatusError as e:\n            return [types.TextContent(\n                type=\"text\",\n                text=(f\"Error: HTTP {e.response.status_code} \"\n                      \"error while fetching the website.\")\n            )]\n        except Exception as e:\n            return [types.TextContent(\n                type=\"text\",\n                text=f\"Error: Failed to fetch website: {str(e)}\"\n            )] "
16 |         }
17 |     ]
18 | }


--------------------------------------------------------------------------------
/.lh/mcp_simple_tool/core/stdio_adapter.py.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sourceFile": "mcp_simple_tool/core/stdio_adapter.py",
 3 |     "activeCommit": 0,
 4 |     "commits": [
 5 |         {
 6 |             "activePatchIndex": 0,
 7 |             "patches": [
 8 |                 {
 9 |                     "date": 1741332369492,
10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
11 |                 }
12 |             ],
13 |             "date": 1741332369492,
14 |             "name": "Commit-0",
15 |             "content": "\"\"\"\nSTDIO适配器，用于处理MCP与STDIO的交互\n\"\"\"\n\nimport anyio\nfrom mcp.server.lowlevel import Server\nfrom mcp.server.stdio import stdio_server\nfrom typing import List, Dict, Any\nimport mcp.types as types\nfrom ..tools import BaseTool\n\n\nclass StdioAdapter:\n    \"\"\"\n    STDIO适配器，用于处理MCP与STDIO的交互\n    \"\"\"\n    \n    def __init__(self, app_name: str, tools: List[BaseTool]):\n        \"\"\"\n        初始化STDIO适配器\n        \n        Args:\n            app_name: 应用名称\n            tools: 工具列表\n        \"\"\"\n        self.app_name = app_name\n        self.tools = tools\n        self.app = Server(app_name)\n        \n    def setup(self):\n        \"\"\"\n        设置MCP服务器\n        \"\"\"\n        # 注册工具调用函数\n        @self.app.call_tool()\n        async def fetch_tool(name: str, arguments: dict) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n            \"\"\"\n            工具调用函数\n            \n            Args:\n                name: 工具名称\n                arguments: 工具参数\n                \n            Returns:\n                工具执行结果\n            \"\"\"\n            # 查找匹配的工具\n            for tool in self.tools:\n                if tool.name == name:\n                    return await tool.execute(arguments)\n            \n            # 如果没有找到匹配的工具，返回错误信息\n            return [types.TextContent(\n                type=\"text\",\n                text=f\"错误: 未知工具: {name}\"\n            )]\n        \n        # 注册工具列表函数\n        @self.app.list_tools()\n        async def list_tools() -> List[types.Tool]:\n            \"\"\"\n            列出所有可用的工具\n            \n            Returns:\n                工具定义列表\n            \"\"\"\n            return [tool.to_tool_definition() for tool in self.tools]\n    \n    async def run_async(self):\n        \"\"\"\n        异步运行STDIO服务器\n        \"\"\"\n        # 设置MCP服务器\n        self.setup()\n        \n        # 运行服务器\n        async with stdio_server() as streams:\n            await self.app.run(\n                streams[0], streams[1], self.app.create_initialization_options()\n            )\n    \n    def run(self):\n        \"\"\"\n        运行STDIO服务器\n        \"\"\"\n        anyio.run(self.run_async) "
16 |         }
17 |     ]
18 | }


--------------------------------------------------------------------------------
/.lh/mcp_tool/tools/README.md.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sourceFile": "mcp_tool/tools/README.md",
 3 |     "activeCommit": 0,
 4 |     "commits": [
 5 |         {
 6 |             "activePatchIndex": 0,
 7 |             "patches": [
 8 |                 {
 9 |                     "date": 1741345406172,
10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
11 |                 }
12 |             ],
13 |             "date": 1741345406172,
14 |             "name": "Commit-0",
15 |             "content": "# SEE工具框架\n\n这是一个用于SEE（Standard Extension Environment）对接的模块化工具框架。该框架允许开发者轻松创建和扩展自定义工具，并通过MCP协议与模型交互。\n\n## 框架结构\n\n```\nmcp_tool/\n├── tools/\n│   ├── __init__.py        # 定义工具基类和注册器\n│   ├── loader.py          # 工具加载器，自动加载所有工具\n│   ├── url_tool.py        # URL工具实现\n│   ├── pdf_tool.py        # PDF解析工具实现\n│   └── quick_pdf_tool.py  # 快速PDF预览工具实现\n├── __init__.py\n├── __main__.py\n└── server.py              # MCP服务器实现\n```\n\n## 如何开发新工具\n\n1. 在`tools`目录下创建一个新的Python文件，如`your_tool.py`\n2. 导入必要的依赖和基类\n3. 创建一个继承自`BaseTool`的工具类\n4. 使用`@ToolRegistry.register`装饰器注册工具\n5. 实现工具的`execute`方法\n\n### 工具模板示例\n\n```python\nimport mcp.types as types\nfrom . import BaseTool, ToolRegistry\n\n@ToolRegistry.register\nclass YourTool(BaseTool):\n    \"\"\"您的工具描述\"\"\"\n    name = \"your_tool_name\"  # 工具的唯一标识符\n    description = \"您的工具描述\"  # 工具的描述信息，将显示给用户\n    input_schema = {\n        \"type\": \"object\",\n        \"required\": [\"param1\"],  # 必需的参数\n        \"properties\": {\n            \"param1\": {\n                \"type\": \"string\",\n                \"description\": \"参数1的描述\",\n            },\n            \"param2\": {\n                \"type\": \"integer\",\n                \"description\": \"参数2的描述（可选）\",\n            }\n        },\n    }\n    \n    async def execute(self, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n        \"\"\"执行工具逻辑\"\"\"\n        # 参数验证\n        if \"param1\" not in arguments:\n            return [types.TextContent(\n                type=\"text\",\n                text=\"Error: Missing required argument 'param1'\"\n            )]\n            \n        # 获取参数\n        param1 = arguments[\"param1\"]\n        param2 = arguments.get(\"param2\", 0)  # 获取可选参数，提供默认值\n        \n        # 执行工具逻辑\n        result = f\"处理参数: {param1}, {param2}\"\n        \n        # 返回结果\n        return [types.TextContent(\n            type=\"text\",\n            text=result\n        )]\n```\n\n## 注意事项\n\n1. 每个工具必须有一个唯一的`name`\n2. 工具描述应该清晰地说明工具的用途和使用方法\n3. 输入模式应该准确描述所需的参数和类型\n4. 所有参数验证应在`execute`方法中处理\n5. 工具应返回适当的`TextContent`或`ImageContent`对象列表\n\n## 自动加载机制\n\n框架使用`loader.py`中的自动加载机制，在启动时自动发现和加载所有工具。您只需按照上述模板创建新工具，它将在下次启动服务器时自动注册。\n\n无需修改`server.py`或其他任何文件，框架会自动处理工具的注册和调用。\n\n## 扩展示例\n\n您可以参考现有的工具实现（`url_tool.py`, `pdf_tool.py`, `quick_pdf_tool.py`）作为开发新工具的参考。 "
16 |         }
17 |     ]
18 | }


--------------------------------------------------------------------------------
/DEPLOY.md:
--------------------------------------------------------------------------------
  1 | # MCP框架本地Docker部署指南
  2 | 
  3 | 本指南将帮助您使用Docker在本地部署支持图片处理的MCP服务器。
  4 | 
  5 | ## 先决条件
  6 | 
  7 | - 已安装Docker和Docker Compose
  8 | - 基本的命令行操作知识
  9 | 
 10 | ## 部署步骤
 11 | 
 12 | ### 1. 准备环境变量文件
 13 | 
 14 | 创建一个名为`.env`的文件在项目根目录中，内容如下：
 15 | 
 16 | ```
 17 | # MCP服务器配置
 18 | MCP_SERVER_PORT=8000
 19 | MCP_SERVER_HOST=0.0.0.0
 20 | DEBUG=true
 21 | 
 22 | # MaxKB配置（可选，如果不使用可以留空）
 23 | MAXKB_HOST=http://host.docker.internal:8080
 24 | MAXKB_CHAT_ID=
 25 | MAXKB_APPLICATION_ID=
 26 | MAXKB_AUTHORIZATION=
 27 | 
 28 | # 本地文件挂载配置
 29 | # 修改HOST_MOUNT_SOURCE为您需要让MCP服务器访问的本地目录路径
 30 | # Mac/Linux示例: HOST_MOUNT_SOURCE=/Users/username/Documents
 31 | # Windows示例: HOST_MOUNT_SOURCE=C:/Users/username/Documents
 32 | HOST_MOUNT_SOURCE=/tmp
 33 | HOST_MOUNT_TARGET=/host_files
 34 | ```
 35 | 
 36 | 请根据您的实际情况修改`HOST_MOUNT_SOURCE`，指向您希望MCP服务器能够访问的本地目录。
 37 | 
 38 | ### 2. 构建并启动Docker容器
 39 | 
 40 | 在项目根目录中，运行以下命令：
 41 | 
 42 | ```bash
 43 | # 构建Docker镜像并启动容器
 44 | docker-compose up -d
 45 | ```
 46 | 
 47 | 此命令会构建Docker镜像并在后台启动容器。
 48 | 
 49 | ### 3. 验证服务是否正常运行
 50 | 
 51 | ```bash
 52 | # 查看容器日志
 53 | docker-compose logs -f
 54 | ```
 55 | 
 56 | 如果看到如下类似输出，说明服务已成功启动：
 57 | 
 58 | ```
 59 | mcp-server    | INFO:     Started server process [1]
 60 | mcp-server    | INFO:     Waiting for application startup.
 61 | mcp-server    | INFO:     Application startup complete.
 62 | mcp-server    | INFO:     Uvicorn running on http://0.0.0.0:8000
 63 | ```
 64 | 
 65 | 您也可以通过访问`http://localhost:8000/sse`验证服务是否正常运行。
 66 | 
 67 | ### 4. 配置Claude或Cursor使用MCP服务器
 68 | 
 69 | #### 在Claude Desktop中配置
 70 | 
 71 | 1. 打开Claude Desktop应用
 72 | 2. 点击左下角头像，选择"Settings..."
 73 | 3. 点击左侧"Developer"
 74 | 4. 点击"Edit Config"
 75 | 5. 输入以下配置：
 76 | 
 77 | ```json
 78 | {
 79 |   "mcpServers": {
 80 |     "custom-mcp": {
 81 |       "url": "http://localhost:8000/sse"
 82 |     }
 83 |   }
 84 | }
 85 | ```
 86 | 
 87 | #### 在Cursor中配置
 88 | 
 89 | 1. 在Cursor中打开命令面板（Ctrl+Shift+P或Cmd+Shift+P）
 90 | 2. 输入"MCP"并选择"MCP: Configure MCP Server"
 91 | 3. 选择"Add New Server"
 92 | 4. 选择"SSE Server"类型
 93 | 5. 输入URL: `http://localhost:8000/sse`
 94 | 6. 输入名称，如"custom-mcp"
 95 | 
 96 | ### 5. 测试文件处理和图片支持
 97 | 
 98 | 在Claude Desktop或Cursor中，尝试以下操作：
 99 | 
100 | - 解析PDF文件：`请解析这个PDF文档：/path/to/your/document.pdf`
101 | - 解析Word文档：`请解析这个Word文档：/path/to/your/document.docx`
102 | 
103 | 如果一切正常，您应该能够看到文档内容以及其中包含的图片。
104 | 
105 | ## 管理Docker容器
106 | 
107 | ```bash
108 | # 停止服务
109 | docker-compose down
110 | 
111 | # 查看容器状态
112 | docker-compose ps
113 | 
114 | # 重启服务
115 | docker-compose restart
116 | ```
117 | 
118 | ## 常见问题排查
119 | 
120 | ### 1. 无法访问主机文件
121 | 
122 | 确保您在`.env`文件中正确设置了`HOST_MOUNT_SOURCE`，指向您需要访问的目录。
123 | 
124 | ### 2. 图片无法显示
125 | 
126 | 检查服务器日志是否有错误信息。可能是Tesseract OCR依赖问题，可以尝试手动进入容器安装：
127 | 
128 | ```bash
129 | docker exec -it mcp-framework_mcp-server_1 bash
130 | apt-get update && apt-get install -y tesseract-ocr tesseract-ocr-chi-sim tesseract-ocr-eng
131 | ```
132 | 
133 | ### 3. MCP服务器无法启动
134 | 
135 | 检查端口是否被占用：
136 | 
137 | ```bash
138 | lsof -i :8000
139 | ```
140 | 
141 | 如果端口被占用，可以在`.env`文件中修改`MCP_SERVER_PORT`为其他值。 


--------------------------------------------------------------------------------
/.lh/mcp_tool/tools/loader.py.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sourceFile": "mcp_tool/tools/loader.py",
 3 |     "activeCommit": 0,
 4 |     "commits": [
 5 |         {
 6 |             "activePatchIndex": 1,
 7 |             "patches": [
 8 |                 {
 9 |                     "date": 1741345451458,
10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
11 |                 },
12 |                 {
13 |                     "date": 1741764393435,
14 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -19,26 +19,23 @@\n     # 获取当前模块的路径\n     package_path = os.path.dirname(__file__)\n     \n     # 获取所有子模块\n-    tool_modules = []\n     for _, name, is_pkg in pkgutil.iter_modules([package_path]):\n         # 跳过__init__.py和loader.py\n         if name in ['__init__', 'loader']:\n             continue\n         \n         # 导入模块\n         module_name = f\"{__package__}.{name}\"\n         try:\n-            module = importlib.import_module(module_name)\n-            tool_modules.append(module)\n+            importlib.import_module(module_name)\n         except ImportError as e:\n             print(f\"Warning: Failed to import module {module_name}: {e}\")\n     \n     # 收集所有已注册的工具类\n     tools = list(ToolRegistry._tools.values())\n     \n-    # 返回工具类列表\n     return tools\n \n def get_tool_instances() -> dict:\n     \"\"\"\n"
15 |                 }
16 |             ],
17 |             "date": 1741345451458,
18 |             "name": "Commit-0",
19 |             "content": "\"\"\"\n工具加载器模块，用于自动加载所有已注册的工具\n\"\"\"\nimport importlib\nimport pkgutil\nimport inspect\nimport os\nimport sys\nfrom typing import List, Type\nfrom . import BaseTool, ToolRegistry\n\ndef load_tools() -> List[Type[BaseTool]]:\n    \"\"\"\n    自动加载tools目录下的所有工具模块\n    \n    Returns:\n        List[Type[BaseTool]]: 已加载的工具类列表\n    \"\"\"\n    # 获取当前模块的路径\n    package_path = os.path.dirname(__file__)\n    \n    # 获取所有子模块\n    tool_modules = []\n    for _, name, is_pkg in pkgutil.iter_modules([package_path]):\n        # 跳过__init__.py和loader.py\n        if name in ['__init__', 'loader']:\n            continue\n        \n        # 导入模块\n        module_name = f\"{__package__}.{name}\"\n        try:\n            module = importlib.import_module(module_name)\n            tool_modules.append(module)\n        except ImportError as e:\n            print(f\"Warning: Failed to import module {module_name}: {e}\")\n    \n    # 收集所有已注册的工具类\n    tools = list(ToolRegistry._tools.values())\n    \n    # 返回工具类列表\n    return tools\n\ndef get_tool_instances() -> dict:\n    \"\"\"\n    创建所有工具类的实例\n    \n    Returns:\n        dict: 工具名称到工具实例的映射\n    \"\"\"\n    tools = load_tools()\n    tool_instances = {}\n    \n    for tool_class in tools:\n        try:\n            tool_instance = tool_class()\n            tool_instances[tool_class.name] = tool_instance\n        except Exception as e:\n            print(f\"Warning: Failed to instantiate tool {tool_class.name}: {e}\")\n    \n    return tool_instances "
20 |         }
21 |     ]
22 | }


--------------------------------------------------------------------------------
/.lh/mcp_simple_tool/tools/web_tool.py.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sourceFile": "mcp_simple_tool/tools/web_tool.py",
 3 |     "activeCommit": 0,
 4 |     "commits": [
 5 |         {
 6 |             "activePatchIndex": 0,
 7 |             "patches": [
 8 |                 {
 9 |                     "date": 1741332226149,
10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
11 |                 }
12 |             ],
13 |             "date": 1741332226149,
14 |             "name": "Commit-0",
15 |             "content": "\"\"\"\n网页内容获取工具\n\"\"\"\n\nimport httpx\nfrom typing import Dict, List, Any\nimport mcp.types as types\nfrom .base import BaseTool\n\n\nclass WebTool(BaseTool):\n    \"\"\"\n    用于获取网页内容的工具\n    \"\"\"\n    \n    @property\n    def name(self) -> str:\n        return \"url\"\n    \n    @property\n    def description(self) -> str:\n        return \"获取指定URL的网页内容\"\n    \n    @property\n    def input_schema(self) -> Dict[str, Any]:\n        return {\n            \"type\": \"object\",\n            \"required\": [\"url\"],\n            \"properties\": {\n                \"url\": {\n                    \"type\": \"string\",\n                    \"description\": \"要获取内容的网站URL\",\n                }\n            },\n        }\n    \n    async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n        \"\"\"\n        获取网页内容\n        \n        Args:\n            arguments: 参数字典，必须包含'url'键\n            \n        Returns:\n            网页内容列表\n        \"\"\"\n        if \"url\" not in arguments:\n            return [types.TextContent(\n                type=\"text\",\n                text=\"错误: 缺少必要参数 'url'\"\n            )]\n        \n        return await self._fetch_website(arguments[\"url\"])\n    \n    async def _fetch_website(self, url: str) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n        \"\"\"\n        获取指定URL的网页内容\n        \n        Args:\n            url: 要获取内容的网站URL\n            \n        Returns:\n            网页内容列表\n        \"\"\"\n        headers = {\n            \"User-Agent\": \"MCP Development Framework (github.com/modelcontextprotocol/python-sdk)\"\n        }\n        \n        try:\n            timeout = httpx.Timeout(10.0, connect=5.0)\n            async with httpx.AsyncClient(\n                follow_redirects=True, \n                headers=headers,\n                timeout=timeout\n            ) as client:\n                response = await client.get(url)\n                response.raise_for_status()\n                return [types.TextContent(type=\"text\", text=response.text)]\n        except httpx.TimeoutException:\n            return [types.TextContent(\n                type=\"text\",\n                text=\"错误: 请求超时，无法获取网站内容。\"\n            )]\n        except httpx.HTTPStatusError as e:\n            return [types.TextContent(\n                type=\"text\",\n                text=f\"错误: HTTP {e.response.status_code} 错误，无法获取网站内容。\"\n            )]\n        except Exception as e:\n            return [types.TextContent(\n                type=\"text\",\n                text=f\"错误: 获取网站内容失败: {str(e)}\"\n            )] "
16 |         }
17 |     ]
18 | }


--------------------------------------------------------------------------------
/.lh/mcp_tool/tools/__init__.py.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sourceFile": "mcp_tool/tools/__init__.py",
 3 |     "activeCommit": 0,
 4 |     "commits": [
 5 |         {
 6 |             "activePatchIndex": 1,
 7 |             "patches": [
 8 |                 {
 9 |                     "date": 1741576383120,
10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
11 |                 },
12 |                 {
13 |                     "date": 1741944085567,
14 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,6 +1,7 @@\n from typing import Dict, Type, List\n import mcp.types as types\n+import os\n \n # 工具基类\n class BaseTool:\n     \"\"\"所有工具的基类\"\"\"\n@@ -19,8 +20,23 @@\n     \n     async def execute(self, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n         \"\"\"执行工具逻辑，需要在子类中实现\"\"\"\n         raise NotImplementedError(\"Tool implementation must override execute method\")\n+    \n+    def process_file_path(self, file_path: str) -> str:\n+        \"\"\"\n+        处理文件路径，支持挂载目录的转换\n+        \n+        如果路径以HOST_MOUNT_SOURCE环境变量开头，则将其转换为容器内的路径\n+        \"\"\"\n+        host_mount_source = os.environ.get('HOST_MOUNT_SOURCE', '')\n+        host_mount_target = os.environ.get('HOST_MOUNT_TARGET', '/host_files')\n+        \n+        # 如果路径以挂载源目录开头，则替换为挂载目标目录\n+        if host_mount_source and file_path.startswith(host_mount_source):\n+            return file_path.replace(host_mount_source, host_mount_target, 1)\n+        \n+        return file_path\n \n \n # 工具注册器\n class ToolRegistry:\n"
15 |                 }
16 |             ],
17 |             "date": 1741576383120,
18 |             "name": "Commit-0",
19 |             "content": "from typing import Dict, Type, List\nimport mcp.types as types\n\n# 工具基类\nclass BaseTool:\n    \"\"\"所有工具的基类\"\"\"\n    name: str = \"\"\n    description: str = \"\"\n    input_schema: dict = {}\n    \n    @classmethod\n    def get_tool_definition(cls) -> types.Tool:\n        \"\"\"获取工具定义\"\"\"\n        return types.Tool(\n            name=cls.name,\n            description=cls.description,\n            inputSchema=cls.input_schema\n        )\n    \n    async def execute(self, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n        \"\"\"执行工具逻辑，需要在子类中实现\"\"\"\n        raise NotImplementedError(\"Tool implementation must override execute method\")\n\n\n# 工具注册器\nclass ToolRegistry:\n    \"\"\"工具注册器，用于管理所有可用工具\"\"\"\n    _tools: Dict[str, Type[BaseTool]] = {}\n    \n    @classmethod\n    def register(cls, tool_class: Type[BaseTool]) -> Type[BaseTool]:\n        \"\"\"注册工具\"\"\"\n        cls._tools[tool_class.name] = tool_class\n        return tool_class\n    \n    @classmethod\n    def get_tool(cls, name: str) -> Type[BaseTool]:\n        \"\"\"获取工具类\"\"\"\n        if name not in cls._tools:\n            raise ValueError(f\"Unknown tool: {name}\")\n        return cls._tools[name]\n    \n    @classmethod\n    def list_tools(cls) -> List[types.Tool]:\n        \"\"\"列出所有可用工具\"\"\"\n        return [tool_class.get_tool_definition() for tool_class in cls._tools.values()]\n    \n    @classmethod\n    def has_tool(cls, name: str) -> bool:\n        \"\"\"检查工具是否存在\"\"\"\n        return name in cls._tools "
20 |         }
21 |     ]
22 | }


--------------------------------------------------------------------------------
/.lh/mcp_simple_tool/tools/__init__.py.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sourceFile": "mcp_simple_tool/tools/__init__.py",
 3 |     "activeCommit": 0,
 4 |     "commits": [
 5 |         {
 6 |             "activePatchIndex": 4,
 7 |             "patches": [
 8 |                 {
 9 |                     "date": 1741332178163,
10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
11 |                 },
12 |                 {
13 |                     "date": 1741332320523,
14 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -2,8 +2,17 @@\n 工具包，包含所有可用的工具\n \"\"\"\n \n from .base import BaseTool\n-# 工具将在各自的模块中创建并在这里导入\n+from .web_tool import WebTool\n+from .pdf_tool import PdfTool\n+from .quick_pdf_tool import QuickPdfTool\n\\ No newline at end of file\n \n-# 导出的工具列表\n-__all__ = ['BaseTool'] \n+# 导出的所有工具类\n+__all__ = ['BaseTool', 'WebTool', 'PdfTool', 'QuickPdfTool']\n+\n+# 所有工具的实例列表，用于自动注册\n+all_tools = [\n+    WebTool(),\n+    PdfTool(),\n+    QuickPdfTool()\n+] \n\\ No newline at end of file\n"
15 |                 },
16 |                 {
17 |                     "date": 1741332588660,
18 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,18 +1,1 @@\n-\"\"\"\n-工具包，包含所有可用的工具\n-\"\"\"\n-\n-from .base import BaseTool\n-from .web_tool import WebTool\n-from .pdf_tool import PdfTool\n-from .quick_pdf_tool import QuickPdfTool\n-\n-# 导出的所有工具类\n-__all__ = ['BaseTool', 'WebTool', 'PdfTool', 'QuickPdfTool']\n-\n-# 所有工具的实例列表，用于自动注册\n-all_tools = [\n-    WebTool(),\n-    PdfTool(),\n-    QuickPdfTool()\n-] \n\\ No newline at end of file\n+ \n\\ No newline at end of file\n"
19 |                 },
20 |                 {
21 |                     "date": 1741333243581,
22 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,4 +1,12 @@\n+'''\n+Author: 刘彦志 yanzhiliu@trip.com\n+Date: 2025-03-07 15:40:36\n+LastEditors: 刘彦志 yanzhiliu@trip.com\n+LastEditTime: 2025-03-07 15:40:42\n+FilePath: /mcp-development-framework/mcp_simple_tool/tools/__init__.py\n+Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE\n+'''\n from typing import Dict, Type, List\n import mcp.types as types\n \n # 工具基类\n"
23 |                 },
24 |                 {
25 |                     "date": 1741337157527,
26 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,9 +1,9 @@\n '''\n Author: 刘彦志 yanzhiliu@trip.com\n Date: 2025-03-07 15:40:36\n LastEditors: 刘彦志 yanzhiliu@trip.com\n-LastEditTime: 2025-03-07 16:45:18\n+LastEditTime: 2025-03-07 16:45:57\n FilePath: /mcp-development-framework/mcp_simple_tool/tools/__init__.py\n Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE\n '''\n from typing import Dict, Type, List\n"
27 |                 }
28 |             ],
29 |             "date": 1741332178163,
30 |             "name": "Commit-0",
31 |             "content": "\"\"\"\n工具包，包含所有可用的工具\n\"\"\"\n\nfrom .base import BaseTool\n# 工具将在各自的模块中创建并在这里导入\n\n# 导出的工具列表\n__all__ = ['BaseTool'] "
32 |         }
33 |     ]
34 | }


--------------------------------------------------------------------------------
/.lh/mcp_simple_tool/tools/base.py.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sourceFile": "mcp_simple_tool/tools/base.py",
 3 |     "activeCommit": 0,
 4 |     "commits": [
 5 |         {
 6 |             "activePatchIndex": 1,
 7 |             "patches": [
 8 |                 {
 9 |                     "date": 1741332169421,
10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
11 |                 },
12 |                 {
13 |                     "date": 1741332588115,
14 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,57 +1,1 @@\n-\"\"\"\n-基础工具类，所有工具都应该继承这个类\n-\"\"\"\n-\n-import abc\n-from typing import Dict, List, Any, Optional\n-import mcp.types as types\n-\n-\n-class BaseTool(abc.ABC):\n-    \"\"\"\n-    所有MCP工具的基类，定义了工具的基本接口\n-    \"\"\"\n-    \n-    @property\n-    @abc.abstractmethod\n-    def name(self) -> str:\n-        \"\"\"工具名称\"\"\"\n-        pass\n-    \n-    @property\n-    @abc.abstractmethod\n-    def description(self) -> str:\n-        \"\"\"工具描述\"\"\"\n-        pass\n-    \n-    @property\n-    @abc.abstractmethod\n-    def input_schema(self) -> Dict[str, Any]:\n-        \"\"\"工具输入参数模式定义\"\"\"\n-        pass\n-    \n-    @abc.abstractmethod\n-    async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n-        \"\"\"\n-        执行工具逻辑\n-        \n-        Args:\n-            arguments: 工具参数\n-            \n-        Returns:\n-            执行结果列表，可以包含文本、图像或其他类型的内容\n-        \"\"\"\n-        pass\n-    \n-    def to_tool_definition(self) -> types.Tool:\n-        \"\"\"\n-        转换为MCP工具定义\n-        \n-        Returns:\n-            MCP工具定义对象\n-        \"\"\"\n-        return types.Tool(\n-            name=self.name,\n-            description=self.description,\n-            inputSchema=self.input_schema\n-        ) \n\\ No newline at end of file\n+ \n\\ No newline at end of file\n"
15 |                 }
16 |             ],
17 |             "date": 1741332169421,
18 |             "name": "Commit-0",
19 |             "content": "\"\"\"\n基础工具类，所有工具都应该继承这个类\n\"\"\"\n\nimport abc\nfrom typing import Dict, List, Any, Optional\nimport mcp.types as types\n\n\nclass BaseTool(abc.ABC):\n    \"\"\"\n    所有MCP工具的基类，定义了工具的基本接口\n    \"\"\"\n    \n    @property\n    @abc.abstractmethod\n    def name(self) -> str:\n        \"\"\"工具名称\"\"\"\n        pass\n    \n    @property\n    @abc.abstractmethod\n    def description(self) -> str:\n        \"\"\"工具描述\"\"\"\n        pass\n    \n    @property\n    @abc.abstractmethod\n    def input_schema(self) -> Dict[str, Any]:\n        \"\"\"工具输入参数模式定义\"\"\"\n        pass\n    \n    @abc.abstractmethod\n    async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n        \"\"\"\n        执行工具逻辑\n        \n        Args:\n            arguments: 工具参数\n            \n        Returns:\n            执行结果列表，可以包含文本、图像或其他类型的内容\n        \"\"\"\n        pass\n    \n    def to_tool_definition(self) -> types.Tool:\n        \"\"\"\n        转换为MCP工具定义\n        \n        Returns:\n            MCP工具定义对象\n        \"\"\"\n        return types.Tool(\n            name=self.name,\n            description=self.description,\n            inputSchema=self.input_schema\n        ) "
20 |         }
21 |     ]
22 | }


--------------------------------------------------------------------------------
/mcp_tool/tools/excel_tool.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | import json
 4 | import mcp.types as types
 5 | from . import BaseTool, ToolRegistry
 6 | 
 7 | @ToolRegistry.register
 8 | class ExcelTool(BaseTool):
 9 |     """Excel解析工具，用于解析Excel文件内容"""
10 |     name = "parse_excel"
11 |     description = "Parses an Excel file and returns its content including all sheets"
12 |     input_schema = {
13 |         "type": "object",
14 |         "required": ["file_path"],
15 |         "properties": {
16 |             "file_path": {
17 |                 "type": "string",
18 |                 "description": "Path to the Excel file to parse",
19 |             }
20 |         },
21 |     }
22 |     
23 |     async def execute(self, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
24 |         """解析Excel文件并返回内容"""
25 |         if "file_path" not in arguments:
26 |             return [types.TextContent(
27 |                 type="text",
28 |                 text="Error: Missing required argument 'file_path'"
29 |             )]
30 |         
31 |         file_path = arguments["file_path"]
32 |         # 处理文件路径，支持挂载目录的转换
33 |         file_path = self.process_file_path(file_path)
34 |         
35 |         if not os.path.exists(file_path):
36 |             return [types.TextContent(
37 |                 type="text",
38 |                 text=f"Error: File not found at path: {file_path}"
39 |             )]
40 |         
41 |         if not file_path.lower().endswith(('.xlsx', '.xls', '.xlsm')):
42 |             return [types.TextContent(
43 |                 type="text",
44 |                 text=f"Error: File is not an Excel file: {file_path}"
45 |             )]
46 |         
47 |         try:
48 |             # 读取Excel文件中的所有sheet
49 |             excel_file = pd.ExcelFile(file_path)
50 |             sheet_names = excel_file.sheet_names
51 |             
52 |             result = {
53 |                 "file_name": os.path.basename(file_path),
54 |                 "sheet_count": len(sheet_names),
55 |                 "sheets": {}
56 |             }
57 |             
58 |             # 解析每个sheet
59 |             for sheet_name in sheet_names:
60 |                 df = pd.read_excel(excel_file, sheet_name=sheet_name)
61 |                 
62 |                 # 将DataFrame转换为字典
63 |                 sheet_data = df.to_dict(orient='records')
64 |                 
65 |                 # 获取列名
66 |                 columns = df.columns.tolist()
67 |                 
68 |                 # 获取行数和列数
69 |                 row_count = len(df)
70 |                 column_count = len(columns)
71 |                 
72 |                 result["sheets"][sheet_name] = {
73 |                     "row_count": row_count,
74 |                     "column_count": column_count,
75 |                     "columns": columns,
76 |                     "data": sheet_data
77 |                 }
78 |             
79 |             # 将结果转换为JSON字符串，并格式化输出
80 |             result_json = json.dumps(result, ensure_ascii=False, indent=2, default=str)
81 |             
82 |             return [types.TextContent(
83 |                 type="text",
84 |                 text=result_json
85 |             )]
86 |             
87 |         except Exception as e:
88 |             return [types.TextContent(
89 |                 type="text",
90 |                 text=f"Error: Failed to parse Excel file: {str(e)}"
91 |             )] 


--------------------------------------------------------------------------------
/mcp_tool/tools/csv_tool.py:
--------------------------------------------------------------------------------
  1 | """
  2 | CSV文件处理工具
  3 | """
  4 | 
  5 | import os
  6 | import pandas as pd
  7 | import traceback
  8 | from typing import Dict, List, Any
  9 | import mcp.types as types
 10 | from . import BaseTool, ToolRegistry
 11 | 
 12 | @ToolRegistry.register
 13 | class CsvTool(BaseTool):
 14 |     """
 15 |     CSV文件处理工具，用于解析CSV文件内容
 16 |     """
 17 |     
 18 |     name = "parse_csv"
 19 |     description = "解析CSV文件内容，支持各种编码格式"
 20 |     input_schema = {
 21 |         "type": "object",
 22 |         "required": ["file_path"],
 23 |         "properties": {
 24 |             "file_path": {
 25 |                 "type": "string",
 26 |                 "description": "CSV文件的本地路径，例如'/path/to/data.csv'",
 27 |             },
 28 |             "encoding": {
 29 |                 "type": "string",
 30 |                 "description": "文件编码格式，例如'utf-8'、'gbk'等，默认自动检测",
 31 |             }
 32 |         },
 33 |     }
 34 |     
 35 |     async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent]:
 36 |         """
 37 |         解析CSV文件内容
 38 |         
 39 |         Args:
 40 |             arguments: 参数字典，必须包含'file_path'键，可选'encoding'键
 41 |         
 42 |         Returns:
 43 |             解析结果列表
 44 |         """
 45 |         if "file_path" not in arguments:
 46 |             return [types.TextContent(
 47 |                 type="text",
 48 |                 text="错误: 缺少必要参数 'file_path'"
 49 |             )]
 50 |         
 51 |         file_path = arguments["file_path"]
 52 |         # 处理文件路径，支持挂载目录的转换
 53 |         file_path = self.process_file_path(file_path)
 54 |         
 55 |         if not os.path.exists(file_path):
 56 |             return [types.TextContent(
 57 |                 type="text",
 58 |                 text=f"错误: 文件不存在: {file_path}"
 59 |             )]
 60 |         
 61 |         try:
 62 |             # 尝试自动检测编码
 63 |             encoding = arguments.get("encoding", None)
 64 |             if encoding is None:
 65 |                 try:
 66 |                     import chardet
 67 |                     with open(file_path, 'rb') as f:
 68 |                         raw_data = f.read()
 69 |                         encoding = chardet.detect(raw_data)['encoding']
 70 |                 except ImportError:
 71 |                     encoding = 'utf-8'  # 如果没有chardet，默认使用utf-8
 72 |             
 73 |             # 读取CSV文件
 74 |             df = pd.read_csv(file_path, encoding=encoding)
 75 |             
 76 |             # 获取基本信息
 77 |             info = {
 78 |                 "文件名": os.path.basename(file_path),
 79 |                 "行数": len(df),
 80 |                 "列数": len(df.columns),
 81 |                 "列名": list(df.columns),
 82 |                 "数据预览": df.head().to_string()
 83 |             }
 84 |             
 85 |             # 生成描述性统计
 86 |             stats = df.describe().to_string()
 87 |             
 88 |             # 组合结果
 89 |             result = (
 90 |                 f"CSV文件解析结果:\n\n"
 91 |                 f"基本信息:\n"
 92 |                 f"- 文件名: {info['文件名']}\n"
 93 |                 f"- 行数: {info['行数']}\n"
 94 |                 f"- 列数: {info['列数']}\n"
 95 |                 f"- 列名: {', '.join(info['列名'])}\n\n"
 96 |                 f"数据预览:\n{info['数据预览']}\n\n"
 97 |                 f"描述性统计:\n{stats}"
 98 |             )
 99 |             
100 |             return [types.TextContent(
101 |                 type="text",
102 |                 text=result
103 |             )]
104 |             
105 |         except Exception as e:
106 |             error_details = traceback.format_exc()
107 |             return [types.TextContent(
108 |                 type="text",
109 |                 text=f"错误: 处理CSV文件时发生错误: {str(e)}\n{error_details}"
110 |             )] 


--------------------------------------------------------------------------------
/mcp_tool/tools/file_tool.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Author: 刘彦志 lyzgithub@163.com
  3 | Date: 2025-03-11 11:25:58
  4 | LastEditors: 刘彦志 lyzgithub@163.com
  5 | LastEditTime: 2025-04-01 17:54:16
  6 | FilePath: /mcp-framework/mcp_tool/tools/file_tool.py
  7 | Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
  8 | '''
  9 | """
 10 | 综合文件处理工具，根据文件类型自动选择合适的处理方式
 11 | """
 12 | 
 13 | import os
 14 | import traceback
 15 | from typing import Dict, List, Any
 16 | import mcp.types as types
 17 | from . import BaseTool, ToolRegistry
 18 | from .pdf_tool import PdfTool
 19 | from .word_tool import WordTool
 20 | from .excel_tool import ExcelTool
 21 | from .csv_tool import CsvTool
 22 | from .markdown_tool import MarkdownTool
 23 | 
 24 | @ToolRegistry.register
 25 | class FileTool(BaseTool):
 26 |     """
 27 |     综合文件处理工具，根据文件扩展名自动选择合适的处理方式
 28 |     支持的文件类型：
 29 |     - PDF文件 (.pdf)
 30 |     - Word文档 (.doc, .docx)
 31 |     - Excel文件 (.xls, .xlsx, .xlsm)
 32 |     - CSV文件 (.csv)
 33 |     - Markdown文件 (.md)
 34 |     """
 35 |     
 36 |     name = "parse_file"
 37 |     description = "解析文件内容，支持PDF、Word、Excel、CSV和Markdown格式"
 38 |     input_schema = {
 39 |         "type": "object",
 40 |         "required": ["file_path"],
 41 |         "properties": {
 42 |             "file_path": {
 43 |                 "type": "string",
 44 |                 "description": "文件的本地路径，例如'/path/to/document.pdf'",
 45 |             }
 46 |         },
 47 |     }
 48 |     
 49 |     def __init__(self):
 50 |         """初始化各种文件处理工具"""
 51 |         super().__init__()
 52 |         self.pdf_tool = PdfTool()
 53 |         self.word_tool = WordTool()
 54 |         self.excel_tool = ExcelTool()
 55 |         self.csv_tool = CsvTool()
 56 |         self.markdown_tool = MarkdownTool()
 57 |     
 58 |     async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:
 59 |         """
 60 |         解析文件内容
 61 |         
 62 |         Args:
 63 |             arguments: 参数字典，必须包含'file_path'键
 64 |         
 65 |         Returns:
 66 |             解析结果列表
 67 |         """
 68 |         if "file_path" not in arguments:
 69 |             return [types.TextContent(
 70 |                 type="text",
 71 |                 text="错误: 缺少必要参数 'file_path'"
 72 |             )]
 73 |         
 74 |         file_path = arguments["file_path"]
 75 |         # 处理文件路径，支持挂载目录的转换
 76 |         file_path = self.process_file_path(file_path)
 77 |         
 78 |         if not os.path.exists(file_path):
 79 |             return [types.TextContent(
 80 |                 type="text",
 81 |                 text=f"错误: 文件不存在: {file_path}"
 82 |             )]
 83 |         
 84 |         # 获取文件扩展名（转换为小写）
 85 |         file_ext = os.path.splitext(file_path)[1].lower()
 86 |         
 87 |         try:
 88 |             # 根据文件扩展名选择处理工具
 89 |             if file_ext == '.pdf':
 90 |                 return await self.pdf_tool.execute(arguments)
 91 |             elif file_ext in ['.doc', '.docx']:
 92 |                 return await self.word_tool.execute(arguments)
 93 |             elif file_ext in ['.xls', '.xlsx', '.xlsm']:
 94 |                 return await self.excel_tool.execute(arguments)
 95 |             elif file_ext == '.csv':
 96 |                 return await self.csv_tool.execute(arguments)
 97 |             elif file_ext == '.md':
 98 |                 return await self.markdown_tool.execute(arguments)
 99 |             else:
100 |                 return [types.TextContent(
101 |                     type="text",
102 |                     text=f"错误: 不支持的文件类型: {file_ext}"
103 |                 )]
104 |         except Exception as e:
105 |             error_details = traceback.format_exc()
106 |             return [types.TextContent(
107 |                 type="text",
108 |                 text=f"错误: 处理文件时发生错误: {str(e)}\n{error_details}"
109 |             )] 


--------------------------------------------------------------------------------
/.lh/mcp_simple_tool/tools/loader.py.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sourceFile": "mcp_simple_tool/tools/loader.py",
 3 |     "activeCommit": 0,
 4 |     "commits": [
 5 |         {
 6 |             "activePatchIndex": 3,
 7 |             "patches": [
 8 |                 {
 9 |                     "date": 1741333350811,
10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
11 |                 },
12 |                 {
13 |                     "date": 1741333377660,
14 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,4 +1,12 @@\n+'''\n+Author: 刘彦志 yanzhiliu@trip.com\n+Date: 2025-03-07 15:42:27\n+LastEditors: 刘彦志 yanzhiliu@trip.com\n+LastEditTime: 2025-03-07 15:42:56\n+FilePath: /mcp-development-framework/mcp_simple_tool/tools/loader.py\n+Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE\n+'''\n \"\"\"\n 工具加载器模块，用于自动加载所有已注册的工具\n \"\"\"\n import importlib\n"
15 |                 },
16 |                 {
17 |                     "date": 1741335072941,
18 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,9 +1,9 @@\n '''\n Author: 刘彦志 yanzhiliu@trip.com\n Date: 2025-03-07 15:42:27\n LastEditors: 刘彦志 yanzhiliu@trip.com\n-LastEditTime: 2025-03-07 15:42:56\n+LastEditTime: 2025-03-07 16:11:12\n FilePath: /mcp-development-framework/mcp_simple_tool/tools/loader.py\n Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE\n '''\n \"\"\"\n"
19 |                 },
20 |                 {
21 |                     "date": 1741335095758,
22 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,9 +1,9 @@\n '''\n Author: 刘彦志 yanzhiliu@trip.com\n Date: 2025-03-07 15:42:27\n LastEditors: 刘彦志 yanzhiliu@trip.com\n-LastEditTime: 2025-03-07 16:11:12\n+LastEditTime: 2025-03-07 16:11:35\n FilePath: /mcp-development-framework/mcp_simple_tool/tools/loader.py\n Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE\n '''\n \"\"\"\n"
23 |                 }
24 |             ],
25 |             "date": 1741333350811,
26 |             "name": "Commit-0",
27 |             "content": "\"\"\"\n工具加载器模块，用于自动加载所有已注册的工具\n\"\"\"\nimport importlib\nimport pkgutil\nimport inspect\nimport os\nimport sys\nfrom typing import List, Type\nfrom . import BaseTool, ToolRegistry\n\ndef load_tools() -> List[Type[BaseTool]]:\n    \"\"\"\n    自动加载tools目录下的所有工具模块\n    \n    Returns:\n        List[Type[BaseTool]]: 已加载的工具类列表\n    \"\"\"\n    # 获取当前模块的路径\n    package_path = os.path.dirname(__file__)\n    \n    # 获取所有子模块\n    tool_modules = []\n    for _, name, is_pkg in pkgutil.iter_modules([package_path]):\n        # 跳过__init__.py和loader.py\n        if name in ['__init__', 'loader']:\n            continue\n        \n        # 导入模块\n        module_name = f\"{__package__}.{name}\"\n        try:\n            module = importlib.import_module(module_name)\n            tool_modules.append(module)\n        except ImportError as e:\n            print(f\"Warning: Failed to import module {module_name}: {e}\")\n    \n    # 收集所有已注册的工具类\n    tools = list(ToolRegistry._tools.values())\n    \n    # 返回工具类列表\n    return tools\n\ndef get_tool_instances() -> dict:\n    \"\"\"\n    创建所有工具类的实例\n    \n    Returns:\n        dict: 工具名称到工具实例的映射\n    \"\"\"\n    tools = load_tools()\n    tool_instances = {}\n    \n    for tool_class in tools:\n        try:\n            tool_instance = tool_class()\n            tool_instances[tool_class.name] = tool_instance\n        except Exception as e:\n            print(f\"Warning: Failed to instantiate tool {tool_class.name}: {e}\")\n    \n    return tool_instances "
28 |         }
29 |     ]
30 | }


--------------------------------------------------------------------------------
/mcp_tool/server.py:
--------------------------------------------------------------------------------
  1 | import anyio
  2 | import click
  3 | import mcp.types as types
  4 | from mcp.server.lowlevel import Server
  5 | import os
  6 | import httpx
  7 | from starlette.responses import JSONResponse
  8 | from urllib.parse import parse_qs
  9 | 
 10 | # 导入工具注册器和工具加载器
 11 | from .tools import ToolRegistry
 12 | from .tools.loader import get_tool_instances
 13 | 
 14 | @click.command()
 15 | @click.option("--port", default=8000, help="Port to listen on for SSE")
 16 | @click.option(
 17 |     "--transport",
 18 |     type=click.Choice(["stdio", "sse"]),
 19 |     default="stdio",
 20 |     help="Transport type",
 21 | )
 22 | def main(port: int, transport: str) -> int:
 23 |     app = Server("mcp-website-fetcher")
 24 |     
 25 |     # 加载所有工具实例
 26 |     tool_instances = get_tool_instances()
 27 | 
 28 |     @app.call_tool()
 29 |     async def fetch_tool( # type: ignore[unused-function]
 30 |         name: str, arguments: dict
 31 |     ) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
 32 |         if ToolRegistry.has_tool(name):
 33 |             tool_instance = tool_instances.get(name)
 34 |             if tool_instance:
 35 |                 try:
 36 |                     return await tool_instance.execute(arguments)
 37 |                 except Exception as e:
 38 |                     import traceback
 39 |                     error_details = traceback.format_exc()
 40 |                     return [types.TextContent(
 41 |                         type="text",
 42 |                         text=f"Error executing tool {name}: {str(e)}\n{error_details}"
 43 |                     )]
 44 |             else:
 45 |                 return [types.TextContent(
 46 |                     type="text",
 47 |                     text=f"Error: Tool instance for {name} not found"
 48 |                 )]
 49 |         else:
 50 |             return [types.TextContent(
 51 |                 type="text",
 52 |                 text=f"Error: Unknown tool: {name}"
 53 |             )]
 54 | 
 55 |     @app.list_tools()
 56 |     async def list_tools() -> list[types.Tool]: # type: ignore[unused-function]
 57 |         return ToolRegistry.list_tools()
 58 | 
 59 |     if transport == "sse":
 60 |         from mcp.server.sse import SseServerTransport
 61 |         from starlette.applications import Starlette
 62 |         from starlette.routing import Mount, Route
 63 |         from starlette.middleware import Middleware
 64 |         from starlette.middleware.cors import CORSMiddleware
 65 | 
 66 |         sse = SseServerTransport("/messages/")
 67 | 
 68 |         # 鉴权函数
 69 |         async def verify_auth(request):
 70 |             """验证请求的鉴权信息"""
 71 |             # 获取鉴权地址，默认为 http://170.106.105.206:4000/users
 72 |             auth_url = os.environ.get("MCP_AUTH_URL", "http://170.106.105.206:4000/users")
 73 |             
 74 |             # 从URL查询参数中获取token
 75 |             query_params = parse_qs(request.scope.get("query_string", b"").decode())
 76 |             token = query_params.get("token", [None])[0]
 77 |             
 78 |             if not token:
 79 |                 return False, "Token parameter is missing in URL"
 80 |             
 81 |             try:
 82 |                 # 构建Authorization头
 83 |                 auth_header = f"Bearer {token}"
 84 |                 
 85 |                 # 发送请求到鉴权服务
 86 |                 async with httpx.AsyncClient() as client:
 87 |                     headers = {"Authorization": auth_header}
 88 |                     response = await client.get(auth_url, headers=headers, timeout=10.0)
 89 |                     
 90 |                     # 检查响应状态码
 91 |                     if response.status_code == 200:
 92 |                         return True, "Authentication successful"
 93 |                     else:
 94 |                         return False, f"Authentication failed with status code: {response.status_code}"
 95 |             except Exception as e:
 96 |                 return False, f"Authentication error: {str(e)}"
 97 | 
 98 |         async def handle_sse(request):
 99 |             # 验证鉴权
100 |             is_authenticated, message = await verify_auth(request)
101 |             if not is_authenticated:
102 |                 return JSONResponse(
103 |                     status_code=401,
104 |                     content={"error": "Unauthorized", "message": message}
105 |                 )
106 |             
107 |             # 增加超时时间，以便处理大型文件
108 |             request.scope["timeout"] = 300  # 设置为5分钟
109 |             async with sse.connect_sse(
110 |                 request.scope, request.receive, request._send
111 |             ) as streams:
112 |                 await app.run(
113 |                     streams[0], streams[1], app.create_initialization_options()
114 |                 )
115 | 
116 |         # 添加CORS中间件以允许跨域请求
117 |         middleware = [
118 |             Middleware(
119 |                 CORSMiddleware,
120 |                 allow_origins=["*"],
121 |                 allow_methods=["*"],
122 |                 allow_headers=["*"],
123 |             )
124 |         ]
125 | 
126 |         starlette_app = Starlette(
127 |             debug=True,
128 |             routes=[
129 |                 Route("/sse", endpoint=handle_sse),
130 |                 Mount("/messages/", app=sse.handle_post_message),
131 |             ],
132 |             middleware=middleware,
133 |         )
134 | 
135 |         import uvicorn
136 | 
137 |         # 增加uvicorn的超时设置
138 |         uvicorn.run(
139 |             starlette_app, 
140 |             host="0.0.0.0", 
141 |             port=port,
142 |             timeout_keep_alive=300,  # 增加保持连接的超时时间
143 |         )
144 |     else:
145 |         from mcp.server.stdio import stdio_server
146 | 
147 |         async def arun():
148 |             async with stdio_server() as streams:
149 |                 await app.run(
150 |                     streams[0], streams[1], app.create_initialization_options()
151 |                 )
152 | 
153 |         anyio.run(arun)
154 | 
155 |     return 0
156 | 


--------------------------------------------------------------------------------
/.lh/mcp_simple_tool/tools/word_tool.py.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sourceFile": "mcp_simple_tool/tools/word_tool.py",
 3 |     "activeCommit": 0,
 4 |     "commits": [
 5 |         {
 6 |             "activePatchIndex": 0,
 7 |             "patches": [
 8 |                 {
 9 |                     "date": 1741337157526,
10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
11 |                 }
12 |             ],
13 |             "date": 1741337157526,
14 |             "name": "Commit-0",
15 |             "content": "\"\"\"\nWord文档解析工具，用于解析Word文档内容\n\"\"\"\n\nimport os\nimport traceback\nfrom typing import Dict, List, Any\nimport docx\nimport mcp.types as types\nfrom . import BaseTool, ToolRegistry\n\n@ToolRegistry.register\nclass WordTool(BaseTool):\n    \"\"\"\n    用于解析Word文档的工具，提取文本内容、表格和图片信息\n    \"\"\"\n    \n    name = \"word\"\n    description = \"解析Word文档内容，提取文本、表格和图片信息\"\n    input_schema = {\n        \"type\": \"object\",\n        \"required\": [\"file_path\"],\n        \"properties\": {\n            \"file_path\": {\n                \"type\": \"string\",\n                \"description\": \"Word文档的本地路径，例如'/path/to/document.docx'\",\n            }\n        },\n    }\n    \n    async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n        \"\"\"\n        解析Word文档\n        \n        Args:\n            arguments: 参数字典，必须包含'file_path'键\n            \n        Returns:\n            Word文档内容列表\n        \"\"\"\n        if \"file_path\" not in arguments:\n            return [types.TextContent(\n                type=\"text\",\n                text=\"错误: 缺少必要参数 'file_path'\"\n            )]\n        \n        return await self._parse_word_document(arguments[\"file_path\"])\n    \n    async def _parse_word_document(self, file_path: str) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n        \"\"\"\n        解析Word文档内容\n        \n        Args:\n            file_path: Word文档路径\n            \n        Returns:\n            Word文档内容列表\n        \"\"\"\n        results = []\n        \n        # 检查文件是否存在\n        if not os.path.exists(file_path):\n            return [types.TextContent(\n                type=\"text\",\n                text=f\"错误: 文件不存在: {file_path}\\n请检查路径是否正确，并确保文件可访问。\"\n            )]\n        \n        # 检查文件扩展名\n        if not file_path.lower().endswith(('.docx', '.doc')):\n            return [types.TextContent(\n                type=\"text\",\n                text=f\"错误: 不支持的文件格式: {file_path}\\n仅支持.docx和.doc格式的Word文档。\"\n            )]\n        \n        try:\n            # 添加文件信息\n            file_size_mb = os.path.getsize(file_path) / (1024 * 1024)\n            results.append(types.TextContent(\n                type=\"text\",\n                text=f\"# Word文档解析\\n\\n文件大小: {file_size_mb:.2f} MB\"\n            ))\n            \n            # 打开Word文档\n            doc = docx.Document(file_path)\n            \n            # 提取文档属性\n            properties = {}\n            if hasattr(doc.core_properties, 'title') and doc.core_properties.title:\n                properties['标题'] = doc.core_properties.title\n            if hasattr(doc.core_properties, 'author') and doc.core_properties.author:\n                properties['作者'] = doc.core_properties.author\n            if hasattr(doc.core_properties, 'created') and doc.core_properties.created:\n                properties['创建时间'] = str(doc.core_properties.created)\n            if hasattr(doc.core_properties, 'modified') and doc.core_properties.modified:\n                properties['修改时间'] = str(doc.core_properties.modified)\n            if hasattr(doc.core_properties, 'comments') and doc.core_properties.comments:\n                properties['备注'] = doc.core_properties.comments\n            \n            # 添加文档属性信息\n            if properties:\n                properties_text = \"## 文档属性\\n\\n\"\n                for key, value in properties.items():\n                    properties_text += f\"- {key}: {value}\\n\"\n                results.append(types.TextContent(\n                    type=\"text\",\n                    text=properties_text\n                ))\n            \n            # 提取文档内容\n            content_text = \"## 文档内容\\n\\n\"\n            \n            # 处理段落\n            paragraphs_count = len(doc.paragraphs)\n            content_text += f\"### 段落 (共{paragraphs_count}个)\\n\\n\"\n            \n            for i, para in enumerate(doc.paragraphs):\n                if para.text.strip():  # 只处理非空段落\n                    content_text += f\"{para.text}\\n\\n\"\n            \n            # 处理表格\n            tables_count = len(doc.tables)\n            if tables_count > 0:\n                content_text += f\"### 表格 (共{tables_count}个)\\n\\n\"\n                \n                for i, table in enumerate(doc.tables):\n                    content_text += f\"#### 表格 {i+1}\\n\\n\"\n                    \n                    # 创建Markdown表格\n                    rows = []\n                    for row in table.rows:\n                        cells = [cell.text.replace('\\n', ' ').strip() for cell in row.cells]\n                        rows.append(cells)\n                    \n                    if rows:\n                        # 表头\n                        content_text += \"| \" + \" | \".join(rows[0]) + \" |\\n\"\n                        # 分隔线\n                        content_text += \"| \" + \" | \".join([\"---\"] * len(rows[0])) + \" |\\n\"\n                        # 表格内容\n                        for row in rows[1:]:\n                            content_text += \"| \" + \" | \".join(row) + \" |\\n\"\n                        \n                        content_text += \"\\n\"\n            \n            # 添加文档内容\n            results.append(types.TextContent(\n                type=\"text\",\n                text=content_text\n            ))\n            \n            # 提取图片信息\n            try:\n                # 计算文档中的图片数量\n                image_count = 0\n                for rel in doc.part.rels.values():\n                    if \"image\" in rel.target_ref:\n                        image_count += 1\n                \n                if image_count > 0:\n                    image_info = f\"## 图片信息\\n\\n文档中包含 {image_count} 张图片。\\n\\n\"\n                    image_info += \"注意：当前仅提供图片数量信息，不提取图片内容。如需查看图片，请直接打开原始文档。\\n\"\n                    \n                    results.append(types.TextContent(\n                        type=\"text\",\n                        text=image_info\n                    ))\n            except Exception as img_error:\n                results.append(types.TextContent(\n                    type=\"text\",\n                    text=f\"警告: 提取图片信息时出错: {str(img_error)}\"\n                ))\n            \n            # 添加处理完成的提示\n            results.append(types.TextContent(\n                type=\"text\",\n                text=\"Word文档处理完成！\"\n            ))\n            \n            return results\n        except Exception as e:\n            error_details = traceback.format_exc()\n            return [types.TextContent(\n                type=\"text\",\n                text=f\"错误: 解析Word文档失败: {str(e)}\\n\"\n                     f\"可能的原因:\\n\"\n                     f\"1. 文件格式不兼容或已损坏\\n\"\n                     f\"2. 文件受密码保护\\n\"\n                     f\"3. 文件包含不支持的内容\\n\\n\"\n                     f\"详细错误信息: {error_details}\"\n            )] "
16 |         }
17 |     ]
18 | }


--------------------------------------------------------------------------------
/.lh/mcp_simple_tool/tools/utils/pdf_helpers.py.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sourceFile": "mcp_simple_tool/tools/utils/pdf_helpers.py",
 3 |     "activeCommit": 0,
 4 |     "commits": [
 5 |         {
 6 |             "activePatchIndex": 1,
 7 |             "patches": [
 8 |                 {
 9 |                     "date": 1741332207363,
10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
11 |                 },
12 |                 {
13 |                     "date": 1741332589773,
14 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,101 +1,1 @@\n-\"\"\"\n-PDF处理相关的辅助函数\n-\"\"\"\n-\n-import os\n-import tempfile\n-import fitz  # PyMuPDF\n-from typing import List, Dict, Any\n-from PIL import Image\n-\n-\n-async def extract_images_from_pdf(file_path: str, output_dir: str) -> List[Dict[str, Any]]:\n-    \"\"\"\n-    使用PyMuPDF (fitz) 从PDF中提取图片，这比pdf2image更高效且能提取嵌入图片\n-    \n-    Args:\n-        file_path: PDF文件路径\n-        output_dir: 图片输出目录\n-        \n-    Returns:\n-        提取的图片信息列表\n-    \"\"\"\n-    image_info = []\n-    \n-    try:\n-        # 打开PDF文件\n-        pdf_document = fitz.open(file_path)\n-        \n-        # 遍历每一页\n-        for page_index in range(len(pdf_document)):\n-            page = pdf_document[page_index]\n-            \n-            # 获取页面上的图片\n-            image_list = page.get_images(full=True)\n-            \n-            # 遍历页面上的每个图片\n-            for img_index, img in enumerate(image_list):\n-                xref = img[0]  # 图片的xref号\n-                base_image = pdf_document.extract_image(xref)\n-                image_bytes = base_image[\"image\"]\n-                image_ext = base_image[\"ext\"]  # 图片扩展名\n-                \n-                # 保存图片到文件\n-                image_filename = f\"page_{page_index + 1}_img_{img_index + 1}.{image_ext}\"\n-                image_path = os.path.join(output_dir, image_filename)\n-                \n-                with open(image_path, \"wb\") as img_file:\n-                    img_file.write(image_bytes)\n-                \n-                # 获取图片信息\n-                with Image.open(image_path) as pil_img:\n-                    width, height = pil_img.size\n-                    format_name = pil_img.format\n-                \n-                # 添加图片信息到列表\n-                image_info.append({\n-                    \"filename\": image_filename,\n-                    \"path\": image_path,\n-                    \"page\": page_index + 1,\n-                    \"width\": width,\n-                    \"height\": height,\n-                    \"format\": format_name,\n-                    \"size_bytes\": len(image_bytes)\n-                })\n-        \n-        # 如果没有找到嵌入图片，尝试渲染页面为图片\n-        if not image_info:\n-            for page_index in range(len(pdf_document)):\n-                page = pdf_document[page_index]\n-                \n-                # 将页面渲染为图片\n-                pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))  # 2x缩放以获得更好的质量\n-                image_filename = f\"page_{page_index + 1}_rendered.png\"\n-                image_path = os.path.join(output_dir, image_filename)\n-                \n-                # 保存渲染的图片\n-                pix.save(image_path)\n-                \n-                # 获取图片信息\n-                with Image.open(image_path) as pil_img:\n-                    width, height = pil_img.size\n-                    format_name = pil_img.format\n-                \n-                # 添加图片信息到列表\n-                image_info.append({\n-                    \"filename\": image_filename,\n-                    \"path\": image_path,\n-                    \"page\": page_index + 1,\n-                    \"width\": width,\n-                    \"height\": height,\n-                    \"format\": format_name,\n-                    \"size_bytes\": os.path.getsize(image_path),\n-                    \"type\": \"rendered_page\"\n-                })\n-        \n-        pdf_document.close()\n-        return image_info\n-    \n-    except Exception as e:\n-        print(f\"提取图片时出错: {str(e)}\")\n-        return [] \n\\ No newline at end of file\n+ \n\\ No newline at end of file\n"
15 |                 }
16 |             ],
17 |             "date": 1741332207363,
18 |             "name": "Commit-0",
19 |             "content": "\"\"\"\nPDF处理相关的辅助函数\n\"\"\"\n\nimport os\nimport tempfile\nimport fitz  # PyMuPDF\nfrom typing import List, Dict, Any\nfrom PIL import Image\n\n\nasync def extract_images_from_pdf(file_path: str, output_dir: str) -> List[Dict[str, Any]]:\n    \"\"\"\n    使用PyMuPDF (fitz) 从PDF中提取图片，这比pdf2image更高效且能提取嵌入图片\n    \n    Args:\n        file_path: PDF文件路径\n        output_dir: 图片输出目录\n        \n    Returns:\n        提取的图片信息列表\n    \"\"\"\n    image_info = []\n    \n    try:\n        # 打开PDF文件\n        pdf_document = fitz.open(file_path)\n        \n        # 遍历每一页\n        for page_index in range(len(pdf_document)):\n            page = pdf_document[page_index]\n            \n            # 获取页面上的图片\n            image_list = page.get_images(full=True)\n            \n            # 遍历页面上的每个图片\n            for img_index, img in enumerate(image_list):\n                xref = img[0]  # 图片的xref号\n                base_image = pdf_document.extract_image(xref)\n                image_bytes = base_image[\"image\"]\n                image_ext = base_image[\"ext\"]  # 图片扩展名\n                \n                # 保存图片到文件\n                image_filename = f\"page_{page_index + 1}_img_{img_index + 1}.{image_ext}\"\n                image_path = os.path.join(output_dir, image_filename)\n                \n                with open(image_path, \"wb\") as img_file:\n                    img_file.write(image_bytes)\n                \n                # 获取图片信息\n                with Image.open(image_path) as pil_img:\n                    width, height = pil_img.size\n                    format_name = pil_img.format\n                \n                # 添加图片信息到列表\n                image_info.append({\n                    \"filename\": image_filename,\n                    \"path\": image_path,\n                    \"page\": page_index + 1,\n                    \"width\": width,\n                    \"height\": height,\n                    \"format\": format_name,\n                    \"size_bytes\": len(image_bytes)\n                })\n        \n        # 如果没有找到嵌入图片，尝试渲染页面为图片\n        if not image_info:\n            for page_index in range(len(pdf_document)):\n                page = pdf_document[page_index]\n                \n                # 将页面渲染为图片\n                pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))  # 2x缩放以获得更好的质量\n                image_filename = f\"page_{page_index + 1}_rendered.png\"\n                image_path = os.path.join(output_dir, image_filename)\n                \n                # 保存渲染的图片\n                pix.save(image_path)\n                \n                # 获取图片信息\n                with Image.open(image_path) as pil_img:\n                    width, height = pil_img.size\n                    format_name = pil_img.format\n                \n                # 添加图片信息到列表\n                image_info.append({\n                    \"filename\": image_filename,\n                    \"path\": image_path,\n                    \"page\": page_index + 1,\n                    \"width\": width,\n                    \"height\": height,\n                    \"format\": format_name,\n                    \"size_bytes\": os.path.getsize(image_path),\n                    \"type\": \"rendered_page\"\n                })\n        \n        pdf_document.close()\n        return image_info\n    \n    except Exception as e:\n        print(f\"提取图片时出错: {str(e)}\")\n        return [] "
20 |         }
21 |     ]
22 | }


--------------------------------------------------------------------------------
/.lh/mcp_simple_tool/core/sse_adapter.py.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sourceFile": "mcp_simple_tool/core/sse_adapter.py",
 3 |     "activeCommit": 0,
 4 |     "commits": [
 5 |         {
 6 |             "activePatchIndex": 1,
 7 |             "patches": [
 8 |                 {
 9 |                     "date": 1741332352813,
10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
11 |                 },
12 |                 {
13 |                     "date": 1741332593156,
14 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,125 +1,1 @@\n-\"\"\"\n-SSE适配器，用于处理MCP与SSE的交互\n-\"\"\"\n-\n-import anyio\n-from mcp.server.lowlevel import Server\n-from mcp.server.sse import SseServerTransport\n-from starlette.applications import Starlette\n-from starlette.routing import Mount, Route\n-from starlette.middleware import Middleware\n-from starlette.middleware.cors import CORSMiddleware\n-import uvicorn\n-from typing import List, Optional, Dict, Any\n-import mcp.types as types\n-from ..tools import BaseTool\n-\n-\n-class SseAdapter:\n-    \"\"\"\n-    SSE适配器，用于处理MCP与SSE的交互\n-    \"\"\"\n-    \n-    def __init__(self, app_name: str, tools: List[BaseTool], host: str = \"0.0.0.0\", port: int = 8000):\n-        \"\"\"\n-        初始化SSE适配器\n-        \n-        Args:\n-            app_name: 应用名称\n-            tools: 工具列表\n-            host: 主机地址\n-            port: 端口号\n-        \"\"\"\n-        self.app_name = app_name\n-        self.tools = tools\n-        self.host = host\n-        self.port = port\n-        self.app = Server(app_name)\n-        \n-    def setup(self):\n-        \"\"\"\n-        设置MCP服务器\n-        \"\"\"\n-        # 注册工具调用函数\n-        @self.app.call_tool()\n-        async def fetch_tool(name: str, arguments: dict) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n-            \"\"\"\n-            工具调用函数\n-            \n-            Args:\n-                name: 工具名称\n-                arguments: 工具参数\n-                \n-            Returns:\n-                工具执行结果\n-            \"\"\"\n-            # 查找匹配的工具\n-            for tool in self.tools:\n-                if tool.name == name:\n-                    return await tool.execute(arguments)\n-            \n-            # 如果没有找到匹配的工具，返回错误信息\n-            return [types.TextContent(\n-                type=\"text\",\n-                text=f\"错误: 未知工具: {name}\"\n-            )]\n-        \n-        # 注册工具列表函数\n-        @self.app.list_tools()\n-        async def list_tools() -> List[types.Tool]:\n-            \"\"\"\n-            列出所有可用的工具\n-            \n-            Returns:\n-                工具定义列表\n-            \"\"\"\n-            return [tool.to_tool_definition() for tool in self.tools]\n-    \n-    def run(self):\n-        \"\"\"\n-        运行SSE服务器\n-        \"\"\"\n-        # 设置MCP服务器\n-        self.setup()\n-        \n-        # 创建SSE传输\n-        sse = SseServerTransport(\"/messages/\")\n-        \n-        # 处理SSE请求\n-        async def handle_sse(request):\n-            # 增加超时时间，以便处理大型文件\n-            request.scope[\"timeout\"] = 300  # 设置为5分钟\n-            async with sse.connect_sse(\n-                request.scope, request.receive, request._send\n-            ) as streams:\n-                await self.app.run(\n-                    streams[0], streams[1], self.app.create_initialization_options()\n-                )\n-        \n-        # 添加CORS中间件以允许跨域请求\n-        middleware = [\n-            Middleware(\n-                CORSMiddleware,\n-                allow_origins=[\"*\"],\n-                allow_methods=[\"*\"],\n-                allow_headers=[\"*\"],\n-            )\n-        ]\n-        \n-        # 创建Starlette应用\n-        starlette_app = Starlette(\n-            debug=True,\n-            routes=[\n-                Route(\"/sse\", endpoint=handle_sse),\n-                Mount(\"/messages/\", app=sse.handle_post_message),\n-            ],\n-            middleware=middleware,\n-        )\n-        \n-        # 运行服务器\n-        uvicorn.run(\n-            starlette_app, \n-            host=self.host, \n-            port=self.port,\n-            timeout_keep_alive=300,  # 增加保持连接的超时时间\n-        ) \n\\ No newline at end of file\n+ \n\\ No newline at end of file\n"
15 |                 }
16 |             ],
17 |             "date": 1741332352813,
18 |             "name": "Commit-0",
19 |             "content": "\"\"\"\nSSE适配器，用于处理MCP与SSE的交互\n\"\"\"\n\nimport anyio\nfrom mcp.server.lowlevel import Server\nfrom mcp.server.sse import SseServerTransport\nfrom starlette.applications import Starlette\nfrom starlette.routing import Mount, Route\nfrom starlette.middleware import Middleware\nfrom starlette.middleware.cors import CORSMiddleware\nimport uvicorn\nfrom typing import List, Optional, Dict, Any\nimport mcp.types as types\nfrom ..tools import BaseTool\n\n\nclass SseAdapter:\n    \"\"\"\n    SSE适配器，用于处理MCP与SSE的交互\n    \"\"\"\n    \n    def __init__(self, app_name: str, tools: List[BaseTool], host: str = \"0.0.0.0\", port: int = 8000):\n        \"\"\"\n        初始化SSE适配器\n        \n        Args:\n            app_name: 应用名称\n            tools: 工具列表\n            host: 主机地址\n            port: 端口号\n        \"\"\"\n        self.app_name = app_name\n        self.tools = tools\n        self.host = host\n        self.port = port\n        self.app = Server(app_name)\n        \n    def setup(self):\n        \"\"\"\n        设置MCP服务器\n        \"\"\"\n        # 注册工具调用函数\n        @self.app.call_tool()\n        async def fetch_tool(name: str, arguments: dict) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n            \"\"\"\n            工具调用函数\n            \n            Args:\n                name: 工具名称\n                arguments: 工具参数\n                \n            Returns:\n                工具执行结果\n            \"\"\"\n            # 查找匹配的工具\n            for tool in self.tools:\n                if tool.name == name:\n                    return await tool.execute(arguments)\n            \n            # 如果没有找到匹配的工具，返回错误信息\n            return [types.TextContent(\n                type=\"text\",\n                text=f\"错误: 未知工具: {name}\"\n            )]\n        \n        # 注册工具列表函数\n        @self.app.list_tools()\n        async def list_tools() -> List[types.Tool]:\n            \"\"\"\n            列出所有可用的工具\n            \n            Returns:\n                工具定义列表\n            \"\"\"\n            return [tool.to_tool_definition() for tool in self.tools]\n    \n    def run(self):\n        \"\"\"\n        运行SSE服务器\n        \"\"\"\n        # 设置MCP服务器\n        self.setup()\n        \n        # 创建SSE传输\n        sse = SseServerTransport(\"/messages/\")\n        \n        # 处理SSE请求\n        async def handle_sse(request):\n            # 增加超时时间，以便处理大型文件\n            request.scope[\"timeout\"] = 300  # 设置为5分钟\n            async with sse.connect_sse(\n                request.scope, request.receive, request._send\n            ) as streams:\n                await self.app.run(\n                    streams[0], streams[1], self.app.create_initialization_options()\n                )\n        \n        # 添加CORS中间件以允许跨域请求\n        middleware = [\n            Middleware(\n                CORSMiddleware,\n                allow_origins=[\"*\"],\n                allow_methods=[\"*\"],\n                allow_headers=[\"*\"],\n            )\n        ]\n        \n        # 创建Starlette应用\n        starlette_app = Starlette(\n            debug=True,\n            routes=[\n                Route(\"/sse\", endpoint=handle_sse),\n                Mount(\"/messages/\", app=sse.handle_post_message),\n            ],\n            middleware=middleware,\n        )\n        \n        # 运行服务器\n        uvicorn.run(\n            starlette_app, \n            host=self.host, \n            port=self.port,\n            timeout_keep_alive=300,  # 增加保持连接的超时时间\n        ) "
20 |         }
21 |     ]
22 | }


--------------------------------------------------------------------------------
/mcp_tool/tools/markdown_tool.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Markdown文件解析工具，用于解析和提取Markdown文件内容
  3 | """
  4 | 
  5 | import os
  6 | import traceback
  7 | from typing import Dict, List, Any
  8 | import mcp.types as types
  9 | from . import BaseTool, ToolRegistry
 10 | 
 11 | @ToolRegistry.register
 12 | class MarkdownTool(BaseTool):
 13 |     """
 14 |     用于解析Markdown文件的工具，提取文本内容、标题结构和列表等信息
 15 |     """
 16 |     
 17 |     name = "parse_markdown"
 18 |     description = "解析Markdown文件内容，提取标题结构、列表和文本内容"
 19 |     input_schema = {
 20 |         "type": "object",
 21 |         "required": ["file_path"],
 22 |         "properties": {
 23 |             "file_path": {
 24 |                 "type": "string",
 25 |                 "description": "Markdown文件的本地路径，例如'/path/to/document.md'",
 26 |             }
 27 |         },
 28 |     }
 29 |     
 30 |     async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:
 31 |         """
 32 |         解析Markdown文件
 33 |         
 34 |         Args:
 35 |             arguments: 参数字典，必须包含'file_path'键
 36 |             
 37 |         Returns:
 38 |             解析结果列表
 39 |         """
 40 |         if "file_path" not in arguments:
 41 |             return [types.TextContent(
 42 |                 type="text",
 43 |                 text="错误: 缺少必要参数 'file_path'"
 44 |             )]
 45 |         
 46 |         # 处理文件路径，支持挂载目录的转换
 47 |         file_path = self.process_file_path(arguments["file_path"])
 48 |         
 49 |         return await self._parse_markdown_file(file_path)
 50 |     
 51 |     async def _parse_markdown_file(self, file_path: str) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:
 52 |         """
 53 |         解析Markdown文件内容
 54 |         
 55 |         Args:
 56 |             file_path: Markdown文件路径
 57 |             
 58 |         Returns:
 59 |             Markdown文件内容解析结果列表
 60 |         """
 61 |         results = []
 62 |         
 63 |         # 检查文件是否存在
 64 |         if not os.path.exists(file_path):
 65 |             return [types.TextContent(
 66 |                 type="text",
 67 |                 text=f"错误: 文件不存在: {file_path}\n请检查路径是否正确，并确保文件可访问。"
 68 |             )]
 69 |         
 70 |         # 检查文件扩展名
 71 |         if not file_path.lower().endswith('.md'):
 72 |             return [types.TextContent(
 73 |                 type="text",
 74 |                 text=f"错误: 不支持的文件格式: {file_path}\n仅支持.md格式的Markdown文件。"
 75 |             )]
 76 |         
 77 |         try:
 78 |             # 添加文件信息
 79 |             file_size_kb = os.path.getsize(file_path) / 1024
 80 |             results.append(types.TextContent(
 81 |                 type="text",
 82 |                 text=f"# Markdown文件解析\n\n文件大小: {file_size_kb:.2f} KB"
 83 |             ))
 84 |             
 85 |             # 读取文件内容
 86 |             with open(file_path, 'r', encoding='utf-8') as file:
 87 |                 content = file.read()
 88 |             
 89 |             # 基本文件信息
 90 |             file_info = f"## 文件基本信息\n\n"
 91 |             file_info += f"- 文件名: {os.path.basename(file_path)}\n"
 92 |             file_info += f"- 路径: {file_path}\n"
 93 |             file_info += f"- 大小: {file_size_kb:.2f} KB\n"
 94 |             file_info += f"- 最后修改时间: {os.path.getmtime(file_path)}\n"
 95 |             
 96 |             results.append(types.TextContent(
 97 |                 type="text",
 98 |                 text=file_info
 99 |             ))
100 |             
101 |             # 解析Markdown内容结构
102 |             structure = self._analyze_markdown_structure(content)
103 |             results.append(types.TextContent(
104 |                 type="text",
105 |                 text=structure
106 |             ))
107 |             
108 |             # 添加原始内容
109 |             results.append(types.TextContent(
110 |                 type="text",
111 |                 text=f"## 原始Markdown内容\n\n```markdown\n{content}\n```"
112 |             ))
113 |             
114 |             # 添加处理完成的提示
115 |             results.append(types.TextContent(
116 |                 type="text",
117 |                 text="Markdown文件处理完成！"
118 |             ))
119 |             
120 |             return results
121 |         except Exception as e:
122 |             error_details = traceback.format_exc()
123 |             return [types.TextContent(
124 |                 type="text",
125 |                 text=f"错误: 解析Markdown文件失败: {str(e)}\n"
126 |                      f"可能的原因:\n"
127 |                      f"1. 文件编码不兼容\n"
128 |                      f"2. 文件已损坏\n"
129 |                      f"3. 文件内容格式异常\n\n"
130 |                      f"详细错误信息: {error_details}"
131 |             )]
132 |     
133 |     def _analyze_markdown_structure(self, content: str) -> str:
134 |         """
135 |         分析Markdown文件结构
136 |         
137 |         Args:
138 |             content: Markdown文件内容
139 |             
140 |         Returns:
141 |             结构分析结果
142 |         """
143 |         lines = content.split('\n')
144 |         
145 |         # 分析标题
146 |         headings = {
147 |             "h1": [],
148 |             "h2": [],
149 |             "h3": [],
150 |             "h4": [],
151 |             "h5": [],
152 |             "h6": []
153 |         }
154 |         
155 |         # 计数
156 |         code_blocks = 0
157 |         lists = 0
158 |         links = 0
159 |         images = 0
160 |         tables = 0
161 |         
162 |         in_code_block = False
163 |         
164 |         for line in lines:
165 |             line = line.strip()
166 |             
167 |             # 检测代码块
168 |             if line.startswith('```'):
169 |                 in_code_block = not in_code_block
170 |                 if not in_code_block:
171 |                     code_blocks += 1
172 |                 continue
173 |                 
174 |             if in_code_block:
175 |                 continue
176 |                 
177 |             # 检测标题
178 |             if line.startswith('# '):
179 |                 headings["h1"].append(line[2:])
180 |             elif line.startswith('## '):
181 |                 headings["h2"].append(line[3:])
182 |             elif line.startswith('### '):
183 |                 headings["h3"].append(line[4:])
184 |             elif line.startswith('#### '):
185 |                 headings["h4"].append(line[5:])
186 |             elif line.startswith('##### '):
187 |                 headings["h5"].append(line[6:])
188 |             elif line.startswith('###### '):
189 |                 headings["h6"].append(line[7:])
190 |                 
191 |             # 检测列表
192 |             if line.startswith('- ') or line.startswith('* ') or line.startswith('+ ') or \
193 |                (line and line[0].isdigit() and '.' in line[:3]):
194 |                 lists += 1
195 |                 
196 |             # 检测链接和图片
197 |             if '](' in line:
198 |                 if line.count('![') > 0:
199 |                     images += line.count('![')
200 |                 links += line.count('](') - line.count('![')
201 |                 
202 |             # 检测表格
203 |             if line.startswith('|') and line.endswith('|'):
204 |                 tables += 1
205 |                 
206 |         # 生成结构报告
207 |         structure = "## Markdown结构分析\n\n"
208 |         
209 |         # 标题结构
210 |         structure += "### 标题结构\n\n"
211 |         has_headings = False
212 |         for level, titles in headings.items():
213 |             if titles:
214 |                 has_headings = True
215 |                 indent = "  " * (int(level[1]) - 1)
216 |                 for title in titles:
217 |                     structure += f"{indent}- {title}\n"
218 |                     
219 |         if not has_headings:
220 |             structure += "文档中未检测到标题结构\n"
221 |             
222 |         # 内容元素统计
223 |         structure += "\n### 内容元素统计\n\n"
224 |         structure += f"- 代码块: {code_blocks} 个\n"
225 |         structure += f"- 列表项: {lists} 个\n"
226 |         structure += f"- 链接: {links} 个\n"
227 |         structure += f"- 图片: {images} 个\n"
228 |         structure += f"- 表格行: {tables} 行\n"
229 |         
230 |         return structure 


--------------------------------------------------------------------------------
/mcp_tool/tools/pdf_tool.py:
--------------------------------------------------------------------------------
  1 | """
  2 | PDF解析工具，用于解析PDF文件内容，支持快速预览和完整解析两种模式
  3 | """
  4 | 
  5 | import os
  6 | import tempfile
  7 | import shutil
  8 | import fitz  # PyMuPDF
  9 | import PyPDF2
 10 | import pymupdf4llm
 11 | import traceback
 12 | from typing import Dict, List, Any
 13 | import mcp.types as types
 14 | from . import BaseTool, ToolRegistry
 15 | from PIL import Image
 16 | import io
 17 | import pytesseract
 18 | import base64
 19 | import imghdr
 20 | 
 21 | @ToolRegistry.register
 22 | class PdfTool(BaseTool):
 23 |     """
 24 |     PDF解析工具，支持两种模式：
 25 |     1. 快速预览模式：仅提取文本内容，适用于大型PDF文件
 26 |     2. 完整解析模式：提取文本和图片内容，提供更详细的文档分析
 27 |     """
 28 |     
 29 |     name = "parse_pdf"
 30 |     description = "解析PDF文件内容，支持快速预览和完整解析两种模式"
 31 |     input_schema = {
 32 |         "type": "object",
 33 |         "required": ["file_path"],
 34 |         "properties": {
 35 |             "file_path": {
 36 |                 "type": "string",
 37 |                 "description": "PDF文件的本地路径，例如'/path/to/document.pdf'",
 38 |             },
 39 |             "mode": {
 40 |                 "type": "string",
 41 |                 "description": "解析模式：'quick'（仅文本）或'full'（文本和图片），默认为'full'",
 42 |                 "enum": ["quick", "full"],
 43 |                 "default": "full"
 44 |             }
 45 |         },
 46 |     }
 47 |     
 48 |     async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:
 49 |         """
 50 |         解析PDF文件
 51 |         
 52 |         Args:
 53 |             arguments: 参数字典，必须包含'file_path'键，可选'mode'键
 54 |         
 55 |         Returns:
 56 |             解析结果列表
 57 |         """
 58 |         if "file_path" not in arguments:
 59 |             return [types.TextContent(
 60 |                 type="text",
 61 |                 text="错误: 缺少必要参数 'file_path'"
 62 |             )]
 63 |         
 64 |         file_path = arguments["file_path"]
 65 |         # 处理文件路径，支持挂载目录的转换
 66 |         file_path = self.process_file_path(file_path)
 67 |         
 68 |         if not os.path.exists(file_path):
 69 |             return [types.TextContent(
 70 |                 type="text",
 71 |                 text=f"错误: 文件不存在: {file_path}"
 72 |             )]
 73 |         
 74 |         if not file_path.lower().endswith('.pdf'):
 75 |             return [types.TextContent(
 76 |                 type="text",
 77 |                 text=f"错误: 文件不是PDF格式: {file_path}"
 78 |             )]
 79 |         
 80 |         mode = arguments.get("mode", "full")
 81 |         
 82 |         if mode == "quick":
 83 |             return await self._quick_preview_pdf(file_path)
 84 |         else:
 85 |             return await self._full_parse_pdf(file_path)
 86 |     
 87 |     async def _quick_preview_pdf(self, file_path: str) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:
 88 |         """
 89 |         快速预览PDF文件，仅提取文本内容
 90 |         """
 91 |         try:
 92 |             # 使用PyMuPDF提取文本
 93 |             doc = fitz.open(file_path)
 94 |             text_content = []
 95 |             
 96 |             # 添加文件信息
 97 |             text_content.append(f"文件名: {os.path.basename(file_path)}")
 98 |             text_content.append(f"页数: {doc.page_count}")
 99 |             text_content.append("---")
100 |             
101 |             # 提取每页文本
102 |             for page_num in range(doc.page_count):
103 |                 page = doc[page_num]
104 |                 text = page.get_text()
105 |                 if text.strip():
106 |                     text_content.append(f"第{page_num + 1}页:")
107 |                     text_content.append(text)
108 |                     text_content.append("---")
109 |             
110 |             doc.close()
111 |             
112 |             return [types.TextContent(
113 |                 type="text",
114 |                 text="\n".join(text_content)
115 |             )]
116 |             
117 |         except Exception as e:
118 |             error_details = traceback.format_exc()
119 |             return [types.TextContent(
120 |                 type="text",
121 |                 text=f"错误: 快速预览PDF时发生错误: {str(e)}\n{error_details}"
122 |             )]
123 |     
124 |     def _get_image_mime_type(self, image_bytes: bytes) -> str:
125 |         """
126 |         获取图片的MIME类型
127 |         """
128 |         image_type = imghdr.what(None, image_bytes)
129 |         if image_type:
130 |             return f"image/{image_type}"
131 |         return "image/png"  # 默认返回PNG类型
132 | 
133 |     def _encode_image_base64(self, image_bytes: bytes) -> str:
134 |         """
135 |         将图片编码为base64格式
136 |         """
137 |         return base64.b64encode(image_bytes).decode('utf-8')
138 | 
139 |     async def _analyze_image(self, image_bytes: bytes, lang: str = 'chi_sim+eng') -> str:
140 |         """
141 |         分析图片内容，识别文字和场景
142 | 
143 |         Args:
144 |             image_bytes: 图片的二进制数据
145 |             lang: OCR语言，默认中文简体+英文
146 | 
147 |         Returns:
148 |             str: 图片分析结果
149 |         """
150 |         try:
151 |             # 将二进制数据转换为PIL Image对象
152 |             image = Image.open(io.BytesIO(image_bytes))
153 |             
154 |             # 进行OCR文字识别
155 |             text = pytesseract.image_to_string(image, lang=lang)
156 |             
157 |             # 如果识别出文字，返回结果
158 |             if text.strip():
159 |                 return f"图片中识别出的文字：\n{text.strip()}"
160 |             else:
161 |                 return "未在图片中识别出文字"
162 |                 
163 |         except Exception as e:
164 |             return f"图片分析失败: {str(e)}"
165 | 
166 |     async def _full_parse_pdf(self, file_path: str) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:
167 |         """
168 |         完整解析PDF文件，提取文本和图片内容
169 |         """
170 |         results = []
171 |         
172 |         try:
173 |             # 使用PyMuPDF提取文本和图片
174 |             doc = fitz.open(file_path)
175 |             
176 |             # 添加文件信息
177 |             results.append(types.TextContent(
178 |                 type="text",
179 |                 text=f"文件名: {os.path.basename(file_path)}\n页数: {doc.page_count}\n---"
180 |             ))
181 |             
182 |             # 处理每一页
183 |             for page_num in range(doc.page_count):
184 |                 page = doc[page_num]
185 |                 
186 |                 # 提取文本
187 |                 text = page.get_text()
188 |                 if text.strip():
189 |                     results.append(types.TextContent(
190 |                         type="text",
191 |                         text=f"第{page_num + 1}页:\n{text}\n---"
192 |                     ))
193 |                 
194 |                 # 提取图片
195 |                 image_list = page.get_images()
196 |                 if image_list:
197 |                     results.append(types.TextContent(
198 |                         type="text",
199 |                         text=f"第{page_num + 1}页包含{len(image_list)}张图片"
200 |                     ))
201 |                     
202 |                     # 处理各页的图片
203 |                     skipped_images = 0
204 |                     successful_images = 0
205 |                     
206 |                     for img_idx, img_info in enumerate(image_list):
207 |                         try:
208 |                             xref = img_info[0]
209 |                             base_image = doc.extract_image(xref)
210 |                             image_bytes = base_image["image"]
211 |                             
212 |                             # 获取图片MIME类型并检查是否支持
213 |                             mime_type = self._get_image_mime_type(image_bytes)
214 |                             supported_mime_types = ["image/jpeg", "image/png", "image/gif", "image/webp"]
215 |                             
216 |                             # 如果格式不受支持，则跳过该图片
217 |                             if mime_type not in supported_mime_types:
218 |                                 skipped_images += 1
219 |                                 continue
220 |                             
221 |                             # 添加图片OCR识别结果
222 |                             image_analysis = await self._analyze_image(image_bytes)
223 |                             results.append(types.TextContent(
224 |                                 type="text",
225 |                                 text=f"第{page_num + 1}页 图片{successful_images + 1}分析结果：\n{image_analysis}\n---"
226 |                             ))
227 |                             
228 |                             # 添加图片内容，直接返回图片而非只返回OCR文本
229 |                             image_base64 = self._encode_image_base64(image_bytes)
230 |                             results.append(types.ImageContent(
231 |                                 type="image",
232 |                                 data=image_base64,
233 |                                 mimeType=mime_type
234 |                             ))
235 |                             
236 |                             successful_images += 1
237 |                         except Exception:
238 |                             # 捕获所有异常，但不中断处理流程
239 |                             skipped_images += 1
240 |                     
241 |                     # 如果有跳过的图片，添加简单提示
242 |                     if skipped_images > 0:
243 |                         results.append(types.TextContent(
244 |                             type="text",
245 |                             text=f"注意: 第{page_num + 1}页有 {skipped_images} 张图片因格式问题已跳过处理。"
246 |                         ))
247 |             
248 |             doc.close()
249 |             return results
250 |             
251 |         except Exception as e:
252 |             error_details = traceback.format_exc()
253 |             return [types.TextContent(
254 |                 type="text",
255 |                 text=f"错误: 完整解析PDF时发生错误: {str(e)}\n{error_details}"
256 |             )] 


--------------------------------------------------------------------------------
/mcp_tool/tools/maxkb_tool.py:
--------------------------------------------------------------------------------
  1 | """
  2 | MaxKB工具，用于请求MaxKB API并处理返回结果
  3 | """
  4 | 
  5 | import httpx
  6 | import json
  7 | import os
  8 | import traceback
  9 | import mcp.types as types
 10 | from . import BaseTool, ToolRegistry
 11 | import logging
 12 | import asyncio
 13 | import socket
 14 | 
 15 | # 设置日志
 16 | logging.basicConfig(level=logging.DEBUG)
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | # 设置httpcore的超时
 20 | os.environ['HTTPCORE_TIMEOUT'] = '60'
 21 | os.environ['HTTPX_TIMEOUT'] = '60'
 22 | 
 23 | @ToolRegistry.register
 24 | class MaxKbTool(BaseTool):
 25 |     """MaxKB API请求工具"""
 26 |     name = "maxkb"
 27 |     description = "请求MaxKB API并返回处理后的结果"
 28 |     input_schema = {
 29 |         "type": "object",
 30 |         "required": ["message"],
 31 |         "properties": {
 32 |             "message": {
 33 |                 "type": "string",
 34 |                 "description": "要发送的消息内容",
 35 |             },
 36 |             "re_chat": {
 37 |                 "type": "boolean",
 38 |                 "description": "是否重新开始对话",
 39 |                 "default": False
 40 |             },
 41 |             "stream": {
 42 |                 "type": "boolean",
 43 |                 "description": "是否使用流式响应",
 44 |                 "default": True
 45 |             }
 46 |         },
 47 |     }
 48 |     
 49 |     def _check_env_variables(self):
 50 |         """检查必要的环境变量是否存在"""
 51 |         required_vars = [
 52 |             'MAXKB_HOST',
 53 |             'MAXKB_CHAT_ID',
 54 |             'MAXKB_APPLICATION_ID',
 55 |             'MAXKB_AUTHORIZATION'
 56 |         ]
 57 |         env_values = {}
 58 |         missing_vars = []
 59 |         for var in required_vars:
 60 |             value = os.getenv(var)
 61 |             if not value:
 62 |                 missing_vars.append(var)
 63 |             else:
 64 |                 env_values[var] = value
 65 |                 
 66 |         if missing_vars:
 67 |             raise ValueError(f"缺少必要的环境变量: {', '.join(missing_vars)}")
 68 |             
 69 |         logger.debug(f"环境变量检查通过: {env_values}")
 70 |         return env_values
 71 |     
 72 |     async def execute(self, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
 73 |         """执行API请求并处理返回结果"""
 74 |         try:
 75 |             # 等待服务器初始化完成
 76 |             logger.debug("等待服务器初始化...")
 77 |             await asyncio.sleep(2)
 78 |             
 79 |             logger.debug(f"收到请求参数: {arguments}")
 80 |             
 81 |             if "message" not in arguments:
 82 |                 return [types.TextContent(
 83 |                     type="text",
 84 |                     text="错误: 缺少必要参数 'message'"
 85 |                 )]
 86 |                 
 87 |             # 检查环境变量
 88 |             env_vars = self._check_env_variables()
 89 |             
 90 |             # 准备请求参数
 91 |             url = f"{env_vars['MAXKB_HOST']}/api/application/chat_message/{env_vars['MAXKB_CHAT_ID']}"
 92 |             headers = {
 93 |                 "accept": "application/json",
 94 |                 "AUTHORIZATION": env_vars['MAXKB_AUTHORIZATION'],
 95 |                 "Content-Type": "application/json"
 96 |             }
 97 |             data = {
 98 |                 "message": arguments["message"],
 99 |                 "re_chat": arguments.get("re_chat", False),
100 |                 "stream": arguments.get("stream", True)
101 |             }
102 |             
103 |             logger.debug(f"准备发送请求到: {url}")
104 |             logger.debug(f"请求头: {headers}")
105 |             logger.debug(f"请求数据: {data}")
106 |             
107 |             try:
108 |                 # 发送请求
109 |                 logger.debug("开始创建HTTP客户端，超时设置为60秒...")
110 |                 limits = httpx.Limits(max_keepalive_connections=5, max_connections=10, keepalive_expiry=60.0)
111 |                 timeout = httpx.Timeout(
112 |                     timeout=60.0,
113 |                     connect=60.0,
114 |                     read=60.0,
115 |                     write=60.0,
116 |                     pool=60.0
117 |                 )
118 |                 async with httpx.AsyncClient(
119 |                     timeout=timeout,
120 |                     limits=limits,
121 |                     transport=httpx.AsyncHTTPTransport(
122 |                         retries=1,
123 |                         verify=False,
124 |                         http1=True,
125 |                         http2=False,
126 |                         socket_options=[(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)]
127 |                     )
128 |                 ) as client:
129 |                     logger.debug("开始发送POST请求...")
130 |                     try:
131 |                         response = await client.post(url, headers=headers, json=data)
132 |                         logger.debug(f"收到响应状态码: {response.status_code}")
133 |                         logger.debug(f"响应头: {response.headers}")
134 |                         
135 |                         # 处理响应内容
136 |                         content_parts = []
137 |                         last_content = ""
138 |                         received_data = []
139 |                         
140 |                         async for line in response.aiter_lines():
141 |                             logger.debug(f"收到行: {line}")
142 |                             if line.startswith('data: '):
143 |                                 try:
144 |                                     # 解析JSON数据
145 |                                     data = json.loads(line[6:])  # 去掉'data: '前缀
146 |                                     logger.debug(f"解析到的数据: {data}")
147 |                                     received_data.append(data)
148 |                                     
149 |                                     # 检查是否有非空的content
150 |                                     if isinstance(data, dict):
151 |                                         current_content = data.get("content", "")
152 |                                         if current_content and current_content != last_content:
153 |                                             last_content = current_content
154 |                                             content_parts.append(current_content)
155 |                                             logger.debug(f"添加新内容: {current_content}")
156 |                                         
157 |                                         # 检查reasoning_content
158 |                                         reasoning_content = data.get("reasoning_content", "")
159 |                                         if reasoning_content and reasoning_content != last_content:
160 |                                             last_content = reasoning_content
161 |                                             content_parts.append(reasoning_content)
162 |                                             logger.debug(f"添加推理内容: {reasoning_content}")
163 |                                             
164 |                                 except json.JSONDecodeError as e:
165 |                                     logger.error(f"JSON解析错误: {e}, 行内容: {line}")
166 |                                     continue  # 忽略无法解析的行
167 |                         
168 |                         # 拼接所有内容
169 |                         result = ''.join(content_parts) if content_parts else ""
170 |                         logger.debug(f"最终结果: {result}")
171 |                         
172 |                         if not result:
173 |                             logger.warning("未获取到有效内容")
174 |                             error_details = f"收到 {len(received_data)} 条数据"
175 |                             if received_data:
176 |                                 error_details += "\n最后一条数据:\n" + json.dumps(received_data[-1], ensure_ascii=False, indent=2)
177 |                             return [types.TextContent(
178 |                                 type="text",
179 |                                 text=f"请求错误: {error_details}"
180 |                             )]
181 |                         
182 |                         return [types.TextContent(
183 |                             type="text",
184 |                             text=result
185 |                         )]
186 |                         
187 |                     except httpx.TimeoutException as e:
188 |                         logger.error(f"请求超时: {str(e)}")
189 |                         logger.error(f"超时配置: {client.timeout}")
190 |                         return [types.TextContent(
191 |                             type="text",
192 |                             text=f"请求超时(60秒): {str(e)}"
193 |                         )]
194 |                     except httpx.ConnectError as e:
195 |                         logger.error(f"连接错误: {str(e)}")
196 |                         logger.error(f"目标URL: {url}")
197 |                         return [types.TextContent(
198 |                             type="text",
199 |                             text=f"连接错误({url}): {str(e)}"
200 |                         )]
201 |                     
202 |                     try:
203 |                         response.raise_for_status()
204 |                     except httpx.HTTPStatusError as e:
205 |                         logger.error(f"HTTP状态错误: {str(e)}")
206 |                         logger.error(f"响应状态码: {e.response.status_code}")
207 |                         logger.error(f"响应内容: {e.response.text}")
208 |                         return [types.TextContent(
209 |                             type="text",
210 |                             text=f"HTTP状态错误: {str(e)}"
211 |                         )]
212 |                         
213 |             except httpx.HTTPError as e:
214 |                 error_msg = f"HTTP请求错误: {str(e)}\n状态码: {getattr(e.response, 'status_code', 'N/A')}\n响应内容: {getattr(e.response, 'text', 'N/A')}"
215 |                 logger.error(error_msg)
216 |                 return [types.TextContent(
217 |                     type="text",
218 |                     text=error_msg
219 |                 )]
220 |                 
221 |         except ValueError as e:
222 |             error_msg = f"配置错误: {str(e)}"
223 |             logger.error(error_msg)
224 |             return [types.TextContent(
225 |                 type="text",
226 |                 text=error_msg
227 |             )]
228 |         except Exception as e:
229 |             error_msg = f"处理错误: {str(e)}\n{traceback.format_exc()}"
230 |             logger.error(error_msg)
231 |             return [types.TextContent(
232 |                 type="text",
233 |                 text=error_msg
234 |             )] 


--------------------------------------------------------------------------------
/.lh/mcp_simple_tool/tools/README.md.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sourceFile": "mcp_simple_tool/tools/README.md",
 3 |     "activeCommit": 0,
 4 |     "commits": [
 5 |         {
 6 |             "activePatchIndex": 1,
 7 |             "patches": [
 8 |                 {
 9 |                     "date": 1741333431551,
10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
11 |                 },
12 |                 {
13 |                     "date": 1741408412235,
14 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -10,28 +10,112 @@\n │   ├── __init__.py        # 定义工具基类和注册器\n │   ├── loader.py          # 工具加载器，自动加载所有工具\n │   ├── url_tool.py        # URL工具实现\n │   ├── pdf_tool.py        # PDF解析工具实现\n-│   └── quick_pdf_tool.py  # 快速PDF预览工具实现\n+│   ├── quick_pdf_tool.py  # 快速PDF预览工具实现\n+│   └── word_tool.py       # Word文档解析工具实现\n ├── __init__.py\n ├── __main__.py\n └── server.py              # MCP服务器实现\n ```\n \n+## 核心组件\n+\n+### 1. BaseTool 基类\n+\n+所有工具都继承自`BaseTool`基类，它定义了工具的基本接口：\n+\n+```python\n+class BaseTool:\n+    \"\"\"所有工具的基类\"\"\"\n+    name: str = \"\"                # 工具名称\n+    description: str = \"\"         # 工具描述\n+    input_schema: dict = {}       # 输入参数模式\n+    \n+    @classmethod\n+    def get_tool_definition(cls) -> types.Tool:\n+        \"\"\"获取工具定义\"\"\"\n+        return types.Tool(\n+            name=cls.name,\n+            description=cls.description,\n+            inputSchema=cls.input_schema\n+        )\n+    \n+    async def execute(self, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n+        \"\"\"执行工具逻辑，需要在子类中实现\"\"\"\n+        raise NotImplementedError(\"Tool implementation must override execute method\")\n+```\n+\n+### 2. ToolRegistry 工具注册器\n+\n+`ToolRegistry`负责管理所有已注册的工具：\n+\n+```python\n+class ToolRegistry:\n+    \"\"\"工具注册器，用于管理所有可用工具\"\"\"\n+    _tools: Dict[str, Type[BaseTool]] = {}\n+    \n+    @classmethod\n+    def register(cls, tool_class: Type[BaseTool]) -> Type[BaseTool]:\n+        \"\"\"注册工具\"\"\"\n+        cls._tools[tool_class.name] = tool_class\n+        return tool_class\n+    \n+    @classmethod\n+    def get_tool(cls, name: str) -> Type[BaseTool]:\n+        \"\"\"获取工具类\"\"\"\n+        if name not in cls._tools:\n+            raise ValueError(f\"Unknown tool: {name}\")\n+        return cls._tools[name]\n+    \n+    @classmethod\n+    def list_tools(cls) -> List[types.Tool]:\n+        \"\"\"列出所有可用工具\"\"\"\n+        return [tool_class.get_tool_definition() for tool_class in cls._tools.values()]\n+```\n+\n+### 3. 工具加载器\n+\n+`loader.py`中的工具加载器负责自动发现和加载所有工具：\n+\n+```python\n+def load_tools() -> List[Type[BaseTool]]:\n+    \"\"\"自动加载tools目录下的所有工具模块\"\"\"\n+    # 获取当前模块的路径\n+    package_path = os.path.dirname(__file__)\n+    \n+    # 获取所有子模块\n+    for _, name, is_pkg in pkgutil.iter_modules([package_path]):\n+        # 跳过__init__.py和loader.py\n+        if name in ['__init__', 'loader']:\n+            continue\n+        \n+        # 导入模块\n+        module_name = f\"{__package__}.{name}\"\n+        importlib.import_module(module_name)\n+    \n+    # 返回所有已注册的工具类\n+    return list(ToolRegistry._tools.values())\n+```\n+\n ## 如何开发新工具\n \n-1. 在`tools`目录下创建一个新的Python文件，如`your_tool.py`\n-2. 导入必要的依赖和基类\n-3. 创建一个继承自`BaseTool`的工具类\n-4. 使用`@ToolRegistry.register`装饰器注册工具\n-5. 实现工具的`execute`方法\n+### 步骤1: 创建工具文件\n \n-### 工具模板示例\n+在`tools`目录下创建一个新的Python文件，如`your_tool.py`。\n \n+### 步骤2: 导入必要的依赖\n+\n ```python\n import mcp.types as types\n from . import BaseTool, ToolRegistry\n+```\n \n+### 步骤3: 创建工具类\n+\n+创建一个继承自`BaseTool`的工具类，并使用`@ToolRegistry.register`装饰器注册工具：\n+\n+```python\n @ToolRegistry.register\n class YourTool(BaseTool):\n     \"\"\"您的工具描述\"\"\"\n     name = \"your_tool_name\"  # 工具的唯一标识符\n@@ -49,32 +133,147 @@\n                 \"description\": \"参数2的描述（可选）\",\n             }\n         },\n     }\n+```\n+\n+### 步骤4: 实现execute方法\n+\n+实现工具的`execute`方法，处理输入参数并返回结果：\n+\n+```python\n+async def execute(self, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n+    \"\"\"执行工具逻辑\"\"\"\n+    # 参数验证\n+    if \"param1\" not in arguments:\n+        return [types.TextContent(\n+            type=\"text\",\n+            text=\"Error: Missing required argument 'param1'\"\n+        )]\n+        \n+    # 获取参数\n+    param1 = arguments[\"param1\"]\n+    param2 = arguments.get(\"param2\", 0)  # 获取可选参数，提供默认值\n     \n+    # 执行工具逻辑\n+    result = f\"处理参数: {param1}, {param2}\"\n+    \n+    # 返回结果\n+    return [types.TextContent(\n+        type=\"text\",\n+        text=result\n+    )]\n+```\n+\n+## 返回类型\n+\n+工具可以返回以下类型的内容：\n+\n+1. **TextContent** - 文本内容\n+   ```python\n+   types.TextContent(type=\"text\", text=\"这是文本内容\")\n+   ```\n+\n+2. **ImageContent** - 图像内容\n+   ```python\n+   types.ImageContent(\n+       type=\"image\",\n+       format=\"jpeg\",\n+       data=base64.b64encode(image_data).decode(\"utf-8\")\n+   )\n+   ```\n+\n+3. **EmbeddedResource** - 嵌入资源\n+   ```python\n+   types.EmbeddedResource(\n+       type=\"embedded\",\n+       format=\"pdf\",\n+       data=base64.b64encode(pdf_data).decode(\"utf-8\")\n+   )\n+   ```\n+\n+## 最佳实践\n+\n+1. **参数验证**：始终验证必需的参数是否存在，并提供清晰的错误消息。\n+\n+2. **错误处理**：使用try-except块捕获异常，并返回用户友好的错误消息。\n+\n+3. **进度反馈**：对于长时间运行的任务，提供进度反馈。\n+\n+4. **资源清理**：确保在函数结束时清理所有资源（如临时文件）。\n+\n+5. **类型注解**：使用类型注解提高代码可读性和可维护性。\n+\n+6. **文档字符串**：为类和方法提供详细的文档字符串。\n+\n+## 示例工具\n+\n+### URL工具\n+\n+```python\n+@ToolRegistry.register\n+class UrlTool(BaseTool):\n+    \"\"\"URL获取工具，用于获取网站内容\"\"\"\n+    name = \"url\"\n+    description = \"Fetches a website and returns its content\"\n+    input_schema = {\n+        \"type\": \"object\",\n+        \"required\": [\"url\"],\n+        \"properties\": {\n+            \"url\": {\n+                \"type\": \"string\",\n+                \"description\": \"URL to fetch\",\n+            }\n+        },\n+    }\n+    \n     async def execute(self, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n-        \"\"\"执行工具逻辑\"\"\"\n-        # 参数验证\n-        if \"param1\" not in arguments:\n+        \"\"\"获取网站内容\"\"\"\n+        if \"url\" not in arguments:\n             return [types.TextContent(\n                 type=\"text\",\n-                text=\"Error: Missing required argument 'param1'\"\n+                text=\"Error: Missing required argument 'url'\"\n             )]\n             \n-        # 获取参数\n-        param1 = arguments[\"param1\"]\n-        param2 = arguments.get(\"param2\", 0)  # 获取可选参数，提供默认值\n-        \n-        # 执行工具逻辑\n-        result = f\"处理参数: {param1}, {param2}\"\n-        \n-        # 返回结果\n-        return [types.TextContent(\n-            type=\"text\",\n-            text=result\n-        )]\n+        url = arguments[\"url\"]\n+        # 实现获取网站内容的逻辑\n+        # ...\n+        return [types.TextContent(type=\"text\", text=response_text)]\n ```\n \n+### PDF工具\n+\n+```python\n+@ToolRegistry.register\n+class PdfTool(BaseTool):\n+    \"\"\"PDF解析工具，用于解析PDF文件并提取文本和图片\"\"\"\n+    name = \"file\"\n+    description = \"解析PDF文件并提取文本和图片内容\"\n+    input_schema = {\n+        \"type\": \"object\",\n+        \"required\": [\"file_path\"],\n+        \"properties\": {\n+            \"file_path\": {\n+                \"type\": \"string\",\n+                \"description\": \"PDF文件的本地路径，例如'/path/to/document.pdf'\",\n+            }\n+        },\n+    }\n+    \n+    async def execute(self, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n+        \"\"\"解析PDF文件并提取文本和图片\"\"\"\n+        if \"file_path\" not in arguments:\n+            return [types.TextContent(\n+                type=\"text\",\n+                text=\"Error: Missing required argument 'file_path'\"\n+            )]\n+            \n+        file_path = arguments[\"file_path\"]\n+        # 实现PDF解析逻辑\n+        # ...\n+        return results\n+```\n+\n ## 注意事项\n \n 1. 每个工具必须有一个唯一的`name`\n 2. 工具描述应该清晰地说明工具的用途和使用方法\n@@ -85,9 +284,5 @@\n ## 自动加载机制\n \n 框架使用`loader.py`中的自动加载机制，在启动时自动发现和加载所有工具。您只需按照上述模板创建新工具，它将在下次启动服务器时自动注册。\n \n-无需修改`server.py`或其他任何文件，框架会自动处理工具的注册和调用。\n-\n-## 扩展示例\n-\n-您可以参考现有的工具实现（`url_tool.py`, `pdf_tool.py`, `quick_pdf_tool.py`）作为开发新工具的参考。 \n\\ No newline at end of file\n+无需修改`server.py`或其他任何文件，框架会自动处理工具的注册和调用。 \n\\ No newline at end of file\n"
15 |                 }
16 |             ],
17 |             "date": 1741333431551,
18 |             "name": "Commit-0",
19 |             "content": "# SEE工具框架\n\n这是一个用于SEE（Standard Extension Environment）对接的模块化工具框架。该框架允许开发者轻松创建和扩展自定义工具，并通过MCP协议与模型交互。\n\n## 框架结构\n\n```\nmcp_simple_tool/\n├── tools/\n│   ├── __init__.py        # 定义工具基类和注册器\n│   ├── loader.py          # 工具加载器，自动加载所有工具\n│   ├── url_tool.py        # URL工具实现\n│   ├── pdf_tool.py        # PDF解析工具实现\n│   └── quick_pdf_tool.py  # 快速PDF预览工具实现\n├── __init__.py\n├── __main__.py\n└── server.py              # MCP服务器实现\n```\n\n## 如何开发新工具\n\n1. 在`tools`目录下创建一个新的Python文件，如`your_tool.py`\n2. 导入必要的依赖和基类\n3. 创建一个继承自`BaseTool`的工具类\n4. 使用`@ToolRegistry.register`装饰器注册工具\n5. 实现工具的`execute`方法\n\n### 工具模板示例\n\n```python\nimport mcp.types as types\nfrom . import BaseTool, ToolRegistry\n\n@ToolRegistry.register\nclass YourTool(BaseTool):\n    \"\"\"您的工具描述\"\"\"\n    name = \"your_tool_name\"  # 工具的唯一标识符\n    description = \"您的工具描述\"  # 工具的描述信息，将显示给用户\n    input_schema = {\n        \"type\": \"object\",\n        \"required\": [\"param1\"],  # 必需的参数\n        \"properties\": {\n            \"param1\": {\n                \"type\": \"string\",\n                \"description\": \"参数1的描述\",\n            },\n            \"param2\": {\n                \"type\": \"integer\",\n                \"description\": \"参数2的描述（可选）\",\n            }\n        },\n    }\n    \n    async def execute(self, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n        \"\"\"执行工具逻辑\"\"\"\n        # 参数验证\n        if \"param1\" not in arguments:\n            return [types.TextContent(\n                type=\"text\",\n                text=\"Error: Missing required argument 'param1'\"\n            )]\n            \n        # 获取参数\n        param1 = arguments[\"param1\"]\n        param2 = arguments.get(\"param2\", 0)  # 获取可选参数，提供默认值\n        \n        # 执行工具逻辑\n        result = f\"处理参数: {param1}, {param2}\"\n        \n        # 返回结果\n        return [types.TextContent(\n            type=\"text\",\n            text=result\n        )]\n```\n\n## 注意事项\n\n1. 每个工具必须有一个唯一的`name`\n2. 工具描述应该清晰地说明工具的用途和使用方法\n3. 输入模式应该准确描述所需的参数和类型\n4. 所有参数验证应在`execute`方法中处理\n5. 工具应返回适当的`TextContent`或`ImageContent`对象列表\n\n## 自动加载机制\n\n框架使用`loader.py`中的自动加载机制，在启动时自动发现和加载所有工具。您只需按照上述模板创建新工具，它将在下次启动服务器时自动注册。\n\n无需修改`server.py`或其他任何文件，框架会自动处理工具的注册和调用。\n\n## 扩展示例\n\n您可以参考现有的工具实现（`url_tool.py`, `pdf_tool.py`, `quick_pdf_tool.py`）作为开发新工具的参考。 "
20 |         }
21 |     ]
22 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # MCP开发框架
  2 | [![Verified on MseeP](https://mseep.ai/badge.svg)](https://mseep.ai/app/34780cde-ee17-4a7b-b9ee-356f41fc9e77) [![smithery badge](https://smithery.ai/badge/@aigo666/mcp-framework)](https://smithery.ai/server/@aigo666/mcp-framework)
  3 | 
  4 | 一个强大的MCP（Model Context Protocol）开发框架，用于创建与大语言模型交互的自定义工具。该框架提供了一套完整的工具集，可以轻松地扩展Cursor IDE的功能，实现网页内容获取、文件处理（PDF、Word、Excel、CSV、Markdown）以及AI对话等高级功能。它具有强大的MCP工具扩展能力，使开发者能够快速构建和集成各种自定义工具。
  5 | 
  6 | <a href="https://glama.ai/mcp/servers/@aigo666/mcp-framework">
  7 |   <img width="380" height="200" src="https://glama.ai/mcp/servers/@aigo666/mcp-framework/badge" />
  8 | </a>
  9 | 
 10 | <details>
 11 | <summary>🔥 最新特性：文档图片内容显示与理解</summary>
 12 | 
 13 | 最新版本现在支持在PDF和Word文档处理中，直接返回原始图片内容并进行OCR识别，使大语言模型能够同时理解文档中的文本和图像内容：
 14 | 
 15 | - **图片内容直接显示**：文档中的图表、图像等可以直接在对话中显示，无需额外工具
 16 | - **OCR文本识别**：自动提取图片中的文字内容，支持中英文多语言
 17 | - **图片内容理解**：大模型可以"看到"文档中的图片，并基于图片内容进行分析和回答
 18 | - **完整文档内容返回**：真正实现文档的全内容理解，包括文本、表格和图像
 19 | 
 20 | 这使得AI模型能够更全面地理解和分析文档内容，特别是对于包含图表、表单、流程图或其他可视化信息的文档尤为有价值。
 21 | </details>
 22 | 
 23 | ## 主要功能
 24 | 
 25 | <details>
 26 | <summary>点击展开查看框架提供的核心功能</summary>
 27 | 
 28 | 本框架提供了以下核心功能：
 29 | 
 30 | ### 1. 综合文件处理
 31 | 
 32 | 使用`parse_file`工具可以自动识别文件类型并选择合适的处理方式，支持PDF、Word、Excel、CSV和Markdown文件。
 33 | 
 34 | - **用法**: `parse_file /path/to/document`
 35 | - **支持格式**: 
 36 |   - PDF文件 (.pdf)
 37 |   - Word文档 (.doc, .docx)
 38 |   - Excel文件 (.xls, .xlsx, .xlsm)
 39 |   - CSV文件 (.csv)
 40 |   - Markdown文件 (.md)
 41 | - **参数**: `file_path` - 文件的本地路径
 42 | - **返回**: 根据文件类型返回相应的处理结果
 43 | 
 44 | ### 2. PDF文档处理
 45 | 
 46 | 使用`parse_pdf`工具可以处理PDF文档，支持两种处理模式：
 47 | 
 48 | - **用法**: `parse_pdf /path/to/document.pdf [mode]`
 49 | - **参数**: 
 50 |   - `file_path` - PDF文件的本地路径
 51 |   - `mode` - 处理模式（可选）：
 52 |     - `quick` - 快速预览模式，仅提取文本内容
 53 |     - `full` - 完整解析模式，提取文本、图片内容和OCR文本（默认）
 54 | - **返回**: 
 55 |   - 快速预览模式：文档的文本内容
 56 |   - 完整解析模式：文档的文本内容、原始图片和OCR识别结果
 57 | 
 58 | ### 3. Word文档解析
 59 | 
 60 | 使用`parse_word`工具可以解析Word文档，提取文本、表格和图片信息。
 61 | 
 62 | - **用法**: `parse_word /path/to/document.docx`
 63 | - **功能**: 解析Word文档并提取文本内容、表格和图片
 64 | - **参数**: `file_path` - Word文档的本地路径
 65 | - **返回**: 文档的文本内容、表格和原始图片
 66 | - **特点**: 同时提供文档内嵌图像的显示和分析功能
 67 | 
 68 | ### 4. Excel文件处理
 69 | 
 70 | 使用`parse_excel`工具可以解析Excel文件，提供完整的表格数据和结构信息。
 71 | 
 72 | - **用法**: `parse_excel /path/to/spreadsheet.xlsx`
 73 | - **功能**: 解析Excel文件的所有工作表
 74 | - **参数**: `file_path` - Excel文件的本地路径
 75 | - **返回**: 
 76 |   - 文件基本信息（文件名、工作表数量）
 77 |   - 每个工作表的详细信息：
 78 |     - 行数和列数
 79 |     - 列名列表
 80 |     - 完整的表格数据
 81 | - **特点**: 
 82 |   - 使用pandas和openpyxl提供高质量的表格数据处理
 83 |   - 支持多工作表处理
 84 |   - 自动处理数据类型转换
 85 | 
 86 | ### 5. CSV文件处理
 87 | 
 88 | 使用`parse_csv`工具可以解析CSV文件，提供完整的数据分析和预览功能。
 89 | 
 90 | - **用法**: `parse_csv /path/to/data.csv`
 91 | - **功能**: 解析CSV文件并提供数据分析
 92 | - **参数**: 
 93 |   - `file_path` - CSV文件的本地路径
 94 |   - `encoding` - 文件编码格式（可选，默认自动检测）
 95 | - **返回**: 
 96 |   - 文件基本信息（文件名、行数、列数）
 97 |   - 列名列表
 98 |   - 数据预览（前5行）
 99 |   - 描述性统计信息
100 | - **特点**: 
101 |   - 自动编码检测
102 |   - 支持多种编码格式（UTF-8、GBK等）
103 |   - 提供数据统计分析
104 |   - 智能数据类型处理
105 | 
106 | ### 6. Markdown文件解析
107 | 
108 | 使用`parse_markdown`工具可以解析Markdown文件，提取文本内容、标题结构和列表等信息。
109 | 
110 | - **用法**: `parse_markdown /path/to/document.md`
111 | - **功能**: 解析Markdown文件并提取标题结构、列表和文本内容
112 | - **参数**: `file_path` - Markdown文件的本地路径
113 | - **返回**: 
114 |   - 文件基本信息（文件名、大小、修改时间等）
115 |   - 标题结构层级展示
116 |   - 内容元素统计（代码块、列表、链接、图片、表格等）
117 |   - 原始Markdown内容
118 | - **特点**: 
119 |   - 自动识别各级标题和结构
120 |   - 智能统计内容元素
121 |   - 完整的标题层级展示
122 | 
123 | ### 7. 网页内容获取
124 | 
125 | 使用`url`工具可以获取任何网页的内容。
126 | 
127 | - **用法**: `url https://example.com`
128 | - **参数**: `url` - 要获取内容的网站URL
129 | - **返回**: 网页的文本内容
130 | - **特点**: 
131 |   - 完整的HTTP错误处理
132 |   - 超时管理
133 |   - 自动编码处理
134 | 
135 | ### 8. MaxKB AI对话
136 | 
137 | 使用`maxkb`工具可以与MaxKB API进行交互，实现智能对话功能。
138 | 
139 | - **用法**: `maxkb "您的问题或指令"`
140 | - **功能**: 发送消息到MaxKB API并获取AI回复
141 | - **参数**: 
142 |   - `message` - 要发送的消息内容（必需）
143 |   - `re_chat` - 是否重新开始对话（可选，默认false）
144 |   - `stream` - 是否使用流式响应（可选，默认true）
145 | - **返回**: AI的回复内容
146 | - **特点**: 
147 |   - 支持流式响应
148 |   - 自动重试机制
149 |   - 完整的错误处理
150 |   - 60秒超时保护
151 |   - 保持连接配置优化
152 | 
153 | </details>
154 | 
155 | ## 技术特点
156 | 
157 | 本框架采用了多种技术来优化文件处理性能：
158 | 
159 | 1. **智能文件类型识别**
160 |    - 自动根据文件扩展名选择合适的处理工具
161 |    - 提供统一的文件处理接口
162 | 
163 | 2. **高效的文档处理**
164 |    - PDF处理：支持快速预览和完整解析两种模式
165 |    - Word处理：精确提取文本、表格和图片
166 |    - Excel处理：高效处理大型表格数据
167 | 
168 | 3. **强大的MCP工具扩展能力**
169 |    - 插件化架构设计，易于扩展
170 |    - 统一的工具注册和调用接口
171 |    - 支持同步和异步工具开发
172 |    - 丰富的工具开发API和辅助函数
173 | 
174 | 4. **内存优化**
175 |    - 使用临时文件管理大型文件
176 |    - 自动清理临时资源
177 |    - 分块处理大型文档
178 | 
179 | 5. **错误处理**
180 |    - 完整的异常捕获和处理
181 |    - 详细的错误信息反馈
182 |    - 优雅的失败处理机制
183 | 
184 | ## 项目结构
185 | 
186 | 本框架采用模块化设计，便于扩展和维护：
187 | 
188 | ```
189 | mcp_tool/
190 | ├── tools/
191 | │   ├── __init__.py        # 定义工具基类和注册器
192 | │   ├── loader.py          # 工具加载器，自动加载所有工具
193 | │   ├── file_tool.py       # 综合文件处理工具
194 | │   ├── pdf_tool.py        # PDF解析工具
195 | │   ├── word_tool.py       # Word文档解析工具
196 | │   ├── excel_tool.py      # Excel文件处理工具
197 | │   ├── csv_tool.py        # CSV文件处理工具
198 | │   ├── markdown_tool.py   # Markdown文件解析工具
199 | │   ├── url_tool.py        # URL工具实现
200 | │   └── maxkb_tool.py      # MaxKB AI对话工具
201 | ├── __init__.py
202 | ├── __main__.py
203 | └── server.py              # MCP服务器实现
204 | ```
205 | 
206 | ## 开发指南
207 | 
208 | ### 如何开发新工具
209 | 
210 | 1. 在`tools`目录下创建一个新的Python文件，如`your_tool.py`
211 | 2. 导入必要的依赖和基类
212 | 3. 创建一个继承自`BaseTool`的工具类
213 | 4. 使用`@ToolRegistry.register`装饰器注册工具
214 | 5. 实现工具的`execute`方法
215 | 
216 | ### 工具模板示例
217 | 
218 | ```python
219 | import mcp.types as types
220 | from . import BaseTool, ToolRegistry
221 | 
222 | @ToolRegistry.register
223 | class YourTool(BaseTool):
224 |     """您的工具描述"""
225 |     name = "your_tool_name"  # 工具的唯一标识符
226 |     description = "您的工具描述"  # 工具的描述信息，将显示给用户
227 |     input_schema = {
228 |         "type": "object",
229 |         "required": ["param1"],  # 必需的参数
230 |         "properties": {
231 |             "param1": {
232 |                 "type": "string",
233 |                 "description": "参数1的描述",
234 |             },
235 |             "param2": {
236 |                 "type": "integer",
237 |                 "description": "参数2的描述（可选）",
238 |             }
239 |         },
240 |     }
241 |   
242 |     async def execute(self, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
243 |         """执行工具逻辑"""
244 |         # 参数验证
245 |         if "param1" not in arguments:
246 |             return [types.TextContent(
247 |                 type="text",
248 |                 text="Error: Missing required argument 'param1'"
249 |             )]
250 |           
251 |         # 获取参数
252 |         param1 = arguments["param1"]
253 |         param2 = arguments.get("param2", 0)  # 获取可选参数，提供默认值
254 |       
255 |         # 执行工具逻辑
256 |         result = f"处理参数: {param1}, {param2}"
257 |       
258 |         # 返回结果
259 |         return [types.TextContent(
260 |             type="text",
261 |             text=result
262 |         )]
263 | ```
264 | 
265 | ## 部署指南
266 | 
267 | ### 环境变量配置
268 | 
269 | 在`.env`文件中配置以下环境变量：
270 | 
271 | ```bash
272 | # Server Configuration
273 | MCP_SERVER_PORT=8000        # 服务器端口
274 | MCP_SERVER_HOST=0.0.0.0     # 服务器主机
275 | 
276 | # 鉴权配置
277 | MCP_AUTH_URL=http://170.106.105.206:4000/users  # 鉴权服务地址
278 | 
279 | # MaxKB配置
280 | MAXKB_HOST=http://host.docker.internal:8080  # MaxKB API主机地址
281 | MAXKB_CHAT_ID=your_chat_id_here              # MaxKB聊天ID
282 | MAXKB_APPLICATION_ID=your_application_id_here # MaxKB应用ID
283 | MAXKB_AUTHORIZATION=your_authorization_key    # MaxKB授权密钥
284 | 
285 | # 调试模式
286 | DEBUG=false                 # 是否启用调试模式
287 | 
288 | # 用户代理
289 | MCP_USER_AGENT="MCP Test Server (github.com/modelcontextprotocol/python-sdk)"
290 | 
291 | # 本地目录挂载配置
292 | HOST_MOUNT_SOURCE=/path/to/your/local/directory  # 本地目录路径
293 | HOST_MOUNT_TARGET=/host_files                    # 容器内挂载路径
294 | ```
295 | 
296 | ### 本地目录挂载
297 | 
298 | 框架支持将本地目录挂载到容器中，以便工具可以访问本地文件。配置方法：
299 | 
300 | 1. 在`.env`文件中设置`HOST_MOUNT_SOURCE`和`HOST_MOUNT_TARGET`环境变量
301 | 2. `HOST_MOUNT_SOURCE`是你本地机器上的目录路径
302 | 3. `HOST_MOUNT_TARGET`是容器内的挂载路径（默认为`/host_files`）
303 | 
304 | 使用工具时，可以直接引用本地文件路径，框架会自动将其转换为容器内的路径。例如：
305 | 
306 | ```
307 | # 使用PDF工具处理本地文件
308 | pdf "/Users/username/Documents/example.pdf"
309 | 
310 | # 框架会自动将路径转换为容器内路径
311 | # 例如："/host_files/example.pdf"
312 | ```
313 | 
314 | 这样，你就可以在不修改工具代码的情况下，轻松访问本地文件。
315 | 
316 | ### Docker部署（推荐）
317 | 
318 | 1. 初始设置：
319 | ```bash
320 | # 克隆仓库
321 | git clone https://github.com/aigo666/mcp-framework.git
322 | cd mcp-framework
323 | 
324 | # 创建环境文件
325 | cp .env.example .env
326 | ```
327 | 
328 | 2. 使用Docker Compose：
329 | ```bash
330 | # 构建并启动
331 | docker compose up --build -d
332 | 
333 | # 查看日志
334 | docker compose logs -f
335 | 
336 | # 管理容器
337 | docker compose ps
338 | docker compose pause
339 | docker compose unpause
340 | docker compose down
341 | ```
342 | 
343 | 3. 访问服务：
344 |    - SSE端点: http://localhost:8000/sse
345 | 
346 | 4. Cursor IDE配置：
347 | - 设置 → 功能 → 添加MCP服务器
348 | - 类型: "sse"
349 | - URL: `http://localhost:8000/sse?token=<your-token>` (替换 `<your-token>` 为您的 JWT Token)
350 | 
351 | ## 鉴权配置
352 | 
353 | <details>
354 | <summary>点击展开查看详细的鉴权配置信息</summary>
355 | 
356 | SSE 服务现在支持 API 鉴权机制，每个请求都需要携带有效的认证信息：
357 | 
358 | 1. 配置鉴权服务地址：
359 |    - 在 `.env` 文件中设置 `MCP_AUTH_URL` 环境变量（默认为 `http://170.106.105.206:4000/users` 该鉴权地址仅供测试，不保证长期稳定，建议使用以下项目自行部署）
360 | 
361 | 2. 客户端配置：
362 |    - 在 Cursor 插件中配置时，需要在 URL 中添加 `token` 查询参数
363 |    - 格式为 `http://your-server:8000/sse?token=<your-token>`
364 |    - 服务器会自动将 token 转换为 `Bearer <your-token>` 格式发送到鉴权服务
365 | 
366 | 3. 鉴权流程：
367 |    - 当 SSE 服务收到请求时，会从 URL 中提取 token 参数
368 |    - 然后向配置的鉴权地址发送请求，并传递 `Authorization: Bearer <your-token>` 头
369 |    - 只有鉴权成功（返回 200 状态码）的请求才会被处理
370 |    - 鉴权失败的请求会收到 401 Unauthorized 响应
371 | 
372 | 4. 推荐JWT鉴权服务：
373 |    - 我们推荐使用Jason Watmore的Node.js JWT鉴权服务作为参考实现
374 |    - 详细文档和示例代码：https://jasonwatmore.com/nodejs-jwt-authentication-tutorial-with-example-api
375 |    - 该实现提供了完整的用户注册、登录、令牌生成和验证功能
376 |    - 可以无缝集成到本框架的鉴权流程中
377 | 
378 | </details>
379 | 
380 | ## 部署方式
381 | 
382 | ### 传统Python部署
383 | 
384 | 1. 安装系统依赖：
385 | ```bash
386 | # Ubuntu/Debian
387 | sudo apt-get update
388 | sudo apt-get install -y poppler-utils tesseract-ocr tesseract-ocr-chi-sim
389 | 
390 | # macOS
391 | brew install poppler tesseract tesseract-lang
392 | 
393 | # Windows
394 | # 1. 下载并安装Tesseract: https://github.com/UB-Mannheim/tesseract/wiki
395 | # 2. 将Tesseract添加到系统PATH
396 | ```
397 | 
398 | 2. 安装Python依赖：
399 | ```bash
400 | # 创建虚拟环境
401 | python -m venv venv
402 | source venv/bin/activate  # Linux/Mac
403 | # 或
404 | .\venv\Scripts\activate  # Windows
405 | 
406 | # 安装依赖
407 | pip install -r requirements.txt
408 | ```
409 | 
410 | 3. 启动服务：
411 | ```bash
412 | python -m mcp_tool
413 | ```
414 | 
415 | ## 依赖项
416 | 
417 | 主要依赖：
418 | - `mcp`: Model Context Protocol实现
419 | - `PyMuPDF`: PDF文档处理
420 | - `python-docx`: Word文档处理
421 | - `pandas`和`openpyxl`: Excel文件处理
422 | - `httpx`: 异步HTTP客户端
423 | - `anyio`: 异步I/O支持
424 | - `click`: 命令行接口
425 | 
426 | ## 贡献指南
427 | 
428 | 1. Fork仓库
429 | 2. 创建功能分支 (`git checkout -b feature/amazing-feature`)
430 | 3. 提交更改 (`git commit -m 'Add some amazing feature'`)
431 | 4. 推送到分支 (`git push origin feature/amazing-feature`)
432 | 5. 打开Pull Request
433 | 
434 | ## 许可证
435 | 
436 | 本项目采用MIT许可证 - 详情请参阅[LICENSE](LICENSE)文件。


--------------------------------------------------------------------------------
/.lh/mcp_simple_tool/tools/image_recognition_tool.py.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "sourceFile": "mcp_simple_tool/tools/image_recognition_tool.py",
 3 |     "activeCommit": 0,
 4 |     "commits": [
 5 |         {
 6 |             "activePatchIndex": 1,
 7 |             "patches": [
 8 |                 {
 9 |                     "date": 1741494708460,
10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
11 |                 },
12 |                 {
13 |                     "date": 1741495204420,
14 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,191 +1,1 @@\n-\"\"\"\n-大模型图像识别工具，用于与大模型API交互进行图像识别\n-\"\"\"\n-\n-import os\n-import json\n-import http.client\n-import traceback\n-from typing import Dict, List, Any, Optional\n-import mcp.types as types\n-from . import BaseTool, ToolRegistry\n-\n-class ImageRecognizer:\n-    \"\"\"图像识别器，负责与大模型API交互进行图像识别\"\"\"\n-    \n-    def __init__(self, base_url: str, api_key: str, model: str = \"gpt-4o\"):\n-        \"\"\"\n-        初始化图像识别器\n-        \n-        Args:\n-            base_url: API基础URL，不包含路径\n-            api_key: API密钥\n-            model: 使用的模型名称\n-        \"\"\"\n-        self.base_url = base_url\n-        self.api_key = api_key\n-        self.model = model\n-    \n-    async def recognize_image(self, image_path: str, prompt: str = \"请详细描述这张图片的内容\") -> Optional[str]:\n-        \"\"\"\n-        识别图片内容\n-        \n-        Args:\n-            image_path: 图片路径\n-            prompt: 提示词\n-            \n-        Returns:\n-            识别结果文本，如果失败则返回None\n-        \"\"\"\n-        try:\n-            # 检查图片是否存在\n-            if not os.path.exists(image_path):\n-                print(f\"错误: 图片不存在: {image_path}\")\n-                return None\n-                \n-            # 构建图片URL（本地文件路径或HTTP URL）\n-            if image_path.startswith(('http://', 'https://')):\n-                image_url = image_path\n-            else:\n-                # 如果是本地文件，需要确保它是可访问的URL\n-                # 这里假设图片已经被放置在可通过HTTP访问的位置\n-                # 实际应用中可能需要上传图片到临时存储服务\n-                image_url = f\"file://{os.path.abspath(image_path)}\"\n-            \n-            # 创建HTTP连接\n-            conn = http.client.HTTPSConnection(self.base_url)\n-            \n-            # 构建请求负载\n-            payload = json.dumps({\n-                \"model\": self.model,\n-                \"stream\": False,\n-                \"messages\": [\n-                    {\n-                        \"role\": \"user\",\n-                        \"content\": [\n-                            {\n-                                \"type\": \"text\",\n-                                \"text\": prompt\n-                            },\n-                            {\n-                                \"type\": \"image_url\",\n-                                \"image_url\": {\n-                                    \"url\": image_url\n-                                }\n-                            }\n-                        ]\n-                    }\n-                ],\n-                \"max_tokens\": 400\n-            })\n-            \n-            # 设置请求头\n-            headers = {\n-                'Accept': 'application/json',\n-                'Authorization': f'Bearer {self.api_key}',\n-                'Content-Type': 'application/json'\n-            }\n-            \n-            # 发送请求\n-            conn.request(\"POST\", \"/v1/chat/completions\", payload, headers)\n-            \n-            # 获取响应\n-            res = conn.getresponse()\n-            data = res.read().decode(\"utf-8\")\n-            \n-            # 解析响应\n-            response = json.loads(data)\n-            \n-            # 提取回复内容\n-            if 'choices' in response and len(response['choices']) > 0:\n-                content = response['choices'][0]['message']['content']\n-                return content\n-            else:\n-                print(f\"错误: 无法从响应中提取内容: {response}\")\n-                return None\n-                \n-        except Exception as e:\n-            print(f\"图像识别过程中出错: {str(e)}\")\n-            traceback.print_exc()\n-            return None\n-\n-@ToolRegistry.register\n-class ImageRecognitionTool(BaseTool):\n-    \"\"\"图像识别工具，用于识别图片内容\"\"\"\n-    \n-    name = \"image_recognition\"\n-    description = \"使用大模型识别图片内容\"\n-    input_schema = {\n-        \"type\": \"object\",\n-        \"required\": [\"image_path\"],\n-        \"properties\": {\n-            \"image_path\": {\n-                \"type\": \"string\",\n-                \"description\": \"图片的本地路径或URL\",\n-            },\n-            \"prompt\": {\n-                \"type\": \"string\",\n-                \"description\": \"提示词，指导模型如何描述图片\",\n-            },\n-            \"base_url\": {\n-                \"type\": \"string\",\n-                \"description\": \"API基础URL，不包含路径\",\n-            },\n-            \"api_key\": {\n-                \"type\": \"string\",\n-                \"description\": \"API密钥\",\n-            },\n-            \"model\": {\n-                \"type\": \"string\",\n-                \"description\": \"使用的模型名称\",\n-            }\n-        },\n-    }\n-    \n-    async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n-        \"\"\"\n-        执行图像识别\n-        \n-        Args:\n-            arguments: 参数字典，必须包含'image_path'键\n-            \n-        Returns:\n-            识别结果\n-        \"\"\"\n-        # 参数验证\n-        if \"image_path\" not in arguments:\n-            return [types.TextContent(\n-                type=\"text\",\n-                text=\"错误: 缺少必要参数 'image_path'\"\n-            )]\n-        \n-        # 获取参数\n-        image_path = arguments[\"image_path\"]\n-        prompt = arguments.get(\"prompt\", \"请详细描述这张图片的内容\")\n-        base_url = arguments.get(\"base_url\", os.environ.get(\"LLM_API_BASE_URL\", \"api.openai.com\"))\n-        api_key = arguments.get(\"api_key\", os.environ.get(\"LLM_API_KEY\", \"\"))\n-        model = arguments.get(\"model\", os.environ.get(\"LLM_MODEL\", \"gpt-4o\"))\n-        \n-        # 验证API密钥\n-        if not api_key:\n-            return [types.TextContent(\n-                type=\"text\",\n-                text=\"错误: 缺少API密钥，请通过参数提供或设置环境变量 LLM_API_KEY\"\n-            )]\n-        \n-        # 创建图像识别器\n-        recognizer = ImageRecognizer(base_url, api_key, model)\n-        \n-        # 执行图像识别\n-        result = await recognizer.recognize_image(image_path, prompt)\n-        \n-        if result:\n-            return [types.TextContent(\n-                type=\"text\",\n-                text=f\"# 图像识别结果\\n\\n{result}\"\n-            )]\n-        else:\n-            return [types.TextContent(\n-                type=\"text\",\n-                text=\"图像识别失败，请检查图片路径和API配置。\"\n-            )] \n\\ No newline at end of file\n+ \n\\ No newline at end of file\n"
15 |                 }
16 |             ],
17 |             "date": 1741494708460,
18 |             "name": "Commit-0",
19 |             "content": "\"\"\"\n大模型图像识别工具，用于与大模型API交互进行图像识别\n\"\"\"\n\nimport os\nimport json\nimport http.client\nimport traceback\nfrom typing import Dict, List, Any, Optional\nimport mcp.types as types\nfrom . import BaseTool, ToolRegistry\n\nclass ImageRecognizer:\n    \"\"\"图像识别器，负责与大模型API交互进行图像识别\"\"\"\n    \n    def __init__(self, base_url: str, api_key: str, model: str = \"gpt-4o\"):\n        \"\"\"\n        初始化图像识别器\n        \n        Args:\n            base_url: API基础URL，不包含路径\n            api_key: API密钥\n            model: 使用的模型名称\n        \"\"\"\n        self.base_url = base_url\n        self.api_key = api_key\n        self.model = model\n    \n    async def recognize_image(self, image_path: str, prompt: str = \"请详细描述这张图片的内容\") -> Optional[str]:\n        \"\"\"\n        识别图片内容\n        \n        Args:\n            image_path: 图片路径\n            prompt: 提示词\n            \n        Returns:\n            识别结果文本，如果失败则返回None\n        \"\"\"\n        try:\n            # 检查图片是否存在\n            if not os.path.exists(image_path):\n                print(f\"错误: 图片不存在: {image_path}\")\n                return None\n                \n            # 构建图片URL（本地文件路径或HTTP URL）\n            if image_path.startswith(('http://', 'https://')):\n                image_url = image_path\n            else:\n                # 如果是本地文件，需要确保它是可访问的URL\n                # 这里假设图片已经被放置在可通过HTTP访问的位置\n                # 实际应用中可能需要上传图片到临时存储服务\n                image_url = f\"file://{os.path.abspath(image_path)}\"\n            \n            # 创建HTTP连接\n            conn = http.client.HTTPSConnection(self.base_url)\n            \n            # 构建请求负载\n            payload = json.dumps({\n                \"model\": self.model,\n                \"stream\": False,\n                \"messages\": [\n                    {\n                        \"role\": \"user\",\n                        \"content\": [\n                            {\n                                \"type\": \"text\",\n                                \"text\": prompt\n                            },\n                            {\n                                \"type\": \"image_url\",\n                                \"image_url\": {\n                                    \"url\": image_url\n                                }\n                            }\n                        ]\n                    }\n                ],\n                \"max_tokens\": 400\n            })\n            \n            # 设置请求头\n            headers = {\n                'Accept': 'application/json',\n                'Authorization': f'Bearer {self.api_key}',\n                'Content-Type': 'application/json'\n            }\n            \n            # 发送请求\n            conn.request(\"POST\", \"/v1/chat/completions\", payload, headers)\n            \n            # 获取响应\n            res = conn.getresponse()\n            data = res.read().decode(\"utf-8\")\n            \n            # 解析响应\n            response = json.loads(data)\n            \n            # 提取回复内容\n            if 'choices' in response and len(response['choices']) > 0:\n                content = response['choices'][0]['message']['content']\n                return content\n            else:\n                print(f\"错误: 无法从响应中提取内容: {response}\")\n                return None\n                \n        except Exception as e:\n            print(f\"图像识别过程中出错: {str(e)}\")\n            traceback.print_exc()\n            return None\n\n@ToolRegistry.register\nclass ImageRecognitionTool(BaseTool):\n    \"\"\"图像识别工具，用于识别图片内容\"\"\"\n    \n    name = \"image_recognition\"\n    description = \"使用大模型识别图片内容\"\n    input_schema = {\n        \"type\": \"object\",\n        \"required\": [\"image_path\"],\n        \"properties\": {\n            \"image_path\": {\n                \"type\": \"string\",\n                \"description\": \"图片的本地路径或URL\",\n            },\n            \"prompt\": {\n                \"type\": \"string\",\n                \"description\": \"提示词，指导模型如何描述图片\",\n            },\n            \"base_url\": {\n                \"type\": \"string\",\n                \"description\": \"API基础URL，不包含路径\",\n            },\n            \"api_key\": {\n                \"type\": \"string\",\n                \"description\": \"API密钥\",\n            },\n            \"model\": {\n                \"type\": \"string\",\n                \"description\": \"使用的模型名称\",\n            }\n        },\n    }\n    \n    async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n        \"\"\"\n        执行图像识别\n        \n        Args:\n            arguments: 参数字典，必须包含'image_path'键\n            \n        Returns:\n            识别结果\n        \"\"\"\n        # 参数验证\n        if \"image_path\" not in arguments:\n            return [types.TextContent(\n                type=\"text\",\n                text=\"错误: 缺少必要参数 'image_path'\"\n            )]\n        \n        # 获取参数\n        image_path = arguments[\"image_path\"]\n        prompt = arguments.get(\"prompt\", \"请详细描述这张图片的内容\")\n        base_url = arguments.get(\"base_url\", os.environ.get(\"LLM_API_BASE_URL\", \"api.openai.com\"))\n        api_key = arguments.get(\"api_key\", os.environ.get(\"LLM_API_KEY\", \"\"))\n        model = arguments.get(\"model\", os.environ.get(\"LLM_MODEL\", \"gpt-4o\"))\n        \n        # 验证API密钥\n        if not api_key:\n            return [types.TextContent(\n                type=\"text\",\n                text=\"错误: 缺少API密钥，请通过参数提供或设置环境变量 LLM_API_KEY\"\n            )]\n        \n        # 创建图像识别器\n        recognizer = ImageRecognizer(base_url, api_key, model)\n        \n        # 执行图像识别\n        result = await recognizer.recognize_image(image_path, prompt)\n        \n        if result:\n            return [types.TextContent(\n                type=\"text\",\n                text=f\"# 图像识别结果\\n\\n{result}\"\n            )]\n        else:\n            return [types.TextContent(\n                type=\"text\",\n                text=\"图像识别失败，请检查图片路径和API配置。\"\n            )] "
20 |         }
21 |     ]
22 | }


--------------------------------------------------------------------------------
/.lh/pyproject.toml.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "sourceFile": "pyproject.toml",
  3 |     "activeCommit": 0,
  4 |     "commits": [
  5 |         {
  6 |             "activePatchIndex": 23,
  7 |             "patches": [
  8 |                 {
  9 |                     "date": 1741245105746,
 10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
 11 |                 },
 12 |                 {
 13 |                     "date": 1741252123512,
 14 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -24,9 +24,10 @@\n     \"httpx>=0.27\", \n     \"mcp\",\n     \"PyPDF2>=3.0.0\",\n     \"pdf2image>=1.16.0\",\n-    \"Pillow>=9.0.0\"\n+    \"Pillow>=9.0.0\",\n+    \"pymupdf4llm>=0.1.0\"\n ]\n \n [project.scripts]\n mcp-simple-tool = \"mcp_simple_tool.server:main\"\n"
 15 |                 },
 16 |                 {
 17 |                     "date": 1741252296913,
 18 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -25,9 +25,9 @@\n     \"mcp\",\n     \"PyPDF2>=3.0.0\",\n     \"pdf2image>=1.16.0\",\n     \"Pillow>=9.0.0\",\n-    \"pymupdf4llm>=0.1.0\"\n+    \"pymupdf4llm==0.0.17\"\n ]\n \n [project.scripts]\n mcp-simple-tool = \"mcp_simple_tool.server:main\"\n"
 19 |                 },
 20 |                 {
 21 |                     "date": 1741258774131,
 22 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -25,9 +25,11 @@\n     \"mcp\",\n     \"PyPDF2>=3.0.0\",\n     \"pdf2image>=1.16.0\",\n     \"Pillow>=9.0.0\",\n-    \"pymupdf4llm==0.0.17\"\n+    \"pymupdf4llm==0.0.17\",\n+    \"pytesseract>=0.3.10\",\n+    \"opencv-python-headless>=4.8.0\"\n ]\n \n [project.scripts]\n mcp-simple-tool = \"mcp_simple_tool.server:main\"\n"
 23 |                 },
 24 |                 {
 25 |                     "date": 1741259378551,
 26 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -25,11 +25,9 @@\n     \"mcp\",\n     \"PyPDF2>=3.0.0\",\n     \"pdf2image>=1.16.0\",\n     \"Pillow>=9.0.0\",\n-    \"pymupdf4llm==0.0.17\",\n-    \"pytesseract>=0.3.10\",\n-    \"opencv-python-headless>=4.8.0\"\n+    \"pymupdf4llm==0.0.17\"\n ]\n \n [project.scripts]\n mcp-simple-tool = \"mcp_simple_tool.server:main\"\n"
 27 |                 },
 28 |                 {
 29 |                     "date": 1741259515121,
 30 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -25,9 +25,12 @@\n     \"mcp\",\n     \"PyPDF2>=3.0.0\",\n     \"pdf2image>=1.16.0\",\n     \"Pillow>=9.0.0\",\n-    \"pymupdf4llm==0.0.17\"\n+    \"pymupdf4llm==0.0.17\",\n+    \"opencv-python-headless>=4.5.0\",\n+    \"pytesseract>=0.3.8\",\n+    \"numpy>=1.20.0\"\n ]\n \n [project.scripts]\n mcp-simple-tool = \"mcp_simple_tool.server:main\"\n"
 31 |                 },
 32 |                 {
 33 |                     "date": 1741259590629,
 34 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -25,12 +25,9 @@\n     \"mcp\",\n     \"PyPDF2>=3.0.0\",\n     \"pdf2image>=1.16.0\",\n     \"Pillow>=9.0.0\",\n-    \"pymupdf4llm==0.0.17\",\n-    \"opencv-python-headless>=4.5.0\",\n-    \"pytesseract>=0.3.8\",\n-    \"numpy>=1.20.0\"\n+    \"pymupdf4llm==0.0.17\"\n ]\n \n [project.scripts]\n mcp-simple-tool = \"mcp_simple_tool.server:main\"\n"
 35 |                 },
 36 |                 {
 37 |                     "date": 1741260619388,
 38 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -25,9 +25,10 @@\n     \"mcp\",\n     \"PyPDF2>=3.0.0\",\n     \"pdf2image>=1.16.0\",\n     \"Pillow>=9.0.0\",\n-    \"pymupdf4llm==0.0.17\"\n+    \"pymupdf4llm==0.0.17\",\n+    \"PyMuPDF>=1.22.0\"\n ]\n \n [project.scripts]\n mcp-simple-tool = \"mcp_simple_tool.server:main\"\n"
 39 |                 },
 40 |                 {
 41 |                     "date": 1741332586798,
 42 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -25,10 +25,9 @@\n     \"mcp\",\n     \"PyPDF2>=3.0.0\",\n     \"pdf2image>=1.16.0\",\n     \"Pillow>=9.0.0\",\n-    \"pymupdf4llm==0.0.17\",\n-    \"PyMuPDF>=1.22.0\"\n+    \"pymupdf4llm==0.0.17\"\n ]\n \n [project.scripts]\n mcp-simple-tool = \"mcp_simple_tool.server:main\"\n"
 43 |                 },
 44 |                 {
 45 |                     "date": 1741335095744,
 46 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -25,9 +25,10 @@\n     \"mcp\",\n     \"PyPDF2>=3.0.0\",\n     \"pdf2image>=1.16.0\",\n     \"Pillow>=9.0.0\",\n-    \"pymupdf4llm==0.0.17\"\n+    \"pymupdf4llm==0.0.17\",\n+    \"PyMuPDF>=1.22.0\"\n ]\n \n [project.scripts]\n mcp-simple-tool = \"mcp_simple_tool.server:main\"\n"
 47 |                 },
 48 |                 {
 49 |                     "date": 1741337118402,
 50 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -26,9 +26,10 @@\n     \"PyPDF2>=3.0.0\",\n     \"pdf2image>=1.16.0\",\n     \"Pillow>=9.0.0\",\n     \"pymupdf4llm==0.0.17\",\n-    \"PyMuPDF>=1.22.0\"\n+    \"PyMuPDF>=1.22.0\",\n+    \"python-docx>=0.8.11\"\n ]\n \n [project.scripts]\n mcp-simple-tool = \"mcp_simple_tool.server:main\"\n"
 51 |                 },
 52 |                 {
 53 |                     "date": 1741523876756,
 54 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -27,9 +27,10 @@\n     \"pdf2image>=1.16.0\",\n     \"Pillow>=9.0.0\",\n     \"pymupdf4llm==0.0.17\",\n     \"PyMuPDF>=1.22.0\",\n-    \"python-docx>=0.8.11\"\n+    \"python-docx>=0.8.11\",\n+    \"requests>=2.28.0\"\n ]\n \n [project.scripts]\n mcp-simple-tool = \"mcp_simple_tool.server:main\"\n"
 55 |                 },
 56 |                 {
 57 |                     "date": 1741529379889,
 58 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -27,10 +27,9 @@\n     \"pdf2image>=1.16.0\",\n     \"Pillow>=9.0.0\",\n     \"pymupdf4llm==0.0.17\",\n     \"PyMuPDF>=1.22.0\",\n-    \"python-docx>=0.8.11\",\n-    \"requests>=2.28.0\"\n+    \"python-docx>=0.8.11\"\n ]\n \n [project.scripts]\n mcp-simple-tool = \"mcp_simple_tool.server:main\"\n"
 59 |                 },
 60 |                 {
 61 |                     "date": 1741660115759,
 62 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -27,9 +27,11 @@\n     \"pdf2image>=1.16.0\",\n     \"Pillow>=9.0.0\",\n     \"pymupdf4llm==0.0.17\",\n     \"PyMuPDF>=1.22.0\",\n-    \"python-docx>=0.8.11\"\n+    \"python-docx>=0.8.11\",\n+    \"pandas>=2.0.0\",\n+    \"openpyxl>=3.1.0\"\n ]\n \n [project.scripts]\n mcp-simple-tool = \"mcp_simple_tool.server:main\"\n"
 63 |                 },
 64 |                 {
 65 |                     "date": 1741665039026,
 66 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -24,14 +24,15 @@\n     \"httpx>=0.27\", \n     \"mcp\",\n     \"PyPDF2>=3.0.0\",\n     \"pdf2image>=1.16.0\",\n-    \"Pillow>=9.0.0\",\n+    \"Pillow>=10.0.0\",\n     \"pymupdf4llm==0.0.17\",\n     \"PyMuPDF>=1.22.0\",\n     \"python-docx>=0.8.11\",\n     \"pandas>=2.0.0\",\n-    \"openpyxl>=3.1.0\"\n+    \"openpyxl>=3.1.0\",\n+    \"pytesseract>=0.3.10\",\n ]\n \n [project.scripts]\n mcp-simple-tool = \"mcp_simple_tool.server:main\"\n"
 67 |                 },
 68 |                 {
 69 |                     "date": 1741667967285,
 70 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -31,8 +31,11 @@\n     \"python-docx>=0.8.11\",\n     \"pandas>=2.0.0\",\n     \"openpyxl>=3.1.0\",\n     \"pytesseract>=0.3.10\",\n+    \"easyocr>=1.7.1\",\n+    \"torch>=2.0.0\",\n+    \"numpy>=1.24.0\",\n ]\n \n [project.scripts]\n mcp-simple-tool = \"mcp_simple_tool.server:main\"\n"
 71 |                 },
 72 |                 {
 73 |                     "date": 1741668087536,
 74 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -22,17 +22,14 @@\n     \"anyio>=4.5\", \n     \"click>=8.1.0\", \n     \"httpx>=0.27\", \n     \"mcp\",\n-    \"PyPDF2>=3.0.0\",\n-    \"pdf2image>=1.16.0\",\n     \"Pillow>=10.0.0\",\n     \"pymupdf4llm==0.0.17\",\n     \"PyMuPDF>=1.22.0\",\n     \"python-docx>=0.8.11\",\n     \"pandas>=2.0.0\",\n     \"openpyxl>=3.1.0\",\n-    \"pytesseract>=0.3.10\",\n     \"easyocr>=1.7.1\",\n     \"torch>=2.0.0\",\n     \"numpy>=1.24.0\",\n ]\n"
 75 |                 },
 76 |                 {
 77 |                     "date": 1741669852349,
 78 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -22,17 +22,17 @@\n     \"anyio>=4.5\", \n     \"click>=8.1.0\", \n     \"httpx>=0.27\", \n     \"mcp\",\n+    \"PyPDF2>=3.0.0\",\n+    \"pdf2image>=1.16.0\",\n     \"Pillow>=10.0.0\",\n     \"pymupdf4llm==0.0.17\",\n     \"PyMuPDF>=1.22.0\",\n     \"python-docx>=0.8.11\",\n     \"pandas>=2.0.0\",\n     \"openpyxl>=3.1.0\",\n-    \"easyocr>=1.7.1\",\n-    \"torch>=2.0.0\",\n-    \"numpy>=1.24.0\",\n+    \"pytesseract>=0.3.10\",\n ]\n \n [project.scripts]\n mcp-simple-tool = \"mcp_simple_tool.server:main\"\n"
 79 |                 },
 80 |                 {
 81 |                     "date": 1741764667021,
 82 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,16 +1,12 @@\n [project]\n-name = \"mcp-simple-tool\"\n+name = \"mcp-tool\"\n version = \"0.1.0\"\n-description = \"A simple MCP server exposing a website fetching tool\"\n+description = \"MCP工具集合，包含文件处理和网页获取功能\"\n readme = \"README.md\"\n requires-python = \">=3.10\"\n-authors = [{ name = \"Anthropic, PBC.\" }]\n-maintainers = [\n-    { name = \"David Soria Parra\", email = \"davidsp@anthropic.com\" },\n-    { name = \"Justin Spahr-Summers\", email = \"justin@anthropic.com\" },\n-]\n-keywords = [\"mcp\", \"llm\", \"automation\", \"web\", \"fetch\"]\n+authors = [{ name = \"aigo666\" }]\n+keywords = [\"mcp\", \"llm\", \"automation\", \"web\", \"fetch\", \"pdf\", \"word\", \"excel\"]\n license = { text = \"MIT\" }\n classifiers = [\n     \"Development Status :: 4 - Beta\",\n     \"Intended Audience :: Developers\",\n@@ -34,9 +30,9 @@\n     \"pytesseract>=0.3.10\",\n ]\n \n [project.scripts]\n-mcp-simple-tool = \"mcp_simple_tool.server:main\"\n+mcp-tool = \"mcp_tool.server:main\"\n \n [project.optional-dependencies]\n dev = [\"pyright>=1.1.378\", \"pytest>=8.3.3\", \"ruff>=0.6.9\", \"pytest-asyncio>=0.23.5\"]\n \n@@ -44,12 +40,12 @@\n requires = [\"hatchling\"]\n build-backend = \"hatchling.build\"\n \n [tool.hatch.build.targets.wheel]\n-packages = [\"mcp_simple_tool\"]\n+packages = [\"mcp_tool\"]\n \n [tool.pyright]\n-include = [\"mcp_simple_tool\"]\n+include = [\"mcp_tool\"]\n venvPath = \".\"\n venv = \".venv\"\n \n [tool.ruff.lint]\n"
 83 |                 },
 84 |                 {
 85 |                     "date": 1741766950590,
 86 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,8 +1,8 @@\n [project]\n-name = \"mcp-tool\"\n+name = \"mcp-simple-tool\"\n version = \"0.1.0\"\n-description = \"MCP工具集合，包含文件处理和网页获取功能\"\n+description = \"A simple MCP server exposing a website fetching tool\"\n readme = \"README.md\"\n requires-python = \">=3.10\"\n authors = [{ name = \"aigo666\" }]\n keywords = [\"mcp\", \"llm\", \"automation\", \"web\", \"fetch\", \"pdf\", \"word\", \"excel\"]\n@@ -30,9 +30,9 @@\n     \"pytesseract>=0.3.10\",\n ]\n \n [project.scripts]\n-mcp-tool = \"mcp_tool.server:main\"\n+mcp-simple-tool = \"mcp_tool.server:main\"\n \n [project.optional-dependencies]\n dev = [\"pyright>=1.1.378\", \"pytest>=8.3.3\", \"ruff>=0.6.9\", \"pytest-asyncio>=0.23.5\"]\n \n"
 87 |                 },
 88 |                 {
 89 |                     "date": 1741767018758,
 90 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,8 +1,8 @@\n [project]\n name = \"mcp-simple-tool\"\n version = \"0.1.0\"\n-description = \"A simple MCP server exposing a website fetching tool\"\n+description = \"MCP工具集合，包含文件处理和网页获取功能\"\n readme = \"README.md\"\n requires-python = \">=3.10\"\n authors = [{ name = \"aigo666\" }]\n keywords = [\"mcp\", \"llm\", \"automation\", \"web\", \"fetch\", \"pdf\", \"word\", \"excel\"]\n"
 91 |                 },
 92 |                 {
 93 |                     "date": 1742295692409,
 94 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -4,9 +4,9 @@\n description = \"MCP工具集合，包含文件处理和网页获取功能\"\n readme = \"README.md\"\n requires-python = \">=3.10\"\n authors = [{ name = \"aigo666\" }]\n-keywords = [\"mcp\", \"llm\", \"automation\", \"web\", \"fetch\", \"pdf\", \"word\", \"excel\"]\n+keywords = [\"mcp\", \"llm\", \"automation\", \"web\", \"fetch\", \"pdf\", \"word\", \"excel\", \"csv\"]\n license = { text = \"MIT\" }\n classifiers = [\n     \"Development Status :: 4 - Beta\",\n     \"Intended Audience :: Developers\",\n@@ -27,8 +27,9 @@\n     \"python-docx>=0.8.11\",\n     \"pandas>=2.0.0\",\n     \"openpyxl>=3.1.0\",\n     \"pytesseract>=0.3.10\",\n+    \"chardet>=5.0.0\",\n ]\n \n [project.scripts]\n mcp-simple-tool = \"mcp_tool.server:main\"\n"
 95 |                 },
 96 |                 {
 97 |                     "date": 1745752379988,
 98 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,7 +1,7 @@\n [project]\n name = \"mcp-simple-tool\"\n-version = \"0.1.0\"\n+version = \"1.1.0\"\n description = \"MCP工具集合，包含文件处理和网页获取功能\"\n readme = \"README.md\"\n requires-python = \">=3.10\"\n authors = [{ name = \"aigo666\" }]\n"
 99 |                 },
100 |                 {
101 |                     "date": 1745752399616,
102 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,7 +1,7 @@\n [project]\n name = \"mcp-simple-tool\"\n-version = \"1.1.0\"\n+version = \"1.2.0\"\n description = \"MCP工具集合，包含文件处理和网页获取功能\"\n readme = \"README.md\"\n requires-python = \">=3.10\"\n authors = [{ name = \"aigo666\" }]\n"
103 |                 }
104 |             ],
105 |             "date": 1741245105746,
106 |             "name": "Commit-0",
107 |             "content": "[project]\nname = \"mcp-simple-tool\"\nversion = \"0.1.0\"\ndescription = \"A simple MCP server exposing a website fetching tool\"\nreadme = \"README.md\"\nrequires-python = \">=3.10\"\nauthors = [{ name = \"Anthropic, PBC.\" }]\nmaintainers = [\n    { name = \"David Soria Parra\", email = \"davidsp@anthropic.com\" },\n    { name = \"Justin Spahr-Summers\", email = \"justin@anthropic.com\" },\n]\nkeywords = [\"mcp\", \"llm\", \"automation\", \"web\", \"fetch\"]\nlicense = { text = \"MIT\" }\nclassifiers = [\n    \"Development Status :: 4 - Beta\",\n    \"Intended Audience :: Developers\",\n    \"License :: OSI Approved :: MIT License\",\n    \"Programming Language :: Python :: 3\",\n    \"Programming Language :: Python :: 3.10\",\n]\ndependencies = [\n    \"anyio>=4.5\", \n    \"click>=8.1.0\", \n    \"httpx>=0.27\", \n    \"mcp\",\n    \"PyPDF2>=3.0.0\",\n    \"pdf2image>=1.16.0\",\n    \"Pillow>=9.0.0\"\n]\n\n[project.scripts]\nmcp-simple-tool = \"mcp_simple_tool.server:main\"\n\n[project.optional-dependencies]\ndev = [\"pyright>=1.1.378\", \"pytest>=8.3.3\", \"ruff>=0.6.9\", \"pytest-asyncio>=0.23.5\"]\n\n[build-system]\nrequires = [\"hatchling\"]\nbuild-backend = \"hatchling.build\"\n\n[tool.hatch.build.targets.wheel]\npackages = [\"mcp_simple_tool\"]\n\n[tool.pyright]\ninclude = [\"mcp_simple_tool\"]\nvenvPath = \".\"\nvenv = \".venv\"\n\n[tool.ruff.lint]\nselect = [\"E\", \"F\", \"I\"]\nignore = []\n\n[tool.ruff]\nline-length = 88\ntarget-version = \"py310\"\n\n[tool.uv]\ndev-dependencies = [\"pyright>=1.1.378\", \"pytest>=8.3.3\", \"ruff>=0.6.9\", \"pytest-asyncio>=0.23.5\"]\n"
108 |         }
109 |     ]
110 | }


--------------------------------------------------------------------------------
/.lh/.env.example.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "sourceFile": ".env.example",
  3 |     "activeCommit": 0,
  4 |     "commits": [
  5 |         {
  6 |             "activePatchIndex": 26,
  7 |             "patches": [
  8 |                 {
  9 |                     "date": 1741252766715,
 10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
 11 |                 },
 12 |                 {
 13 |                     "date": 1741252784152,
 14 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,9 +1,9 @@\n ###\n  # @Author: 刘彦志 yanzhiliu@trip.com\n  # @Date: 2025-03-06 14:57:07\n  # @LastEditors: 刘彦志 yanzhiliu@trip.com\n- # @LastEditTime: 2025-03-06 17:19:26\n+ # @LastEditTime: 2025-03-06 17:19:42\n  # @FilePath: /weaviate-mcp-server/.env.example\n  # @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE\n ### \n # Server Configuration\n"
 15 |                 },
 16 |                 {
 17 |                     "date": 1741252909928,
 18 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,9 +1,9 @@\n ###\n  # @Author: 刘彦志 yanzhiliu@trip.com\n  # @Date: 2025-03-06 14:57:07\n  # @LastEditors: 刘彦志 yanzhiliu@trip.com\n- # @LastEditTime: 2025-03-06 17:19:42\n+ # @LastEditTime: 2025-03-06 17:21:49\n  # @FilePath: /weaviate-mcp-server/.env.example\n  # @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE\n ### \n # Server Configuration\n"
 19 |                 },
 20 |                 {
 21 |                     "date": 1741253024653,
 22 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,12 +1,4 @@\n-###\n- # @Author: 刘彦志 yanzhiliu@trip.com\n- # @Date: 2025-03-06 14:57:07\n- # @LastEditors: 刘彦志 yanzhiliu@trip.com\n- # @LastEditTime: 2025-03-06 17:21:49\n- # @FilePath: /weaviate-mcp-server/.env.example\n- # @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE\n-### \n # Server Configuration\n MCP_SERVER_PORT=8000\n MCP_SERVER_HOST=0.0.0.0\n \n"
 23 |                 },
 24 |                 {
 25 |                     "date": 1741494832888,
 26 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -5,5 +5,10 @@\n # Optional: Set to 'true' to enable debug mode\n DEBUG=false\n \n # Optional: Set custom User-Agent for website fetching\n-MCP_USER_AGENT=\"MCP Test Server (github.com/modelcontextprotocol/python-sdk)\" \n\\ No newline at end of file\n+MCP_USER_AGENT=\"MCP Test Server (github.com/modelcontextprotocol/python-sdk)\"\n+\n+# Large Language Model API Configuration (for image recognition)\n+LLM_API_KEY=your_api_key_here\n+LLM_API_BASE_URL=api.openai.com\n+LLM_MODEL=gpt-4o \n\\ No newline at end of file\n"
 27 |                 },
 28 |                 {
 29 |                     "date": 1741495204664,
 30 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -5,10 +5,5 @@\n # Optional: Set to 'true' to enable debug mode\n DEBUG=false\n \n # Optional: Set custom User-Agent for website fetching\n-MCP_USER_AGENT=\"MCP Test Server (github.com/modelcontextprotocol/python-sdk)\"\n-\n-# Large Language Model API Configuration (for image recognition)\n-LLM_API_KEY=your_api_key_here\n-LLM_API_BASE_URL=api.openai.com\n-LLM_MODEL=gpt-4o \n\\ No newline at end of file\n+MCP_USER_AGENT=\"MCP Test Server (github.com/modelcontextprotocol/python-sdk)\" \n\\ No newline at end of file\n"
 31 |                 },
 32 |                 {
 33 |                     "date": 1741521142056,
 34 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -5,5 +5,17 @@\n # Optional: Set to 'true' to enable debug mode\n DEBUG=false\n \n # Optional: Set custom User-Agent for website fetching\n-MCP_USER_AGENT=\"MCP Test Server (github.com/modelcontextprotocol/python-sdk)\" \n\\ No newline at end of file\n+MCP_USER_AGENT=\"MCP Test Server (github.com/modelcontextprotocol/python-sdk)\"\n+\n+# Image Recognition Configuration\n+# 大模型API基础URL\n+LLM_API_BASE_URL=api.openai.com\n+# 大模型API密钥\n+LLM_API_KEY=your_api_key_here\n+# 使用的模型名称\n+LLM_MODEL=gpt-4o\n+# 最大输出token数\n+LLM_MAX_TOKENS=400\n+# 是否启用图像识别功能\n+ENABLE_IMAGE_RECOGNITION=true \n\\ No newline at end of file\n"
 35 |                 },
 36 |                 {
 37 |                     "date": 1741521329620,
 38 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -17,5 +17,7 @@\n LLM_MODEL=gpt-4o\n # 最大输出token数\n LLM_MAX_TOKENS=400\n # 是否启用图像识别功能\n-ENABLE_IMAGE_RECOGNITION=true \n\\ No newline at end of file\n+ENABLE_IMAGE_RECOGNITION=true\n+# API服务的图像路径映射\n+API_IMAGE_PATH=/host_images \n\\ No newline at end of file\n"
 39 |                 },
 40 |                 {
 41 |                     "date": 1741522053556,
 42 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -19,5 +19,9 @@\n LLM_MAX_TOKENS=400\n # 是否启用图像识别功能\n ENABLE_IMAGE_RECOGNITION=true\n # API服务的图像路径映射\n-API_IMAGE_PATH=/host_images \n\\ No newline at end of file\n+API_IMAGE_PATH=/host_images\n+# API请求超时时间（秒）\n+LLM_API_TIMEOUT=120\n+# API请求最大重试次数\n+LLM_API_MAX_RETRIES=2 \n\\ No newline at end of file\n"
 43 |                 },
 44 |                 {
 45 |                     "date": 1741522367543,
 46 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -23,5 +23,7 @@\n API_IMAGE_PATH=/host_images\n # API请求超时时间（秒）\n LLM_API_TIMEOUT=120\n # API请求最大重试次数\n-LLM_API_MAX_RETRIES=2 \n\\ No newline at end of file\n+LLM_API_MAX_RETRIES=2\n+# 最大处理图像数量\n+MAX_IMAGES_TO_PROCESS=5 \n\\ No newline at end of file\n"
 47 |                 },
 48 |                 {
 49 |                     "date": 1741523067968,
 50 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -25,5 +25,9 @@\n LLM_API_TIMEOUT=120\n # API请求最大重试次数\n LLM_API_MAX_RETRIES=2\n # 最大处理图像数量\n-MAX_IMAGES_TO_PROCESS=5 \n\\ No newline at end of file\n+MAX_IMAGES_TO_PROCESS=5\n+# PDF处理总超时时间（秒）\n+PDF_PROCESSING_TIMEOUT=300\n+# 是否默认启用PDF快速处理模式\n+PDF_FAST_MODE=true \n\\ No newline at end of file\n"
 51 |                 },
 52 |                 {
 53 |                     "date": 1741523185742,
 54 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -29,5 +29,7 @@\n MAX_IMAGES_TO_PROCESS=5\n # PDF处理总超时时间（秒）\n PDF_PROCESSING_TIMEOUT=300\n # 是否默认启用PDF快速处理模式\n-PDF_FAST_MODE=true \n\\ No newline at end of file\n+PDF_FAST_MODE=true\n+# 快速PDF处理超时时间（秒）\n+QUICK_PDF_TIMEOUT=120 \n\\ No newline at end of file\n"
 55 |                 },
 56 |                 {
 57 |                     "date": 1741523195647,
 58 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -5,31 +5,5 @@\n # Optional: Set to 'true' to enable debug mode\n DEBUG=false\n \n # Optional: Set custom User-Agent for website fetching\n-MCP_USER_AGENT=\"MCP Test Server (github.com/modelcontextprotocol/python-sdk)\"\n-\n-# Image Recognition Configuration\n-# 大模型API基础URL\n-LLM_API_BASE_URL=api.openai.com\n-# 大模型API密钥\n-LLM_API_KEY=your_api_key_here\n-# 使用的模型名称\n-LLM_MODEL=gpt-4o\n-# 最大输出token数\n-LLM_MAX_TOKENS=400\n-# 是否启用图像识别功能\n-ENABLE_IMAGE_RECOGNITION=true\n-# API服务的图像路径映射\n-API_IMAGE_PATH=/host_images\n-# API请求超时时间（秒）\n-LLM_API_TIMEOUT=120\n-# API请求最大重试次数\n-LLM_API_MAX_RETRIES=2\n-# 最大处理图像数量\n-MAX_IMAGES_TO_PROCESS=5\n-# PDF处理总超时时间（秒）\n-PDF_PROCESSING_TIMEOUT=300\n-# 是否默认启用PDF快速处理模式\n-PDF_FAST_MODE=true\n-# 快速PDF处理超时时间（秒）\n-QUICK_PDF_TIMEOUT=120 \n\\ No newline at end of file\n+MCP_USER_AGENT=\"MCP Test Server (github.com/modelcontextprotocol/python-sdk)\" \n\\ No newline at end of file\n"
 59 |                 },
 60 |                 {
 61 |                     "date": 1741525164543,
 62 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,4 +1,12 @@\n+###\n+ # @Author: 刘彦志 yanzhiliu@trip.com\n+ # @Date: 2025-03-06 14:57:07\n+ # @LastEditors: 刘彦志 yanzhiliu@trip.com\n+ # @LastEditTime: 2025-03-09 20:59:24\n+ # @FilePath: /mcp-development-framework/.env.example\n+ # @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE\n+### \n # Server Configuration\n MCP_SERVER_PORT=8000\n MCP_SERVER_HOST=0.0.0.0\n \n"
 63 |                 },
 64 |                 {
 65 |                     "date": 1741525471713,
 66 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,9 +1,9 @@\n ###\n  # @Author: 刘彦志 yanzhiliu@trip.com\n  # @Date: 2025-03-06 14:57:07\n  # @LastEditors: 刘彦志 yanzhiliu@trip.com\n- # @LastEditTime: 2025-03-09 20:59:24\n+ # @LastEditTime: 2025-03-09 21:04:31\n  # @FilePath: /mcp-development-framework/.env.example\n  # @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE\n ### \n # Server Configuration\n"
 67 |                 },
 68 |                 {
 69 |                     "date": 1741525735591,
 70 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,9 +1,9 @@\n ###\n  # @Author: 刘彦志 yanzhiliu@trip.com\n  # @Date: 2025-03-06 14:57:07\n  # @LastEditors: 刘彦志 yanzhiliu@trip.com\n- # @LastEditTime: 2025-03-09 21:04:31\n+ # @LastEditTime: 2025-03-09 21:08:55\n  # @FilePath: /mcp-development-framework/.env.example\n  # @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE\n ### \n # Server Configuration\n"
 71 |                 },
 72 |                 {
 73 |                     "date": 1741526027814,
 74 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,9 +1,9 @@\n ###\n  # @Author: 刘彦志 yanzhiliu@trip.com\n  # @Date: 2025-03-06 14:57:07\n  # @LastEditors: 刘彦志 yanzhiliu@trip.com\n- # @LastEditTime: 2025-03-09 21:08:55\n+ # @LastEditTime: 2025-03-09 21:13:47\n  # @FilePath: /mcp-development-framework/.env.example\n  # @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE\n ### \n # Server Configuration\n"
 75 |                 },
 76 |                 {
 77 |                     "date": 1741576383122,
 78 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,12 +1,4 @@\n-###\n- # @Author: 刘彦志 yanzhiliu@trip.com\n- # @Date: 2025-03-06 14:57:07\n- # @LastEditors: 刘彦志 yanzhiliu@trip.com\n- # @LastEditTime: 2025-03-09 21:13:47\n- # @FilePath: /mcp-development-framework/.env.example\n- # @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE\n-### \n # Server Configuration\n MCP_SERVER_PORT=8000\n MCP_SERVER_HOST=0.0.0.0\n \n"
 79 |                 },
 80 |                 {
 81 |                     "date": 1741763393097,
 82 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,9 +1,12 @@\n-# Server Configuration\n+# MaxKB配置\n+MAXKB_HOST=http://localhost:8080\n+MAXKB_CHAT_ID=4a99a706-ff0f-11ef-be75-0242ac110002\n+MAXKB_APPLICATION_ID=application-e689000edd89acb58572482651fa88e0\n+\n+# 其他配置\n+DEBUG=false\n MCP_SERVER_PORT=8000\n MCP_SERVER_HOST=0.0.0.0\n \n-# Optional: Set to 'true' to enable debug mode\n-DEBUG=false\n-\n # Optional: Set custom User-Agent for website fetching\n MCP_USER_AGENT=\"MCP Test Server (github.com/modelcontextprotocol/python-sdk)\" \n\\ No newline at end of file\n"
 83 |                 },
 84 |                 {
 85 |                     "date": 1741763853889,
 86 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,6 +1,12 @@\n # MaxKB配置\n-MAXKB_HOST=http://localhost:8080\n+\n+# 方案1：通过host.docker.internal访问宿主机（推荐）\n+MAXKB_HOST=http://host.docker.internal:8080\n+\n+# 方案2：直接访问MaxKB容器（需要在同一个Docker网络中）\n+# MAXKB_HOST=http://maxkb:8080\n+\n MAXKB_CHAT_ID=4a99a706-ff0f-11ef-be75-0242ac110002\n MAXKB_APPLICATION_ID=application-e689000edd89acb58572482651fa88e0\n \n # 其他配置\n"
 87 |                 },
 88 |                 {
 89 |                     "date": 1741765355187,
 90 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,18 +1,14 @@\n+# Server Configuration\n+MCP_SERVER_PORT=8000\n+MCP_SERVER_HOST=0.0.0.0\n+\n # MaxKB配置\n-\n-# 方案1：通过host.docker.internal访问宿主机（推荐）\n MAXKB_HOST=http://host.docker.internal:8080\n+MAXKB_CHAT_ID=your_chat_id_here\n+MAXKB_APPLICATION_ID=your_application_id_here\n \n-# 方案2：直接访问MaxKB容器（需要在同一个Docker网络中）\n-# MAXKB_HOST=http://maxkb:8080\n-\n-MAXKB_CHAT_ID=4a99a706-ff0f-11ef-be75-0242ac110002\n-MAXKB_APPLICATION_ID=application-e689000edd89acb58572482651fa88e0\n-\n-# 其他配置\n+# Optional: Set to 'true' to enable debug mode\n DEBUG=false\n-MCP_SERVER_PORT=8000\n-MCP_SERVER_HOST=0.0.0.0\n \n # Optional: Set custom User-Agent for website fetching\n MCP_USER_AGENT=\"MCP Test Server (github.com/modelcontextprotocol/python-sdk)\" \n\\ No newline at end of file\n"
 91 |                 },
 92 |                 {
 93 |                     "date": 1741944033744,
 94 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -10,5 +10,9 @@\n # Optional: Set to 'true' to enable debug mode\n DEBUG=false\n \n # Optional: Set custom User-Agent for website fetching\n-MCP_USER_AGENT=\"MCP Test Server (github.com/modelcontextprotocol/python-sdk)\" \n\\ No newline at end of file\n+MCP_USER_AGENT=\"MCP Test Server (github.com/modelcontextprotocol/python-sdk)\"\n+\n+# 本地目录挂载配置\n+HOST_MOUNT_SOURCE=/path/to/your/local/directory\n+HOST_MOUNT_TARGET=/host_files \n\\ No newline at end of file\n"
 95 |                 },
 96 |                 {
 97 |                     "date": 1744091461682,
 98 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,7 +1,9 @@\n # Server Configuration\n MCP_SERVER_PORT=8000\n MCP_SERVER_HOST=0.0.0.0\n+# 鉴权配置\n+MCP_AUTH_URL=http://170.106.105.206:4000/users\n \n # MaxKB配置\n MAXKB_HOST=http://host.docker.internal:8080\n MAXKB_CHAT_ID=your_chat_id_here\n"
 99 |                 },
100 |                 {
101 |                     "date": 1744096070549,
102 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,9 +1,7 @@\n # Server Configuration\n MCP_SERVER_PORT=8000\n MCP_SERVER_HOST=0.0.0.0\n-# 鉴权配置\n-MCP_AUTH_URL=http://170.106.105.206:4000/users\n \n # MaxKB配置\n MAXKB_HOST=http://host.docker.internal:8080\n MAXKB_CHAT_ID=your_chat_id_here\n"
103 |                 },
104 |                 {
105 |                     "date": 1744098373299,
106 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,7 +1,9 @@\n # Server Configuration\n MCP_SERVER_PORT=8000\n MCP_SERVER_HOST=0.0.0.0\n+# 鉴权配置\n+MCP_AUTH_URL=http://170.106.105.206:4000/users\n \n # MaxKB配置\n MAXKB_HOST=http://host.docker.internal:8080\n MAXKB_CHAT_ID=your_chat_id_here\n"
107 |                 },
108 |                 {
109 |                     "date": 1744180913503,
110 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -2,8 +2,9 @@\n MCP_SERVER_PORT=8000\n MCP_SERVER_HOST=0.0.0.0\n # 鉴权配置\n MCP_AUTH_URL=http://170.106.105.206:4000/users\n+MCP_API_KEY=your-api-key-here\n \n # MaxKB配置\n MAXKB_HOST=http://host.docker.internal:8080\n MAXKB_CHAT_ID=your_chat_id_here\n"
111 |                 },
112 |                 {
113 |                     "date": 1744181141810,
114 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -2,9 +2,8 @@\n MCP_SERVER_PORT=8000\n MCP_SERVER_HOST=0.0.0.0\n # 鉴权配置\n MCP_AUTH_URL=http://170.106.105.206:4000/users\n-MCP_API_KEY=your-api-key-here\n \n # MaxKB配置\n MAXKB_HOST=http://host.docker.internal:8080\n MAXKB_CHAT_ID=your_chat_id_here\n"
115 |                 }
116 |             ],
117 |             "date": 1741252766715,
118 |             "name": "Commit-0",
119 |             "content": "###\n # @Author: 刘彦志 yanzhiliu@trip.com\n # @Date: 2025-03-06 14:57:07\n # @LastEditors: 刘彦志 yanzhiliu@trip.com\n # @LastEditTime: 2025-03-06 17:19:26\n # @FilePath: /weaviate-mcp-server/.env.example\n # @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE\n### \n# Server Configuration\nMCP_SERVER_PORT=8000\nMCP_SERVER_HOST=0.0.0.0\n\n# Optional: Set to 'true' to enable debug mode\nDEBUG=false\n\n# Optional: Set custom User-Agent for website fetching\nMCP_USER_AGENT=\"MCP Test Server (github.com/modelcontextprotocol/python-sdk)\" "
120 |         }
121 |     ]
122 | }


--------------------------------------------------------------------------------
/mcp_tool/tools/word_tool.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Word文档解析工具，用于解析Word文档内容
  3 | """
  4 | 
  5 | import os
  6 | import traceback
  7 | import io
  8 | import base64
  9 | import imghdr
 10 | import tempfile
 11 | import subprocess
 12 | import shutil
 13 | from typing import Dict, List, Any, Tuple, Optional
 14 | import docx
 15 | from docx.document import Document
 16 | from docx.parts.document import DocumentPart
 17 | from docx.package import Package
 18 | import mcp.types as types
 19 | from . import BaseTool, ToolRegistry
 20 | 
 21 | @ToolRegistry.register
 22 | class WordTool(BaseTool):
 23 |     """
 24 |     用于解析Word文档的工具，提取文本内容、表格和图片信息
 25 |     支持.docx和.doc(Word 97-2003)格式
 26 |     """
 27 |     
 28 |     name = "parse_word"
 29 |     description = "解析Word文档内容，提取文本、表格和图片信息"
 30 |     input_schema = {
 31 |         "type": "object",
 32 |         "required": ["file_path"],
 33 |         "properties": {
 34 |             "file_path": {
 35 |                 "type": "string",
 36 |                 "description": "Word文档的本地路径，例如'/path/to/document.docx'",
 37 |             }
 38 |         },
 39 |     }
 40 |     
 41 |     async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:
 42 |         """
 43 |         解析Word文档
 44 |         
 45 |         Args:
 46 |             arguments: 参数字典，必须包含'file_path'键
 47 |             
 48 |         Returns:
 49 |             解析结果列表
 50 |         """
 51 |         if "file_path" not in arguments:
 52 |             return [types.TextContent(
 53 |                 type="text",
 54 |                 text="错误: 缺少必要参数 'file_path'"
 55 |             )]
 56 |         
 57 |         # 处理文件路径，支持挂载目录的转换
 58 |         file_path = self.process_file_path(arguments["file_path"])
 59 |         
 60 |         return await self._parse_word_document(file_path)
 61 |     
 62 |     def _get_image_mime_type(self, image_bytes: bytes) -> str:
 63 |         """
 64 |         获取图片的MIME类型
 65 |         """
 66 |         image_type = imghdr.what(None, image_bytes)
 67 |         if image_type:
 68 |             return f"image/{image_type}"
 69 |         return "image/png"  # 默认返回PNG类型
 70 |     
 71 |     def _encode_image_base64(self, image_bytes: bytes) -> str:
 72 |         """
 73 |         将图片编码为base64格式
 74 |         """
 75 |         return base64.b64encode(image_bytes).decode('utf-8')
 76 |     
 77 |     def _is_valid_image(self, image_bytes: bytes) -> bool:
 78 |         """
 79 |         检查数据是否为有效的图片
 80 |         
 81 |         Args:
 82 |             image_bytes: 图片二进制数据
 83 |             
 84 |         Returns:
 85 |             是否为有效图片
 86 |         """
 87 |         # 检查常见图片格式的文件头特征
 88 |         if len(image_bytes) < 12:
 89 |             return False  # 文件太小，不可能是有效图片
 90 |             
 91 |         # 使用imghdr识别图片类型
 92 |         image_type = imghdr.what(None, image_bytes)
 93 |         if not image_type:
 94 |             return False
 95 |             
 96 |         # 进一步验证常见图片格式的文件头特征
 97 |         file_signatures = {
 98 |             'jpeg': [bytes([0xFF, 0xD8, 0xFF])],  # JPEG
 99 |             'png': [bytes([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])],  # PNG
100 |             'gif': [bytes([0x47, 0x49, 0x46, 0x38, 0x37, 0x61]), bytes([0x47, 0x49, 0x46, 0x38, 0x39, 0x61])],  # GIF
101 |             'bmp': [bytes([0x42, 0x4D])],  # BMP
102 |             'webp': [bytes([0x52, 0x49, 0x46, 0x46]) + b'....WEBP'],  # WEBP (使用模式匹配)
103 |         }
104 |         
105 |         # 检查文件头是否匹配任何已知图片格式
106 |         if image_type in file_signatures:
107 |             for signature in file_signatures[image_type]:
108 |                 if len(signature) <= len(image_bytes):
109 |                     # 对于WEBP这种需要模式匹配的格式特殊处理
110 |                     if image_type == 'webp':
111 |                         if image_bytes.startswith(b'RIFF') and b'WEBP' in image_bytes[0:12]:
112 |                             return True
113 |                     # 直接比较字节序列
114 |                     elif image_bytes.startswith(signature):
115 |                         return True
116 |             return False
117 |         
118 |         # 未知格式但imghdr认为是图片，需要更严格的验证
119 |         try:
120 |             from PIL import Image
121 |             img = Image.open(io.BytesIO(image_bytes))
122 |             img.verify()  # 验证图片完整性
123 |             return True
124 |         except Exception:
125 |             return False
126 |     
127 |     def _extract_images_from_word(self, doc: Document) -> List[Tuple[str, bytes]]:
128 |         """
129 |         从Word文档中提取图片，过滤掉嵌入的外部文档
130 |         
131 |         Args:
132 |             doc: Word文档对象
133 |             
134 |         Returns:
135 |             图片列表，每项包含图片ID和二进制数据
136 |         """
137 |         images = []
138 |         document_part = doc.part
139 |         rels = document_part.rels
140 |         
141 |         for rel in rels.values():
142 |             try:
143 |                 # 只处理图片类型的关系
144 |                 if "image" in rel.reltype:
145 |                     image_part = rel.target_part
146 |                     image_bytes = image_part.blob
147 |                     image_id = rel.rId
148 |                     
149 |                     # 验证是否为真实图片，过滤掉嵌入的外部文档
150 |                     if self._is_valid_image(image_bytes):
151 |                         images.append((image_id, image_bytes))
152 |             except Exception:
153 |                 continue
154 |                     
155 |         return images
156 |     
157 |     def _is_libreoffice_installed(self) -> bool:
158 |         """
159 |         检查系统是否安装了LibreOffice
160 |         
161 |         Returns:
162 |             布尔值，表示LibreOffice是否可用
163 |         """
164 |         try:
165 |             # 尝试执行LibreOffice命令，检查是否安装
166 |             result = subprocess.run(
167 |                 ["soffice", "--version"], 
168 |                 stdout=subprocess.PIPE, 
169 |                 stderr=subprocess.PIPE,
170 |                 timeout=5  # 设置超时，避免长时间等待
171 |             )
172 |             return result.returncode == 0
173 |         except (subprocess.SubprocessError, FileNotFoundError, OSError):
174 |             return False
175 |     
176 |     def _convert_doc_to_docx(self, file_path: str) -> Optional[str]:
177 |         """
178 |         使用LibreOffice将.doc文件转换为.docx格式
179 |         
180 |         Args:
181 |             file_path: .doc文档路径
182 |             
183 |         Returns:
184 |             转换后的.docx文件路径，如果转换失败则返回None
185 |             
186 |         Raises:
187 |             Exception: 当转换过程中出现错误时
188 |         """
189 |         # 创建临时目录用于存放转换结果
190 |         temp_dir = tempfile.mkdtemp(prefix="word_convert_")
191 |         
192 |         try:
193 |             # 获取文件名（不含路径）
194 |             file_name = os.path.basename(file_path)
195 |             # 基础文件名（不含扩展名）
196 |             base_name = os.path.splitext(file_name)[0]
197 |             # 预期的转换结果文件路径
198 |             output_docx = os.path.join(temp_dir, f"{base_name}.docx")
199 |             
200 |             # 调用LibreOffice进行格式转换
201 |             process = subprocess.run(
202 |                 [
203 |                     "soffice",
204 |                     "--headless",
205 |                     "--convert-to", "docx",
206 |                     "--outdir", temp_dir,
207 |                     file_path
208 |                 ],
209 |                 capture_output=True,
210 |                 text=True,
211 |                 timeout=60  # 设置一分钟超时
212 |             )
213 |             
214 |             # 检查转换是否成功
215 |             if process.returncode != 0:
216 |                 error_msg = process.stderr.strip() or "未知错误"
217 |                 raise Exception(f"LibreOffice转换失败: {error_msg}")
218 |             
219 |             # 验证输出文件是否存在
220 |             if not os.path.exists(output_docx):
221 |                 raise Exception("转换后的文件不存在")
222 |                 
223 |             # 检查文件大小，确保不是空文件
224 |             if os.path.getsize(output_docx) == 0:
225 |                 raise Exception("转换结果为空文件")
226 |             
227 |             return output_docx
228 |             
229 |         except subprocess.TimeoutExpired:
230 |             raise Exception("文档转换超时，可能是文档过大或复杂")
231 |         except Exception as e:
232 |             raise Exception(f"转换.doc文件时出错: {str(e)}")
233 |         
234 |     async def _parse_word_document(self, file_path: str) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:
235 |         """
236 |         解析Word文档内容，支持.docx和.doc格式
237 |         
238 |         Args:
239 |             file_path: Word文档路径
240 |             
241 |         Returns:
242 |             Word文档内容列表
243 |         """
244 |         results = []
245 |         temp_docx_path = None
246 |         
247 |         # 检查文件是否存在
248 |         if not os.path.exists(file_path):
249 |             return [types.TextContent(
250 |                 type="text",
251 |                 text=f"错误: 文件不存在: {file_path}\n请检查路径是否正确，并确保文件可访问。"
252 |             )]
253 |         
254 |         # 检查文件扩展名
255 |         if not file_path.lower().endswith(('.docx', '.doc')):
256 |             return [types.TextContent(
257 |                 type="text",
258 |                 text=f"错误: 不支持的文件格式: {file_path}\n仅支持.docx和.doc格式的Word文档。"
259 |             )]
260 |         
261 |         try:
262 |             # 添加文件信息
263 |             file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
264 |             
265 |             # 处理.doc格式（Word 97-2003文档）
266 |             if file_path.lower().endswith('.doc'):
267 |                 results.append(types.TextContent(
268 |                     type="text",
269 |                     text=f"# Word文档解析 (Word 97-2003 格式)\n\n文件大小: {file_size_mb:.2f} MB"
270 |                 ))
271 |                 
272 |                 # 检查LibreOffice是否可用
273 |                 if not self._is_libreoffice_installed():
274 |                     return [types.TextContent(
275 |                         type="text",
276 |                         text="错误: 无法解析Word 97-2003 (.doc)格式。\n"
277 |                              "系统未安装LibreOffice，无法进行格式转换。\n"
278 |                              "请安装LibreOffice后重试，或将文档另存为.docx格式。"
279 |                     )]
280 |                 
281 |                 try:
282 |                     # 显示转换提示
283 |                     results.append(types.TextContent(
284 |                         type="text",
285 |                         text="正在使用LibreOffice转换文档格式，请稍候..."
286 |                     ))
287 |                     
288 |                     # 转换.doc到.docx
289 |                     temp_docx_path = self._convert_doc_to_docx(file_path)
290 |                     
291 |                     # 更新文件路径为转换后的文件
292 |                     file_path = temp_docx_path
293 |                     
294 |                     results.append(types.TextContent(
295 |                         type="text",
296 |                         text="文档格式转换完成，继续解析...\n"
297 |                     ))
298 |                 except Exception as e:
299 |                     return results + [types.TextContent(
300 |                         type="text",
301 |                         text=f"错误: {str(e)}\n"
302 |                              f"建议:\n"
303 |                              f"1. 确保已正确安装LibreOffice且可通过命令行访问\n"
304 |                              f"2. 尝试手动将文档转换为.docx格式后重试\n"
305 |                              f"3. 检查文档是否加密或损坏"
306 |                     )]
307 |             else:
308 |                 results.append(types.TextContent(
309 |                     type="text",
310 |                     text=f"# Word文档解析\n\n文件大小: {file_size_mb:.2f} MB"
311 |                 ))
312 |             
313 |             # 打开Word文档
314 |             doc = docx.Document(file_path)
315 |             
316 |             # 提取文档属性
317 |             properties = {}
318 |             if hasattr(doc.core_properties, 'title') and doc.core_properties.title:
319 |                 properties['标题'] = doc.core_properties.title
320 |             if hasattr(doc.core_properties, 'author') and doc.core_properties.author:
321 |                 properties['作者'] = doc.core_properties.author
322 |             if hasattr(doc.core_properties, 'created') and doc.core_properties.created:
323 |                 properties['创建时间'] = str(doc.core_properties.created)
324 |             if hasattr(doc.core_properties, 'modified') and doc.core_properties.modified:
325 |                 properties['修改时间'] = str(doc.core_properties.modified)
326 |             if hasattr(doc.core_properties, 'comments') and doc.core_properties.comments:
327 |                 properties['备注'] = doc.core_properties.comments
328 |             
329 |             # 添加文档属性信息
330 |             if properties:
331 |                 properties_text = "## 文档属性\n\n"
332 |                 for key, value in properties.items():
333 |                     properties_text += f"- {key}: {value}\n"
334 |                 results.append(types.TextContent(
335 |                     type="text",
336 |                     text=properties_text
337 |                 ))
338 |             
339 |             # 提取文档内容
340 |             content_text = "## 文档内容\n\n"
341 |             
342 |             # 处理段落
343 |             paragraphs_count = len(doc.paragraphs)
344 |             content_text += f"### 段落 (共{paragraphs_count}个)\n\n"
345 |             
346 |             for i, para in enumerate(doc.paragraphs):
347 |                 if para.text.strip():  # 只处理非空段落
348 |                     content_text += f"{para.text}\n\n"
349 |             
350 |             # 处理表格
351 |             tables_count = len(doc.tables)
352 |             if tables_count > 0:
353 |                 content_text += f"### 表格 (共{tables_count}个)\n\n"
354 |                 
355 |                 for i, table in enumerate(doc.tables):
356 |                     content_text += f"#### 表格 {i+1}\n\n"
357 |                     
358 |                     # 创建Markdown表格
359 |                     rows = []
360 |                     for row in table.rows:
361 |                         cells = [cell.text.replace('\n', ' ').strip() for cell in row.cells]
362 |                         rows.append(cells)
363 |                     
364 |                     if rows:
365 |                         # 表头
366 |                         content_text += "| " + " | ".join(rows[0]) + " |\n"
367 |                         # 分隔线
368 |                         content_text += "| " + " | ".join(["---"] * len(rows[0])) + " |\n"
369 |                         # 表格内容
370 |                         for row in rows[1:]:
371 |                             content_text += "| " + " | ".join(row) + " |\n"
372 |                         
373 |                         content_text += "\n"
374 |             
375 |             # 添加文档内容
376 |             results.append(types.TextContent(
377 |                 type="text",
378 |                 text=content_text
379 |             ))
380 |             
381 |             # 提取图片信息和内容
382 |             try:
383 |                 # 提取文档中的所有图片，并过滤掉嵌入的外部文档
384 |                 images = self._extract_images_from_word(doc)
385 |                 
386 |                 if images:
387 |                     image_info = f"## 图片信息\n\n文档中包含 {len(images)} 张图片。\n\n"
388 |                     results.append(types.TextContent(
389 |                         type="text",
390 |                         text=image_info
391 |                     ))
392 |                     
393 |                     # 返回图片内容
394 |                     for i, (image_id, image_bytes) in enumerate(images):
395 |                         try:
396 |                             # 获取图片MIME类型
397 |                             mime_type = self._get_image_mime_type(image_bytes)
398 |                             
399 |                             # 将图片添加到结果中
400 |                             image_base64 = self._encode_image_base64(image_bytes)
401 |                             results.append(types.TextContent(
402 |                                 type="text",
403 |                                 text=f"### 图片 {i+1}\n\n"
404 |                             ))
405 |                             results.append(types.ImageContent(
406 |                                 type="image",
407 |                                 data=image_base64,
408 |                                 mimeType=mime_type
409 |                             ))
410 |                         except Exception as e:
411 |                             # 记录图片处理错误但不中断
412 |                             results.append(types.TextContent(
413 |                                 type="text",
414 |                                 text=f"注意: 图片 {i+1} 处理失败: {str(e)}"
415 |                             ))
416 |                 else:
417 |                     results.append(types.TextContent(
418 |                         type="text",
419 |                         text="## 图片信息\n\n文档中未包含图片或嵌入对象均不是有效图片。"
420 |                     ))
421 |             except Exception as img_error:
422 |                 results.append(types.TextContent(
423 |                     type="text",
424 |                     text=f"警告: 提取图片信息时出错: {str(img_error)}"
425 |                 ))
426 |             
427 |             # 添加处理完成的提示
428 |             results.append(types.TextContent(
429 |                 type="text",
430 |                 text="Word文档处理完成！"
431 |             ))
432 |             
433 |             return results
434 |         except Exception as e:
435 |             error_details = traceback.format_exc()
436 |             return [types.TextContent(
437 |                 type="text",
438 |                 text=f"错误: 解析Word文档失败: {str(e)}\n"
439 |                      f"可能的原因:\n"
440 |                      f"1. 文件格式不兼容或已损坏\n"
441 |                      f"2. 文件受密码保护\n"
442 |                      f"3. 文件包含不支持的内容\n\n"
443 |                      f"详细错误信息: {error_details}"
444 |             )]
445 |         finally:
446 |             # 清理临时文件
447 |             if temp_docx_path and os.path.exists(temp_docx_path):
448 |                 try:
449 |                     # 删除临时文件
450 |                     temp_dir = os.path.dirname(temp_docx_path)
451 |                     shutil.rmtree(temp_dir, ignore_errors=True)
452 |                 except Exception:
453 |                     # 忽略清理过程中的错误
454 |                     pass 


--------------------------------------------------------------------------------
/.lh/Dockerfile.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "sourceFile": "Dockerfile",
  3 |     "activeCommit": 0,
  4 |     "commits": [
  5 |         {
  6 |             "activePatchIndex": 28,
  7 |             "patches": [
  8 |                 {
  9 |                     "date": 1741245157547,
 10 |                     "content": "Index: \n===================================================================\n--- \n+++ \n"
 11 |                 },
 12 |                 {
 13 |                     "date": 1741258783838,
 14 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -8,8 +8,11 @@\n RUN apt-get update && apt-get install -y \\\n     build-essential \\\n     curl \\\n     poppler-utils \\\n+    tesseract-ocr \\\n+    tesseract-ocr-eng \\\n+    tesseract-ocr-chi-sim \\\n     && rm -rf /var/lib/apt/lists/*\n \n # Copy project files\n COPY . .\n"
 15 |                 },
 16 |                 {
 17 |                     "date": 1741259379637,
 18 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -8,11 +8,8 @@\n RUN apt-get update && apt-get install -y \\\n     build-essential \\\n     curl \\\n     poppler-utils \\\n-    tesseract-ocr \\\n-    tesseract-ocr-eng \\\n-    tesseract-ocr-chi-sim \\\n     && rm -rf /var/lib/apt/lists/*\n \n # Copy project files\n COPY . .\n"
 19 |                 },
 20 |                 {
 21 |                     "date": 1741259527600,
 22 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -8,8 +8,11 @@\n RUN apt-get update && apt-get install -y \\\n     build-essential \\\n     curl \\\n     poppler-utils \\\n+    tesseract-ocr \\\n+    tesseract-ocr-chi-sim \\\n+    tesseract-ocr-eng \\\n     && rm -rf /var/lib/apt/lists/*\n \n # Copy project files\n COPY . .\n"
 23 |                 },
 24 |                 {
 25 |                     "date": 1741259592267,
 26 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -8,11 +8,8 @@\n RUN apt-get update && apt-get install -y \\\n     build-essential \\\n     curl \\\n     poppler-utils \\\n-    tesseract-ocr \\\n-    tesseract-ocr-chi-sim \\\n-    tesseract-ocr-eng \\\n     && rm -rf /var/lib/apt/lists/*\n \n # Copy project files\n COPY . .\n"
 27 |                 },
 28 |                 {
 29 |                     "date": 1741496348552,
 30 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -13,12 +13,12 @@\n \n # Copy project files\n COPY . .\n \n-# Install the package in editable mode\n-RUN pip install --no-cache-dir -e \".[dev]\"\n+# Install the package in editable mode with proper path\n+RUN pip install --no-cache-dir -e .\n \n # Expose the port\n EXPOSE 8000\n \n\\ No newline at end of file\n # Run the server with SSE transport\n-CMD [\"mcp-simple-tool\", \"--transport\", \"sse\", \"--port\", \"8000\"] \n+CMD [\"python\", \"-m\", \"mcp_simple_tool\", \"--transport\", \"sse\", \"--port\", \"8000\"] \n\\ No newline at end of file\n"
 31 |                 },
 32 |                 {
 33 |                     "date": 1741496544769,
 34 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -20,5 +20,5 @@\n # Expose the port\n EXPOSE 8000\n \n # Run the server with SSE transport\n-CMD [\"python\", \"-m\", \"mcp_simple_tool\", \"--transport\", \"sse\", \"--port\", \"8000\"] \n\\ No newline at end of file\n+CMD [\"python\", \"-m\", \"mcp_tool\", \"--transport\", \"sse\", \"--port\", \"8000\"] \n\\ No newline at end of file\n"
 35 |                 },
 36 |                 {
 37 |                     "date": 1741529804201,
 38 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -20,5 +20,5 @@\n # Expose the port\n EXPOSE 8000\n \n # Run the server with SSE transport\n-CMD [\"python\", \"-m\", \"mcp_tool\", \"--transport\", \"sse\", \"--port\", \"8000\"] \n\\ No newline at end of file\n+CMD [\"python\", \"-m\", \"mcp_simple_tool\", \"--transport\", \"sse\", \"--port\", \"8000\"] \n\\ No newline at end of file\n"
 39 |                 },
 40 |                 {
 41 |                     "date": 1741529967474,
 42 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -20,5 +20,5 @@\n # Expose the port\n EXPOSE 8000\n \n # Run the server with SSE transport\n-CMD [\"python\", \"-m\", \"mcp_simple_tool\", \"--transport\", \"sse\", \"--port\", \"8000\"] \n\\ No newline at end of file\n+CMD [\"python\", \"-m\", \"mcp_tool\", \"--transport\", \"sse\", \"--port\", \"8000\"] \n\\ No newline at end of file\n"
 43 |                 },
 44 |                 {
 45 |                     "date": 1741668945541,
 46 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -3,13 +3,20 @@\n \n # Set working directory\n WORKDIR /app\n \n-# Install build dependencies and curl for healthcheck\n+# Install build dependencies and required packages\n RUN apt-get update && apt-get install -y \\\n     build-essential \\\n     curl \\\n     poppler-utils \\\n+    libgl1-mesa-glx \\\n+    libglib2.0-0 \\\n+    libsm6 \\\n+    libxext6 \\\n+    libxrender-dev \\\n+    libpango-1.0-0 \\\n+    libpangocairo-1.0-0 \\\n     && rm -rf /var/lib/apt/lists/*\n \n # Copy project files\n COPY . .\n"
 47 |                 },
 48 |                 {
 49 |                     "date": 1741669852360,
 50 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -3,20 +3,13 @@\n \n # Set working directory\n WORKDIR /app\n \n-# Install build dependencies and required packages\n+# Install build dependencies and curl for healthcheck\n RUN apt-get update && apt-get install -y \\\n     build-essential \\\n     curl \\\n     poppler-utils \\\n-    libgl1-mesa-glx \\\n-    libglib2.0-0 \\\n-    libsm6 \\\n-    libxext6 \\\n-    libxrender-dev \\\n-    libpango-1.0-0 \\\n-    libpangocairo-1.0-0 \\\n     && rm -rf /var/lib/apt/lists/*\n \n # Copy project files\n COPY . .\n"
 51 |                 },
 52 |                 {
 53 |                     "date": 1741765604403,
 54 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -8,8 +8,9 @@\n RUN apt-get update && apt-get install -y \\\n     build-essential \\\n     curl \\\n     poppler-utils \\\n+    tesseract-ocr \\\n     && rm -rf /var/lib/apt/lists/*\n \n # Copy project files\n COPY . .\n"
 55 |                 },
 56 |                 {
 57 |                     "date": 1741765639526,
 58 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -11,8 +11,13 @@\n     poppler-utils \\\n     tesseract-ocr \\\n     && rm -rf /var/lib/apt/lists/*\n \n+# Configure pip to use Aliyun mirror and increase timeout\n+RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ \\\n+    && pip config set global.trusted-host mirrors.aliyun.com \\\n+    && pip config set global.timeout 120\n+\n # Copy project files\n COPY . .\n \n # Install the package in editable mode with proper path\n"
 59 |                 },
 60 |                 {
 61 |                     "date": 1741765649689,
 62 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -8,16 +8,10 @@\n RUN apt-get update && apt-get install -y \\\n     build-essential \\\n     curl \\\n     poppler-utils \\\n-    tesseract-ocr \\\n     && rm -rf /var/lib/apt/lists/*\n \n-# Configure pip to use Aliyun mirror and increase timeout\n-RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ \\\n-    && pip config set global.trusted-host mirrors.aliyun.com \\\n-    && pip config set global.timeout 120\n-\n # Copy project files\n COPY . .\n \n # Install the package in editable mode with proper path\n"
 63 |                 },
 64 |                 {
 65 |                     "date": 1741766545615,
 66 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -7,8 +7,11 @@\n # Install build dependencies and curl for healthcheck\n RUN apt-get update && apt-get install -y \\\n     build-essential \\\n     curl \\\n+    python3-dev \\\n+    libxml2-dev \\\n+    libxslt-dev \\\n     poppler-utils \\\n     && rm -rf /var/lib/apt/lists/*\n \n # Copy project files\n"
 67 |                 },
 68 |                 {
 69 |                     "date": 1741767018759,
 70 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -7,11 +7,8 @@\n # Install build dependencies and curl for healthcheck\n RUN apt-get update && apt-get install -y \\\n     build-essential \\\n     curl \\\n-    python3-dev \\\n-    libxml2-dev \\\n-    libxslt-dev \\\n     poppler-utils \\\n     && rm -rf /var/lib/apt/lists/*\n \n # Copy project files\n"
 71 |                 },
 72 |                 {
 73 |                     "date": 1742297240834,
 74 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -5,8 +5,11 @@\n WORKDIR /app\n \n # Install build dependencies and curl for healthcheck\n RUN apt-get update && apt-get install -y \\\n+    gnupg \\\n+    && apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 0E98404D386FA1D9 \\\n+    && apt-get update && apt-get install -y \\\n     build-essential \\\n     curl \\\n     poppler-utils \\\n     && rm -rf /var/lib/apt/lists/*\n"
 75 |                 },
 76 |                 {
 77 |                     "date": 1742297392893,
 78 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -4,12 +4,12 @@\n # Set working directory\n WORKDIR /app\n \n # Install build dependencies and curl for healthcheck\n-RUN apt-get update && apt-get install -y \\\n-    gnupg \\\n-    && apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 0E98404D386FA1D9 \\\n-    && apt-get update && apt-get install -y \\\n+RUN apt-get clean && \\\n+    rm -rf /var/lib/apt/lists/* && \\\n+    apt-get update --fix-missing && \\\n+    apt-get install -y \\\n     build-essential \\\n     curl \\\n     poppler-utils \\\n     && rm -rf /var/lib/apt/lists/*\n"
 79 |                 },
 80 |                 {
 81 |                     "date": 1742297503072,
 82 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,15 +1,12 @@\n # Use Python 3.10 slim image as base\n-FROM python:3.10-slim\n+FROM python:3.10.13-slim-bullseye\n \n # Set working directory\n WORKDIR /app\n \n # Install build dependencies and curl for healthcheck\n-RUN apt-get clean && \\\n-    rm -rf /var/lib/apt/lists/* && \\\n-    apt-get update --fix-missing && \\\n-    apt-get install -y \\\n+RUN apt-get update && apt-get install -y \\\n     build-essential \\\n     curl \\\n     poppler-utils \\\n     && rm -rf /var/lib/apt/lists/*\n"
 83 |                 },
 84 |                 {
 85 |                     "date": 1742297532430,
 86 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,6 +1,6 @@\n # Use Python 3.10 slim image as base\n-FROM python:3.10.13-slim-bullseye\n+FROM python:3.10-slim\n \n # Set working directory\n WORKDIR /app\n \n"
 87 |                 },
 88 |                 {
 89 |                     "date": 1742297908270,
 90 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -4,9 +4,13 @@\n # Set working directory\n WORKDIR /app\n \n # Install build dependencies and curl for healthcheck\n-RUN apt-get update && apt-get install -y \\\n+RUN apt-get update && \\\n+    apt-get install -y --no-install-recommends gnupg2 && \\\n+    apt-key update && \\\n+    apt-get update && \\\n+    apt-get install -y \\\n     build-essential \\\n     curl \\\n     poppler-utils \\\n     && rm -rf /var/lib/apt/lists/*\n"
 91 |                 },
 92 |                 {
 93 |                     "date": 1742298011438,
 94 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,16 +1,12 @@\n-# Use Python 3.10 slim image as base\n-FROM python:3.10-slim\n+# Use Python 3.10 bullseye image as base\n+FROM python:3.10-bullseye\n \n # Set working directory\n WORKDIR /app\n \n # Install build dependencies and curl for healthcheck\n-RUN apt-get update && \\\n-    apt-get install -y --no-install-recommends gnupg2 && \\\n-    apt-key update && \\\n-    apt-get update && \\\n-    apt-get install -y \\\n+RUN apt-get update && apt-get install -y \\\n     build-essential \\\n     curl \\\n     poppler-utils \\\n     && rm -rf /var/lib/apt/lists/*\n"
 95 |                 },
 96 |                 {
 97 |                     "date": 1742298072974,
 98 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,6 +1,6 @@\n-# Use Python 3.10 bullseye image as base\n-FROM python:3.10-bullseye\n+# Use Python 3.10 slim image as base\n+FROM python:3.10-slim\n \n # Set working directory\n WORKDIR /app\n \n"
 99 |                 },
100 |                 {
101 |                     "date": 1744091992789,
102 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,6 +1,6 @@\n # Use Python 3.10 slim image as base\n-FROM python:3.10-slim\n+FROM mirrors.ustc.edu.cn/python:3.10-slim\n \n # Set working directory\n WORKDIR /app\n \n"
103 |                 },
104 |                 {
105 |                     "date": 1744092113363,
106 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,6 +1,6 @@\n # Use Python 3.10 slim image as base\n-FROM mirrors.ustc.edu.cn/python:3.10-slim\n+FROM python:3.10-slim\n \n # Set working directory\n WORKDIR /app\n \n"
107 |                 },
108 |                 {
109 |                     "date": 1745551240589,
110 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -0,0 +1,30 @@\n+# Use Python 3.10 slim image as base\n+FROM python:3.10-slim\n+\n+# Set working directory\n+WORKDIR /app\n+\n+# Install build dependencies and required libraries\n+RUN apt-get update && apt-get install -y \\\n+    build-essential \\\n+    curl \\\n+    poppler-utils \\\n+    tesseract-ocr \\\n+    tesseract-ocr-chi-sim \\\n+    tesseract-ocr-eng \\\n+    ffmpeg \\\n+    libsm6 \\\n+    libxext6 \\\n+    && rm -rf /var/lib/apt/lists/*\n+\n+# Copy project files\n+COPY . .\n+\n+# Install the package in editable mode with proper path\n+RUN pip install --no-cache-dir -e .\n+\n+# Expose the port\n+EXPOSE 8000\n+\n+# Run the server with SSE transport\n+CMD [\"python\", \"-m\", \"mcp_tool\", \"--transport\", \"sse\", \"--port\", \"8000\"] \n\\ No newline at end of file\n"
111 |                 },
112 |                 {
113 |                     "date": 1745917875171,
114 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -22,32 +22,14 @@\n \n # Install the package in editable mode with proper path\n RUN pip install --no-cache-dir -e .\n \n-# Expose the port\n-EXPOSE 8000\n+# Install dependencies\n+RUN pip install --no-cache-dir -r requirements.txt\n \n-# Run the server with SSE transport\n-CMD [\"python\", \"-m\", \"mcp_tool\", \"--transport\", \"sse\", \"--port\", \"8000\"] \n-# Use Python 3.10 slim image as base\n-FROM python:3.10-slim\n+# Install image processing library Pillow\n+RUN pip install --no-cache-dir Pillow\n \n-# Set working directory\n-WORKDIR /app\n-\n-# Install build dependencies and curl for healthcheck\n-RUN apt-get update && apt-get install -y \\\n-    build-essential \\\n-    curl \\\n-    poppler-utils \\\n-    && rm -rf /var/lib/apt/lists/*\n-\n-# Copy project files\n-COPY . .\n-\n-# Install the package in editable mode with proper path\n-RUN pip install --no-cache-dir -e .\n-\n # Expose the port\n EXPOSE 8000\n \n # Run the server with SSE transport\n"
115 |                 },
116 |                 {
117 |                     "date": 1745917895343,
118 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -22,14 +22,8 @@\n \n # Install the package in editable mode with proper path\n RUN pip install --no-cache-dir -e .\n \n-# Install dependencies\n-RUN pip install --no-cache-dir -r requirements.txt\n-\n-# Install image processing library Pillow\n-RUN pip install --no-cache-dir Pillow\n-\n # Expose the port\n EXPOSE 8000\n \n # Run the server with SSE transport\n"
119 |                 },
120 |                 {
121 |                     "date": 1745992866896,
122 |                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -14,8 +14,9 @@\n     tesseract-ocr-eng \\\n     ffmpeg \\\n     libsm6 \\\n     libxext6 \\\n+    libreoffice \\\n     && rm -rf /var/lib/apt/lists/*\n \n # Copy project files\n COPY . .\n"
123 |                 }
124 |             ],
125 |             "date": 1741245157547,
126 |             "name": "Commit-0",
127 |             "content": "# Use Python 3.10 slim image as base\nFROM python:3.10-slim\n\n# Set working directory\nWORKDIR /app\n\n# Install build dependencies and curl for healthcheck\nRUN apt-get update && apt-get install -y \\\n    build-essential \\\n    curl \\\n    poppler-utils \\\n    && rm -rf /var/lib/apt/lists/*\n\n# Copy project files\nCOPY . .\n\n# Install the package in editable mode\nRUN pip install --no-cache-dir -e \".[dev]\"\n\n# Expose the port\nEXPOSE 8000\n\n# Run the server with SSE transport\nCMD [\"mcp-simple-tool\", \"--transport\", \"sse\", \"--port\", \"8000\"] "
128 |         }
129 |     ]
130 | }


--------------------------------------------------------------------------------