├── app.ico ├── config ├── version.json ├── proxy_api.json ├── prompts.json └── models.json ├── requirements.txt ├── models ├── __init__.py ├── base.py ├── baidu_ocr.py ├── openai.py ├── google.py ├── factory.py ├── alibaba.py ├── doubao.py ├── mathpix.py ├── deepseek.py └── anthropic.py ├── .gitignore ├── AGENTS.md ├── README.md ├── docs └── beginner-tutorial.md ├── static └── js │ └── ui.js ├── LICENSE └── app.py /app.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zippland/Snap-Solver/HEAD/app.ico -------------------------------------------------------------------------------- /config/version.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1.5.1", 3 | "build_date": "2025-04-11", 4 | "github_repo": "Zippland/Snap-Solver" 5 | } -------------------------------------------------------------------------------- /config/proxy_api.json: -------------------------------------------------------------------------------- 1 | { 2 | "apis": { 3 | "alibaba": "", 4 | "anthropic": "", 5 | "deepseek": "", 6 | "doubao": "", 7 | "google": "", 8 | "openai": "" 9 | }, 10 | "enabled": true 11 | } -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | flask==3.1.0 2 | pyautogui==0.9.54 3 | pyperclip==1.8.2 4 | Pillow==11.1.0 5 | flask-socketio==5.5.1 6 | python-engineio==4.11.2 7 | python-socketio==5.12.1 8 | requests==2.32.3 9 | openai==1.61.0 10 | google-generativeai==0.7.0 11 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseModel 2 | from .anthropic import AnthropicModel 3 | from .openai import OpenAIModel 4 | from .deepseek import DeepSeekModel 5 | from .alibaba import AlibabaModel 6 | from .google import GoogleModel 7 | from .doubao import DoubaoModel 8 | from .factory import ModelFactory 9 | 10 | __all__ = [ 11 | 'BaseModel', 12 | 'AnthropicModel', 13 | 'OpenAIModel', 14 | 'DeepSeekModel', 15 | 'AlibabaModel', 16 | 'GoogleModel', 17 | 'DoubaoModel', 18 | 'ModelFactory' 19 | ] 20 | -------------------------------------------------------------------------------- /models/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Generator, Any 3 | 4 | class BaseModel(ABC): 5 | def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, api_base_url: str = None): 6 | self.api_key = api_key 7 | self.temperature = temperature 8 | self.language = language 9 | self.system_prompt = system_prompt or self.get_default_system_prompt() 10 | self.api_base_url = api_base_url 11 | 12 | @abstractmethod 13 | def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]: 14 | """ 15 | Analyze the given image and yield response chunks. 16 | 17 | Args: 18 | image_data: Base64 encoded image data 19 | proxies: Optional proxy configuration 20 | 21 | Yields: 22 | dict: Response chunks with status and content 23 | """ 24 | pass 25 | 26 | @abstractmethod 27 | def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]: 28 | """ 29 | Analyze the given text and yield response chunks. 30 | 31 | Args: 32 | text: Text to analyze 33 | proxies: Optional proxy configuration 34 | 35 | Yields: 36 | dict: Response chunks with status and content 37 | """ 38 | pass 39 | 40 | def get_default_system_prompt(self) -> str: 41 | """返回默认的系统提示词,子类可覆盖但不再是必须实现的方法""" 42 | return "您是一位专业的问题解决专家。请逐步分析问题,找出问题所在,并提供详细的解决方案。始终使用用户偏好的语言回答。" 43 | 44 | @abstractmethod 45 | def get_model_identifier(self) -> str: 46 | """Return the model identifier used in API calls""" 47 | pass 48 | -------------------------------------------------------------------------------- /config/prompts.json: -------------------------------------------------------------------------------- 1 | { "ACM_hard": { 2 | "name": "ACM编程题(困难)", 3 | "content":"你是一个顶尖的算法竞赛选手 + 程序员。你的任务是接收一道 ACM / 编程题目(包含题目描述、输入输出格式、约束)并输出一份完整可运行的解法。请严格按照以下步骤:\n1. 题目复述;\n2. 复杂度与限制分析;\n3. 思路与算法设计;\n4. 伪代码 / 算法框架;\n5. 最终可运行python代码(带注释);\n6. 时间复杂度 / 空间复杂度总结 + 边界 / 特殊输入测试。输出格式必须包含这些部分,不得省略分析或直接跳到代码。", 4 | "description": "专为ACM编程竞赛题设计的提示词" 5 | }, 6 | "a_default": { 7 | "name": "默认提示词", 8 | "content": "如果给的是图片,请先识别图片上面的题目,并输出完整题干;如果给的不是图片,直接诠释一下题目。然后解决该问题,如果是编程题,请输出最终可运行代码(带注释)。", 9 | "description": "通用问题解决提示词" 10 | }, 11 | "single_choice": { 12 | "name": "单选题提示词", 13 | "content": "您是一位专业的单选题解析专家。当看到一个单选题时,请:\n1. 仔细阅读题目要求和选项\n2. 分析每个选项的正确性\n3. 明确指出正确选项\n4. 解释为什么该选项正确\n5. 简要说明其他选项错误的原因\n6. 总结相关知识点", 14 | "description": "专为单选题分析设计的提示词" 15 | }, 16 | "multiple_choice": { 17 | "name": "多选题提示词", 18 | "content": "您是一位专业的多选题解析专家。当看到一个多选题时,请:\n1. 仔细阅读题目要求和所有选项\n2. 逐一分析每个选项的正确性\n3. 明确列出所有正确选项\n4. 详细解释每个正确选项的理由\n5. 说明错误选项的问题所在\n6. 归纳总结相关知识点", 19 | "description": "专为多选题分析设计的提示词" 20 | }, 21 | "programming": { 22 | "name": "ACM编程题提示词", 23 | "content": "您是一位专业的ACM编程竞赛解题专家。当看到一个编程题时,请:\n1. 分析题目要求、输入输出格式和约束条件\n2. 确定解题思路和算法策略\n3. 分析算法复杂度\n4. 提供完整、可运行的代码实现\n5. 解释代码中的关键部分\n6. 提供一些测试用例及其输出\n7. 讨论可能的优化方向", 24 | "description": "专为ACM编程竞赛题设计的提示词" 25 | }, 26 | "pattern_reasoning": { 27 | "name": "图形推理题提示词", 28 | "content": "您是一位专业的图形推理题解析专家。当看到一个图形推理题时,请:\n1. 观察并描述题目给出的图形序列\n2. 分析图形之间的变化规律\n3. 归纳可能的变化模式(如旋转、翻转、数量变化等)\n4. 应用发现的规律预测下一个图形\n5. 在多个选项中确定符合规律的答案\n6. 详细解释推理过程", 29 | "description": "专为图形推理题设计的提示词" 30 | }, 31 | "chart_calculation": { 32 | "name": "图表计算题提示词", 33 | "content": "您是一位专业的图表数据分析专家。当看到一个包含图表的计算题时,请:\n1. 仔细阅读并描述图表包含的信息(表格、柱状图、折线图等)\n2. 确定题目要求计算的具体内容\n3. 从图表中提取相关数据\n4. 设计合适的计算方法\n5. 进行准确的计算过程\n6. 清晰呈现计算结果\n7. 必要时解释数据的含义和趋势", 34 | "description": "专为图表数据分析和计算题设计的提示词" 35 | } 36 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | node_modules/ 3 | /.pnp 4 | .pnp.js 5 | yarn.lock 6 | package-lock.json 7 | .npm 8 | .yarn-integrity 9 | 10 | # Python 11 | __pycache__/ 12 | *.py[cod] 13 | *$py.class 14 | *.so 15 | .Python 16 | env/ 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | 33 | # Testing & Coverage 34 | /coverage 35 | .nyc_output 36 | 37 | # Production & Build 38 | /build 39 | /dist 40 | /out 41 | .next/ 42 | out/ 43 | 44 | # Development & Environment 45 | .env 46 | .env.local 47 | .env.development.local 48 | .env.test.local 49 | .env.production.local 50 | config.local.js 51 | config.dev.js 52 | 53 | # Logs 54 | logs/ 55 | *.log 56 | npm-debug.log* 57 | yarn-debug.log* 58 | yarn-error.log* 59 | 60 | # IDEs and Editors 61 | /.idea/ 62 | .project 63 | .classpath 64 | .c9/ 65 | *.launch 66 | .settings/ 67 | *.sublime-workspace 68 | .vscode/* 69 | !.vscode/settings.json 70 | !.vscode/tasks.json 71 | !.vscode/launch.json 72 | !.vscode/extensions.json 73 | 74 | # Java 75 | *.class 76 | *.war 77 | *.ear 78 | *.jar 79 | target/ 80 | 81 | # Gradle 82 | .gradle 83 | /build/ 84 | 85 | # Maven 86 | target/ 87 | pom.xml.tag 88 | pom.xml.releaseBackup 89 | pom.xml.versionsBackup 90 | pom.xml.next 91 | release.properties 92 | dependency-reduced-pom.xml 93 | 94 | # TypeScript 95 | *.tsbuildinfo 96 | 97 | # OS Generated Files 98 | .DS_Store 99 | .DS_Store? 100 | ._* 101 | .Spotlight-V100 102 | .Trashes 103 | ehthumbs.db 104 | Thumbs.db 105 | 106 | # Backup Files 107 | *.bak 108 | *.swp 109 | *.swo 110 | *~ 111 | 112 | # Optional REPL history 113 | .node_repl_history 114 | 115 | # Media & Large Files 116 | *.mp4 117 | *.tiff 118 | *.avi 119 | *.flv 120 | *.mov 121 | *.wmv 122 | *.tgz 123 | 124 | # Optional eslint cache 125 | .eslintcache 126 | 127 | # Project specific 128 | config/update_info.json 129 | config/api_keys.json 130 | config/api_base_urls.json 131 | .venv/ 132 | venv/ 133 | 134 | # uv 135 | .python-version 136 | pyproject.toml 137 | uv.lock 138 | -------------------------------------------------------------------------------- /AGENTS.md: -------------------------------------------------------------------------------- 1 | # Repository Guidelines 2 | 3 | ## Project Structure & Module Organization 4 | Snap-Solver is a Flask web app served from `app.py`, which wires Socket.IO streaming, screenshot capture, and model dispatch. Model adapters live in `models/`, with `factory.py` loading provider metadata from `config/models.json` and creating the appropriate client (OpenAI, Anthropic, DeepSeek, Qwen, etc.). User-facing templates live under `templates/`, with shared assets in `static/`. Runtime configuration and secrets are JSON files in `config/`; treat these as local-only overrides even if sample values exist in the repo. Python dependencies are listed in `requirements.txt` (lockfile: `uv.lock`). 5 | 6 | ## Build, Test, and Development Commands 7 | - `python -m venv .venv && source .venv/bin/activate` sets up an isolated environment. 8 | - `pip install -r requirements.txt` or `uv sync` installs Flask, provider SDKs, and Socket.IO. 9 | - `python app.py` boots the development server at `http://localhost:5000` with verbose engine logs. 10 | - `FLASK_ENV=development python app.py` enables auto-reload during active development. 11 | 12 | ## Coding Style & Naming Conventions 13 | Follow PEP 8: 4-space indentation, `snake_case` for Python functions, and descriptive class names that match provider roles (see `models/openai.py`). JSON configs use lowerCamelCase keys so the web client can consume them directly; keep that convention when adding settings. Client scripts in `static/js/` should stay modular and avoid sprawling event handlers. 14 | 15 | ## Testing Guidelines 16 | There is no automated test suite yet; whenever you add features, verify end-to-end by launching `python app.py`, triggering a screenshot from the UI, and confirming Socket.IO events stream without tracebacks. When integrating a new model, seed a temporary key in `config/api_keys.json`, exercise one request, and capture console logs before reverting secrets. If you introduce automated tests, place them in `tests/` and gate external calls behind mocks so the suite can run offline. 17 | 18 | ## Commit & Pull Request Guidelines 19 | The history favors concise, imperative commit subjects in Chinese (e.g., `修复发送按钮保存裁剪框数据`). Keep messages under 70 characters, enumerate multi-part changes in the body, and reference related issues with `#123` when applicable. Pull requests should outline the user-visible impact, note any config updates or new dependencies, attach UI screenshots for front-end tweaks, and list manual verification steps so reviewers can reproduce them quickly. 20 | 21 | ## Configuration & Security Tips 22 | Never commit real API keys—`.gitignore` already excludes `config/api_keys.json` and other volatile files, so create local copies (`config/api_keys.local.json`) for experimentation. When sharing deployment instructions, direct operators to set API credentials via environment variables or secure vaults and only populate JSON stubs during runtime startup logic. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

Snap-Solver 版本

2 | 3 | 4 |

5 | 🔍 一键截屏,自动解题 - 线上考试,从未如此简单 6 |

7 | 8 |

9 | Python 10 | Flask 11 | AI 12 | License 13 |

14 | 15 | 16 |

17 | 核心特性 • 18 | 快速开始 • 19 | 新手教程 • 20 | 使用指南 • 21 | 技术架构 • 22 | 高级配置 • 23 | 常见问题 • 24 | 获取帮助 25 |

26 | 27 |
28 | 29 | 获取Release 30 | 31 |      32 | 33 | 阅读新手教程 34 | 35 |      36 | 37 | 联系我们 38 | 39 |
40 | 43 | 44 | ## 💫 项目简介 45 | 46 | **Snap-Solver** 是一个革命性的AI笔试测评工具,专为学生、考生和自学者设计。只需**按下快捷键**,即可自动截取屏幕上的任何题目,通过AI进行分析并提供详细解答。 47 | 48 | 无论是复杂的数学公式、物理难题、编程问题,还是其他学科的挑战,Snap-Solver都能提供清晰、准确、有条理的解决方案,帮助您更好地理解和掌握知识点。 49 | 50 | ## 📚 新手教程 51 | 52 | 第一次使用?按照我们的 [《新手教程》](docs/beginner-tutorial.md) 完成环境准备、模型配置和首次解题演练,全程图文指引,几分钟即可上手。 53 | 54 | ## 🔧 技术架构 55 | 56 | ```mermaid 57 | graph TD 58 | A[用户界面] --> B[Flask Web服务] 59 | B --> C{API路由} 60 | C --> D[截图服务] 61 | C --> E[OCR识别] 62 | C --> F[AI分析] 63 | E --> |Mathpix API| G[文本提取] 64 | F --> |模型选择| H1[OpenAI] 65 | F --> |模型选择| H2[Anthropic] 66 | F --> |模型选择| H3[DeepSeek] 67 | F --> |模型选择| H4[Alibaba] 68 | F --> |模型选择| H5[Google] 69 | F --> |模型选择| H6[Doubao] 70 | D --> I[Socket.IO实时通信] 71 | I --> A 72 | ``` 73 | 74 | ## ✨ 核心特性 75 | 76 | 77 | 78 | 85 | 95 | 96 | 97 | 104 | 111 | 112 | 113 | 120 | 127 | 128 |
79 |

📱 跨设备协同

80 |
    81 |
  • 一键截图:按下快捷键,即可在移动设备上查看和分析电脑屏幕
  • 82 |
  • 局域网共享:一处部署,多设备访问,提升学习效率
  • 83 |
84 |
86 |

🧠 多模型AI支持

87 |
    88 |
  • GPT 家族:OpenAI强大的推理能力
  • 89 |
  • Claude 家族:Anthropic的高级理解与解释
  • 90 |
  • DeepSeek 家族:专为中文场景优化的模型
  • 91 |
  • QVQ 和 Qwen 家族:以视觉推理闻名的国产AI
  • 92 |
  • Gemini 家族:智商130的非推理AI
  • 93 |
94 |
98 |

🔍 精准识别

99 |
    100 |
  • OCR文字识别:准确捕捉图片中的文本
  • 101 |
  • 数学公式支持:通过Mathpix精确识别复杂数学符号
  • 102 |
103 |
105 |

🌐 全球无障碍

106 |
    107 |
  • VPN代理支持:自定义代理设置,解决网络访问限制
  • 108 |
  • 多语言响应:支持定制AI回复语言
  • 109 |
110 |
114 |

💻 全平台兼容

115 |
    116 |
  • 桌面支持:Windows、MacOS、Linux
  • 117 |
  • 移动访问:手机、平板通过浏览器直接使用
  • 118 |
119 |
121 |

⚙️ 高度可定制

122 |
    123 |
  • 思考深度控制:调整AI的分析深度
  • 124 |
  • 自定义提示词:针对特定学科优化提示
  • 125 |
126 |
129 | 130 | ## 🚀 快速开始 131 | 132 | ### 📋 前置要求 133 | 134 | - Python 3.x 135 | - 至少以下一个API Key: 136 | - OpenAI API Key 137 | - Anthropic API Key (推荐✅) 138 | - DeepSeek API Key 139 | - Alibaba API Key (国内用户首选) 140 | - Google API Key 141 | - Mathpix API Key (推荐OCR识别✅) 142 | 143 | ### 📥 开始使用 144 | 145 | ```bash 146 | # 启动应用 147 | python app.py 148 | ``` 149 | 150 | ### 📱 访问方式 151 | 152 | - **本机访问**:打开浏览器,访问 http://localhost:5000 153 | - **局域网设备访问**:在同一网络的任何设备上访问 `http://[电脑IP]:5000` 154 | 155 | ### 🎯 使用场景示例 156 | 157 | - **课后习题**:截取教材或作业中的难题,获取步骤详解 158 | - **编程调试**:截取代码错误信息,获取修复建议 159 | - **考试复习**:分析错题并理解解题思路 160 | - **文献研究**:截取复杂论文段落,获取简化解释 161 | 162 | ### 🧩 组件详情 163 | 164 | - **前端**:响应式HTML/CSS/JS界面,支持移动设备 165 | - **后端**:Flask + SocketIO,提供RESTful API和WebSocket 166 | - **AI接口**:多模型支持,统一接口标准 167 | - **图像处理**:高效的截图和裁剪功能 168 | 169 | ## ⚙️ 高级可调参数 170 | 171 | - **温度**:调整回答的创造性与确定性(0.1-1.0) 172 | - **最大输出Token**:控制回答长度 173 | - **推理深度**:标准模式(快速)或深度思考(详细) 174 | - **思考预算占比**:平衡思考过程与最终答案的详细程度 175 | - **系统提示词**:自定义AI的基础行为与专业领域 176 | 177 | ## ❓ 常见问题 178 | 179 |
180 | 如何获得最佳识别效果? 181 |

182 | 确保截图清晰,包含完整题目和必要上下文。对于数学公式,建议使用Mathpix OCR以获得更准确的识别结果。 183 |

184 |
185 | 186 |
187 | 无法连接到服务怎么办? 188 |

189 | 1. 检查防火墙设置是否允许5000端口
190 | 2. 确认设备在同一局域网内
191 | 3. 尝试重启应用程序
192 | 4. 查看控制台日志获取错误信息 193 |

194 |
195 | 196 |
197 | API调用失败的原因? 198 |

199 | 1. API密钥可能无效或余额不足
200 | 2. 网络连接问题,特别是国际API
201 | 3. 代理设置不正确
202 | 4. API服务可能临时不可用 203 |

204 |
205 | 206 |
207 | 如何优化AI回答质量? 208 |

209 | 1. 调整系统提示词,添加特定学科的指导
210 | 2. 根据问题复杂度选择合适的模型
211 | 3. 对于复杂题目,使用"深度思考"模式
212 | 4. 确保截取的题目包含完整信息 213 |

214 |
215 | 216 | ## 🤝 获取帮助 217 | 218 | - **代部署服务**:如果您不擅长编程,需要代部署服务,请联系 [zylanjian@outlook.com](mailto:zylanjian@outlook.com) 219 | - **问题报告**:在GitHub仓库提交Issue 220 | - **功能建议**:欢迎通过Issue或邮件提供改进建议 221 | 222 | ## 📜 开源协议 223 | 224 | 本项目采用 [Apache 2.0](LICENSE) 协议。 225 | -------------------------------------------------------------------------------- /models/baidu_ocr.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | import time 4 | import urllib.request 5 | import urllib.parse 6 | from typing import Generator, Dict, Any 7 | from .base import BaseModel 8 | 9 | class BaiduOCRModel(BaseModel): 10 | """ 11 | 百度OCR模型,用于图像文字识别 12 | """ 13 | 14 | def __init__(self, api_key: str, secret_key: str = None, temperature: float = 0.7, system_prompt: str = None): 15 | """ 16 | 初始化百度OCR模型 17 | 18 | Args: 19 | api_key: 百度API Key 20 | secret_key: 百度Secret Key(可以在api_key中用冒号分隔传入) 21 | temperature: 不用于OCR但保持BaseModel兼容性 22 | system_prompt: 不用于OCR但保持BaseModel兼容性 23 | 24 | Raises: 25 | ValueError: 如果API密钥格式无效 26 | """ 27 | super().__init__(api_key, temperature, system_prompt) 28 | 29 | # 支持两种格式:单独传递或在api_key中用冒号分隔 30 | if secret_key: 31 | self.api_key = api_key 32 | self.secret_key = secret_key 33 | else: 34 | try: 35 | self.api_key, self.secret_key = api_key.split(':') 36 | except ValueError: 37 | raise ValueError("百度OCR API密钥必须是 'API_KEY:SECRET_KEY' 格式或单独传递secret_key参数") 38 | 39 | # 百度API URLs 40 | self.token_url = "https://aip.baidubce.com/oauth/2.0/token" 41 | self.ocr_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic" 42 | 43 | # 缓存access_token 44 | self._access_token = None 45 | self._token_expires = 0 46 | 47 | def get_access_token(self) -> str: 48 | """获取百度API的access_token""" 49 | # 检查是否需要刷新token(提前5分钟刷新) 50 | if self._access_token and time.time() < self._token_expires - 300: 51 | return self._access_token 52 | 53 | # 请求新的access_token 54 | params = { 55 | 'grant_type': 'client_credentials', 56 | 'client_id': self.api_key, 57 | 'client_secret': self.secret_key 58 | } 59 | 60 | data = urllib.parse.urlencode(params).encode('utf-8') 61 | request = urllib.request.Request(self.token_url, data=data) 62 | request.add_header('Content-Type', 'application/x-www-form-urlencoded') 63 | 64 | try: 65 | with urllib.request.urlopen(request) as response: 66 | result = json.loads(response.read().decode('utf-8')) 67 | 68 | if 'access_token' in result: 69 | self._access_token = result['access_token'] 70 | # 设置过期时间(默认30天,但我们提前刷新) 71 | self._token_expires = time.time() + result.get('expires_in', 2592000) 72 | return self._access_token 73 | else: 74 | raise Exception(f"获取access_token失败: {result.get('error_description', '未知错误')}") 75 | 76 | except Exception as e: 77 | raise Exception(f"请求access_token失败: {str(e)}") 78 | 79 | def ocr_image(self, image_data: str) -> str: 80 | """ 81 | 对图像进行OCR识别 82 | 83 | Args: 84 | image_data: Base64编码的图像数据 85 | 86 | Returns: 87 | str: 识别出的文字内容 88 | """ 89 | access_token = self.get_access_token() 90 | 91 | # 准备请求数据 92 | params = { 93 | 'image': image_data, 94 | 'language_type': 'auto_detect', # 自动检测语言 95 | 'detect_direction': 'true', # 检测图像朝向 96 | 'probability': 'false' # 不返回置信度(减少响应大小) 97 | } 98 | 99 | data = urllib.parse.urlencode(params).encode('utf-8') 100 | url = f"{self.ocr_url}?access_token={access_token}" 101 | 102 | request = urllib.request.Request(url, data=data) 103 | request.add_header('Content-Type', 'application/x-www-form-urlencoded') 104 | 105 | try: 106 | with urllib.request.urlopen(request) as response: 107 | result = json.loads(response.read().decode('utf-8')) 108 | 109 | if 'error_code' in result: 110 | raise Exception(f"百度OCR API错误: {result.get('error_msg', '未知错误')}") 111 | 112 | # 提取识别的文字 113 | words_result = result.get('words_result', []) 114 | text_lines = [item['words'] for item in words_result] 115 | 116 | return '\n'.join(text_lines) 117 | 118 | except Exception as e: 119 | raise Exception(f"OCR识别失败: {str(e)}") 120 | 121 | def extract_full_text(self, image_data: str) -> str: 122 | """ 123 | 提取图像中的完整文本(与Mathpix兼容的接口) 124 | 125 | Args: 126 | image_data: Base64编码的图像数据 127 | 128 | Returns: 129 | str: 提取的文本内容 130 | """ 131 | return self.ocr_image(image_data) 132 | 133 | def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[Dict[str, Any], None, None]: 134 | """ 135 | 分析图像并返回OCR结果(流式输出以保持接口一致性) 136 | 137 | Args: 138 | image_data: Base64编码的图像数据 139 | proxies: 代理配置(未使用) 140 | 141 | Yields: 142 | dict: 包含OCR结果的响应 143 | """ 144 | try: 145 | text = self.ocr_image(image_data) 146 | yield { 147 | 'status': 'completed', 148 | 'content': text, 149 | 'model': 'baidu-ocr' 150 | } 151 | except Exception as e: 152 | yield { 153 | 'status': 'error', 154 | 'content': f'OCR识别失败: {str(e)}', 155 | 'model': 'baidu-ocr' 156 | } 157 | 158 | def analyze_text(self, text: str, proxies: dict = None) -> Generator[Dict[str, Any], None, None]: 159 | """ 160 | 分析文本(OCR模型不支持文本分析) 161 | 162 | Args: 163 | text: 输入文本 164 | proxies: 代理配置(未使用) 165 | 166 | Yields: 167 | dict: 错误响应 168 | """ 169 | yield { 170 | 'status': 'error', 171 | 'content': 'OCR模型不支持文本分析功能', 172 | 'model': 'baidu-ocr' 173 | } 174 | 175 | def get_model_identifier(self) -> str: 176 | """返回模型标识符""" 177 | return "baidu-ocr" 178 | -------------------------------------------------------------------------------- /config/models.json: -------------------------------------------------------------------------------- 1 | { 2 | "providers": { 3 | "anthropic": { 4 | "name": "Anthropic", 5 | "api_key_id": "AnthropicApiKey", 6 | "class_name": "AnthropicModel" 7 | }, 8 | "openai": { 9 | "name": "OpenAI", 10 | "api_key_id": "OpenaiApiKey", 11 | "class_name": "OpenAIModel" 12 | }, 13 | "deepseek": { 14 | "name": "DeepSeek", 15 | "api_key_id": "DeepseekApiKey", 16 | "class_name": "DeepSeekModel" 17 | }, 18 | "alibaba": { 19 | "name": "Alibaba", 20 | "api_key_id": "AlibabaApiKey", 21 | "class_name": "AlibabaModel" 22 | }, 23 | "google": { 24 | "name": "Google", 25 | "api_key_id": "GoogleApiKey", 26 | "class_name": "GoogleModel" 27 | }, 28 | "doubao": { 29 | "name": "Doubao", 30 | "api_key_id": "DoubaoApiKey", 31 | "class_name": "DoubaoModel" 32 | } 33 | }, 34 | "models": { 35 | "claude-opus-4-20250514": { 36 | "name": "Claude 4 Opus", 37 | "provider": "anthropic", 38 | "supportsMultimodal": true, 39 | "isReasoning": true, 40 | "version": "20250514", 41 | "description": "最强大的Claude 4 Opus模型,支持图像理解和深度思考过程" 42 | }, 43 | "claude-opus-4-1-20250805": { 44 | "name": "Claude 4.1 Opus", 45 | "provider": "anthropic", 46 | "supportsMultimodal": true, 47 | "isReasoning": false, 48 | "version": "20250805", 49 | "description": "Claude Opus 4.1 最新标准模式,快速响应并支持多模态输入" 50 | }, 51 | "claude-opus-4-1-20250805-thinking": { 52 | "name": "Claude 4.1 Opus (Thinking)", 53 | "provider": "anthropic", 54 | "supportsMultimodal": true, 55 | "isReasoning": true, 56 | "version": "20250805", 57 | "description": "Claude Opus 4.1 思考模式,启用更长思考过程以提升推理质量" 58 | }, 59 | "claude-sonnet-4-20250514": { 60 | "name": "Claude 4 Sonnet", 61 | "provider": "anthropic", 62 | "supportsMultimodal": true, 63 | "isReasoning": true, 64 | "version": "20250514", 65 | "description": "高性能的Claude 4 Sonnet模型,支持图像理解和思考过程" 66 | }, 67 | "claude-sonnet-4-5-20250929": { 68 | "name": "Claude 4.5 Sonnet", 69 | "provider": "anthropic", 70 | "supportsMultimodal": true, 71 | "isReasoning": true, 72 | "version": "20250929", 73 | "description": "Claude Sonnet 4.5 版,兼具多模态理解与最新推理能力" 74 | }, 75 | "gpt-4o-2024-11-20": { 76 | "name": "GPT-4o", 77 | "provider": "openai", 78 | "supportsMultimodal": true, 79 | "isReasoning": false, 80 | "version": "2024-11-20", 81 | "description": "OpenAI的GPT-4o模型,支持图像理解" 82 | }, 83 | "gpt-5-2025-08-07": { 84 | "name": "GPT-5", 85 | "provider": "openai", 86 | "supportsMultimodal": true, 87 | "isReasoning": true, 88 | "version": "2025-08-07", 89 | "description": "OpenAI旗舰级GPT-5模型,支持多模态输入与高级推理" 90 | }, 91 | "gpt-5-1": { 92 | "name": "GPT-5.1", 93 | "provider": "openai", 94 | "supportsMultimodal": true, 95 | "isReasoning": true, 96 | "version": "latest", 97 | "description": "GPT-5.1 新版旗舰模型,强化长上下文与推理表现" 98 | }, 99 | "gpt-5-codex-high": { 100 | "name": "GPT Codex High", 101 | "provider": "openai", 102 | "supportsMultimodal": false, 103 | "isReasoning": true, 104 | "version": "latest", 105 | "description": "OpenAI高性能代码模型Codex High,侧重复杂代码生成与重构" 106 | }, 107 | "o3-mini": { 108 | "name": "o3-mini", 109 | "provider": "openai", 110 | "supportsMultimodal": false, 111 | "isReasoning": true, 112 | "version": "latest", 113 | "description": "OpenAI的o3-mini模型,支持图像理解和思考过程" 114 | }, 115 | "deepseek-chat": { 116 | "name": "DeepSeek-V3", 117 | "provider": "deepseek", 118 | "supportsMultimodal": false, 119 | "isReasoning": false, 120 | "version": "latest", 121 | "description": "DeepSeek最新大模型,671B MoE模型,支持60 tokens/秒的高速生成" 122 | }, 123 | "deepseek-reasoner": { 124 | "name": "DeepSeek-R1", 125 | "provider": "deepseek", 126 | "supportsMultimodal": false, 127 | "isReasoning": true, 128 | "version": "latest", 129 | "description": "DeepSeek推理模型,提供详细思考过程(仅支持文本)" 130 | }, 131 | "QVQ-Max-2025-03-25": { 132 | "name": "QVQ-Max", 133 | "provider": "alibaba", 134 | "supportsMultimodal": true, 135 | "isReasoning": true, 136 | "version": "2025-03-25", 137 | "description": "阿里巴巴通义千问-QVQ-Max版本,支持图像理解和思考过程" 138 | }, 139 | "qwen-vl-max-latest": { 140 | "name": "Qwen-VL-MAX", 141 | "provider": "alibaba", 142 | "supportsMultimodal": true, 143 | "isReasoning": false, 144 | "version": "latest", 145 | "description": "阿里通义千问VL-MAX模型,视觉理解能力最强,支持图像理解和复杂任务" 146 | }, 147 | "gemini-2.5-pro": { 148 | "name": "Gemini 2.5 Pro", 149 | "provider": "google", 150 | "supportsMultimodal": true, 151 | "isReasoning": true, 152 | "version": "latest", 153 | "description": "Google最强大的Gemini 2.5 Pro模型,支持图像理解(需要付费API密钥)" 154 | }, 155 | "gemini-2.5-flash": { 156 | "name": "Gemini 2.5 Flash", 157 | "provider": "google", 158 | "supportsMultimodal": true, 159 | "isReasoning": false, 160 | "version": "latest", 161 | "description": "Google最新的Gemini 2.5 Flash模型,支持图像理解,速度更快,性能更好" 162 | }, 163 | "gemini-2.0-flash": { 164 | "name": "Gemini 2.0 Flash", 165 | "provider": "google", 166 | "supportsMultimodal": true, 167 | "isReasoning": false, 168 | "version": "latest", 169 | "description": "Google更快速的Gemini 2.0 Flash模型,支持图像理解,有免费配额" 170 | }, 171 | "gemini-3-pro": { 172 | "name": "Gemini 3 Pro", 173 | "provider": "google", 174 | "supportsMultimodal": true, 175 | "isReasoning": true, 176 | "version": "latest", 177 | "description": "Google Gemini 3 Pro 顶级推理模型,面向复杂多模态任务" 178 | }, 179 | "doubao-seed-1-6-250615": { 180 | "name": "Doubao-Seed-1.6", 181 | "provider": "doubao", 182 | "supportsMultimodal": true, 183 | "isReasoning": true, 184 | "version": "latest", 185 | "description": "支持auto/thinking/non-thinking三种思考模式、支持多模态、256K长上下文" 186 | } 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /docs/beginner-tutorial.md: -------------------------------------------------------------------------------- 1 | # Snap-Solver 零基础上手教程 2 | 3 | 这篇教程面向第一次接触编程或 Python 的朋友,手把手带你从安装环境开始,直到在电脑和手机上顺利使用 Snap-Solver 完成题目分析。如果你在任何步骤遇到困难,建议按章节逐步检查,或对照文末的常见问题排查。 4 | 5 | --- 6 | 7 | ## 1. Snap-Solver 是什么? 8 | 9 | Snap-Solver 是一个本地运行的截屏解题工具,主要功能包括: 10 | - 一键截取电脑屏幕的题目图片; 11 | - 自动调用 OCR(文字识别)和多种大模型,给出详细解析; 12 | - 支持在手机、平板等局域网设备上实时查看结果; 13 | - 可以按需配置代理、中转 API、自定义提示词等高级选项。 14 | 15 | 整个应用基于 Python + Flask,只要能启动一个 Python 程序,就可以完全离线地掌握它的运行方式。 16 | 17 | --- 18 | 19 | ## 2. 准备清单 20 | 21 | - 一台可以联网的 Windows、macOS 或 Linux 电脑; 22 | - 至少一个可用的模型 API Key(推荐准备 2~3 个,方便切换): 23 | - OpenAI、Anthropic、DeepSeek、阿里灵积(Qwen)、Google、Mathpix 等任一即可; 24 | - 约 2 GB 可用硬盘空间; 25 | - 基本的文本编辑器(Windows 自带记事本即可,推荐使用 VS Code / Notepad++ 等更易读的工具)。 26 | 27 | > **提示**:Snap-Solver 不依赖显卡或 GPU,普通轻薄本即可顺利运行。 28 | 29 | --- 30 | 31 | ## 3. 第一次打开命令行 32 | 33 | Snap-Solver 需要在命令行里执行几条简单的指令。命令行是一个黑色(或白色)窗口,通过输入文字来让电脑完成任务。不同系统打开方式略有区别: 34 | 35 | ### 3.1 Windows 36 | 1. 同时按下键盘 `Win` 键(左下角带 Windows 徽标的键)+ `S`,输入 `cmd` 或 `terminal`。 37 | 2. 选择 **命令提示符(Command Prompt)** 或 **Windows Terminal**,回车打开。 38 | 3. 复制命令时,可在窗口上点击右键 → 「粘贴」,或使用快捷键 `Ctrl + V`。 39 | 4. 想切换到某个文件夹(例如 `D:\Snap-Solver`),输入: 40 | ```powershell 41 | cd /d D:\Snap-Solver 42 | ``` 43 | 5. 查看当前文件夹内的内容: 44 | ```powershell 45 | dir 46 | ``` 47 | 48 | ### 3.2 macOS 49 | 1. 同时按下 `Command + Space` 呼出 Spotlight,输入 `Terminal` 并回车。 50 | 2. 在终端中,复制粘贴使用常规快捷键 `Command + C` / `Command + V`。 51 | 3. 切换到下载好的项目目录(例如在「下载」文件夹内): 52 | ```bash 53 | cd ~/Downloads/Snap-Solver 54 | ``` 55 | 4. 查看当前文件夹内容: 56 | ```bash 57 | ls 58 | ``` 59 | 60 | ### 3.3 Linux(Ubuntu 示例) 61 | 1. 同时按 `Ctrl + Alt + T` 打开终端。 62 | 2. 切换到项目目录: 63 | ```bash 64 | cd ~/Snap-Solver 65 | ``` 66 | 3. 查看内容: 67 | ```bash 68 | ls 69 | ``` 70 | 71 | > **常用命令速记** 72 | > - `cd 路径`:进入某个文件夹(路径中有空格请用双引号包住,例如 `cd "C:\My Folder"`)。 73 | > - `dir`(Windows)/`ls`(macOS、Linux):查看当前文件夹下的文件。 74 | > - 键盘方向键 ↑ 可以快速调出上一条命令,避免重复输入。 75 | 76 | --- 77 | 78 | ## 4. 安装 Python 3 79 | 80 | Snap-Solver 基于 Python 3.9+,推荐使用 3.10 或 3.11 版本。 81 | 82 | ### 4.1 Windows 83 | 1. 打开浏览器访问:https://www.python.org/downloads/ 84 | 2. 点击最新的稳定版(例如 `Python 3.11.x`)的 **Download Windows installer (64-bit)**。 85 | 3. 双击下载的安装包,记得在第一步勾选 **Add Python to PATH**。 86 | 4. 按提示完成安装。 87 | 5. 打开命令行窗口,输入: 88 | ```powershell 89 | python --version 90 | pip --version 91 | ``` 92 | 若能看到版本号(如 `Python 3.11.7`),说明安装成功。 93 | 94 | ### 4.2 macOS 95 | 1. 访问 https://www.python.org/downloads/mac-osx/ 下载 `macOS 64-bit universal2 installer`。 96 | 2. 双击 `.pkg` 文件按提示安装。 97 | 3. 打开终端输入: 98 | ```bash 99 | python3 --version 100 | pip3 --version 101 | ``` 102 | 如果输出版本号,表示安装完成。后续命令中的 `python`、`pip` 均可替换为 `python3`、`pip3`。 103 | 104 | ### 4.3 Linux(Ubuntu 示例) 105 | ```bash 106 | sudo apt update 107 | sudo apt install python3 python3-venv python3-pip -y 108 | python3 --version 109 | pip3 --version 110 | ``` 111 | 112 | --- 113 | 114 | ## 5. (可选)安装 Git 115 | 116 | Git 方便后续更新项目,也可以用来下载代码。 117 | - Windows:https://git-scm.com/download/win 118 | - macOS:在终端输入 `xcode-select --install` 或从 https://git-scm.com/download/mac 获取 119 | - Linux:`sudo apt install git -y` 120 | 121 | 如果暂时不想安装 Git,也可以稍后直接下载压缩包。 122 | 123 | --- 124 | 125 | ## 6. 获取 Snap-Solver 项目代码 126 | 127 | 任选其一: 128 | 1. **使用 Git 克隆(推荐)** 129 | ```bash 130 | git clone https://github.com/Zippland/Snap-Solver.git 131 | cd Snap-Solver 132 | ``` 133 | 2. **下载压缩包** 134 | - 打开项目主页:https://github.com/Zippland/Snap-Solver 135 | - 点击右侧 `Release` → `Source code (zip)` 136 | - 解压缩后,将文件夹重命名为 `Snap-Solver` 并记住路径 137 | 138 | 后续步骤默认你已经位于项目根目录(包含 `app.py`、`requirements.txt` 的那个文件夹)。如果忘记位置,可再次查看文件夹并使用 `cd` 进入。 139 | 140 | --- 141 | 142 | ## 7. 创建虚拟环境并安装依赖 143 | 144 | 虚拟环境可以把项目依赖和系统环境隔离,避免冲突。 145 | 146 | ### 7.1 创建虚拟环境 147 | 148 | - **Windows PowerShell** 149 | ```powershell 150 | python -m venv .venv 151 | .\.venv\Scripts\Activate 152 | ``` 153 | - **macOS / Linux** 154 | ```bash 155 | python3 -m venv .venv 156 | source .venv/bin/activate 157 | ``` 158 | 159 | 激活成功后,命令行前面会出现 `(.venv)` 前缀。若你关闭了命令行窗口,需要重新进入项目目录并再次执行激活命令。 160 | 161 | ### 7.2 安装依赖 162 | 163 | ```bash 164 | pip install --upgrade pip 165 | pip install -r requirements.txt 166 | ``` 167 | 168 | 常见依赖(Flask、PyAutoGUI、Pillow 等)都会自动安装。首次安装可能用时 1~5 分钟,请耐心等待。 169 | 170 | > **如果安装失败**:请检查网络、切换镜像源或参考文末常见问题。 171 | 172 | --- 173 | 174 | ## 8. 首次启动与访问 175 | 176 | 1. 保证虚拟环境处于激活状态。 177 | 2. 在项目根目录执行: 178 | ```bash 179 | python app.py 180 | ``` 181 | 3. 终端中会看到 Flask/SocketIO 的日志,最后出现 `Running on http://127.0.0.1:5000` 表示启动成功。 182 | 4. 若需要在手机/平板访问,请在**同一局域网下**输入 `http://<电脑IP>:5000`。电脑 IP 可在终端日志中看到,例如 `http://192.168.1.8:5000`(可能是别的,每次打开都会刷新)。 183 | 184 | > **暂停服务**:在终端按 `Ctrl + C` 即可停止运行。再次启动时,只需重新激活虚拟环境并执行 `python app.py`。 185 | 186 | --- 187 | 188 | ## 9. 配置 API 密钥与基础设置 189 | 190 | 启动网页后,点击右上角的齿轮图标进入「设置」面板,建议先完成以下几项: 191 | 192 | ### 9.1 填写模型 API Key 193 | 194 | - 根据你手上的 Key,将对应值填入设置页面的输入框中; 195 | - 常用字段: 196 | - `OpenaiApiKey`:OpenAI 模型(如 GPT-4o、o3-mini) 197 | - `AnthropicApiKey`:Claude 系列 198 | - `DeepseekApiKey`:DeepSeek 199 | - `AlibabaApiKey`:通义千问 / Qwen / QVQ 200 | - `GoogleApiKey`:Gemini 系列 201 | - `MathpixAppId` & `MathpixAppKey`:用于高精度公式识别 202 | - 点击保存后,信息会写入 `config/api_keys.json` 方便下次启动直接读取。 203 | 204 | ### 9.2 设置代理与中转(可选) 205 | 206 | - 若你需要走代理或企业中转通道,可在设置面板中开启代理选项; 207 | - 对应的 JSON 文件是 `config/proxy_api.json`,可直接编辑来指定各模型的自定义 `base_url`; 208 | - 修改后需重启应用才能生效。 209 | 210 | ### 9.3 如何确认 VPN/代理端口 211 | 212 | 很多加速器或 VPN 客户端会在本地启动一个「系统代理」服务(常见端口如 `7890`、`1080` 等)。具体端口位置通常可以通过以下途径找到: 213 | - 打开 VPN 客户端的设置页面,寻找「本地监听端口」「HTTP(S) 代理」「SOCKS 代理」等字样; 214 | - Windows 用户也可以在「设置 → 网络和 Internet → 代理」里查看「使用代理服务器」的地址和端口; 215 | - macOS 用户可在「系统设置 → 网络 → Wi-Fi(或以太网)→ 详情 → 代理」里查看勾选的服务和端口; 216 | - 高级用户可以在命令行里运行 `netstat -ano | findstr 127.0.0.1`(Windows)或 `lsof -iTCP -sTCP:LISTEN | grep 127.0.0.1`(macOS/Linux)确认本地监听端口。 217 | 218 | 拿到端口后,在 Snap-Solver 的代理设置中填入对应的地址(通常是 `127.0.0.1:<端口>`),就能让模型请求走 VPN。不同工具的界面名称可能略有差异,重点是找出「本地监听地址 + 端口号」这一对信息。 219 | 220 | --- 221 | 222 | ## 10. 获取常用 API Key(详细教程) 223 | 224 | API Key 相当于你在各大模型平台上的「门票」。不同平台的获取流程不同,以下列出了最常用的几个来源。申请过程中务必保护好个人隐私与账号安全,切勿向他人泄露密钥。 225 | 226 | ### 10.1 OpenAI(GPT-4o / o3-mini 等) 227 | 1. 打开 https://platform.openai.com/ 并使用邮箱或第三方账号注册 / 登录。 228 | 2. 首次使用需完成实名和支付方式绑定(可选择信用卡或预付费余额)。 229 | 3. 登录后点击右上角头像 → `View API keys`。 230 | 4. 点击 `Create new secret key`,复制生成的密钥(形如 `sk-...`)。 231 | 5. 将该密钥粘贴到 Snap-Solver 的 `OpenaiApiKey` 输入框,并妥善保存。 232 | 233 | ### 10.2 Anthropic(Claude 系列) 234 | 1. 打开 https://console.anthropic.com/ 并注册账号。 235 | 2. 按提示完成手机号验证和支付方式绑定(部分国家需排队开通)。 236 | 3. 登录后进入 `API Keys` 页面,点击 `Create Key`。 237 | 4. 复制生成的密钥(形如 `sk-ant-...`),粘贴到 Snap-Solver 的 `AnthropicApiKey`。 238 | 239 | ### 10.3 DeepSeek 240 | 1. 访问 https://platform.deepseek.com/ 并注册登录。 241 | 2. 如果需要人民币支付,可在「账号设置」绑定支付宝;海外用户可使用信用卡。 242 | 3. 进入 `API Keys`,点击 `新建密钥`。 243 | 4. 复制生成的密钥(形如 `sk-xxx`),填入 `DeepseekApiKey`。 244 | 245 | ### 10.4 阿里云通义千问 / Qwen / QVQ 246 | 1. 打开 https://dashscope.console.aliyun.com/ 并使用阿里云账号登录。 247 | 2. 进入「API Key 管理」页面,点击 `创建 API Key`。 248 | 3. 复制密钥(形如 `sk-yourkey`)填入 `AlibabaApiKey`。 249 | 4. 如需开通收费模型,请在「计费与配额」中先完成实名认证并开通付费策略。 250 | 251 | ### 10.5 Google Gemini 252 | 1. 前往 https://ai.google.dev/ 并登录 Google 账号。 253 | 2. 点击右上角 `Get API key`。 254 | 3. 选择或创建项目,生成新的 API Key。 255 | 4. 将密钥填入 `GoogleApiKey`。 256 | 257 | ### 10.6 Mathpix(高精度公式识别) 258 | 1. 访问 https://dashboard.mathpix.com/ 注册账号。 259 | 2. 完成邮箱验证后,在侧边栏找到 `API Keys`。 260 | 3. 创建新的 App,复制 `App ID` 和 `App Key`。 261 | 4. 分别填入 Snap-Solver 的 `MathpixAppId` 与 `MathpixAppKey` 字段。 262 | 263 | > **安全小贴士** 264 | > - API Key 和密码一样重要,泄露后他人可能代你调用接口、消耗额度。 265 | > - 建议为不同用途创建多个密钥,定期检查和撤销不用的密钥。 266 | > - 如果平台支持额度上限、IP 白名单等功能,可以酌情启用以降低风险。 267 | 268 | --- 269 | 270 | ## 11. 完成第一次题目解析 271 | 272 | 1. 确认右上角的「连接状态」显示为绿色的「已连接」。 273 | 2. 点击顶部的「开始截图」,按提示框拖拽需要识别的题目区域。 274 | 3. 截图完成后,预览区会显示图片,并出现「发送至 AI」或「提取文本」按钮: 275 | - **发送至 AI**:直接让所选模型解析图像; 276 | - **提取文本**:先做 OCR,把文字复制出来,再发送给模型。 277 | 4. 在右侧的「分析结果」面板可以查看: 278 | - AI 的思考过程(可折叠); 279 | - 最终解答、代码或步骤; 280 | - 中间日志与计时。 281 | 5. 若需要改用其他模型,重新打开设置面板即可实时切换。 282 | 283 | > **小技巧**:长按或双击分析结果中的文本,可快速复制粘贴;终端会实时输出请求日志,方便排查问题。 284 | 285 | --- 286 | 287 | ## 12. 常见问题速查 288 | 289 | - **`python` 命令找不到**:在 Windows 上打开新的终端后请重启电脑,或使用 `py` 命令;macOS/Linux 请尝试 `python3`。 290 | - **`pip install` 超时**:可以临时使用清华源 `pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt`。 291 | - **启动后网页打不开**:确认终端没有报错;检查防火墙、端口占用,或尝试 `http://127.0.0.1:5000`。 292 | - **截图没反应**:Windows/macOS 需要授权「辅助功能 / 截屏」权限给 Python;macOS 在「系统设置 - 隐私与安全」中勾选 `python` 或终端应用。 293 | - **模型报 401/403**:检查 API Key 是否正确、账号余额是否充足,必要时在设置里更换模型或填入自定义域名。 294 | - **手机访问失败**:确保手机和电脑在同一个 Wi-Fi 下,且电脑未开启 VPN 导致局域网隔离。 295 | 296 | --- 297 | 298 | ## 13. 进一步探索 299 | 300 | - `config/models.json`:自定义展示在下拉框的模型列表,包含模型名称、供应商、能力标签等,可按需添加。 301 | - `config/prompts.json`:定义默认 prompt,可根据学科优化。 302 | - 更新项目:如果是 Git 克隆,执行 `git pull`;压缩包用户可重新下载覆盖。 303 | 304 | 完成以上步骤后,你已经具备运行和日常使用 Snap-Solver 的全部基础。如果你有新的需求或遇到无法解决的问题,可以先查看 README 或在 Issues 中搜索 / 提问。祝你学习顺利,刷题提效! 305 | -------------------------------------------------------------------------------- /models/openai.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Generator, Dict, Optional 3 | from openai import OpenAI 4 | from .base import BaseModel 5 | 6 | class OpenAIModel(BaseModel): 7 | def __init__(self, api_key, temperature=0.7, system_prompt=None, language=None, api_base_url=None, model_identifier=None): 8 | super().__init__(api_key, temperature, system_prompt, language) 9 | # 设置API基础URL,默认为OpenAI官方API 10 | self.api_base_url = api_base_url 11 | # 允许从外部配置显式指定模型标识符 12 | self.model_identifier = model_identifier or "gpt-4o-2024-11-20" 13 | 14 | def get_default_system_prompt(self) -> str: 15 | return """You are an expert at analyzing questions and providing detailed solutions. When presented with an image of a question: 16 | 1. First read and understand the question carefully 17 | 2. Break down the key components of the question 18 | 3. Provide a clear, step-by-step solution 19 | 4. If relevant, explain any concepts or theories involved 20 | 5. If there are multiple approaches, explain the most efficient one first""" 21 | 22 | def get_model_identifier(self) -> str: 23 | return self.model_identifier 24 | 25 | def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]: 26 | """Stream GPT-4o's response for text analysis""" 27 | try: 28 | # Initial status 29 | yield {"status": "started", "content": ""} 30 | 31 | # Save original environment state 32 | original_env = { 33 | 'http_proxy': os.environ.get('http_proxy'), 34 | 'https_proxy': os.environ.get('https_proxy') 35 | } 36 | 37 | try: 38 | # Set proxy environment variables if provided 39 | if proxies: 40 | if 'http' in proxies: 41 | os.environ['http_proxy'] = proxies['http'] 42 | if 'https' in proxies: 43 | os.environ['https_proxy'] = proxies['https'] 44 | 45 | # Initialize OpenAI client with base_url if provided 46 | if self.api_base_url: 47 | client = OpenAI(api_key=self.api_key, base_url=self.api_base_url) 48 | else: 49 | client = OpenAI(api_key=self.api_key) 50 | 51 | # Prepare messages 52 | messages = [ 53 | { 54 | "role": "system", 55 | "content": self.system_prompt 56 | }, 57 | { 58 | "role": "user", 59 | "content": text 60 | } 61 | ] 62 | 63 | response = client.chat.completions.create( 64 | model=self.get_model_identifier(), 65 | messages=messages, 66 | temperature=self.temperature, 67 | stream=True, 68 | max_tokens=4000 69 | ) 70 | 71 | # 使用累积缓冲区 72 | response_buffer = "" 73 | 74 | for chunk in response: 75 | if hasattr(chunk.choices[0].delta, 'content'): 76 | content = chunk.choices[0].delta.content 77 | if content: 78 | # 累积内容 79 | response_buffer += content 80 | 81 | # 只在累积一定数量的字符或遇到句子结束标记时才发送 82 | if len(content) >= 10 or content.endswith(('.', '!', '?', '。', '!', '?', '\n')): 83 | yield { 84 | "status": "streaming", 85 | "content": response_buffer 86 | } 87 | 88 | # 确保发送最终完整内容 89 | if response_buffer: 90 | yield { 91 | "status": "streaming", 92 | "content": response_buffer 93 | } 94 | 95 | # Send completion status 96 | yield { 97 | "status": "completed", 98 | "content": response_buffer 99 | } 100 | 101 | finally: 102 | # Restore original environment state 103 | for key, value in original_env.items(): 104 | if value is None: 105 | if key in os.environ: 106 | del os.environ[key] 107 | else: 108 | os.environ[key] = value 109 | 110 | except Exception as e: 111 | yield { 112 | "status": "error", 113 | "error": str(e) 114 | } 115 | 116 | def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]: 117 | """Stream GPT-4o's response for image analysis""" 118 | try: 119 | # Initial status 120 | yield {"status": "started", "content": ""} 121 | 122 | # Save original environment state 123 | original_env = { 124 | 'http_proxy': os.environ.get('http_proxy'), 125 | 'https_proxy': os.environ.get('https_proxy') 126 | } 127 | 128 | try: 129 | # Set proxy environment variables if provided 130 | if proxies: 131 | if 'http' in proxies: 132 | os.environ['http_proxy'] = proxies['http'] 133 | if 'https' in proxies: 134 | os.environ['https_proxy'] = proxies['https'] 135 | 136 | # Initialize OpenAI client with base_url if provided 137 | if self.api_base_url: 138 | client = OpenAI(api_key=self.api_key, base_url=self.api_base_url) 139 | else: 140 | client = OpenAI(api_key=self.api_key) 141 | 142 | # 使用系统提供的系统提示词,不再自动添加语言指令 143 | system_prompt = self.system_prompt 144 | 145 | # Prepare messages with image 146 | messages = [ 147 | { 148 | "role": "system", 149 | "content": system_prompt 150 | }, 151 | { 152 | "role": "user", 153 | "content": [ 154 | { 155 | "type": "image_url", 156 | "image_url": { 157 | "url": f"data:image/jpeg;base64,{image_data}" 158 | } 159 | }, 160 | { 161 | "type": "text", 162 | "text": "Please analyze this image and provide a detailed solution." 163 | } 164 | ] 165 | } 166 | ] 167 | 168 | response = client.chat.completions.create( 169 | model=self.get_model_identifier(), 170 | messages=messages, 171 | temperature=self.temperature, 172 | stream=True, 173 | max_tokens=4000 174 | ) 175 | 176 | # 使用累积缓冲区 177 | response_buffer = "" 178 | 179 | for chunk in response: 180 | if hasattr(chunk.choices[0].delta, 'content'): 181 | content = chunk.choices[0].delta.content 182 | if content: 183 | # 累积内容 184 | response_buffer += content 185 | 186 | # 只在累积一定数量的字符或遇到句子结束标记时才发送 187 | if len(content) >= 10 or content.endswith(('.', '!', '?', '。', '!', '?', '\n')): 188 | yield { 189 | "status": "streaming", 190 | "content": response_buffer 191 | } 192 | 193 | # 确保发送最终完整内容 194 | if response_buffer: 195 | yield { 196 | "status": "streaming", 197 | "content": response_buffer 198 | } 199 | 200 | # Send completion status 201 | yield { 202 | "status": "completed", 203 | "content": response_buffer 204 | } 205 | 206 | finally: 207 | # Restore original environment state 208 | for key, value in original_env.items(): 209 | if value is None: 210 | if key in os.environ: 211 | del os.environ[key] 212 | else: 213 | os.environ[key] = value 214 | 215 | except Exception as e: 216 | yield { 217 | "status": "error", 218 | "error": str(e) 219 | } 220 | -------------------------------------------------------------------------------- /models/google.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import base64 4 | from typing import Generator, Dict, Any, Optional, List 5 | import google.generativeai as genai 6 | from .base import BaseModel 7 | 8 | class GoogleModel(BaseModel): 9 | """ 10 | Google Gemini API模型实现类 11 | 支持Gemini 2.5 Pro等模型,可处理文本和图像输入 12 | """ 13 | 14 | def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None, api_base_url: str = None): 15 | """ 16 | 初始化Google模型 17 | 18 | Args: 19 | api_key: Google API密钥 20 | temperature: 生成温度 21 | system_prompt: 系统提示词 22 | language: 首选语言 23 | model_name: 指定具体模型名称,如不指定则使用默认值 24 | api_base_url: API基础URL,用于设置自定义API端点 25 | """ 26 | super().__init__(api_key, temperature, system_prompt, language) 27 | self.model_name = model_name or self.get_model_identifier() 28 | self.max_tokens = 8192 # 默认最大输出token数 29 | self.api_base_url = api_base_url 30 | 31 | # 配置Google API 32 | if api_base_url: 33 | # 配置中转API - 使用环境变量方式 34 | # 移除末尾的斜杠以避免重复路径问题 35 | clean_base_url = api_base_url.rstrip('/') 36 | # 设置环境变量来指定API端点 37 | os.environ['GOOGLE_AI_API_ENDPOINT'] = clean_base_url 38 | genai.configure(api_key=api_key) 39 | else: 40 | # 使用默认API端点 41 | # 清除可能存在的自定义端点环境变量 42 | if 'GOOGLE_AI_API_ENDPOINT' in os.environ: 43 | del os.environ['GOOGLE_AI_API_ENDPOINT'] 44 | genai.configure(api_key=api_key) 45 | 46 | def get_default_system_prompt(self) -> str: 47 | return """You are an expert at analyzing questions and providing detailed solutions. When presented with an image of a question: 48 | 1. First read and understand the question carefully 49 | 2. Break down the key components of the question 50 | 3. Provide a clear, step-by-step solution 51 | 4. If relevant, explain any concepts or theories involved 52 | 5. If there are multiple approaches, explain the most efficient one first""" 53 | 54 | def get_model_identifier(self) -> str: 55 | """返回默认的模型标识符""" 56 | return "gemini-2.0-flash" # 使用有免费配额的模型作为默认值 57 | 58 | def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]: 59 | """流式生成文本响应""" 60 | try: 61 | yield {"status": "started"} 62 | 63 | # 设置环境变量代理(如果提供) 64 | original_proxies = None 65 | if proxies: 66 | original_proxies = { 67 | 'http_proxy': os.environ.get('http_proxy'), 68 | 'https_proxy': os.environ.get('https_proxy') 69 | } 70 | if 'http' in proxies: 71 | os.environ['http_proxy'] = proxies['http'] 72 | if 'https' in proxies: 73 | os.environ['https_proxy'] = proxies['https'] 74 | 75 | try: 76 | # 初始化模型 77 | model = genai.GenerativeModel(self.model_name) 78 | 79 | # 获取最大输出Token设置 80 | max_tokens = self.max_tokens if hasattr(self, 'max_tokens') else 8192 81 | 82 | # 创建配置参数 83 | generation_config = { 84 | 'temperature': self.temperature, 85 | 'max_output_tokens': max_tokens, 86 | 'top_p': 0.95, 87 | 'top_k': 64, 88 | } 89 | 90 | # 构建提示 91 | prompt_parts = [] 92 | 93 | # 添加系统提示词 94 | if self.system_prompt: 95 | prompt_parts.append(self.system_prompt) 96 | 97 | # 添加用户查询 98 | if self.language and self.language != 'auto': 99 | prompt_parts.append(f"请使用{self.language}回答以下问题: {text}") 100 | else: 101 | prompt_parts.append(text) 102 | 103 | # 初始化响应缓冲区 104 | response_buffer = "" 105 | 106 | # 流式生成响应 107 | response = model.generate_content( 108 | prompt_parts, 109 | generation_config=generation_config, 110 | stream=True 111 | ) 112 | 113 | for chunk in response: 114 | if not chunk.text: 115 | continue 116 | 117 | # 累积响应文本 118 | response_buffer += chunk.text 119 | 120 | # 发送响应进度 121 | if len(chunk.text) >= 10 or chunk.text.endswith(('.', '!', '?', '。', '!', '?', '\n')): 122 | yield { 123 | "status": "streaming", 124 | "content": response_buffer 125 | } 126 | 127 | # 确保发送完整的最终内容 128 | yield { 129 | "status": "completed", 130 | "content": response_buffer 131 | } 132 | 133 | finally: 134 | # 恢复原始代理设置 135 | if original_proxies: 136 | for key, value in original_proxies.items(): 137 | if value is None: 138 | if key in os.environ: 139 | del os.environ[key] 140 | else: 141 | os.environ[key] = value 142 | 143 | except Exception as e: 144 | yield { 145 | "status": "error", 146 | "error": f"Gemini API错误: {str(e)}" 147 | } 148 | 149 | def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]: 150 | """分析图像并流式生成响应""" 151 | try: 152 | yield {"status": "started"} 153 | 154 | # 设置环境变量代理(如果提供) 155 | original_proxies = None 156 | if proxies: 157 | original_proxies = { 158 | 'http_proxy': os.environ.get('http_proxy'), 159 | 'https_proxy': os.environ.get('https_proxy') 160 | } 161 | if 'http' in proxies: 162 | os.environ['http_proxy'] = proxies['http'] 163 | if 'https' in proxies: 164 | os.environ['https_proxy'] = proxies['https'] 165 | 166 | try: 167 | # 初始化模型 168 | model = genai.GenerativeModel(self.model_name) 169 | 170 | # 获取最大输出Token设置 171 | max_tokens = self.max_tokens if hasattr(self, 'max_tokens') else 8192 172 | 173 | # 创建配置参数 174 | generation_config = { 175 | 'temperature': self.temperature, 176 | 'max_output_tokens': max_tokens, 177 | 'top_p': 0.95, 178 | 'top_k': 64, 179 | } 180 | 181 | # 构建提示词 182 | prompt_parts = [] 183 | 184 | # 添加系统提示词 185 | if self.system_prompt: 186 | prompt_parts.append(self.system_prompt) 187 | 188 | # 添加默认图像分析指令 189 | if self.language and self.language != 'auto': 190 | prompt_parts.append(f"请使用{self.language}分析这张图片并提供详细解答。") 191 | else: 192 | prompt_parts.append("请分析这张图片并提供详细解答。") 193 | 194 | # 处理图像数据 195 | if image_data.startswith('data:image'): 196 | # 如果是data URI,提取base64部分 197 | image_data = image_data.split(',', 1)[1] 198 | 199 | # 使用genai的特定方法处理图像 200 | image_part = { 201 | "mime_type": "image/jpeg", 202 | "data": base64.b64decode(image_data) 203 | } 204 | prompt_parts.append(image_part) 205 | 206 | # 初始化响应缓冲区 207 | response_buffer = "" 208 | 209 | # 流式生成响应 210 | response = model.generate_content( 211 | prompt_parts, 212 | generation_config=generation_config, 213 | stream=True 214 | ) 215 | 216 | for chunk in response: 217 | if not chunk.text: 218 | continue 219 | 220 | # 累积响应文本 221 | response_buffer += chunk.text 222 | 223 | # 发送响应进度 224 | if len(chunk.text) >= 10 or chunk.text.endswith(('.', '!', '?', '。', '!', '?', '\n')): 225 | yield { 226 | "status": "streaming", 227 | "content": response_buffer 228 | } 229 | 230 | # 确保发送完整的最终内容 231 | yield { 232 | "status": "completed", 233 | "content": response_buffer 234 | } 235 | 236 | finally: 237 | # 恢复原始代理设置 238 | if original_proxies: 239 | for key, value in original_proxies.items(): 240 | if value is None: 241 | if key in os.environ: 242 | del os.environ[key] 243 | else: 244 | os.environ[key] = value 245 | 246 | except Exception as e: 247 | yield { 248 | "status": "error", 249 | "error": f"Gemini图像分析错误: {str(e)}" 250 | } -------------------------------------------------------------------------------- /static/js/ui.js: -------------------------------------------------------------------------------- 1 | class UIManager { 2 | constructor() { 3 | // 延迟初始化,确保DOM已加载 4 | if (document.readyState === 'loading') { 5 | document.addEventListener('DOMContentLoaded', () => this.init()); 6 | } else { 7 | // 如果DOM已经加载完成,则立即初始化 8 | this.init(); 9 | } 10 | } 11 | 12 | init() { 13 | console.log('初始化UI管理器...'); 14 | // UI elements 15 | this.settingsPanel = document.getElementById('settingsPanel'); 16 | this.settingsToggle = document.getElementById('settingsToggle'); 17 | this.closeSettings = document.getElementById('closeSettings'); 18 | this.themeToggle = document.getElementById('themeToggle'); 19 | this.toastContainer = document.getElementById('toastContainer'); 20 | 21 | // 验证关键元素是否存在 22 | if (!this.themeToggle) { 23 | console.error('主题切换按钮未找到!'); 24 | return; 25 | } 26 | 27 | if (!this.toastContainer) { 28 | console.error('Toast容器未找到!'); 29 | // 尝试创建Toast容器 30 | this.toastContainer = this.createToastContainer(); 31 | } 32 | 33 | // Check for preferred color scheme 34 | this.checkPreferredColorScheme(); 35 | 36 | // Initialize event listeners 37 | this.setupEventListeners(); 38 | 39 | console.log('UI管理器初始化完成'); 40 | } 41 | 42 | createToastContainer() { 43 | console.log('创建Toast容器'); 44 | const container = document.createElement('div'); 45 | container.id = 'toastContainer'; 46 | container.className = 'toast-container'; 47 | document.body.appendChild(container); 48 | return container; 49 | } 50 | 51 | checkPreferredColorScheme() { 52 | const savedTheme = localStorage.getItem('theme'); 53 | const prefersDark = window.matchMedia('(prefers-color-scheme: dark)'); 54 | 55 | if (savedTheme) { 56 | this.setTheme(savedTheme === 'dark'); 57 | } else { 58 | this.setTheme(prefersDark.matches); 59 | } 60 | 61 | prefersDark.addEventListener('change', (e) => this.setTheme(e.matches)); 62 | } 63 | 64 | setTheme(isDark) { 65 | try { 66 | document.documentElement.setAttribute('data-theme', isDark ? 'dark' : 'light'); 67 | if (this.themeToggle) { 68 | this.themeToggle.innerHTML = ``; 69 | } 70 | localStorage.setItem('theme', isDark ? 'dark' : 'light'); 71 | console.log(`主题已切换为: ${isDark ? '深色' : '浅色'}`); 72 | } catch (error) { 73 | console.error('设置主题时出错:', error); 74 | } 75 | } 76 | 77 | /** 78 | * 显示一个Toast消息 79 | * @param {string} message 显示的消息内容 80 | * @param {string} type 消息类型,可以是'success', 'error', 'info', 'warning' 81 | * @param {number} displayTime 显示的时间(毫秒),如果为-1则持续显示直到手动关闭 82 | * @returns {HTMLElement} 返回创建的Toast元素,可用于后续移除 83 | */ 84 | showToast(message, type = 'success', displayTime) { 85 | try { 86 | if (!message) { 87 | console.warn('尝试显示空消息'); 88 | message = ''; 89 | } 90 | 91 | if (!this.toastContainer) { 92 | console.error('Toast容器不存在,正在创建新容器'); 93 | this.toastContainer = this.createToastContainer(); 94 | if (!this.toastContainer) { 95 | console.error('无法创建Toast容器,放弃显示消息'); 96 | return null; 97 | } 98 | } 99 | 100 | // 检查是否已经存在相同内容的提示 101 | try { 102 | const existingToasts = this.toastContainer.querySelectorAll('.toast'); 103 | for (const existingToast of existingToasts) { 104 | try { 105 | const spanElement = existingToast.querySelector('span'); 106 | if (spanElement && spanElement.textContent === message) { 107 | // 已经存在相同的提示,不再创建新的 108 | return existingToast; 109 | } 110 | } catch (e) { 111 | console.warn('检查现有toast时出错:', e); 112 | // 继续检查其他toast元素 113 | } 114 | } 115 | } catch (e) { 116 | console.warn('查询现有toast时出错:', e); 117 | // 继续创建新的toast 118 | } 119 | 120 | const toast = document.createElement('div'); 121 | toast.className = `toast ${type}`; 122 | 123 | // 根据类型设置图标 124 | let icon = 'check-circle'; 125 | if (type === 'error') icon = 'exclamation-circle'; 126 | else if (type === 'warning') icon = 'exclamation-triangle'; 127 | else if (type === 'info') icon = 'info-circle'; 128 | 129 | toast.innerHTML = ` 130 | 131 | ${message} 132 | `; 133 | 134 | // 如果是持续显示的Toast,添加关闭按钮 135 | if (displayTime === -1) { 136 | const closeButton = document.createElement('button'); 137 | closeButton.className = 'toast-close'; 138 | closeButton.innerHTML = ''; 139 | closeButton.addEventListener('click', (e) => { 140 | this.hideToast(toast); 141 | }); 142 | toast.appendChild(closeButton); 143 | toast.classList.add('persistent'); 144 | } 145 | 146 | this.toastContainer.appendChild(toast); 147 | 148 | // 为不同类型的提示设置不同的显示时间 149 | if (displayTime !== -1) { 150 | // 如果没有指定时间,则根据消息类型和内容长度设置默认时间 151 | if (displayTime === undefined) { 152 | displayTime = message === '截图成功' ? 1500 : 153 | type === 'error' ? 5000 : 154 | message.length > 50 ? 4000 : 3000; 155 | } 156 | 157 | setTimeout(() => { 158 | this.hideToast(toast); 159 | }, displayTime); 160 | } 161 | 162 | return toast; 163 | } catch (error) { 164 | console.error('显示Toast消息时出错:', error); 165 | return null; 166 | } 167 | } 168 | 169 | /** 170 | * 隐藏一个Toast消息 171 | * @param {HTMLElement} toast 要隐藏的Toast元素 172 | */ 173 | hideToast(toast) { 174 | if (!toast || !toast.parentNode) return; 175 | 176 | toast.style.opacity = '0'; 177 | setTimeout(() => { 178 | if (toast.parentNode) { 179 | toast.remove(); 180 | } 181 | }, 300); 182 | } 183 | 184 | closeAllPanels() { 185 | if (this.settingsPanel) { 186 | this.settingsPanel.classList.remove('active'); 187 | } 188 | } 189 | 190 | hideSettingsPanel() { 191 | if (this.settingsPanel) { 192 | this.settingsPanel.classList.remove('active'); 193 | } 194 | } 195 | 196 | toggleSettingsPanel() { 197 | if (this.settingsPanel) { 198 | this.settingsPanel.classList.toggle('active'); 199 | } 200 | } 201 | 202 | closeSettingsPanel() { 203 | if (this.settingsPanel) { 204 | this.settingsPanel.classList.remove('active'); 205 | } 206 | } 207 | 208 | // 检查点击事件,如果点击了设置面板外部,则关闭设置面板 209 | checkClickOutsideSettings(e) { 210 | if (this.settingsPanel && 211 | !this.settingsPanel.contains(e.target) && 212 | !e.target.closest('#settingsToggle')) { 213 | this.settingsPanel.classList.remove('active'); 214 | } 215 | } 216 | 217 | setupEventListeners() { 218 | // 确保所有元素都存在 219 | if (!this.settingsToggle || !this.closeSettings || !this.themeToggle) { 220 | console.error('无法设置事件监听器:一些UI元素未找到'); 221 | return; 222 | } 223 | 224 | // Settings panel 225 | this.settingsToggle.addEventListener('click', () => { 226 | this.closeAllPanels(); 227 | this.settingsPanel.classList.toggle('active'); 228 | }); 229 | 230 | this.closeSettings.addEventListener('click', () => { 231 | this.settingsPanel.classList.remove('active'); 232 | }); 233 | 234 | // Theme toggle 235 | this.themeToggle.addEventListener('click', () => { 236 | try { 237 | const currentTheme = document.documentElement.getAttribute('data-theme'); 238 | console.log('当前主题:', currentTheme); 239 | this.setTheme(currentTheme !== 'dark'); 240 | } catch (error) { 241 | console.error('切换主题时出错:', error); 242 | } 243 | }); 244 | 245 | // Close panels when clicking outside 246 | document.addEventListener('click', (e) => { 247 | this.checkClickOutsideSettings(e); 248 | }); 249 | } 250 | } 251 | 252 | // 创建全局实例 253 | window.UIManager = UIManager; 254 | 255 | // 确保在DOM加载完毕后才创建UIManager实例 256 | if (document.readyState === 'loading') { 257 | document.addEventListener('DOMContentLoaded', () => { 258 | window.uiManager = new UIManager(); 259 | }); 260 | } else { 261 | window.uiManager = new UIManager(); 262 | } 263 | 264 | // 导出全局辅助函数 265 | window.showToast = (message, type) => { 266 | if (window.uiManager) { 267 | return window.uiManager.showToast(message, type); 268 | } else { 269 | console.error('UI管理器未初始化,无法显示Toast'); 270 | return null; 271 | } 272 | }; 273 | 274 | window.closeAllPanels = () => { 275 | if (window.uiManager) { 276 | window.uiManager.closeAllPanels(); 277 | } else { 278 | console.error('UI管理器未初始化,无法关闭面板'); 279 | } 280 | }; 281 | -------------------------------------------------------------------------------- /models/factory.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Type, Any, Optional 2 | import json 3 | import os 4 | import importlib 5 | from .base import BaseModel 6 | from .mathpix import MathpixModel # MathpixModel需要直接导入,因为它是特殊OCR工具 7 | from .baidu_ocr import BaiduOCRModel # 百度OCR也是特殊OCR工具,直接导入 8 | 9 | class ModelFactory: 10 | # 模型基本信息,包含类型和特性 11 | _models: Dict[str, Dict[str, Any]] = {} 12 | _class_map: Dict[str, Type[BaseModel]] = {} 13 | 14 | @classmethod 15 | def initialize(cls): 16 | """从配置文件加载模型信息""" 17 | try: 18 | config_path = os.path.join(os.path.dirname(__file__), '..', 'config', 'models.json') 19 | with open(config_path, 'r', encoding='utf-8') as f: 20 | config = json.load(f) 21 | 22 | # 加载提供商信息和类映射 23 | providers = config.get('providers', {}) 24 | for provider_id, provider_info in providers.items(): 25 | class_name = provider_info.get('class_name') 26 | if class_name: 27 | # 从当前包动态导入模型类 28 | module = importlib.import_module(f'.{provider_id.lower()}', package=__package__) 29 | cls._class_map[provider_id] = getattr(module, class_name) 30 | 31 | # 加载模型信息 32 | for model_id, model_info in config.get('models', {}).items(): 33 | provider_id = model_info.get('provider') 34 | if provider_id and provider_id in cls._class_map: 35 | cls._models[model_id] = { 36 | 'class': cls._class_map[provider_id], 37 | 'provider_id': provider_id, 38 | 'is_multimodal': model_info.get('supportsMultimodal', False), 39 | 'is_reasoning': model_info.get('isReasoning', False), 40 | 'display_name': model_info.get('name', model_id), 41 | 'description': model_info.get('description', '') 42 | } 43 | 44 | # 添加特殊OCR工具模型(不在配置文件中定义) 45 | 46 | # 添加Mathpix OCR工具 47 | cls._models['mathpix'] = { 48 | 'class': MathpixModel, 49 | 'is_multimodal': True, 50 | 'is_reasoning': False, 51 | 'display_name': 'Mathpix OCR', 52 | 'description': '数学公式识别工具,适用于复杂数学内容', 53 | 'is_ocr_only': True 54 | } 55 | 56 | # 添加百度OCR工具 57 | cls._models['baidu-ocr'] = { 58 | 'class': BaiduOCRModel, 59 | 'is_multimodal': True, 60 | 'is_reasoning': False, 61 | 'display_name': '百度OCR', 62 | 'description': '通用文字识别工具,支持中文识别', 63 | 'is_ocr_only': True 64 | } 65 | 66 | print(f"已从配置加载 {len(cls._models)} 个模型") 67 | except Exception as e: 68 | print(f"加载模型配置失败: {str(e)}") 69 | cls._initialize_defaults() 70 | 71 | @classmethod 72 | def _initialize_defaults(cls): 73 | """初始化默认模型(当配置加载失败时)""" 74 | print("配置加载失败,使用空模型列表") 75 | 76 | # 不再硬编码模型定义,而是使用空字典 77 | cls._models = {} 78 | 79 | # 添加特殊OCR工具(当配置加载失败时的备用) 80 | try: 81 | # 导入并添加Mathpix OCR工具 82 | from .mathpix import MathpixModel 83 | 84 | cls._models['mathpix'] = { 85 | 'class': MathpixModel, 86 | 'is_multimodal': True, 87 | 'is_reasoning': False, 88 | 'display_name': 'Mathpix OCR', 89 | 'description': '数学公式识别工具,适用于复杂数学内容', 90 | 'is_ocr_only': True 91 | } 92 | except Exception as e: 93 | print(f"无法加载Mathpix OCR工具: {str(e)}") 94 | 95 | # 添加百度OCR工具 96 | try: 97 | from .baidu_ocr import BaiduOCRModel 98 | 99 | cls._models['baidu-ocr'] = { 100 | 'class': BaiduOCRModel, 101 | 'is_multimodal': True, 102 | 'is_reasoning': False, 103 | 'display_name': '百度OCR', 104 | 'description': '通用文字识别工具,支持中文识别', 105 | 'is_ocr_only': True 106 | } 107 | except Exception as e: 108 | print(f"无法加载百度OCR工具: {str(e)}") 109 | 110 | @classmethod 111 | def create_model(cls, model_name: str, api_key: str, temperature: float = 0.7, 112 | system_prompt: Optional[str] = None, language: Optional[str] = None, api_base_url: Optional[str] = None) -> BaseModel: 113 | """ 114 | Create a model instance based on the model name. 115 | 116 | Args: 117 | model_name: The identifier for the model 118 | api_key: The API key for the model service 119 | temperature: The temperature to use for generation 120 | system_prompt: The system prompt to use 121 | language: The preferred language for responses 122 | api_base_url: The base URL for API requests 123 | 124 | Returns: 125 | A model instance 126 | """ 127 | if model_name not in cls._models: 128 | raise ValueError(f"Unknown model: {model_name}") 129 | 130 | model_info = cls._models[model_name] 131 | model_class = model_info['class'] 132 | provider_id = model_info.get('provider_id') 133 | 134 | if provider_id == 'openai': 135 | return model_class( 136 | api_key=api_key, 137 | temperature=temperature, 138 | system_prompt=system_prompt, 139 | language=language, 140 | api_base_url=api_base_url, 141 | model_identifier=model_name 142 | ) 143 | 144 | # 对于DeepSeek模型,需要传递正确的模型名称 145 | if 'deepseek' in model_name.lower(): 146 | return model_class( 147 | api_key=api_key, 148 | temperature=temperature, 149 | system_prompt=system_prompt, 150 | language=language, 151 | model_name=model_name, 152 | api_base_url=api_base_url 153 | ) 154 | # 对于阿里巴巴模型,也需要传递正确的模型名称 155 | elif 'qwen' in model_name.lower() or 'qvq' in model_name.lower() or 'alibaba' in model_name.lower(): 156 | return model_class( 157 | api_key=api_key, 158 | temperature=temperature, 159 | system_prompt=system_prompt, 160 | language=language, 161 | model_name=model_name 162 | ) 163 | # 对于Google模型,也需要传递正确的模型名称 164 | elif 'gemini' in model_name.lower() or 'google' in model_name.lower(): 165 | return model_class( 166 | api_key=api_key, 167 | temperature=temperature, 168 | system_prompt=system_prompt, 169 | language=language, 170 | model_name=model_name, 171 | api_base_url=api_base_url 172 | ) 173 | # 对于豆包模型,也需要传递正确的模型名称 174 | elif 'doubao' in model_name.lower(): 175 | return model_class( 176 | api_key=api_key, 177 | temperature=temperature, 178 | system_prompt=system_prompt, 179 | language=language, 180 | model_name=model_name, 181 | api_base_url=api_base_url 182 | ) 183 | # 对于Mathpix模型,不传递language参数 184 | elif model_name == 'mathpix': 185 | return model_class( 186 | api_key=api_key, 187 | temperature=temperature, 188 | system_prompt=system_prompt 189 | ) 190 | # 对于百度OCR模型,传递api_key(支持API_KEY:SECRET_KEY格式) 191 | elif model_name == 'baidu-ocr': 192 | return model_class( 193 | api_key=api_key, 194 | temperature=temperature, 195 | system_prompt=system_prompt 196 | ) 197 | # 对于Anthropic模型,需要传递model_identifier参数 198 | elif 'claude' in model_name.lower() or 'anthropic' in model_name.lower(): 199 | return model_class( 200 | api_key=api_key, 201 | temperature=temperature, 202 | system_prompt=system_prompt, 203 | language=language, 204 | api_base_url=api_base_url, 205 | model_identifier=model_name 206 | ) 207 | else: 208 | # 其他模型仅传递标准参数 209 | return model_class( 210 | api_key=api_key, 211 | temperature=temperature, 212 | system_prompt=system_prompt, 213 | language=language, 214 | api_base_url=api_base_url 215 | ) 216 | 217 | @classmethod 218 | def get_available_models(cls) -> list[Dict[str, Any]]: 219 | """Return a list of available models with their information""" 220 | models_info = [] 221 | for model_id, info in cls._models.items(): 222 | # 跳过仅OCR工具模型 223 | if info.get('is_ocr_only', False): 224 | continue 225 | 226 | models_info.append({ 227 | 'id': model_id, 228 | 'display_name': info.get('display_name', model_id), 229 | 'description': info.get('description', ''), 230 | 'is_multimodal': info.get('is_multimodal', False), 231 | 'is_reasoning': info.get('is_reasoning', False) 232 | }) 233 | return models_info 234 | 235 | @classmethod 236 | def get_model_ids(cls) -> list[str]: 237 | """Return a list of available model identifiers""" 238 | return [model_id for model_id in cls._models.keys() 239 | if not cls._models[model_id].get('is_ocr_only', False)] 240 | 241 | @classmethod 242 | def is_multimodal(cls, model_name: str) -> bool: 243 | """判断模型是否支持多模态输入""" 244 | return cls._models.get(model_name, {}).get('is_multimodal', False) 245 | 246 | @classmethod 247 | def is_reasoning(cls, model_name: str) -> bool: 248 | """判断模型是否为推理模型""" 249 | return cls._models.get(model_name, {}).get('is_reasoning', False) 250 | 251 | @classmethod 252 | def get_model_display_name(cls, model_name: str) -> str: 253 | """获取模型的显示名称""" 254 | return cls._models.get(model_name, {}).get('display_name', model_name) 255 | 256 | @classmethod 257 | def register_model(cls, model_name: str, model_class: Type[BaseModel], 258 | is_multimodal: bool = False, is_reasoning: bool = False, 259 | display_name: Optional[str] = None, description: Optional[str] = None) -> None: 260 | """ 261 | Register a new model type with the factory. 262 | 263 | Args: 264 | model_name: The identifier for the model 265 | model_class: The model class to register 266 | is_multimodal: Whether the model supports image input 267 | is_reasoning: Whether the model provides reasoning process 268 | display_name: Human-readable name for the model 269 | description: Description of the model 270 | """ 271 | cls._models[model_name] = { 272 | 'class': model_class, 273 | 'is_multimodal': is_multimodal, 274 | 'is_reasoning': is_reasoning, 275 | 'display_name': display_name or model_name, 276 | 'description': description or '' 277 | } 278 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [2025] [Zippland] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /models/alibaba.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Generator, Dict, Optional, Any 3 | from openai import OpenAI 4 | from .base import BaseModel 5 | 6 | class AlibabaModel(BaseModel): 7 | def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None, api_base_url: str = None): 8 | # 如果没有提供模型名称,才使用默认值 9 | self.model_name = model_name if model_name else "QVQ-Max-2025-03-25" 10 | print(f"初始化阿里巴巴模型: {self.model_name}") 11 | # 在super().__init__之前设置model_name,这样get_default_system_prompt能使用它 12 | super().__init__(api_key, temperature, system_prompt, language) 13 | self.api_base_url = api_base_url # 存储API基础URL 14 | 15 | def get_default_system_prompt(self) -> str: 16 | """根据模型名称返回不同的默认系统提示词""" 17 | # 检查是否是通义千问VL模型 18 | if self.model_name and "qwen-vl" in self.model_name: 19 | return """你是通义千问VL视觉语言助手,擅长图像理解、文字识别、内容分析和创作。请根据用户提供的图像: 20 | 1. 仔细阅读并理解问题 21 | 2. 分析问题的关键组成部分 22 | 3. 提供清晰的、逐步的解决方案 23 | 4. 如果相关,解释涉及的概念或理论 24 | 5. 如果有多种解决方法,先解释最高效的方法""" 25 | else: 26 | # QVQ模型使用原先的提示词 27 | return """你是一位专业的问题分析与解答助手。当看到一个问题图片时,请: 28 | 1. 仔细阅读并理解问题 29 | 2. 分析问题的关键组成部分 30 | 3. 提供清晰的、逐步的解决方案 31 | 4. 如果相关,解释涉及的概念或理论 32 | 5. 如果有多种解决方法,先解释最高效的方法""" 33 | 34 | def get_model_identifier(self) -> str: 35 | """根据模型名称返回对应的模型标识符""" 36 | # 直接映射模型ID到DashScope API使用的标识符 37 | model_mapping = { 38 | "QVQ-Max-2025-03-25": "qvq-max", 39 | "qwen-vl-max-latest": "qwen-vl-max", # 修正为正确的API标识符 40 | } 41 | 42 | print(f"模型名称: {self.model_name}") 43 | 44 | # 从模型映射表中获取模型标识符,如果不存在则使用默认值 45 | model_id = model_mapping.get(self.model_name) 46 | if model_id: 47 | print(f"从映射表中获取到模型标识符: {model_id}") 48 | return model_id 49 | 50 | # 如果没有精确匹配,检查是否包含特定前缀 51 | if self.model_name and "qwen-vl" in self.model_name.lower(): 52 | if "max" in self.model_name.lower(): 53 | print(f"识别为qwen-vl-max模型") 54 | return "qwen-vl-max" 55 | elif "plus" in self.model_name.lower(): 56 | print(f"识别为qwen-vl-plus模型") 57 | return "qwen-vl-plus" 58 | elif "lite" in self.model_name.lower(): 59 | print(f"识别为qwen-vl-lite模型") 60 | return "qwen-vl-lite" 61 | print(f"默认使用qwen-vl-max模型") 62 | return "qwen-vl-max" # 默认使用最强版本 63 | 64 | # 如果包含QVQ或alibaba关键词,默认使用qvq-max 65 | if self.model_name and ("qvq" in self.model_name.lower() or "alibaba" in self.model_name.lower()): 66 | print(f"识别为QVQ模型,使用qvq-max") 67 | return "qvq-max" 68 | 69 | # 最后的默认值 70 | print(f"警告:无法识别的模型名称 {self.model_name},默认使用qvq-max") 71 | return "qvq-max" 72 | 73 | def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]: 74 | """Stream QVQ-Max's response for text analysis""" 75 | try: 76 | # Initial status 77 | yield {"status": "started", "content": ""} 78 | 79 | # Save original environment state 80 | original_env = { 81 | 'http_proxy': os.environ.get('http_proxy'), 82 | 'https_proxy': os.environ.get('https_proxy') 83 | } 84 | 85 | try: 86 | # Set proxy environment variables if provided 87 | if proxies: 88 | if 'http' in proxies: 89 | os.environ['http_proxy'] = proxies['http'] 90 | if 'https' in proxies: 91 | os.environ['https_proxy'] = proxies['https'] 92 | 93 | # Initialize OpenAI compatible client for DashScope 94 | client = OpenAI( 95 | api_key=self.api_key, 96 | base_url="https://dashscope.aliyuncs.com/compatible-mode/v1" 97 | ) 98 | 99 | # Prepare messages 100 | messages = [ 101 | { 102 | "role": "system", 103 | "content": [{"type": "text", "text": self.system_prompt}] 104 | }, 105 | { 106 | "role": "user", 107 | "content": [{"type": "text", "text": text}] 108 | } 109 | ] 110 | 111 | # 创建聊天完成请求 112 | response = client.chat.completions.create( 113 | model=self.get_model_identifier(), 114 | messages=messages, 115 | temperature=self.temperature, 116 | stream=True, 117 | max_tokens=self._get_max_tokens() 118 | ) 119 | 120 | # 记录思考过程和回答 121 | reasoning_content = "" 122 | answer_content = "" 123 | is_answering = False 124 | 125 | # 检查是否为通义千问VL模型(不支持reasoning_content) 126 | is_qwen_vl = "qwen-vl" in self.get_model_identifier().lower() 127 | print(f"分析文本使用模型标识符: {self.get_model_identifier()}, 是否为千问VL模型: {is_qwen_vl}") 128 | 129 | for chunk in response: 130 | if not chunk.choices: 131 | continue 132 | 133 | delta = chunk.choices[0].delta 134 | 135 | # 处理思考过程(仅适用于QVQ模型) 136 | if not is_qwen_vl and hasattr(delta, 'reasoning_content') and delta.reasoning_content is not None: 137 | reasoning_content += delta.reasoning_content 138 | # 思考过程作为一个独立的内容发送 139 | yield { 140 | "status": "reasoning", 141 | "content": reasoning_content, 142 | "is_reasoning": True 143 | } 144 | elif delta.content != "": 145 | # 判断是否开始回答(从思考过程切换到回答) 146 | if not is_answering and not is_qwen_vl: 147 | is_answering = True 148 | # 发送完整的思考过程 149 | if reasoning_content: 150 | yield { 151 | "status": "reasoning_complete", 152 | "content": reasoning_content, 153 | "is_reasoning": True 154 | } 155 | 156 | # 累积回答内容 157 | answer_content += delta.content 158 | 159 | # 发送回答内容 160 | yield { 161 | "status": "streaming", 162 | "content": answer_content 163 | } 164 | 165 | # 确保发送最终完整内容 166 | if answer_content: 167 | yield { 168 | "status": "completed", 169 | "content": answer_content 170 | } 171 | 172 | finally: 173 | # Restore original environment state 174 | for key, value in original_env.items(): 175 | if value is None: 176 | if key in os.environ: 177 | del os.environ[key] 178 | else: 179 | os.environ[key] = value 180 | 181 | except Exception as e: 182 | yield { 183 | "status": "error", 184 | "error": str(e) 185 | } 186 | 187 | def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]: 188 | """Stream model's response for image analysis""" 189 | try: 190 | # Initial status 191 | yield {"status": "started", "content": ""} 192 | 193 | # Save original environment state 194 | original_env = { 195 | 'http_proxy': os.environ.get('http_proxy'), 196 | 'https_proxy': os.environ.get('https_proxy') 197 | } 198 | 199 | try: 200 | # Set proxy environment variables if provided 201 | if proxies: 202 | if 'http' in proxies: 203 | os.environ['http_proxy'] = proxies['http'] 204 | if 'https' in proxies: 205 | os.environ['https_proxy'] = proxies['https'] 206 | 207 | # Initialize OpenAI compatible client for DashScope 208 | client = OpenAI( 209 | api_key=self.api_key, 210 | base_url="https://dashscope.aliyuncs.com/compatible-mode/v1" 211 | ) 212 | 213 | # 使用系统提供的系统提示词,不再自动添加语言指令 214 | system_prompt = self.system_prompt 215 | 216 | # Prepare messages with image 217 | messages = [ 218 | { 219 | "role": "system", 220 | "content": [{"type": "text", "text": system_prompt}] 221 | }, 222 | { 223 | "role": "user", 224 | "content": [ 225 | { 226 | "type": "image_url", 227 | "image_url": { 228 | "url": f"data:image/jpeg;base64,{image_data}" 229 | } 230 | }, 231 | { 232 | "type": "text", 233 | "text": "请分析这个图片并提供详细的解答。" 234 | } 235 | ] 236 | } 237 | ] 238 | 239 | # 创建聊天完成请求 240 | response = client.chat.completions.create( 241 | model=self.get_model_identifier(), 242 | messages=messages, 243 | temperature=self.temperature, 244 | stream=True, 245 | max_tokens=self._get_max_tokens() 246 | ) 247 | 248 | # 记录思考过程和回答 249 | reasoning_content = "" 250 | answer_content = "" 251 | is_answering = False 252 | 253 | # 检查是否为通义千问VL模型(不支持reasoning_content) 254 | is_qwen_vl = "qwen-vl" in self.get_model_identifier().lower() 255 | print(f"分析图像使用模型标识符: {self.get_model_identifier()}, 是否为千问VL模型: {is_qwen_vl}") 256 | 257 | for chunk in response: 258 | if not chunk.choices: 259 | continue 260 | 261 | delta = chunk.choices[0].delta 262 | 263 | # 处理思考过程(仅适用于QVQ模型) 264 | if not is_qwen_vl and hasattr(delta, 'reasoning_content') and delta.reasoning_content is not None: 265 | reasoning_content += delta.reasoning_content 266 | # 思考过程作为一个独立的内容发送 267 | yield { 268 | "status": "reasoning", 269 | "content": reasoning_content, 270 | "is_reasoning": True 271 | } 272 | elif delta.content != "": 273 | # 判断是否开始回答(从思考过程切换到回答) 274 | if not is_answering and not is_qwen_vl: 275 | is_answering = True 276 | # 发送完整的思考过程 277 | if reasoning_content: 278 | yield { 279 | "status": "reasoning_complete", 280 | "content": reasoning_content, 281 | "is_reasoning": True 282 | } 283 | 284 | # 累积回答内容 285 | answer_content += delta.content 286 | 287 | # 发送回答内容 288 | yield { 289 | "status": "streaming", 290 | "content": answer_content 291 | } 292 | 293 | # 确保发送最终完整内容 294 | if answer_content: 295 | yield { 296 | "status": "completed", 297 | "content": answer_content 298 | } 299 | 300 | finally: 301 | # Restore original environment state 302 | for key, value in original_env.items(): 303 | if value is None: 304 | if key in os.environ: 305 | del os.environ[key] 306 | else: 307 | os.environ[key] = value 308 | 309 | except Exception as e: 310 | yield { 311 | "status": "error", 312 | "error": str(e) 313 | } 314 | 315 | def _get_max_tokens(self) -> int: 316 | """根据模型类型返回合适的max_tokens值""" 317 | # 检查是否为通义千问VL模型 318 | if "qwen-vl" in self.get_model_identifier(): 319 | return 2000 # 通义千问VL模型最大支持2048,留一些余量 320 | # QVQ模型或其他模型 321 | return self.max_tokens if hasattr(self, 'max_tokens') and self.max_tokens else 4000 -------------------------------------------------------------------------------- /models/doubao.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import base64 4 | from typing import Generator, Dict, Any, Optional 5 | import requests 6 | from .base import BaseModel 7 | 8 | class DoubaoModel(BaseModel): 9 | """ 10 | 豆包API模型实现类 11 | 支持字节跳动的豆包AI模型,可处理文本和图像输入 12 | """ 13 | 14 | def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None, api_base_url: str = None): 15 | """ 16 | 初始化豆包模型 17 | 18 | Args: 19 | api_key: 豆包API密钥 20 | temperature: 生成温度 21 | system_prompt: 系统提示词 22 | language: 首选语言 23 | model_name: 指定具体模型名称,如不指定则使用默认值 24 | api_base_url: API基础URL,用于设置自定义API端点 25 | """ 26 | super().__init__(api_key, temperature, system_prompt, language) 27 | self.model_name = model_name or self.get_model_identifier() 28 | self.base_url = api_base_url or "https://ark.cn-beijing.volces.com/api/v3" 29 | self.max_tokens = 4096 # 默认最大输出token数 30 | self.reasoning_config = None # 推理配置,类似于AnthropicModel 31 | 32 | def get_default_system_prompt(self) -> str: 33 | return """你是一个专业的问题分析专家。当看到问题图片时: 34 | 1. 仔细阅读并理解问题 35 | 2. 分解问题的关键组成部分 36 | 3. 提供清晰的分步解决方案 37 | 4. 如果相关,解释涉及的概念或理论 38 | 5. 如果有多种方法,优先解释最有效的方法""" 39 | 40 | def get_model_identifier(self) -> str: 41 | """返回默认的模型标识符""" 42 | return "doubao-seed-1-6-250615" # Doubao-Seed-1.6 43 | 44 | def get_actual_model_name(self) -> str: 45 | """根据配置的模型名称返回实际的API调用标识符""" 46 | # 豆包API的实际模型名称映射 47 | model_mapping = { 48 | "doubao-seed-1-6-250615": "doubao-seed-1-6-250615" 49 | } 50 | 51 | return model_mapping.get(self.model_name, "doubao-seed-1-6-250615") 52 | 53 | def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]: 54 | """流式生成文本响应""" 55 | try: 56 | yield {"status": "started"} 57 | 58 | # 设置环境变量代理(如果提供) 59 | original_proxies = None 60 | if proxies: 61 | original_proxies = { 62 | 'http_proxy': os.environ.get('http_proxy'), 63 | 'https_proxy': os.environ.get('https_proxy') 64 | } 65 | if 'http' in proxies: 66 | os.environ['http_proxy'] = proxies['http'] 67 | if 'https' in proxies: 68 | os.environ['https_proxy'] = proxies['https'] 69 | 70 | try: 71 | # 构建请求头 72 | headers = { 73 | "Authorization": f"Bearer {self.api_key}", 74 | "Content-Type": "application/json" 75 | } 76 | 77 | # 构建消息 - 添加系统提示词 78 | messages = [] 79 | 80 | # 添加系统提示词 81 | if self.system_prompt: 82 | messages.append({ 83 | "role": "system", 84 | "content": self.system_prompt 85 | }) 86 | 87 | # 添加用户查询 88 | user_content = text 89 | if self.language and self.language != 'auto': 90 | user_content = f"请使用{self.language}回答以下问题: {text}" 91 | 92 | messages.append({ 93 | "role": "user", 94 | "content": user_content 95 | }) 96 | 97 | # 处理推理配置 98 | thinking = { 99 | "type": "auto" # 默认值 100 | } 101 | 102 | if hasattr(self, 'reasoning_config') and self.reasoning_config: 103 | # 从reasoning_config中获取thinking_mode 104 | thinking_mode = self.reasoning_config.get('thinking_mode', "auto") 105 | thinking = { 106 | "type": thinking_mode 107 | } 108 | 109 | # 构建请求数据 110 | data = { 111 | "model": self.get_actual_model_name(), 112 | "messages": messages, 113 | "thinking": thinking, 114 | "temperature": self.temperature, 115 | "max_tokens": self.max_tokens, 116 | "stream": True 117 | } 118 | 119 | # 发送流式请求 120 | response = requests.post( 121 | f"{self.base_url}/chat/completions", 122 | headers=headers, 123 | json=data, 124 | stream=True, 125 | proxies=proxies if proxies else None, 126 | timeout=60 127 | ) 128 | 129 | if response.status_code != 200: 130 | error_text = response.text 131 | raise Exception(f"HTTP {response.status_code}: {error_text}") 132 | 133 | response.raise_for_status() 134 | 135 | # 初始化响应缓冲区 136 | response_buffer = "" 137 | 138 | # 处理流式响应 139 | for line in response.iter_lines(): 140 | if not line: 141 | continue 142 | 143 | line = line.decode('utf-8') 144 | if not line.startswith('data: '): 145 | continue 146 | 147 | line = line[6:] # 移除 'data: ' 前缀 148 | 149 | if line == '[DONE]': 150 | break 151 | 152 | try: 153 | chunk_data = json.loads(line) 154 | choices = chunk_data.get('choices', []) 155 | 156 | if choices and len(choices) > 0: 157 | delta = choices[0].get('delta', {}) 158 | content = delta.get('content', '') 159 | 160 | if content: 161 | response_buffer += content 162 | 163 | # 发送响应进度 164 | yield { 165 | "status": "streaming", 166 | "content": response_buffer 167 | } 168 | 169 | except json.JSONDecodeError: 170 | continue 171 | 172 | # 确保发送完整的最终内容 173 | yield { 174 | "status": "completed", 175 | "content": response_buffer 176 | } 177 | 178 | finally: 179 | # 恢复原始代理设置 180 | if original_proxies: 181 | for key, value in original_proxies.items(): 182 | if value is None: 183 | if key in os.environ: 184 | del os.environ[key] 185 | else: 186 | os.environ[key] = value 187 | 188 | except Exception as e: 189 | yield { 190 | "status": "error", 191 | "error": f"豆包API错误: {str(e)}" 192 | } 193 | 194 | def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]: 195 | """分析图像并流式生成响应""" 196 | try: 197 | yield {"status": "started"} 198 | 199 | # 设置环境变量代理(如果提供) 200 | original_proxies = None 201 | if proxies: 202 | original_proxies = { 203 | 'http_proxy': os.environ.get('http_proxy'), 204 | 'https_proxy': os.environ.get('https_proxy') 205 | } 206 | if 'http' in proxies: 207 | os.environ['http_proxy'] = proxies['http'] 208 | if 'https' in proxies: 209 | os.environ['https_proxy'] = proxies['https'] 210 | 211 | try: 212 | # 构建请求头 213 | headers = { 214 | "Authorization": f"Bearer {self.api_key}", 215 | "Content-Type": "application/json" 216 | } 217 | 218 | # 处理图像数据 219 | if image_data.startswith('data:image'): 220 | # 如果是data URI,提取base64部分 221 | image_data = image_data.split(',', 1)[1] 222 | 223 | # 构建用户消息 - 使用豆包API官方示例格式 224 | # 首先检查图像数据的格式,确保是有效的图像 225 | image_format = "jpeg" # 默认使用jpeg 226 | if image_data.startswith('/9j/'): # JPEG magic number in base64 227 | image_format = "jpeg" 228 | elif image_data.startswith('iVBORw0KGgo'): # PNG magic number in base64 229 | image_format = "png" 230 | 231 | # 构建消息 232 | messages = [] 233 | 234 | # 添加系统提示词 235 | if self.system_prompt: 236 | messages.append({ 237 | "role": "system", 238 | "content": self.system_prompt 239 | }) 240 | 241 | user_content = [ 242 | { 243 | "type": "text", 244 | "text": f"请使用{self.language}分析这张图片并提供详细解答。" if self.language and self.language != 'auto' else "请分析这张图片并提供详细解答?" 245 | }, 246 | { 247 | "type": "image_url", 248 | "image_url": { 249 | "url": f"data:image/{image_format};base64,{image_data}" 250 | } 251 | } 252 | ] 253 | 254 | messages.append({ 255 | "role": "user", 256 | "content": user_content 257 | }) 258 | 259 | # 处理推理配置 260 | thinking = { 261 | "type": "auto" # 默认值 262 | } 263 | 264 | if hasattr(self, 'reasoning_config') and self.reasoning_config: 265 | # 从reasoning_config中获取thinking_mode 266 | thinking_mode = self.reasoning_config.get('thinking_mode', "auto") 267 | thinking = { 268 | "type": thinking_mode 269 | } 270 | 271 | # 构建请求数据 272 | data = { 273 | "model": self.get_actual_model_name(), 274 | "messages": messages, 275 | "thinking": thinking, 276 | "temperature": self.temperature, 277 | "max_tokens": self.max_tokens, 278 | "stream": True 279 | } 280 | 281 | # 发送流式请求 282 | response = requests.post( 283 | f"{self.base_url}/chat/completions", 284 | headers=headers, 285 | json=data, 286 | stream=True, 287 | proxies=proxies if proxies else None, 288 | timeout=60 289 | ) 290 | 291 | if response.status_code != 200: 292 | error_text = response.text 293 | raise Exception(f"HTTP {response.status_code}: {error_text}") 294 | 295 | response.raise_for_status() 296 | 297 | # 初始化响应缓冲区 298 | response_buffer = "" 299 | 300 | # 处理流式响应 301 | for line in response.iter_lines(): 302 | if not line: 303 | continue 304 | 305 | line = line.decode('utf-8') 306 | if not line.startswith('data: '): 307 | continue 308 | 309 | line = line[6:] # 移除 'data: ' 前缀 310 | 311 | if line == '[DONE]': 312 | break 313 | 314 | try: 315 | chunk_data = json.loads(line) 316 | choices = chunk_data.get('choices', []) 317 | 318 | if choices and len(choices) > 0: 319 | delta = choices[0].get('delta', {}) 320 | content = delta.get('content', '') 321 | 322 | if content: 323 | response_buffer += content 324 | 325 | # 发送响应进度 326 | yield { 327 | "status": "streaming", 328 | "content": response_buffer 329 | } 330 | 331 | except json.JSONDecodeError: 332 | continue 333 | 334 | # 确保发送完整的最终内容 335 | yield { 336 | "status": "completed", 337 | "content": response_buffer 338 | } 339 | 340 | finally: 341 | # 恢复原始代理设置 342 | if original_proxies: 343 | for key, value in original_proxies.items(): 344 | if value is None: 345 | if key in os.environ: 346 | del os.environ[key] 347 | else: 348 | os.environ[key] = value 349 | 350 | except Exception as e: 351 | yield { 352 | "status": "error", 353 | "error": f"豆包图像分析错误: {str(e)}" 354 | } 355 | -------------------------------------------------------------------------------- /models/mathpix.py: -------------------------------------------------------------------------------- 1 | from typing import Generator, Dict, Any 2 | import json 3 | import requests 4 | from .base import BaseModel 5 | 6 | class MathpixModel(BaseModel): 7 | """ 8 | Mathpix OCR model for processing images containing mathematical formulas, 9 | text, and tables. 10 | """ 11 | 12 | def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None): 13 | """ 14 | Initialize the Mathpix model. 15 | 16 | Args: 17 | api_key: Mathpix API key in format "app_id:app_key" 18 | temperature: Not used for Mathpix but kept for BaseModel compatibility 19 | system_prompt: Not used for Mathpix but kept for BaseModel compatibility 20 | 21 | Raises: 22 | ValueError: If the API key format is invalid 23 | """ 24 | # 只传递必需的参数,不传递language参数 25 | super().__init__(api_key, temperature, system_prompt) 26 | try: 27 | self.app_id, self.app_key = api_key.split(':') 28 | except ValueError: 29 | raise ValueError("Mathpix API key must be in format 'app_id:app_key'") 30 | 31 | self.api_url = "https://api.mathpix.com/v3/text" 32 | self.headers = { 33 | "app_id": self.app_id, 34 | "app_key": self.app_key, 35 | "Content-Type": "application/json" 36 | } 37 | 38 | # Content type presets 39 | self.presets = { 40 | "math": { 41 | "formats": ["latex_normal", "latex_styled", "asciimath"], 42 | "data_options": { 43 | "include_asciimath": True, 44 | "include_latex": True, 45 | "include_mathml": True 46 | }, 47 | "ocr_options": { 48 | "detect_formulas": True, 49 | "enable_math_ocr": True, 50 | "enable_handwritten": True, 51 | "rm_spaces": True 52 | } 53 | }, 54 | "text": { 55 | "formats": ["text"], 56 | "data_options": { 57 | "include_latex": False, 58 | "include_asciimath": False 59 | }, 60 | "ocr_options": { 61 | "enable_spell_check": True, 62 | "enable_handwritten": True, 63 | "rm_spaces": False 64 | } 65 | }, 66 | "table": { 67 | "formats": ["text", "data"], 68 | "data_options": { 69 | "include_latex": True 70 | }, 71 | "ocr_options": { 72 | "detect_tables": True, 73 | "enable_spell_check": True, 74 | "rm_spaces": True 75 | } 76 | }, 77 | "full_text": { 78 | "formats": ["text"], 79 | "data_options": { 80 | "include_latex": False, 81 | "include_asciimath": False 82 | }, 83 | "ocr_options": { 84 | "enable_spell_check": True, 85 | "enable_handwritten": True, 86 | "rm_spaces": False, 87 | "detect_paragraphs": True, 88 | "enable_tables": False, 89 | "enable_math_ocr": False 90 | } 91 | } 92 | } 93 | 94 | # Default to math preset 95 | self.current_preset = "math" 96 | 97 | def analyze_image(self, image_data: str, proxies: dict = None, content_type: str = None, 98 | confidence_threshold: float = 0.8, max_retries: int = 3) -> Generator[dict, None, None]: 99 | """ 100 | Analyze an image using Mathpix OCR API. 101 | 102 | Args: 103 | image_data: Base64 encoded image data 104 | proxies: Optional proxy configuration 105 | content_type: Type of content to analyze ('math', 'text', or 'table') 106 | confidence_threshold: Minimum confidence score to accept (0.0 to 1.0) 107 | max_retries: Maximum number of retry attempts for failed requests 108 | 109 | Yields: 110 | dict: Response chunks with status and content 111 | """ 112 | if content_type and content_type in self.presets: 113 | self.current_preset = content_type 114 | 115 | preset = self.presets[self.current_preset] 116 | 117 | try: 118 | # Prepare request payload 119 | payload = { 120 | "src": f"data:image/jpeg;base64,{image_data}", 121 | "formats": preset["formats"], 122 | "data_options": preset["data_options"], 123 | "ocr_options": preset["ocr_options"] 124 | } 125 | 126 | # Initialize retry counter 127 | retry_count = 0 128 | 129 | while retry_count < max_retries: 130 | try: 131 | # Send request to Mathpix API with timeout 132 | response = requests.post( 133 | self.api_url, 134 | headers=self.headers, 135 | json=payload, 136 | proxies=proxies, 137 | timeout=25 # 25 second timeout 138 | ) 139 | 140 | # Handle specific API error codes 141 | if response.status_code == 429: # Rate limit exceeded 142 | if retry_count < max_retries - 1: 143 | retry_count += 1 144 | continue 145 | else: 146 | raise requests.exceptions.RequestException("Rate limit exceeded") 147 | 148 | response.raise_for_status() 149 | result = response.json() 150 | 151 | # Check confidence threshold 152 | if 'confidence' in result and result['confidence'] < confidence_threshold: 153 | yield { 154 | "status": "warning", 155 | "content": f"Low confidence score: {result['confidence']:.2%}" 156 | } 157 | 158 | break # Success, exit retry loop 159 | 160 | except (requests.exceptions.Timeout, requests.exceptions.ConnectionError): 161 | if retry_count < max_retries - 1: 162 | retry_count += 1 163 | continue 164 | raise 165 | 166 | # Format the response 167 | formatted_response = self._format_response(result) 168 | 169 | # Yield initial status 170 | yield { 171 | "status": "started", 172 | "content": "" 173 | } 174 | 175 | # Yield the formatted response 176 | yield { 177 | "status": "completed", 178 | "content": formatted_response, 179 | "model": self.get_model_identifier() 180 | } 181 | 182 | except requests.exceptions.RequestException as e: 183 | yield { 184 | "status": "error", 185 | "error": f"Mathpix API error: {str(e)}" 186 | } 187 | except Exception as e: 188 | yield { 189 | "status": "error", 190 | "error": f"Error processing image: {str(e)}" 191 | } 192 | 193 | def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]: 194 | """ 195 | Not implemented for Mathpix model as it only processes images. 196 | """ 197 | yield { 198 | "status": "error", 199 | "error": "Text analysis is not supported by Mathpix model" 200 | } 201 | 202 | def get_default_system_prompt(self) -> str: 203 | """ 204 | Not used for Mathpix model. 205 | """ 206 | return "" 207 | 208 | def get_model_identifier(self) -> str: 209 | """ 210 | Return the model identifier. 211 | """ 212 | return "mathpix" 213 | 214 | def _format_response(self, result: Dict[str, Any]) -> str: 215 | """ 216 | Format the Mathpix API response into a readable string. 217 | 218 | Args: 219 | result: Raw API response from Mathpix 220 | 221 | Returns: 222 | str: Formatted response string with all available formats 223 | """ 224 | formatted_parts = [] 225 | 226 | # Add confidence score if available 227 | if 'confidence' in result: 228 | formatted_parts.append(f"Confidence: {result['confidence']:.2%}\n") 229 | 230 | # Add text content 231 | if 'text' in result: 232 | formatted_parts.append("Text Content:") 233 | formatted_parts.append(result['text']) 234 | formatted_parts.append("") 235 | 236 | # Add LaTeX content 237 | if 'latex_normal' in result: 238 | formatted_parts.append("LaTeX (Normal):") 239 | formatted_parts.append(result['latex_normal']) 240 | formatted_parts.append("") 241 | 242 | if 'latex_styled' in result: 243 | formatted_parts.append("LaTeX (Styled):") 244 | formatted_parts.append(result['latex_styled']) 245 | formatted_parts.append("") 246 | 247 | # Add data formats (ASCII math, MathML) 248 | if 'data' in result and isinstance(result['data'], list): 249 | for item in result['data']: 250 | item_type = item.get('type', '') 251 | if item_type and 'value' in item: 252 | formatted_parts.append(f"{item_type.upper()}:") 253 | formatted_parts.append(item['value']) 254 | formatted_parts.append("") 255 | 256 | # Add table data if present 257 | if 'tables' in result and result['tables']: 258 | formatted_parts.append("Tables Detected:") 259 | for i, table in enumerate(result['tables'], 1): 260 | formatted_parts.append(f"Table {i}:") 261 | if 'cells' in table: 262 | # Format table as a grid 263 | cells = table['cells'] 264 | if cells: 265 | max_col = max(cell.get('col', 0) for cell in cells) + 1 266 | max_row = max(cell.get('row', 0) for cell in cells) + 1 267 | grid = [['' for _ in range(max_col)] for _ in range(max_row)] 268 | 269 | for cell in cells: 270 | row = cell.get('row', 0) 271 | col = cell.get('col', 0) 272 | text = cell.get('text', '') 273 | grid[row][col] = text 274 | 275 | # Format grid as table 276 | col_widths = [max(len(str(grid[r][c])) for r in range(max_row)) for c in range(max_col)] 277 | for row in grid: 278 | row_str = ' | '.join(f"{str(cell):<{width}}" for cell, width in zip(row, col_widths)) 279 | formatted_parts.append(f"| {row_str} |") 280 | formatted_parts.append("") 281 | 282 | # Add error message if present 283 | if 'error' in result: 284 | error_msg = result['error'] 285 | if isinstance(error_msg, dict): 286 | error_msg = error_msg.get('message', str(error_msg)) 287 | formatted_parts.append(f"Error: {error_msg}") 288 | 289 | return "\n".join(formatted_parts).strip() 290 | 291 | def extract_full_text(self, image_data: str, proxies: dict = None, max_retries: int = 3) -> str: 292 | """ 293 | 专门用于提取图像中的全部文本内容,忽略数学公式和表格等其他元素。 294 | 295 | Args: 296 | image_data: Base64编码的图像数据 297 | proxies: 可选的代理配置 298 | max_retries: 请求失败时的最大重试次数 299 | 300 | Returns: 301 | str: 图像中提取的完整文本内容 302 | """ 303 | try: 304 | # 准备请求负载,使用专为全文提取配置的参数 305 | payload = { 306 | "src": f"data:image/jpeg;base64,{image_data}", 307 | "formats": ["text"], 308 | "data_options": { 309 | "include_latex": False, 310 | "include_asciimath": False 311 | }, 312 | "ocr_options": { 313 | "enable_spell_check": True, 314 | "enable_handwritten": True, 315 | "rm_spaces": False, 316 | "detect_paragraphs": True, 317 | "enable_tables": False, 318 | "enable_math_ocr": False 319 | } 320 | } 321 | 322 | # 初始化重试计数器 323 | retry_count = 0 324 | 325 | while retry_count < max_retries: 326 | try: 327 | # 发送请求到Mathpix API 328 | response = requests.post( 329 | self.api_url, 330 | headers=self.headers, 331 | json=payload, 332 | proxies=proxies, 333 | timeout=30 # 30秒超时 334 | ) 335 | 336 | # 处理特定API错误代码 337 | if response.status_code == 429: # 超出速率限制 338 | if retry_count < max_retries - 1: 339 | retry_count += 1 340 | continue 341 | else: 342 | raise requests.exceptions.RequestException("超出API速率限制") 343 | 344 | response.raise_for_status() 345 | result = response.json() 346 | 347 | # 直接返回文本内容 348 | if 'text' in result: 349 | return result['text'] 350 | else: 351 | return "未能提取到文本内容" 352 | 353 | except (requests.exceptions.Timeout, requests.exceptions.ConnectionError): 354 | if retry_count < max_retries - 1: 355 | retry_count += 1 356 | continue 357 | raise 358 | 359 | except requests.exceptions.RequestException as e: 360 | return f"Mathpix API错误: {str(e)}" 361 | except Exception as e: 362 | return f"处理图像时出错: {str(e)}" 363 | -------------------------------------------------------------------------------- /models/deepseek.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | import os 4 | from typing import Generator 5 | from openai import OpenAI 6 | from .base import BaseModel 7 | 8 | class DeepSeekModel(BaseModel): 9 | def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = "deepseek-reasoner", api_base_url: str = None): 10 | super().__init__(api_key, temperature, system_prompt, language) 11 | self.model_name = model_name 12 | self.api_base_url = api_base_url # 存储API基础URL 13 | 14 | def get_default_system_prompt(self) -> str: 15 | return """You are an expert at analyzing questions and providing detailed solutions. When presented with an image of a question: 16 | 1. First read and understand the question carefully 17 | 2. Break down the key components of the question 18 | 3. Provide a clear, step-by-step solution 19 | 4. If relevant, explain any concepts or theories involved 20 | 5. If there are multiple approaches, explain the most efficient one first""" 21 | 22 | def get_model_identifier(self) -> str: 23 | """根据模型名称返回正确的API标识符""" 24 | # 通过模型名称来确定实际的API调用标识符 25 | if self.model_name == "deepseek-chat": 26 | return "deepseek-chat" 27 | # 如果是deepseek-reasoner或包含reasoner的模型名称,返回推理模型标识符 28 | if "reasoner" in self.model_name.lower(): 29 | return "deepseek-reasoner" 30 | # 对于deepseek-chat也返回对应的模型名称 31 | if "chat" in self.model_name.lower() or self.model_name == "deepseek-chat": 32 | return "deepseek-chat" 33 | 34 | # 根据配置中的模型ID来确定实际的模型类型 35 | if self.model_name == "deepseek-reasoner": 36 | return "deepseek-reasoner" 37 | elif self.model_name == "deepseek-chat": 38 | return "deepseek-chat" 39 | 40 | # 默认使用deepseek-chat作为API标识符 41 | print(f"未知的DeepSeek模型名称: {self.model_name},使用deepseek-chat作为默认值") 42 | return "deepseek-chat" 43 | 44 | def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]: 45 | """Stream DeepSeek's response for text analysis""" 46 | try: 47 | # Initial status 48 | yield {"status": "started", "content": ""} 49 | 50 | # 保存原始环境变量 51 | original_env = { 52 | 'http_proxy': os.environ.get('http_proxy'), 53 | 'https_proxy': os.environ.get('https_proxy') 54 | } 55 | 56 | try: 57 | # 如果提供了代理设置,通过环境变量设置 58 | if proxies: 59 | if 'http' in proxies: 60 | os.environ['http_proxy'] = proxies['http'] 61 | if 'https' in proxies: 62 | os.environ['https_proxy'] = proxies['https'] 63 | 64 | # 初始化DeepSeek客户端,不再使用session对象 65 | client = OpenAI( 66 | api_key=self.api_key, 67 | base_url="https://api.deepseek.com" 68 | ) 69 | 70 | # 使用系统提供的系统提示词,不再自动添加语言指令 71 | system_prompt = self.system_prompt 72 | 73 | # 构建请求参数 74 | params = { 75 | "model": self.get_model_identifier(), 76 | "messages": [ 77 | { 78 | 'role': 'system', 79 | 'content': system_prompt 80 | }, 81 | { 82 | 'role': 'user', 83 | 'content': text 84 | } 85 | ], 86 | "stream": True 87 | } 88 | 89 | # 只有非推理模型才设置temperature参数 90 | if not self.get_model_identifier().endswith('reasoner') and self.temperature is not None: 91 | params["temperature"] = self.temperature 92 | 93 | print(f"调用DeepSeek API: {self.get_model_identifier()}, 是否设置温度: {not self.get_model_identifier().endswith('reasoner')}, 温度值: {self.temperature if not self.get_model_identifier().endswith('reasoner') else 'N/A'}") 94 | 95 | response = client.chat.completions.create(**params) 96 | 97 | # 使用两个缓冲区,分别用于常规内容和思考内容 98 | response_buffer = "" 99 | thinking_buffer = "" 100 | 101 | for chunk in response: 102 | # 打印chunk以调试 103 | try: 104 | print(f"DeepSeek API返回chunk: {chunk}") 105 | except: 106 | print("无法打印chunk") 107 | 108 | try: 109 | # 同时处理两种不同的内容,确保正确区分思考内容和最终内容 110 | delta = chunk.choices[0].delta 111 | 112 | # 处理推理模型的思考内容 113 | if hasattr(delta, 'reasoning_content') and delta.reasoning_content: 114 | content = delta.reasoning_content 115 | thinking_buffer += content 116 | 117 | # 发送思考内容更新 118 | if len(content) >= 20 or content.endswith(('.', '!', '?', '。', '!', '?', '\n')): 119 | yield { 120 | "status": "thinking", 121 | "content": thinking_buffer 122 | } 123 | 124 | # 处理最终结果内容 - 即使在推理模型中也会有content字段 125 | if hasattr(delta, 'content') and delta.content: 126 | content = delta.content 127 | response_buffer += content 128 | print(f"累积响应内容: '{content}', 当前buffer: '{response_buffer}'") 129 | 130 | # 发送结果内容更新 131 | if len(content) >= 10 or content.endswith(('.', '!', '?', '。', '!', '?', '\n')): 132 | yield { 133 | "status": "streaming", 134 | "content": response_buffer 135 | } 136 | 137 | # 处理消息结束 138 | if hasattr(chunk.choices[0], 'finish_reason') and chunk.choices[0].finish_reason: 139 | print(f"生成结束,原因: {chunk.choices[0].finish_reason}") 140 | # 注意:不要在这里把思考内容作为正文,因为这可能导致重复内容 141 | except Exception as e: 142 | print(f"解析响应chunk时出错: {str(e)}") 143 | continue 144 | 145 | # 确保发送最终的缓冲内容 146 | if thinking_buffer: 147 | yield { 148 | "status": "thinking_complete", 149 | "content": thinking_buffer 150 | } 151 | 152 | # 发送最终响应内容 153 | if response_buffer: 154 | yield { 155 | "status": "completed", 156 | "content": response_buffer 157 | } 158 | 159 | # 如果没有正常的响应内容,但有思考内容,则将思考内容作为最终结果 160 | elif thinking_buffer: 161 | yield { 162 | "status": "completed", 163 | "content": thinking_buffer 164 | } 165 | else: 166 | # 如果两者都没有,返回一个空结果 167 | yield { 168 | "status": "completed", 169 | "content": "没有获取到内容" 170 | } 171 | 172 | except Exception as e: 173 | error_msg = str(e) 174 | print(f"DeepSeek API调用出错: {error_msg}") 175 | 176 | # 提供具体的错误信息 177 | if "invalid_api_key" in error_msg.lower(): 178 | error_msg = "DeepSeek API密钥无效,请检查您的API密钥" 179 | elif "rate_limit" in error_msg.lower(): 180 | error_msg = "DeepSeek API请求频率超限,请稍后再试" 181 | elif "quota_exceeded" in error_msg.lower(): 182 | error_msg = "DeepSeek API配额已用完,请续费或等待下个计费周期" 183 | 184 | yield { 185 | "status": "error", 186 | "error": f"DeepSeek API错误: {error_msg}" 187 | } 188 | finally: 189 | # 恢复原始环境变量 190 | for key, value in original_env.items(): 191 | if value is None: 192 | if key in os.environ: 193 | del os.environ[key] 194 | else: 195 | os.environ[key] = value 196 | 197 | except Exception as e: 198 | error_msg = str(e) 199 | print(f"调用DeepSeek模型时发生错误: {error_msg}") 200 | 201 | if "invalid_api_key" in error_msg.lower(): 202 | error_msg = "API密钥无效,请检查设置" 203 | elif "rate_limit" in error_msg.lower(): 204 | error_msg = "API请求频率超限,请稍后再试" 205 | 206 | yield { 207 | "status": "error", 208 | "error": f"DeepSeek API错误: {error_msg}" 209 | } 210 | 211 | def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]: 212 | """Stream DeepSeek's response for image analysis""" 213 | try: 214 | # 检查我们是否有支持图像的模型 215 | if self.model_name == "deepseek-chat" or self.model_name == "deepseek-reasoner": 216 | yield { 217 | "status": "error", 218 | "error": "当前DeepSeek模型不支持图像分析,请使用Anthropic或OpenAI的多模态模型" 219 | } 220 | return 221 | 222 | # Initial status 223 | yield {"status": "started", "content": ""} 224 | 225 | # 保存原始环境变量 226 | original_env = { 227 | 'http_proxy': os.environ.get('http_proxy'), 228 | 'https_proxy': os.environ.get('https_proxy') 229 | } 230 | 231 | try: 232 | # 如果提供了代理设置,通过环境变量设置 233 | if proxies: 234 | if 'http' in proxies: 235 | os.environ['http_proxy'] = proxies['http'] 236 | if 'https' in proxies: 237 | os.environ['https_proxy'] = proxies['https'] 238 | 239 | # 初始化DeepSeek客户端,不再使用session对象 240 | client = OpenAI( 241 | api_key=self.api_key, 242 | base_url="https://api.deepseek.com" 243 | ) 244 | 245 | # 使用系统提供的系统提示词,不再自动添加语言指令 246 | system_prompt = self.system_prompt 247 | 248 | # 构建请求参数 249 | params = { 250 | "model": self.get_model_identifier(), 251 | "messages": [ 252 | { 253 | 'role': 'system', 254 | 'content': system_prompt 255 | }, 256 | { 257 | 'role': 'user', 258 | 'content': f"Here's an image of a question to analyze: data:image/png;base64,{image_data}" 259 | } 260 | ], 261 | "stream": True 262 | } 263 | 264 | # 只有非推理模型才设置temperature参数 265 | if not self.get_model_identifier().endswith('reasoner') and self.temperature is not None: 266 | params["temperature"] = self.temperature 267 | 268 | response = client.chat.completions.create(**params) 269 | 270 | # 使用两个缓冲区,分别用于常规内容和思考内容 271 | response_buffer = "" 272 | thinking_buffer = "" 273 | 274 | for chunk in response: 275 | # 打印chunk以调试 276 | try: 277 | print(f"DeepSeek图像API返回chunk: {chunk}") 278 | except: 279 | print("无法打印chunk") 280 | 281 | try: 282 | # 同时处理两种不同的内容,确保正确区分思考内容和最终内容 283 | delta = chunk.choices[0].delta 284 | 285 | # 处理推理模型的思考内容 286 | if hasattr(delta, 'reasoning_content') and delta.reasoning_content: 287 | content = delta.reasoning_content 288 | thinking_buffer += content 289 | 290 | # 发送思考内容更新 291 | if len(content) >= 20 or content.endswith(('.', '!', '?', '。', '!', '?', '\n')): 292 | yield { 293 | "status": "thinking", 294 | "content": thinking_buffer 295 | } 296 | 297 | # 处理最终结果内容 - 即使在推理模型中也会有content字段 298 | if hasattr(delta, 'content') and delta.content: 299 | content = delta.content 300 | response_buffer += content 301 | print(f"累积图像响应内容: '{content}', 当前buffer: '{response_buffer}'") 302 | 303 | # 发送结果内容更新 304 | if len(content) >= 10 or content.endswith(('.', '!', '?', '。', '!', '?', '\n')): 305 | yield { 306 | "status": "streaming", 307 | "content": response_buffer 308 | } 309 | 310 | # 处理消息结束 311 | if hasattr(chunk.choices[0], 'finish_reason') and chunk.choices[0].finish_reason: 312 | print(f"图像生成结束,原因: {chunk.choices[0].finish_reason}") 313 | except Exception as e: 314 | print(f"解析图像响应chunk时出错: {str(e)}") 315 | continue 316 | 317 | # 确保发送最终的缓冲内容 318 | if thinking_buffer: 319 | yield { 320 | "status": "thinking_complete", 321 | "content": thinking_buffer 322 | } 323 | 324 | # 发送最终响应内容 325 | if response_buffer: 326 | yield { 327 | "status": "completed", 328 | "content": response_buffer 329 | } 330 | 331 | except Exception as e: 332 | error_msg = str(e) 333 | print(f"DeepSeek API调用出错: {error_msg}") 334 | 335 | # 提供具体的错误信息 336 | if "invalid_api_key" in error_msg.lower(): 337 | error_msg = "DeepSeek API密钥无效,请检查您的API密钥" 338 | elif "rate_limit" in error_msg.lower(): 339 | error_msg = "DeepSeek API请求频率超限,请稍后再试" 340 | 341 | yield { 342 | "status": "error", 343 | "error": f"DeepSeek API错误: {error_msg}" 344 | } 345 | finally: 346 | # 恢复原始环境变量 347 | for key, value in original_env.items(): 348 | if value is None: 349 | if key in os.environ: 350 | del os.environ[key] 351 | else: 352 | os.environ[key] = value 353 | 354 | except Exception as e: 355 | error_msg = str(e) 356 | if "invalid_api_key" in error_msg.lower(): 357 | error_msg = "API密钥无效,请检查设置" 358 | elif "rate_limit" in error_msg.lower(): 359 | error_msg = "API请求频率超限,请稍后再试" 360 | 361 | yield { 362 | "status": "error", 363 | "error": f"DeepSeek API错误: {error_msg}" 364 | } 365 | -------------------------------------------------------------------------------- /models/anthropic.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | from typing import Generator, Optional 4 | from .base import BaseModel 5 | 6 | class AnthropicModel(BaseModel): 7 | def __init__(self, api_key, temperature=0.7, system_prompt=None, language=None, api_base_url=None, model_identifier=None): 8 | super().__init__(api_key, temperature, system_prompt or self.get_default_system_prompt(), language or "en") 9 | # 设置API基础URL,默认为Anthropic官方API 10 | self.api_base_url = api_base_url or "https://api.anthropic.com/v1" 11 | # 设置模型标识符,支持动态选择 12 | self.model_identifier = model_identifier or "claude-3-7-sonnet-20250219" 13 | # 初始化推理配置 14 | self.reasoning_config = None 15 | # 初始化最大Token数 16 | self.max_tokens = None 17 | 18 | def get_default_system_prompt(self) -> str: 19 | return """You are an expert at analyzing questions and providing detailed solutions. When presented with an image of a question: 20 | 1. First read and understand the question carefully 21 | 2. Break down the key components of the question 22 | 3. Provide a clear, step-by-step solution 23 | 4. If relevant, explain any concepts or theories involved 24 | 5. If there are multiple approaches, explain the most efficient one first""" 25 | 26 | def get_model_identifier(self) -> str: 27 | return self.model_identifier 28 | 29 | def analyze_text(self, text: str, proxies: Optional[dict] = None) -> Generator[dict, None, None]: 30 | """Stream Claude's response for text analysis""" 31 | try: 32 | yield {"status": "started"} 33 | 34 | api_key = self.api_key 35 | if api_key.startswith('Bearer '): 36 | api_key = api_key[7:] 37 | 38 | headers = { 39 | 'x-api-key': api_key, 40 | 'anthropic-version': '2023-06-01', 41 | 'content-type': 'application/json', 42 | 'accept': 'application/json', 43 | } 44 | 45 | # 获取最大输出Token设置 46 | max_tokens = 8192 # 默认值 47 | if hasattr(self, 'max_tokens') and self.max_tokens: 48 | max_tokens = self.max_tokens 49 | 50 | payload = { 51 | 'model': self.get_model_identifier(), 52 | 'stream': True, 53 | 'max_tokens': max_tokens, 54 | 'temperature': 1, 55 | 'system': self.system_prompt, 56 | 'messages': [{ 57 | 'role': 'user', 58 | 'content': [ 59 | { 60 | 'type': 'text', 61 | 'text': text 62 | } 63 | ] 64 | }] 65 | } 66 | 67 | # 处理推理配置 68 | if hasattr(self, 'reasoning_config') and self.reasoning_config: 69 | # 如果设置了extended reasoning 70 | if self.reasoning_config.get('reasoning_depth') == 'extended': 71 | think_budget = self.reasoning_config.get('think_budget', max_tokens // 2) 72 | payload['thinking'] = { 73 | 'type': 'enabled', 74 | 'budget_tokens': think_budget 75 | } 76 | # 如果设置了instant模式 77 | elif self.reasoning_config.get('speed_mode') == 'instant': 78 | # 确保当使用speed_mode时不包含thinking参数 79 | if 'thinking' in payload: 80 | del payload['thinking'] 81 | # 默认启用思考但使用较小的预算 82 | else: 83 | payload['thinking'] = { 84 | 'type': 'enabled', 85 | 'budget_tokens': min(4096, max_tokens // 4) 86 | } 87 | # 默认设置 88 | else: 89 | payload['thinking'] = { 90 | 'type': 'enabled', 91 | 'budget_tokens': min(4096, max_tokens // 4) 92 | } 93 | 94 | print(f"Debug - 推理配置: max_tokens={max_tokens}, thinking={payload.get('thinking', payload.get('speed_mode', 'default'))}") 95 | 96 | # 使用配置的API基础URL 97 | api_endpoint = f"{self.api_base_url}/messages" 98 | 99 | response = requests.post( 100 | api_endpoint, 101 | headers=headers, 102 | json=payload, 103 | stream=True, 104 | proxies=proxies, 105 | timeout=60 106 | ) 107 | 108 | if response.status_code != 200: 109 | error_msg = f'API error: {response.status_code}' 110 | try: 111 | error_data = response.json() 112 | if 'error' in error_data: 113 | error_msg += f" - {error_data['error']['message']}" 114 | except: 115 | error_msg += f" - {response.text}" 116 | yield {"status": "error", "error": error_msg} 117 | return 118 | 119 | thinking_content = "" 120 | response_buffer = "" 121 | 122 | for chunk in response.iter_lines(): 123 | if not chunk: 124 | continue 125 | 126 | try: 127 | chunk_str = chunk.decode('utf-8') 128 | if not chunk_str.startswith('data: '): 129 | continue 130 | 131 | chunk_str = chunk_str[6:] 132 | data = json.loads(chunk_str) 133 | 134 | if data.get('type') == 'content_block_delta': 135 | if 'delta' in data: 136 | if 'text' in data['delta']: 137 | text_chunk = data['delta']['text'] 138 | response_buffer += text_chunk 139 | # 只在每累积一定数量的字符后才发送,减少UI跳变 140 | if len(text_chunk) >= 10 or text_chunk.endswith(('.', '!', '?', '。', '!', '?', '\n')): 141 | yield { 142 | "status": "streaming", 143 | "content": response_buffer 144 | } 145 | 146 | elif 'thinking' in data['delta']: 147 | thinking_chunk = data['delta']['thinking'] 148 | thinking_content += thinking_chunk 149 | # 只在每累积一定数量的字符后才发送,减少UI跳变 150 | if len(thinking_chunk) >= 20 or thinking_chunk.endswith(('.', '!', '?', '。', '!', '?', '\n')): 151 | yield { 152 | "status": "thinking", 153 | "content": thinking_content 154 | } 155 | 156 | # 处理新的extended_thinking格式 157 | elif data.get('type') == 'extended_thinking_delta': 158 | if 'delta' in data and 'text' in data['delta']: 159 | thinking_chunk = data['delta']['text'] 160 | thinking_content += thinking_chunk 161 | # 只在每累积一定数量的字符后才发送,减少UI跳变 162 | if len(thinking_chunk) >= 20 or thinking_chunk.endswith(('.', '!', '?', '。', '!', '?', '\n')): 163 | yield { 164 | "status": "thinking", 165 | "content": thinking_content 166 | } 167 | 168 | elif data.get('type') == 'message_stop': 169 | # 确保发送完整的思考内容 170 | if thinking_content: 171 | yield { 172 | "status": "thinking_complete", 173 | "content": thinking_content 174 | } 175 | # 确保发送完整的响应内容 176 | yield { 177 | "status": "completed", 178 | "content": response_buffer 179 | } 180 | 181 | elif data.get('type') == 'error': 182 | error_msg = data.get('error', {}).get('message', 'Unknown error') 183 | yield { 184 | "status": "error", 185 | "error": error_msg 186 | } 187 | break 188 | 189 | except json.JSONDecodeError as e: 190 | print(f"JSON decode error: {str(e)}") 191 | continue 192 | 193 | except Exception as e: 194 | yield { 195 | "status": "error", 196 | "error": f"Streaming error: {str(e)}" 197 | } 198 | 199 | def analyze_image(self, image_data, proxies: Optional[dict] = None): 200 | yield {"status": "started"} 201 | 202 | api_key = self.api_key 203 | if api_key.startswith('Bearer '): 204 | api_key = api_key[7:] 205 | 206 | headers = { 207 | 'x-api-key': api_key, 208 | 'anthropic-version': '2023-06-01', 209 | 'content-type': 'application/json' 210 | } 211 | 212 | # 使用系统提供的系统提示词,不再自动添加语言指令 213 | system_prompt = self.system_prompt 214 | 215 | # 获取最大输出Token设置 216 | max_tokens = 8192 # 默认值 217 | if hasattr(self, 'max_tokens') and self.max_tokens: 218 | max_tokens = self.max_tokens 219 | 220 | payload = { 221 | 'model': self.get_model_identifier(), 222 | 'stream': True, 223 | 'max_tokens': max_tokens, 224 | 'temperature': 1, 225 | 'system': system_prompt, 226 | 'messages': [{ 227 | 'role': 'user', 228 | 'content': [ 229 | { 230 | 'type': 'image', 231 | 'source': { 232 | 'type': 'base64', 233 | 'media_type': 'image/png', 234 | 'data': image_data 235 | } 236 | }, 237 | { 238 | 'type': 'text', 239 | 'text': "请分析这个问题并提供详细的解决方案。如果你看到多个问题,请逐一解决。" 240 | } 241 | ] 242 | }] 243 | } 244 | 245 | # 处理推理配置 246 | if hasattr(self, 'reasoning_config') and self.reasoning_config: 247 | # 如果设置了extended reasoning 248 | if self.reasoning_config.get('reasoning_depth') == 'extended': 249 | think_budget = self.reasoning_config.get('think_budget', max_tokens // 2) 250 | payload['thinking'] = { 251 | 'type': 'enabled', 252 | 'budget_tokens': think_budget 253 | } 254 | # 如果设置了instant模式 255 | elif self.reasoning_config.get('speed_mode') == 'instant': 256 | # 只需确保不包含thinking参数,不添加speed_mode参数 257 | if 'thinking' in payload: 258 | del payload['thinking'] 259 | # 默认启用思考但使用较小的预算 260 | else: 261 | payload['thinking'] = { 262 | 'type': 'enabled', 263 | 'budget_tokens': min(4096, max_tokens // 4) 264 | } 265 | # 默认设置 266 | else: 267 | payload['thinking'] = { 268 | 'type': 'enabled', 269 | 'budget_tokens': min(4096, max_tokens // 4) 270 | } 271 | 272 | print(f"Debug - 图像分析推理配置: max_tokens={max_tokens}, thinking={payload.get('thinking', payload.get('speed_mode', 'default'))}") 273 | 274 | # 使用配置的API基础URL 275 | api_endpoint = f"{self.api_base_url}/messages" 276 | 277 | response = requests.post( 278 | api_endpoint, 279 | headers=headers, 280 | json=payload, 281 | stream=True, 282 | proxies=proxies, 283 | timeout=60 284 | ) 285 | 286 | if response.status_code != 200: 287 | error_msg = f'API error: {response.status_code}' 288 | try: 289 | error_data = response.json() 290 | if 'error' in error_data: 291 | error_msg += f" - {error_data['error']['message']}" 292 | except: 293 | error_msg += f" - {response.text}" 294 | yield {"status": "error", "error": error_msg} 295 | return 296 | 297 | thinking_content = "" 298 | response_buffer = "" 299 | 300 | for chunk in response.iter_lines(): 301 | if not chunk: 302 | continue 303 | 304 | try: 305 | chunk_str = chunk.decode('utf-8') 306 | if not chunk_str.startswith('data: '): 307 | continue 308 | 309 | chunk_str = chunk_str[6:] 310 | data = json.loads(chunk_str) 311 | 312 | if data.get('type') == 'content_block_delta': 313 | if 'delta' in data: 314 | if 'text' in data['delta']: 315 | text_chunk = data['delta']['text'] 316 | response_buffer += text_chunk 317 | # 只在每累积一定数量的字符后才发送,减少UI跳变 318 | if len(text_chunk) >= 10 or text_chunk.endswith(('.', '!', '?', '。', '!', '?', '\n')): 319 | yield { 320 | "status": "streaming", 321 | "content": response_buffer 322 | } 323 | 324 | elif 'thinking' in data['delta']: 325 | thinking_chunk = data['delta']['thinking'] 326 | thinking_content += thinking_chunk 327 | # 只在每累积一定数量的字符后才发送,减少UI跳变 328 | if len(thinking_chunk) >= 20 or thinking_chunk.endswith(('.', '!', '?', '。', '!', '?', '\n')): 329 | yield { 330 | "status": "thinking", 331 | "content": thinking_content 332 | } 333 | 334 | # 处理新的extended_thinking格式 335 | elif data.get('type') == 'extended_thinking_delta': 336 | if 'delta' in data and 'text' in data['delta']: 337 | thinking_chunk = data['delta']['text'] 338 | thinking_content += thinking_chunk 339 | # 只在每累积一定数量的字符后才发送,减少UI跳变 340 | if len(thinking_chunk) >= 20 or thinking_chunk.endswith(('.', '!', '?', '。', '!', '?', '\n')): 341 | yield { 342 | "status": "thinking", 343 | "content": thinking_content 344 | } 345 | 346 | elif data.get('type') == 'message_stop': 347 | # 确保发送完整的思考内容 348 | if thinking_content: 349 | yield { 350 | "status": "thinking_complete", 351 | "content": thinking_content 352 | } 353 | # 确保发送完整的响应内容 354 | yield { 355 | "status": "completed", 356 | "content": response_buffer 357 | } 358 | 359 | elif data.get('type') == 'error': 360 | error_message = data.get('error', {}).get('message', 'Unknown error') 361 | yield { 362 | "status": "error", 363 | "error": error_message 364 | } 365 | 366 | except Exception as e: 367 | yield { 368 | "status": "error", 369 | "error": f"Error processing response: {str(e)}" 370 | } 371 | break 372 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, jsonify, render_template, request, send_from_directory 2 | from flask_socketio import SocketIO 3 | import pyautogui 4 | import base64 5 | from io import BytesIO 6 | import socket 7 | from threading import Thread, Event 8 | import threading 9 | from PIL import Image 10 | import pyperclip 11 | from models import ModelFactory 12 | import time 13 | import os 14 | import json 15 | import traceback 16 | import requests 17 | from datetime import datetime 18 | import sys 19 | 20 | app = Flask(__name__) 21 | socketio = SocketIO( 22 | app, 23 | cors_allowed_origins="*", 24 | ping_timeout=30, 25 | ping_interval=5, 26 | max_http_buffer_size=50 * 1024 * 1024, 27 | async_mode='threading', # 使用threading模式提高兼容性 28 | engineio_logger=True, # 启用引擎日志,便于调试 29 | logger=True # 启用Socket.IO日志 30 | ) 31 | 32 | # 常量定义 33 | CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) 34 | CONFIG_DIR = os.path.join(CURRENT_DIR, 'config') 35 | STATIC_DIR = os.path.join(CURRENT_DIR, 'static') 36 | # 确保配置目录存在 37 | os.makedirs(CONFIG_DIR, exist_ok=True) 38 | 39 | # 密钥和其他配置文件路径 40 | API_KEYS_FILE = os.path.join(CONFIG_DIR, 'api_keys.json') 41 | API_BASE_URLS_FILE = os.path.join(CONFIG_DIR, 'api_base_urls.json') 42 | VERSION_FILE = os.path.join(CONFIG_DIR, 'version.json') 43 | UPDATE_INFO_FILE = os.path.join(CONFIG_DIR, 'update_info.json') 44 | PROMPT_FILE = os.path.join(CONFIG_DIR, 'prompts.json') # 新增提示词配置文件路径 45 | PROXY_API_FILE = os.path.join(CONFIG_DIR, 'proxy_api.json') # 新增中转API配置文件路径 46 | 47 | DEFAULT_API_BASE_URLS = { 48 | "AnthropicApiBaseUrl": "", 49 | "OpenaiApiBaseUrl": "", 50 | "DeepseekApiBaseUrl": "", 51 | "AlibabaApiBaseUrl": "", 52 | "GoogleApiBaseUrl": "", 53 | "DoubaoApiBaseUrl": "" 54 | } 55 | 56 | def ensure_api_base_urls_file(): 57 | """确保 API 基础 URL 配置文件存在并包含所有占位符""" 58 | try: 59 | file_exists = os.path.exists(API_BASE_URLS_FILE) 60 | base_urls = {} 61 | if file_exists: 62 | try: 63 | with open(API_BASE_URLS_FILE, 'r', encoding='utf-8') as f: 64 | loaded = json.load(f) 65 | if isinstance(loaded, dict): 66 | base_urls = loaded 67 | else: 68 | file_exists = False 69 | except json.JSONDecodeError: 70 | file_exists = False 71 | 72 | missing_key_added = False 73 | for key, default_value in DEFAULT_API_BASE_URLS.items(): 74 | if key not in base_urls: 75 | base_urls[key] = default_value 76 | missing_key_added = True 77 | 78 | if not file_exists or missing_key_added or not base_urls: 79 | with open(API_BASE_URLS_FILE, 'w', encoding='utf-8') as f: 80 | json.dump(base_urls or DEFAULT_API_BASE_URLS, f, ensure_ascii=False, indent=2) 81 | except Exception as e: 82 | print(f"初始化API基础URL配置失败: {e}") 83 | 84 | # 确保API基础URL文件已经生成 85 | ensure_api_base_urls_file() 86 | 87 | # 跟踪用户生成任务的字典 88 | generation_tasks = {} 89 | 90 | # 初始化模型工厂 91 | ModelFactory.initialize() 92 | 93 | def get_local_ip(): 94 | try: 95 | # Get local IP address 96 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 97 | s.connect(("8.8.8.8", 80)) 98 | ip = s.getsockname()[0] 99 | s.close() 100 | return ip 101 | except Exception: 102 | return "127.0.0.1" 103 | 104 | @app.route('/') 105 | def index(): 106 | local_ip = get_local_ip() 107 | 108 | # 检查更新 109 | try: 110 | update_info = check_for_updates() 111 | except: 112 | update_info = {'has_update': False} 113 | 114 | return render_template('index.html', local_ip=local_ip, update_info=update_info) 115 | 116 | @socketio.on('connect') 117 | def handle_connect(): 118 | print('Client connected') 119 | 120 | @socketio.on('disconnect') 121 | def handle_disconnect(): 122 | print('Client disconnected') 123 | 124 | def create_model_instance(model_id, settings, is_reasoning=False): 125 | """创建模型实例""" 126 | # 提取API密钥 127 | api_keys = settings.get('apiKeys', {}) 128 | 129 | # 确定需要哪个API密钥 130 | api_key_id = None 131 | # 特殊情况:o3-mini使用OpenAI API密钥 132 | if model_id.lower() == "o3-mini": 133 | api_key_id = "OpenaiApiKey" 134 | # 其他Anthropic/Claude模型 135 | elif "claude" in model_id.lower() or "anthropic" in model_id.lower(): 136 | api_key_id = "AnthropicApiKey" 137 | elif any(keyword in model_id.lower() for keyword in ["gpt", "openai"]): 138 | api_key_id = "OpenaiApiKey" 139 | elif "deepseek" in model_id.lower(): 140 | api_key_id = "DeepseekApiKey" 141 | elif "qvq" in model_id.lower() or "alibaba" in model_id.lower() or "qwen" in model_id.lower(): 142 | api_key_id = "AlibabaApiKey" 143 | elif "gemini" in model_id.lower() or "google" in model_id.lower(): 144 | api_key_id = "GoogleApiKey" 145 | elif "doubao" in model_id.lower(): 146 | api_key_id = "DoubaoApiKey" 147 | 148 | # 首先尝试从本地配置获取API密钥 149 | api_key = get_api_key(api_key_id) 150 | 151 | # 如果本地没有配置,尝试使用前端传递的密钥(向后兼容) 152 | if not api_key: 153 | api_key = api_keys.get(api_key_id) 154 | 155 | if not api_key: 156 | raise ValueError(f"API key is required for the selected model (keyId: {api_key_id})") 157 | 158 | # 获取maxTokens参数,默认为8192 159 | max_tokens = int(settings.get('maxTokens', 8192)) 160 | 161 | # 检查是否启用中转API 162 | proxy_api_config = load_proxy_api() 163 | base_url = None 164 | 165 | if proxy_api_config.get('enabled', False): 166 | # 根据模型类型选择对应的中转API 167 | if "claude" in model_id.lower() or "anthropic" in model_id.lower(): 168 | base_url = proxy_api_config.get('apis', {}).get('anthropic', '') 169 | elif any(keyword in model_id.lower() for keyword in ["gpt", "openai"]): 170 | base_url = proxy_api_config.get('apis', {}).get('openai', '') 171 | elif "deepseek" in model_id.lower(): 172 | base_url = proxy_api_config.get('apis', {}).get('deepseek', '') 173 | elif "qvq" in model_id.lower() or "alibaba" in model_id.lower() or "qwen" in model_id.lower(): 174 | base_url = proxy_api_config.get('apis', {}).get('alibaba', '') 175 | elif "gemini" in model_id.lower() or "google" in model_id.lower(): 176 | base_url = proxy_api_config.get('apis', {}).get('google', '') 177 | 178 | # 从前端设置获取自定义API基础URL (apiBaseUrls) 179 | api_base_urls = settings.get('apiBaseUrls', {}) 180 | if api_base_urls: 181 | # 根据模型类型选择对应的自定义API基础URL 182 | if "claude" in model_id.lower() or "anthropic" in model_id.lower(): 183 | custom_base_url = api_base_urls.get('anthropic') 184 | if custom_base_url: 185 | base_url = custom_base_url 186 | elif any(keyword in model_id.lower() for keyword in ["gpt", "openai"]): 187 | custom_base_url = api_base_urls.get('openai') 188 | if custom_base_url: 189 | base_url = custom_base_url 190 | elif "deepseek" in model_id.lower(): 191 | custom_base_url = api_base_urls.get('deepseek') 192 | if custom_base_url: 193 | base_url = custom_base_url 194 | elif "qvq" in model_id.lower() or "alibaba" in model_id.lower() or "qwen" in model_id.lower(): 195 | custom_base_url = api_base_urls.get('alibaba') 196 | if custom_base_url: 197 | base_url = custom_base_url 198 | elif "gemini" in model_id.lower() or "google" in model_id.lower(): 199 | custom_base_url = api_base_urls.get('google') 200 | if custom_base_url: 201 | base_url = custom_base_url 202 | elif "doubao" in model_id.lower(): 203 | custom_base_url = api_base_urls.get('doubao') 204 | if custom_base_url: 205 | base_url = custom_base_url 206 | 207 | # 创建模型实例 208 | model_instance = ModelFactory.create_model( 209 | model_name=model_id, 210 | api_key=api_key, 211 | temperature=None if is_reasoning else float(settings.get('temperature', 0.7)), 212 | system_prompt=settings.get('systemPrompt'), 213 | language=settings.get('language', '中文'), 214 | api_base_url=base_url # 现在BaseModel支持api_base_url参数 215 | ) 216 | 217 | # 设置最大输出Token,但不为阿里巴巴模型设置(它们有自己内部的处理逻辑) 218 | is_alibaba_model = "qvq" in model_id.lower() or "alibaba" in model_id.lower() or "qwen" in model_id.lower() 219 | if not is_alibaba_model: 220 | model_instance.max_tokens = max_tokens 221 | 222 | return model_instance 223 | 224 | def stream_model_response(response_generator, sid, model_name=None): 225 | """Stream model responses to the client""" 226 | try: 227 | print("Starting response streaming...") 228 | 229 | # 判断模型是否为推理模型 230 | is_reasoning = model_name and ModelFactory.is_reasoning(model_name) 231 | if is_reasoning: 232 | print(f"使用推理模型 {model_name},将显示思考过程") 233 | 234 | # 初始化:发送开始状态 235 | socketio.emit('ai_response', { 236 | 'status': 'started', 237 | 'content': '', 238 | 'is_reasoning': is_reasoning 239 | }, room=sid) 240 | print("Sent initial status to client") 241 | 242 | # 维护服务端缓冲区以累积完整内容 243 | response_buffer = "" 244 | thinking_buffer = "" 245 | 246 | # 上次发送的时间戳,用于控制发送频率 247 | last_emit_time = time.time() 248 | 249 | # 流式处理响应 250 | for response in response_generator: 251 | # 处理Mathpix响应 252 | if isinstance(response.get('content', ''), str) and 'mathpix' in response.get('model', ''): 253 | if current_time - last_emit_time >= 0.3: 254 | socketio.emit('ai_response', { 255 | 'status': 'thinking', 256 | 'content': thinking_buffer, 257 | 'is_reasoning': True 258 | }, room=sid) 259 | last_emit_time = current_time 260 | 261 | elif status == 'thinking_complete': 262 | # 仅对推理模型处理思考过程 263 | if is_reasoning: 264 | # 直接使用完整的思考内容 265 | thinking_buffer = content 266 | 267 | print(f"Thinking complete, total length: {len(thinking_buffer)} chars") 268 | socketio.emit('ai_response', { 269 | 'status': 'thinking_complete', 270 | 'content': thinking_buffer, 271 | 'is_reasoning': True 272 | }, room=sid) 273 | 274 | elif status == 'streaming': 275 | # 直接使用模型提供的完整内容 276 | response_buffer = content 277 | 278 | # 控制发送频率,至少间隔0.3秒 279 | current_time = time.time() 280 | if current_time - last_emit_time >= 0.3: 281 | socketio.emit('ai_response', { 282 | 'status': 'streaming', 283 | 'content': response_buffer, 284 | 'is_reasoning': is_reasoning 285 | }, room=sid) 286 | last_emit_time = current_time 287 | 288 | elif status == 'completed': 289 | # 确保发送最终完整内容 290 | socketio.emit('ai_response', { 291 | 'status': 'completed', 292 | 'content': content or response_buffer, 293 | 'is_reasoning': is_reasoning 294 | }, room=sid) 295 | print("Response completed") 296 | 297 | elif status == 'error': 298 | # 错误状态直接转发 299 | response['is_reasoning'] = is_reasoning 300 | socketio.emit('ai_response', response, room=sid) 301 | print(f"Error: {response.get('error', 'Unknown error')}") 302 | 303 | # 其他状态直接转发 304 | else: 305 | response['is_reasoning'] = is_reasoning 306 | socketio.emit('ai_response', response, room=sid) 307 | 308 | except Exception as e: 309 | error_msg = f"Streaming error: {str(e)}" 310 | print(error_msg) 311 | socketio.emit('ai_response', { 312 | 'status': 'error', 313 | 'error': error_msg, 314 | 'is_reasoning': model_name and ModelFactory.is_reasoning(model_name) 315 | }, room=sid) 316 | 317 | @socketio.on('request_screenshot') 318 | def handle_screenshot_request(): 319 | try: 320 | # 添加调试信息 321 | print("DEBUG: 执行request_screenshot截图") 322 | 323 | # Capture the screen 324 | screenshot = pyautogui.screenshot() 325 | 326 | # Convert the image to base64 string 327 | buffered = BytesIO() 328 | screenshot.save(buffered, format="PNG") 329 | img_str = base64.b64encode(buffered.getvalue()).decode() 330 | 331 | # Emit the screenshot back to the client,不打印base64数据 332 | print("DEBUG: 完成request_screenshot截图,图片大小: {} KB".format(len(img_str) // 1024)) 333 | socketio.emit('screenshot_response', { 334 | 'success': True, 335 | 'image': img_str 336 | }) 337 | except Exception as e: 338 | socketio.emit('screenshot_response', { 339 | 'success': False, 340 | 'error': str(e) 341 | }) 342 | 343 | @socketio.on('extract_text') 344 | def handle_text_extraction(data): 345 | try: 346 | print("Starting text extraction...") 347 | 348 | # Validate input data 349 | if not data or not isinstance(data, dict): 350 | raise ValueError("Invalid request data") 351 | 352 | if 'image' not in data: 353 | raise ValueError("No image data provided") 354 | 355 | image_data = data['image'] 356 | if not isinstance(image_data, str): 357 | raise ValueError("Invalid image data format") 358 | 359 | # 检查图像大小,避免处理过大的图像导致断开连接 360 | image_size_bytes = len(image_data) * 3 / 4 # 估算base64的实际大小 361 | if image_size_bytes > 10 * 1024 * 1024: # 10MB 362 | raise ValueError("Image too large, please crop to a smaller area") 363 | 364 | settings = data.get('settings', {}) 365 | if not isinstance(settings, dict): 366 | raise ValueError("Invalid settings format") 367 | 368 | # 优先使用百度OCR,如果没有配置则使用Mathpix 369 | # 首先尝试获取百度OCR API密钥 370 | baidu_api_key = get_api_key('BaiduApiKey') 371 | baidu_secret_key = get_api_key('BaiduSecretKey') 372 | 373 | # 构建百度OCR API密钥(格式:api_key:secret_key) 374 | ocr_key = None 375 | ocr_model = None 376 | 377 | if baidu_api_key and baidu_secret_key: 378 | ocr_key = f"{baidu_api_key}:{baidu_secret_key}" 379 | ocr_model = 'baidu-ocr' 380 | print("Using Baidu OCR for text extraction...") 381 | else: 382 | # 回退到Mathpix 383 | mathpix_app_id = get_api_key('MathpixAppId') 384 | mathpix_app_key = get_api_key('MathpixAppKey') 385 | 386 | # 构建完整的Mathpix API密钥(格式:app_id:app_key) 387 | mathpix_key = f"{mathpix_app_id}:{mathpix_app_key}" if mathpix_app_id and mathpix_app_key else None 388 | 389 | # 如果本地没有配置,尝试使用前端传递的密钥(向后兼容) 390 | if not mathpix_key: 391 | mathpix_key = settings.get('mathpixApiKey') 392 | 393 | if mathpix_key: 394 | ocr_key = mathpix_key 395 | ocr_model = 'mathpix' 396 | print("Using Mathpix OCR for text extraction...") 397 | 398 | if not ocr_key: 399 | raise ValueError("OCR API key is required. Please configure Baidu OCR (API Key + Secret Key) or Mathpix (App ID + App Key)") 400 | 401 | # 先回复客户端,确认已收到请求,防止超时断开 402 | # 注意:这里不能使用return,否则后续代码不会执行 403 | socketio.emit('request_acknowledged', { 404 | 'status': 'received', 405 | 'message': f'Image received, text extraction in progress using {ocr_model}' 406 | }, room=request.sid) 407 | 408 | try: 409 | if ocr_model == 'baidu-ocr': 410 | api_key, secret_key = ocr_key.split(':') 411 | if not api_key.strip() or not secret_key.strip(): 412 | raise ValueError() 413 | elif ocr_model == 'mathpix': 414 | app_id, app_key = ocr_key.split(':') 415 | if not app_id.strip() or not app_key.strip(): 416 | raise ValueError() 417 | except ValueError: 418 | if ocr_model == 'baidu-ocr': 419 | raise ValueError("Invalid Baidu OCR API key format. Expected format: 'API_KEY:SECRET_KEY'") 420 | else: 421 | raise ValueError("Invalid Mathpix API key format. Expected format: 'app_id:app_key'") 422 | 423 | print(f"Creating {ocr_model} model instance...") 424 | # ModelFactory.create_model会处理不同模型类型 425 | model = ModelFactory.create_model( 426 | model_name=ocr_model, 427 | api_key=ocr_key 428 | ) 429 | 430 | print("Starting text extraction...") 431 | # 使用新的extract_full_text方法直接提取完整文本 432 | extracted_text = model.extract_full_text(image_data) 433 | 434 | # 直接返回文本结果 435 | socketio.emit('text_extracted', { 436 | 'content': extracted_text 437 | }, room=request.sid) 438 | 439 | except ValueError as e: 440 | error_msg = str(e) 441 | print(f"Validation error: {error_msg}") 442 | socketio.emit('text_extracted', { 443 | 'error': error_msg 444 | }, room=request.sid) 445 | except Exception as e: 446 | error_msg = f"Text extraction error: {str(e)}" 447 | print(f"Unexpected error: {error_msg}") 448 | print(f"Error details: {type(e).__name__}") 449 | socketio.emit('text_extracted', { 450 | 'error': error_msg 451 | }, room=request.sid) 452 | 453 | @socketio.on('stop_generation') 454 | def handle_stop_generation(): 455 | """处理停止生成请求""" 456 | sid = request.sid 457 | print(f"接收到停止生成请求: {sid}") 458 | 459 | if sid in generation_tasks: 460 | # 设置停止标志 461 | stop_event = generation_tasks[sid] 462 | stop_event.set() 463 | 464 | # 发送已停止状态 465 | socketio.emit('ai_response', { 466 | 'status': 'stopped', 467 | 'content': '生成已停止' 468 | }, room=sid) 469 | 470 | print(f"已停止用户 {sid} 的生成任务") 471 | else: 472 | print(f"未找到用户 {sid} 的生成任务") 473 | 474 | @socketio.on('analyze_text') 475 | def handle_analyze_text(data): 476 | try: 477 | text = data.get('text', '') 478 | settings = data.get('settings', {}) 479 | 480 | # 获取推理配置 481 | reasoning_config = settings.get('reasoningConfig', {}) 482 | 483 | # 获取maxTokens 484 | max_tokens = int(settings.get('maxTokens', 8192)) 485 | 486 | print(f"Debug - 文本分析请求: {text[:50]}...") 487 | print(f"Debug - 最大Token: {max_tokens}, 推理配置: {reasoning_config}") 488 | 489 | # 获取模型和API密钥 490 | model_id = settings.get('model', 'claude-3-7-sonnet-20250219') 491 | 492 | if not text: 493 | socketio.emit('error', {'message': '文本内容不能为空'}) 494 | return 495 | 496 | # 获取模型信息,判断是否为推理模型 497 | model_info = settings.get('modelInfo', {}) 498 | is_reasoning = model_info.get('isReasoning', False) 499 | 500 | model_instance = create_model_instance(model_id, settings, is_reasoning) 501 | 502 | # 将推理配置传递给模型 503 | if reasoning_config: 504 | model_instance.reasoning_config = reasoning_config 505 | 506 | # 如果启用代理,配置代理设置 507 | proxies = None 508 | if settings.get('proxyEnabled'): 509 | proxies = { 510 | 'http': f"http://{settings.get('proxyHost')}:{settings.get('proxyPort')}", 511 | 'https': f"http://{settings.get('proxyHost')}:{settings.get('proxyPort')}" 512 | } 513 | 514 | # 创建用于停止生成的事件 515 | sid = request.sid 516 | stop_event = Event() 517 | generation_tasks[sid] = stop_event 518 | 519 | try: 520 | for response in model_instance.analyze_text(text, proxies=proxies): 521 | # 检查是否收到停止信号 522 | if stop_event.is_set(): 523 | print(f"分析文本生成被用户 {sid} 停止") 524 | break 525 | 526 | socketio.emit('ai_response', response, room=sid) 527 | finally: 528 | # 清理任务 529 | if sid in generation_tasks: 530 | del generation_tasks[sid] 531 | 532 | except Exception as e: 533 | print(f"Error in analyze_text: {str(e)}") 534 | traceback.print_exc() 535 | socketio.emit('error', {'message': f'分析文本时出错: {str(e)}'}) 536 | 537 | @socketio.on('analyze_image') 538 | def handle_analyze_image(data): 539 | try: 540 | image_data = data.get('image') 541 | settings = data.get('settings', {}) 542 | 543 | # 获取推理配置 544 | reasoning_config = settings.get('reasoningConfig', {}) 545 | 546 | # 获取maxTokens 547 | max_tokens = int(settings.get('maxTokens', 8192)) 548 | 549 | print(f"Debug - 图像分析请求") 550 | print(f"Debug - 最大Token: {max_tokens}, 推理配置: {reasoning_config}") 551 | 552 | # 获取模型和API密钥 553 | model_id = settings.get('model', 'claude-3-7-sonnet-20250219') 554 | 555 | if not image_data: 556 | socketio.emit('error', {'message': '图像数据不能为空'}) 557 | return 558 | 559 | # 获取模型信息,判断是否为推理模型 560 | model_info = settings.get('modelInfo', {}) 561 | is_reasoning = model_info.get('isReasoning', False) 562 | 563 | model_instance = create_model_instance(model_id, settings, is_reasoning) 564 | 565 | # 将推理配置传递给模型 566 | if reasoning_config: 567 | model_instance.reasoning_config = reasoning_config 568 | 569 | # 如果启用代理,配置代理设置 570 | proxies = None 571 | if settings.get('proxyEnabled'): 572 | proxies = { 573 | 'http': f"http://{settings.get('proxyHost')}:{settings.get('proxyPort')}", 574 | 'https': f"http://{settings.get('proxyHost')}:{settings.get('proxyPort')}" 575 | } 576 | 577 | # 创建用于停止生成的事件 578 | sid = request.sid 579 | stop_event = Event() 580 | generation_tasks[sid] = stop_event 581 | 582 | try: 583 | for response in model_instance.analyze_image(image_data, proxies=proxies): 584 | # 检查是否收到停止信号 585 | if stop_event.is_set(): 586 | print(f"分析图像生成被用户 {sid} 停止") 587 | break 588 | 589 | socketio.emit('ai_response', response, room=sid) 590 | finally: 591 | # 清理任务 592 | if sid in generation_tasks: 593 | del generation_tasks[sid] 594 | 595 | except Exception as e: 596 | print(f"Error in analyze_image: {str(e)}") 597 | traceback.print_exc() 598 | socketio.emit('error', {'message': f'分析图像时出错: {str(e)}'}) 599 | 600 | @socketio.on('capture_screenshot') 601 | def handle_capture_screenshot(data): 602 | try: 603 | # 添加调试信息 604 | print("DEBUG: 执行capture_screenshot截图") 605 | 606 | # Capture the screen 607 | screenshot = pyautogui.screenshot() 608 | 609 | # Convert the image to base64 string 610 | buffered = BytesIO() 611 | screenshot.save(buffered, format="PNG") 612 | img_str = base64.b64encode(buffered.getvalue()).decode() 613 | 614 | # Emit the screenshot back to the client,不打印base64数据 615 | print("DEBUG: 完成capture_screenshot截图,图片大小: {} KB".format(len(img_str) // 1024)) 616 | socketio.emit('screenshot_complete', { 617 | 'success': True, 618 | 'image': img_str 619 | }, room=request.sid) 620 | except Exception as e: 621 | error_msg = f"Screenshot error: {str(e)}" 622 | print(f"Error capturing screenshot: {error_msg}") 623 | socketio.emit('screenshot_complete', { 624 | 'success': False, 625 | 'error': error_msg 626 | }, room=request.sid) 627 | 628 | def load_model_config(): 629 | """加载模型配置信息""" 630 | try: 631 | config_path = os.path.join(CONFIG_DIR, 'models.json') 632 | with open(config_path, 'r', encoding='utf-8') as f: 633 | config = json.load(f) 634 | return config 635 | except Exception as e: 636 | print(f"加载模型配置失败: {e}") 637 | return { 638 | "providers": {}, 639 | "models": {} 640 | } 641 | 642 | def load_prompts(): 643 | """加载系统提示词配置""" 644 | try: 645 | if os.path.exists(PROMPT_FILE): 646 | with open(PROMPT_FILE, 'r', encoding='utf-8') as f: 647 | return json.load(f) 648 | else: 649 | # 如果文件不存在,创建默认提示词配置 650 | default_prompts = { 651 | "default": { 652 | "name": "默认提示词", 653 | "content": "您是一位专业的问题解决专家。请逐步分析问题,找出问题所在,并提供详细的解决方案。始终使用用户偏好的语言回答。", 654 | "description": "通用问题解决提示词" 655 | } 656 | } 657 | with open(PROMPT_FILE, 'w', encoding='utf-8') as f: 658 | json.dump(default_prompts, f, ensure_ascii=False, indent=4) 659 | return default_prompts 660 | except Exception as e: 661 | print(f"加载提示词配置失败: {e}") 662 | return { 663 | "default": { 664 | "name": "默认提示词", 665 | "content": "您是一位专业的问题解决专家。请逐步分析问题,找出问题所在,并提供详细的解决方案。始终使用用户偏好的语言回答。", 666 | "description": "通用问题解决提示词" 667 | } 668 | } 669 | 670 | def save_prompt(prompt_id, prompt_data): 671 | """保存单个提示词到配置文件""" 672 | try: 673 | prompts = load_prompts() 674 | prompts[prompt_id] = prompt_data 675 | with open(PROMPT_FILE, 'w', encoding='utf-8') as f: 676 | json.dump(prompts, f, ensure_ascii=False, indent=4) 677 | return True 678 | except Exception as e: 679 | print(f"保存提示词配置失败: {e}") 680 | return False 681 | 682 | def delete_prompt(prompt_id): 683 | """从配置文件中删除一个提示词""" 684 | try: 685 | prompts = load_prompts() 686 | if prompt_id in prompts: 687 | del prompts[prompt_id] 688 | with open(PROMPT_FILE, 'w', encoding='utf-8') as f: 689 | json.dump(prompts, f, ensure_ascii=False, indent=4) 690 | return True 691 | return False 692 | except Exception as e: 693 | print(f"删除提示词配置失败: {e}") 694 | return False 695 | 696 | # 替换 before_first_request 装饰器 697 | def init_model_config(): 698 | """初始化模型配置""" 699 | try: 700 | model_config = load_model_config() 701 | # 更新ModelFactory的模型信息 702 | if hasattr(ModelFactory, 'update_model_capabilities'): 703 | ModelFactory.update_model_capabilities(model_config) 704 | print("已加载模型配置") 705 | except Exception as e: 706 | print(f"初始化模型配置失败: {e}") 707 | 708 | # 在请求处理前注册初始化函数 709 | @app.before_request 710 | def before_request_handler(): 711 | # 使用全局变量跟踪是否已初始化 712 | if not getattr(app, '_model_config_initialized', False): 713 | init_model_config() 714 | app._model_config_initialized = True 715 | 716 | # 版本检查函数 717 | def check_for_updates(): 718 | """检查GitHub上是否有新版本""" 719 | try: 720 | # 读取当前版本信息 721 | version_file = os.path.join(CONFIG_DIR, 'version.json') 722 | with open(version_file, 'r', encoding='utf-8') as f: 723 | version_info = json.load(f) 724 | 725 | current_version = version_info.get('version', '0.0.0') 726 | repo = version_info.get('github_repo', 'Zippland/Snap-Solver') 727 | 728 | # 请求GitHub API获取最新发布版本 729 | api_url = f"https://api.github.com/repos/{repo}/releases/latest" 730 | 731 | # 添加User-Agent以符合GitHub API要求 732 | headers = {'User-Agent': 'Snap-Solver-Update-Checker'} 733 | 734 | response = requests.get(api_url, headers=headers, timeout=5) 735 | if response.status_code == 200: 736 | latest_release = response.json() 737 | latest_version = latest_release.get('tag_name', '').lstrip('v') 738 | 739 | # 如果版本号为空,尝试从名称中提取 740 | if not latest_version and 'name' in latest_release: 741 | import re 742 | version_match = re.search(r'v?(\d+\.\d+\.\d+)', latest_release['name']) 743 | if version_match: 744 | latest_version = version_match.group(1) 745 | 746 | # 比较版本号(简单比较,可以改进为更复杂的语义版本比较) 747 | has_update = compare_versions(latest_version, current_version) 748 | 749 | update_info = { 750 | 'has_update': has_update, 751 | 'current_version': current_version, 752 | 'latest_version': latest_version, 753 | 'release_url': latest_release.get('html_url', f"https://github.com/{repo}/releases/latest"), 754 | 'release_date': latest_release.get('published_at', ''), 755 | 'release_notes': latest_release.get('body', ''), 756 | } 757 | 758 | # 缓存更新信息 759 | update_info_file = os.path.join(CONFIG_DIR, 'update_info.json') 760 | with open(update_info_file, 'w', encoding='utf-8') as f: 761 | json.dump(update_info, f, ensure_ascii=False, indent=2) 762 | 763 | return update_info 764 | 765 | # 如果无法连接GitHub,尝试读取缓存的更新信息 766 | update_info_file = os.path.join(CONFIG_DIR, 'update_info.json') 767 | if os.path.exists(update_info_file): 768 | with open(update_info_file, 'r', encoding='utf-8') as f: 769 | return json.load(f) 770 | 771 | return {'has_update': False, 'current_version': current_version} 772 | 773 | except Exception as e: 774 | print(f"检查更新失败: {str(e)}") 775 | # 出错时返回一个默认的值 776 | return {'has_update': False, 'error': str(e)} 777 | 778 | def compare_versions(version1, version2): 779 | """比较两个版本号,如果version1比version2更新,则返回True""" 780 | try: 781 | v1_parts = [int(x) for x in version1.split('.')] 782 | v2_parts = [int(x) for x in version2.split('.')] 783 | 784 | # 确保两个版本号的组成部分长度相同 785 | while len(v1_parts) < len(v2_parts): 786 | v1_parts.append(0) 787 | while len(v2_parts) < len(v1_parts): 788 | v2_parts.append(0) 789 | 790 | # 逐部分比较 791 | for i in range(len(v1_parts)): 792 | if v1_parts[i] > v2_parts[i]: 793 | return True 794 | elif v1_parts[i] < v2_parts[i]: 795 | return False 796 | 797 | # 完全相同的版本 798 | return False 799 | except: 800 | # 如果解析出错,默认不更新 801 | return False 802 | 803 | @app.route('/api/check-update', methods=['GET']) 804 | def api_check_update(): 805 | """检查更新的API端点""" 806 | update_info = check_for_updates() 807 | return jsonify(update_info) 808 | 809 | # 添加配置文件路由 810 | @app.route('/config/') 811 | def serve_config(filename): 812 | return send_from_directory(CONFIG_DIR, filename) 813 | 814 | # 添加用于获取所有模型信息的API 815 | @app.route('/api/models', methods=['GET']) 816 | def get_models(): 817 | """返回可用的模型列表""" 818 | models = ModelFactory.get_available_models() 819 | return jsonify(models) 820 | 821 | # 获取所有API密钥 822 | @app.route('/api/keys', methods=['GET']) 823 | def get_api_keys(): 824 | """获取所有API密钥""" 825 | api_keys = load_api_keys() 826 | return jsonify(api_keys) 827 | 828 | # 保存API密钥 829 | @app.route('/api/keys', methods=['POST']) 830 | def update_api_keys(): 831 | """更新API密钥配置""" 832 | try: 833 | new_keys = request.json 834 | if not isinstance(new_keys, dict): 835 | return jsonify({"success": False, "message": "无效的API密钥格式"}), 400 836 | 837 | # 加载当前密钥 838 | current_keys = load_api_keys() 839 | 840 | # 更新密钥 841 | for key, value in new_keys.items(): 842 | current_keys[key] = value 843 | 844 | # 保存回文件 845 | if save_api_keys(current_keys): 846 | return jsonify({"success": True, "message": "API密钥已保存"}) 847 | else: 848 | return jsonify({"success": False, "message": "保存API密钥失败"}), 500 849 | 850 | except Exception as e: 851 | return jsonify({"success": False, "message": f"更新API密钥错误: {str(e)}"}), 500 852 | 853 | # 加载API密钥配置 854 | def load_api_keys(): 855 | """从配置文件加载API密钥""" 856 | try: 857 | default_keys = { 858 | "AnthropicApiKey": "", 859 | "OpenaiApiKey": "", 860 | "DeepseekApiKey": "", 861 | "AlibabaApiKey": "", 862 | "MathpixAppId": "", 863 | "MathpixAppKey": "", 864 | "GoogleApiKey": "", 865 | "DoubaoApiKey": "", 866 | "BaiduApiKey": "", 867 | "BaiduSecretKey": "" 868 | } 869 | if os.path.exists(API_KEYS_FILE): 870 | with open(API_KEYS_FILE, 'r', encoding='utf-8') as f: 871 | api_keys = json.load(f) 872 | 873 | # 确保新增的密钥占位符能自动补充 874 | missing_key_added = False 875 | for key, default_value in default_keys.items(): 876 | if key not in api_keys: 877 | api_keys[key] = default_value 878 | missing_key_added = True 879 | 880 | if missing_key_added: 881 | save_api_keys(api_keys) 882 | 883 | return api_keys 884 | else: 885 | # 如果文件不存在,创建默认配置 886 | save_api_keys(default_keys) 887 | return default_keys 888 | except Exception as e: 889 | print(f"加载API密钥配置失败: {e}") 890 | return {} 891 | 892 | # 加载中转API配置 893 | def load_proxy_api(): 894 | """从配置文件加载中转API配置""" 895 | try: 896 | if os.path.exists(PROXY_API_FILE): 897 | with open(PROXY_API_FILE, 'r', encoding='utf-8') as f: 898 | return json.load(f) 899 | else: 900 | # 如果文件不存在,创建默认配置 901 | default_proxy_apis = { 902 | "enabled": False, 903 | "apis": { 904 | "anthropic": "", 905 | "openai": "", 906 | "deepseek": "", 907 | "alibaba": "", 908 | "google": "" 909 | } 910 | } 911 | save_proxy_api(default_proxy_apis) 912 | return default_proxy_apis 913 | except Exception as e: 914 | print(f"加载中转API配置失败: {e}") 915 | return {"enabled": False, "apis": {}} 916 | 917 | # 保存中转API配置 918 | def save_proxy_api(proxy_api_config): 919 | """保存中转API配置到文件""" 920 | try: 921 | # 确保配置目录存在 922 | os.makedirs(os.path.dirname(PROXY_API_FILE), exist_ok=True) 923 | 924 | with open(PROXY_API_FILE, 'w', encoding='utf-8') as f: 925 | json.dump(proxy_api_config, f, ensure_ascii=False, indent=2) 926 | return True 927 | except Exception as e: 928 | print(f"保存中转API配置失败: {e}") 929 | return False 930 | 931 | # 保存API密钥配置 932 | def save_api_keys(api_keys): 933 | try: 934 | # 确保配置目录存在 935 | os.makedirs(os.path.dirname(API_KEYS_FILE), exist_ok=True) 936 | 937 | with open(API_KEYS_FILE, 'w', encoding='utf-8') as f: 938 | json.dump(api_keys, f, ensure_ascii=False, indent=2) 939 | return True 940 | except Exception as e: 941 | print(f"保存API密钥配置失败: {e}") 942 | return False 943 | 944 | # 获取特定API密钥 945 | def get_api_key(key_name): 946 | """获取指定的API密钥""" 947 | api_keys = load_api_keys() 948 | return api_keys.get(key_name, "") 949 | 950 | @app.route('/api/models') 951 | def api_models(): 952 | """API端点:获取可用模型列表""" 953 | try: 954 | # 加载模型配置 955 | config = load_model_config() 956 | 957 | # 转换为前端需要的格式 958 | models = [] 959 | for model_id, model_info in config['models'].items(): 960 | models.append({ 961 | 'id': model_id, 962 | 'display_name': model_info.get('name', model_id), 963 | 'is_multimodal': model_info.get('supportsMultimodal', False), 964 | 'is_reasoning': model_info.get('isReasoning', False), 965 | 'description': model_info.get('description', ''), 966 | 'version': model_info.get('version', 'latest') 967 | }) 968 | 969 | # 返回模型列表 970 | return jsonify(models) 971 | except Exception as e: 972 | print(f"获取模型列表时出错: {e}") 973 | return jsonify([]), 500 974 | 975 | @app.route('/api/prompts', methods=['GET']) 976 | def get_prompts(): 977 | """API端点:获取所有系统提示词""" 978 | try: 979 | prompts = load_prompts() 980 | return jsonify(prompts) 981 | except Exception as e: 982 | print(f"获取提示词列表时出错: {e}") 983 | return jsonify({"error": str(e)}), 500 984 | 985 | @app.route('/api/prompts/', methods=['GET']) 986 | def get_prompt(prompt_id): 987 | """API端点:获取单个系统提示词""" 988 | try: 989 | prompts = load_prompts() 990 | if prompt_id in prompts: 991 | return jsonify(prompts[prompt_id]) 992 | else: 993 | return jsonify({"error": "提示词不存在"}), 404 994 | except Exception as e: 995 | print(f"获取提示词时出错: {e}") 996 | return jsonify({"error": str(e)}), 500 997 | 998 | @app.route('/api/prompts', methods=['POST']) 999 | def add_prompt(): 1000 | """API端点:添加或更新系统提示词""" 1001 | try: 1002 | data = request.json 1003 | if not data or not isinstance(data, dict): 1004 | return jsonify({"error": "无效的请求数据"}), 400 1005 | 1006 | prompt_id = data.get('id') 1007 | if not prompt_id: 1008 | return jsonify({"error": "提示词ID不能为空"}), 400 1009 | 1010 | prompt_data = { 1011 | "name": data.get('name', f"提示词{prompt_id}"), 1012 | "content": data.get('content', ""), 1013 | "description": data.get('description', "") 1014 | } 1015 | 1016 | save_prompt(prompt_id, prompt_data) 1017 | return jsonify({"success": True, "id": prompt_id}) 1018 | except Exception as e: 1019 | print(f"保存提示词时出错: {e}") 1020 | return jsonify({"error": str(e)}), 500 1021 | 1022 | @app.route('/api/prompts/', methods=['DELETE']) 1023 | def remove_prompt(prompt_id): 1024 | """API端点:删除系统提示词""" 1025 | try: 1026 | success = delete_prompt(prompt_id) 1027 | if success: 1028 | return jsonify({"success": True}) 1029 | else: 1030 | return jsonify({"error": "提示词不存在或删除失败"}), 404 1031 | except Exception as e: 1032 | print(f"删除提示词时出错: {e}") 1033 | return jsonify({"error": str(e)}), 500 1034 | 1035 | @app.route('/api/proxy-api', methods=['GET']) 1036 | def get_proxy_api(): 1037 | """API端点:获取中转API配置""" 1038 | try: 1039 | proxy_api_config = load_proxy_api() 1040 | return jsonify(proxy_api_config) 1041 | except Exception as e: 1042 | print(f"获取中转API配置时出错: {e}") 1043 | return jsonify({"error": str(e)}), 500 1044 | 1045 | @app.route('/api/proxy-api', methods=['POST']) 1046 | def update_proxy_api(): 1047 | """API端点:更新中转API配置""" 1048 | try: 1049 | new_config = request.json 1050 | if not isinstance(new_config, dict): 1051 | return jsonify({"success": False, "message": "无效的中转API配置格式"}), 400 1052 | 1053 | # 保存回文件 1054 | if save_proxy_api(new_config): 1055 | return jsonify({"success": True, "message": "中转API配置已保存"}) 1056 | else: 1057 | return jsonify({"success": False, "message": "保存中转API配置失败"}), 500 1058 | 1059 | except Exception as e: 1060 | return jsonify({"success": False, "message": f"更新中转API配置错误: {str(e)}"}), 500 1061 | 1062 | @app.route('/api/clipboard', methods=['POST']) 1063 | def update_clipboard(): 1064 | """将文本复制到服务器剪贴板""" 1065 | try: 1066 | data = request.get_json(silent=True) or {} 1067 | text = data.get('text', '') 1068 | 1069 | if not isinstance(text, str) or not text.strip(): 1070 | return jsonify({"success": False, "message": "剪贴板内容不能为空"}), 400 1071 | 1072 | # 直接尝试复制,不使用is_available()检查 1073 | try: 1074 | pyperclip.copy(text) 1075 | return jsonify({"success": True}) 1076 | except Exception as e: 1077 | return jsonify({"success": False, "message": f"复制到剪贴板失败: {str(e)}"}), 500 1078 | except Exception as e: 1079 | app.logger.exception("更新剪贴板时发生异常") 1080 | return jsonify({"success": False, "message": f"服务器内部错误: {str(e)}"}), 500 1081 | 1082 | @app.route('/api/clipboard', methods=['GET']) 1083 | def get_clipboard(): 1084 | """从服务器剪贴板读取文本""" 1085 | try: 1086 | # 直接尝试读取,不使用is_available()检查 1087 | try: 1088 | text = pyperclip.paste() 1089 | if text is None: 1090 | text = "" 1091 | 1092 | return jsonify({ 1093 | "success": True, 1094 | "text": text, 1095 | "message": "成功读取剪贴板内容" 1096 | }) 1097 | except Exception as e: 1098 | return jsonify({"success": False, "message": f"读取剪贴板失败: {str(e)}"}), 500 1099 | except Exception as e: 1100 | app.logger.exception("读取剪贴板时发生异常") 1101 | return jsonify({"success": False, "message": f"服务器内部错误: {str(e)}"}), 500 1102 | 1103 | if __name__ == '__main__': 1104 | # 尝试使用5000端口,如果被占用则使用5001 1105 | port = 5000 1106 | import socket 1107 | try: 1108 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 1109 | s.bind(('0.0.0.0', port)) 1110 | s.close() 1111 | except OSError: 1112 | port = 5001 1113 | print(f"端口5000被占用,将使用端口{port}") 1114 | 1115 | local_ip = get_local_ip() 1116 | print(f"Local IP Address: {local_ip}") 1117 | print(f"Connect from your mobile device using: {local_ip}:{port}") 1118 | 1119 | # 加载模型配置 1120 | model_config = load_model_config() 1121 | if hasattr(ModelFactory, 'update_model_capabilities'): 1122 | ModelFactory.update_model_capabilities(model_config) 1123 | print("已加载模型配置信息") 1124 | 1125 | # Run Flask in the main thread without debug mode 1126 | socketio.run(app, host='0.0.0.0', port=port, allow_unsafe_werkzeug=True) 1127 | --------------------------------------------------------------------------------