├── app.ico
├── config
├── version.json
├── proxy_api.json
├── prompts.json
└── models.json
├── requirements.txt
├── models
├── __init__.py
├── base.py
├── baidu_ocr.py
├── openai.py
├── google.py
├── factory.py
├── alibaba.py
├── doubao.py
├── mathpix.py
├── deepseek.py
└── anthropic.py
├── .gitignore
├── AGENTS.md
├── README.md
├── docs
└── beginner-tutorial.md
├── static
└── js
│ └── ui.js
├── LICENSE
└── app.py
/app.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zippland/Snap-Solver/HEAD/app.ico
--------------------------------------------------------------------------------
/config/version.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "1.5.1",
3 | "build_date": "2025-04-11",
4 | "github_repo": "Zippland/Snap-Solver"
5 | }
--------------------------------------------------------------------------------
/config/proxy_api.json:
--------------------------------------------------------------------------------
1 | {
2 | "apis": {
3 | "alibaba": "",
4 | "anthropic": "",
5 | "deepseek": "",
6 | "doubao": "",
7 | "google": "",
8 | "openai": ""
9 | },
10 | "enabled": true
11 | }
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | flask==3.1.0
2 | pyautogui==0.9.54
3 | pyperclip==1.8.2
4 | Pillow==11.1.0
5 | flask-socketio==5.5.1
6 | python-engineio==4.11.2
7 | python-socketio==5.12.1
8 | requests==2.32.3
9 | openai==1.61.0
10 | google-generativeai==0.7.0
11 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BaseModel
2 | from .anthropic import AnthropicModel
3 | from .openai import OpenAIModel
4 | from .deepseek import DeepSeekModel
5 | from .alibaba import AlibabaModel
6 | from .google import GoogleModel
7 | from .doubao import DoubaoModel
8 | from .factory import ModelFactory
9 |
10 | __all__ = [
11 | 'BaseModel',
12 | 'AnthropicModel',
13 | 'OpenAIModel',
14 | 'DeepSeekModel',
15 | 'AlibabaModel',
16 | 'GoogleModel',
17 | 'DoubaoModel',
18 | 'ModelFactory'
19 | ]
20 |
--------------------------------------------------------------------------------
/models/base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from typing import Generator, Any
3 |
4 | class BaseModel(ABC):
5 | def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, api_base_url: str = None):
6 | self.api_key = api_key
7 | self.temperature = temperature
8 | self.language = language
9 | self.system_prompt = system_prompt or self.get_default_system_prompt()
10 | self.api_base_url = api_base_url
11 |
12 | @abstractmethod
13 | def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
14 | """
15 | Analyze the given image and yield response chunks.
16 |
17 | Args:
18 | image_data: Base64 encoded image data
19 | proxies: Optional proxy configuration
20 |
21 | Yields:
22 | dict: Response chunks with status and content
23 | """
24 | pass
25 |
26 | @abstractmethod
27 | def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
28 | """
29 | Analyze the given text and yield response chunks.
30 |
31 | Args:
32 | text: Text to analyze
33 | proxies: Optional proxy configuration
34 |
35 | Yields:
36 | dict: Response chunks with status and content
37 | """
38 | pass
39 |
40 | def get_default_system_prompt(self) -> str:
41 | """返回默认的系统提示词,子类可覆盖但不再是必须实现的方法"""
42 | return "您是一位专业的问题解决专家。请逐步分析问题,找出问题所在,并提供详细的解决方案。始终使用用户偏好的语言回答。"
43 |
44 | @abstractmethod
45 | def get_model_identifier(self) -> str:
46 | """Return the model identifier used in API calls"""
47 | pass
48 |
--------------------------------------------------------------------------------
/config/prompts.json:
--------------------------------------------------------------------------------
1 | { "ACM_hard": {
2 | "name": "ACM编程题(困难)",
3 | "content":"你是一个顶尖的算法竞赛选手 + 程序员。你的任务是接收一道 ACM / 编程题目(包含题目描述、输入输出格式、约束)并输出一份完整可运行的解法。请严格按照以下步骤:\n1. 题目复述;\n2. 复杂度与限制分析;\n3. 思路与算法设计;\n4. 伪代码 / 算法框架;\n5. 最终可运行python代码(带注释);\n6. 时间复杂度 / 空间复杂度总结 + 边界 / 特殊输入测试。输出格式必须包含这些部分,不得省略分析或直接跳到代码。",
4 | "description": "专为ACM编程竞赛题设计的提示词"
5 | },
6 | "a_default": {
7 | "name": "默认提示词",
8 | "content": "如果给的是图片,请先识别图片上面的题目,并输出完整题干;如果给的不是图片,直接诠释一下题目。然后解决该问题,如果是编程题,请输出最终可运行代码(带注释)。",
9 | "description": "通用问题解决提示词"
10 | },
11 | "single_choice": {
12 | "name": "单选题提示词",
13 | "content": "您是一位专业的单选题解析专家。当看到一个单选题时,请:\n1. 仔细阅读题目要求和选项\n2. 分析每个选项的正确性\n3. 明确指出正确选项\n4. 解释为什么该选项正确\n5. 简要说明其他选项错误的原因\n6. 总结相关知识点",
14 | "description": "专为单选题分析设计的提示词"
15 | },
16 | "multiple_choice": {
17 | "name": "多选题提示词",
18 | "content": "您是一位专业的多选题解析专家。当看到一个多选题时,请:\n1. 仔细阅读题目要求和所有选项\n2. 逐一分析每个选项的正确性\n3. 明确列出所有正确选项\n4. 详细解释每个正确选项的理由\n5. 说明错误选项的问题所在\n6. 归纳总结相关知识点",
19 | "description": "专为多选题分析设计的提示词"
20 | },
21 | "programming": {
22 | "name": "ACM编程题提示词",
23 | "content": "您是一位专业的ACM编程竞赛解题专家。当看到一个编程题时,请:\n1. 分析题目要求、输入输出格式和约束条件\n2. 确定解题思路和算法策略\n3. 分析算法复杂度\n4. 提供完整、可运行的代码实现\n5. 解释代码中的关键部分\n6. 提供一些测试用例及其输出\n7. 讨论可能的优化方向",
24 | "description": "专为ACM编程竞赛题设计的提示词"
25 | },
26 | "pattern_reasoning": {
27 | "name": "图形推理题提示词",
28 | "content": "您是一位专业的图形推理题解析专家。当看到一个图形推理题时,请:\n1. 观察并描述题目给出的图形序列\n2. 分析图形之间的变化规律\n3. 归纳可能的变化模式(如旋转、翻转、数量变化等)\n4. 应用发现的规律预测下一个图形\n5. 在多个选项中确定符合规律的答案\n6. 详细解释推理过程",
29 | "description": "专为图形推理题设计的提示词"
30 | },
31 | "chart_calculation": {
32 | "name": "图表计算题提示词",
33 | "content": "您是一位专业的图表数据分析专家。当看到一个包含图表的计算题时,请:\n1. 仔细阅读并描述图表包含的信息(表格、柱状图、折线图等)\n2. 确定题目要求计算的具体内容\n3. 从图表中提取相关数据\n4. 设计合适的计算方法\n5. 进行准确的计算过程\n6. 清晰呈现计算结果\n7. 必要时解释数据的含义和趋势",
34 | "description": "专为图表数据分析和计算题设计的提示词"
35 | }
36 | }
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Dependencies
2 | node_modules/
3 | /.pnp
4 | .pnp.js
5 | yarn.lock
6 | package-lock.json
7 | .npm
8 | .yarn-integrity
9 |
10 | # Python
11 | __pycache__/
12 | *.py[cod]
13 | *$py.class
14 | *.so
15 | .Python
16 | env/
17 | build/
18 | develop-eggs/
19 | dist/
20 | downloads/
21 | eggs/
22 | .eggs/
23 | lib/
24 | lib64/
25 | parts/
26 | sdist/
27 | var/
28 | wheels/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 |
33 | # Testing & Coverage
34 | /coverage
35 | .nyc_output
36 |
37 | # Production & Build
38 | /build
39 | /dist
40 | /out
41 | .next/
42 | out/
43 |
44 | # Development & Environment
45 | .env
46 | .env.local
47 | .env.development.local
48 | .env.test.local
49 | .env.production.local
50 | config.local.js
51 | config.dev.js
52 |
53 | # Logs
54 | logs/
55 | *.log
56 | npm-debug.log*
57 | yarn-debug.log*
58 | yarn-error.log*
59 |
60 | # IDEs and Editors
61 | /.idea/
62 | .project
63 | .classpath
64 | .c9/
65 | *.launch
66 | .settings/
67 | *.sublime-workspace
68 | .vscode/*
69 | !.vscode/settings.json
70 | !.vscode/tasks.json
71 | !.vscode/launch.json
72 | !.vscode/extensions.json
73 |
74 | # Java
75 | *.class
76 | *.war
77 | *.ear
78 | *.jar
79 | target/
80 |
81 | # Gradle
82 | .gradle
83 | /build/
84 |
85 | # Maven
86 | target/
87 | pom.xml.tag
88 | pom.xml.releaseBackup
89 | pom.xml.versionsBackup
90 | pom.xml.next
91 | release.properties
92 | dependency-reduced-pom.xml
93 |
94 | # TypeScript
95 | *.tsbuildinfo
96 |
97 | # OS Generated Files
98 | .DS_Store
99 | .DS_Store?
100 | ._*
101 | .Spotlight-V100
102 | .Trashes
103 | ehthumbs.db
104 | Thumbs.db
105 |
106 | # Backup Files
107 | *.bak
108 | *.swp
109 | *.swo
110 | *~
111 |
112 | # Optional REPL history
113 | .node_repl_history
114 |
115 | # Media & Large Files
116 | *.mp4
117 | *.tiff
118 | *.avi
119 | *.flv
120 | *.mov
121 | *.wmv
122 | *.tgz
123 |
124 | # Optional eslint cache
125 | .eslintcache
126 |
127 | # Project specific
128 | config/update_info.json
129 | config/api_keys.json
130 | config/api_base_urls.json
131 | .venv/
132 | venv/
133 |
134 | # uv
135 | .python-version
136 | pyproject.toml
137 | uv.lock
138 |
--------------------------------------------------------------------------------
/AGENTS.md:
--------------------------------------------------------------------------------
1 | # Repository Guidelines
2 |
3 | ## Project Structure & Module Organization
4 | Snap-Solver is a Flask web app served from `app.py`, which wires Socket.IO streaming, screenshot capture, and model dispatch. Model adapters live in `models/`, with `factory.py` loading provider metadata from `config/models.json` and creating the appropriate client (OpenAI, Anthropic, DeepSeek, Qwen, etc.). User-facing templates live under `templates/`, with shared assets in `static/`. Runtime configuration and secrets are JSON files in `config/`; treat these as local-only overrides even if sample values exist in the repo. Python dependencies are listed in `requirements.txt` (lockfile: `uv.lock`).
5 |
6 | ## Build, Test, and Development Commands
7 | - `python -m venv .venv && source .venv/bin/activate` sets up an isolated environment.
8 | - `pip install -r requirements.txt` or `uv sync` installs Flask, provider SDKs, and Socket.IO.
9 | - `python app.py` boots the development server at `http://localhost:5000` with verbose engine logs.
10 | - `FLASK_ENV=development python app.py` enables auto-reload during active development.
11 |
12 | ## Coding Style & Naming Conventions
13 | Follow PEP 8: 4-space indentation, `snake_case` for Python functions, and descriptive class names that match provider roles (see `models/openai.py`). JSON configs use lowerCamelCase keys so the web client can consume them directly; keep that convention when adding settings. Client scripts in `static/js/` should stay modular and avoid sprawling event handlers.
14 |
15 | ## Testing Guidelines
16 | There is no automated test suite yet; whenever you add features, verify end-to-end by launching `python app.py`, triggering a screenshot from the UI, and confirming Socket.IO events stream without tracebacks. When integrating a new model, seed a temporary key in `config/api_keys.json`, exercise one request, and capture console logs before reverting secrets. If you introduce automated tests, place them in `tests/` and gate external calls behind mocks so the suite can run offline.
17 |
18 | ## Commit & Pull Request Guidelines
19 | The history favors concise, imperative commit subjects in Chinese (e.g., `修复发送按钮保存裁剪框数据`). Keep messages under 70 characters, enumerate multi-part changes in the body, and reference related issues with `#123` when applicable. Pull requests should outline the user-visible impact, note any config updates or new dependencies, attach UI screenshots for front-end tweaks, and list manual verification steps so reviewers can reproduce them quickly.
20 |
21 | ## Configuration & Security Tips
22 | Never commit real API keys—`.gitignore` already excludes `config/api_keys.json` and other volatile files, so create local copies (`config/api_keys.local.json`) for experimentation. When sharing deployment instructions, direct operators to set API credentials via environment variables or secure vaults and only populate JSON stubs during runtime startup logic.
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
Snap-Solver 
2 |
3 |
4 |
5 | 🔍 一键截屏,自动解题 - 线上考试,从未如此简单
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | 核心特性 •
18 | 快速开始 •
19 | 新手教程 •
20 | 使用指南 •
21 | 技术架构 •
22 | 高级配置 •
23 | 常见问题 •
24 | 获取帮助
25 |
26 |
27 |
40 |
43 |
44 | ## 💫 项目简介
45 |
46 | **Snap-Solver** 是一个革命性的AI笔试测评工具,专为学生、考生和自学者设计。只需**按下快捷键**,即可自动截取屏幕上的任何题目,通过AI进行分析并提供详细解答。
47 |
48 | 无论是复杂的数学公式、物理难题、编程问题,还是其他学科的挑战,Snap-Solver都能提供清晰、准确、有条理的解决方案,帮助您更好地理解和掌握知识点。
49 |
50 | ## 📚 新手教程
51 |
52 | 第一次使用?按照我们的 [《新手教程》](docs/beginner-tutorial.md) 完成环境准备、模型配置和首次解题演练,全程图文指引,几分钟即可上手。
53 |
54 | ## 🔧 技术架构
55 |
56 | ```mermaid
57 | graph TD
58 | A[用户界面] --> B[Flask Web服务]
59 | B --> C{API路由}
60 | C --> D[截图服务]
61 | C --> E[OCR识别]
62 | C --> F[AI分析]
63 | E --> |Mathpix API| G[文本提取]
64 | F --> |模型选择| H1[OpenAI]
65 | F --> |模型选择| H2[Anthropic]
66 | F --> |模型选择| H3[DeepSeek]
67 | F --> |模型选择| H4[Alibaba]
68 | F --> |模型选择| H5[Google]
69 | F --> |模型选择| H6[Doubao]
70 | D --> I[Socket.IO实时通信]
71 | I --> A
72 | ```
73 |
74 | ## ✨ 核心特性
75 |
76 |
77 |
78 |
79 | 📱 跨设备协同
80 |
81 | - 一键截图:按下快捷键,即可在移动设备上查看和分析电脑屏幕
82 | - 局域网共享:一处部署,多设备访问,提升学习效率
83 |
84 | |
85 |
86 | 🧠 多模型AI支持
87 |
88 | - GPT 家族:OpenAI强大的推理能力
89 | - Claude 家族:Anthropic的高级理解与解释
90 | - DeepSeek 家族:专为中文场景优化的模型
91 | - QVQ 和 Qwen 家族:以视觉推理闻名的国产AI
92 | - Gemini 家族:智商130的非推理AI
93 |
94 | |
95 |
96 |
97 |
98 | 🔍 精准识别
99 |
100 | - OCR文字识别:准确捕捉图片中的文本
101 | - 数学公式支持:通过Mathpix精确识别复杂数学符号
102 |
103 | |
104 |
105 | 🌐 全球无障碍
106 |
107 | - VPN代理支持:自定义代理设置,解决网络访问限制
108 | - 多语言响应:支持定制AI回复语言
109 |
110 | |
111 |
112 |
113 |
114 | 💻 全平台兼容
115 |
116 | - 桌面支持:Windows、MacOS、Linux
117 | - 移动访问:手机、平板通过浏览器直接使用
118 |
119 | |
120 |
121 | ⚙️ 高度可定制
122 |
123 | - 思考深度控制:调整AI的分析深度
124 | - 自定义提示词:针对特定学科优化提示
125 |
126 | |
127 |
128 |
129 |
130 | ## 🚀 快速开始
131 |
132 | ### 📋 前置要求
133 |
134 | - Python 3.x
135 | - 至少以下一个API Key:
136 | - OpenAI API Key
137 | - Anthropic API Key (推荐✅)
138 | - DeepSeek API Key
139 | - Alibaba API Key (国内用户首选)
140 | - Google API Key
141 | - Mathpix API Key (推荐OCR识别✅)
142 |
143 | ### 📥 开始使用
144 |
145 | ```bash
146 | # 启动应用
147 | python app.py
148 | ```
149 |
150 | ### 📱 访问方式
151 |
152 | - **本机访问**:打开浏览器,访问 http://localhost:5000
153 | - **局域网设备访问**:在同一网络的任何设备上访问 `http://[电脑IP]:5000`
154 |
155 | ### 🎯 使用场景示例
156 |
157 | - **课后习题**:截取教材或作业中的难题,获取步骤详解
158 | - **编程调试**:截取代码错误信息,获取修复建议
159 | - **考试复习**:分析错题并理解解题思路
160 | - **文献研究**:截取复杂论文段落,获取简化解释
161 |
162 | ### 🧩 组件详情
163 |
164 | - **前端**:响应式HTML/CSS/JS界面,支持移动设备
165 | - **后端**:Flask + SocketIO,提供RESTful API和WebSocket
166 | - **AI接口**:多模型支持,统一接口标准
167 | - **图像处理**:高效的截图和裁剪功能
168 |
169 | ## ⚙️ 高级可调参数
170 |
171 | - **温度**:调整回答的创造性与确定性(0.1-1.0)
172 | - **最大输出Token**:控制回答长度
173 | - **推理深度**:标准模式(快速)或深度思考(详细)
174 | - **思考预算占比**:平衡思考过程与最终答案的详细程度
175 | - **系统提示词**:自定义AI的基础行为与专业领域
176 |
177 | ## ❓ 常见问题
178 |
179 |
180 | 如何获得最佳识别效果?
181 |
182 | 确保截图清晰,包含完整题目和必要上下文。对于数学公式,建议使用Mathpix OCR以获得更准确的识别结果。
183 |
184 |
185 |
186 |
187 | 无法连接到服务怎么办?
188 |
189 | 1. 检查防火墙设置是否允许5000端口
190 | 2. 确认设备在同一局域网内
191 | 3. 尝试重启应用程序
192 | 4. 查看控制台日志获取错误信息
193 |
194 |
195 |
196 |
197 | API调用失败的原因?
198 |
199 | 1. API密钥可能无效或余额不足
200 | 2. 网络连接问题,特别是国际API
201 | 3. 代理设置不正确
202 | 4. API服务可能临时不可用
203 |
204 |
205 |
206 |
207 | 如何优化AI回答质量?
208 |
209 | 1. 调整系统提示词,添加特定学科的指导
210 | 2. 根据问题复杂度选择合适的模型
211 | 3. 对于复杂题目,使用"深度思考"模式
212 | 4. 确保截取的题目包含完整信息
213 |
214 |
215 |
216 | ## 🤝 获取帮助
217 |
218 | - **代部署服务**:如果您不擅长编程,需要代部署服务,请联系 [zylanjian@outlook.com](mailto:zylanjian@outlook.com)
219 | - **问题报告**:在GitHub仓库提交Issue
220 | - **功能建议**:欢迎通过Issue或邮件提供改进建议
221 |
222 | ## 📜 开源协议
223 |
224 | 本项目采用 [Apache 2.0](LICENSE) 协议。
225 |
--------------------------------------------------------------------------------
/models/baidu_ocr.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import json
3 | import time
4 | import urllib.request
5 | import urllib.parse
6 | from typing import Generator, Dict, Any
7 | from .base import BaseModel
8 |
9 | class BaiduOCRModel(BaseModel):
10 | """
11 | 百度OCR模型,用于图像文字识别
12 | """
13 |
14 | def __init__(self, api_key: str, secret_key: str = None, temperature: float = 0.7, system_prompt: str = None):
15 | """
16 | 初始化百度OCR模型
17 |
18 | Args:
19 | api_key: 百度API Key
20 | secret_key: 百度Secret Key(可以在api_key中用冒号分隔传入)
21 | temperature: 不用于OCR但保持BaseModel兼容性
22 | system_prompt: 不用于OCR但保持BaseModel兼容性
23 |
24 | Raises:
25 | ValueError: 如果API密钥格式无效
26 | """
27 | super().__init__(api_key, temperature, system_prompt)
28 |
29 | # 支持两种格式:单独传递或在api_key中用冒号分隔
30 | if secret_key:
31 | self.api_key = api_key
32 | self.secret_key = secret_key
33 | else:
34 | try:
35 | self.api_key, self.secret_key = api_key.split(':')
36 | except ValueError:
37 | raise ValueError("百度OCR API密钥必须是 'API_KEY:SECRET_KEY' 格式或单独传递secret_key参数")
38 |
39 | # 百度API URLs
40 | self.token_url = "https://aip.baidubce.com/oauth/2.0/token"
41 | self.ocr_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
42 |
43 | # 缓存access_token
44 | self._access_token = None
45 | self._token_expires = 0
46 |
47 | def get_access_token(self) -> str:
48 | """获取百度API的access_token"""
49 | # 检查是否需要刷新token(提前5分钟刷新)
50 | if self._access_token and time.time() < self._token_expires - 300:
51 | return self._access_token
52 |
53 | # 请求新的access_token
54 | params = {
55 | 'grant_type': 'client_credentials',
56 | 'client_id': self.api_key,
57 | 'client_secret': self.secret_key
58 | }
59 |
60 | data = urllib.parse.urlencode(params).encode('utf-8')
61 | request = urllib.request.Request(self.token_url, data=data)
62 | request.add_header('Content-Type', 'application/x-www-form-urlencoded')
63 |
64 | try:
65 | with urllib.request.urlopen(request) as response:
66 | result = json.loads(response.read().decode('utf-8'))
67 |
68 | if 'access_token' in result:
69 | self._access_token = result['access_token']
70 | # 设置过期时间(默认30天,但我们提前刷新)
71 | self._token_expires = time.time() + result.get('expires_in', 2592000)
72 | return self._access_token
73 | else:
74 | raise Exception(f"获取access_token失败: {result.get('error_description', '未知错误')}")
75 |
76 | except Exception as e:
77 | raise Exception(f"请求access_token失败: {str(e)}")
78 |
79 | def ocr_image(self, image_data: str) -> str:
80 | """
81 | 对图像进行OCR识别
82 |
83 | Args:
84 | image_data: Base64编码的图像数据
85 |
86 | Returns:
87 | str: 识别出的文字内容
88 | """
89 | access_token = self.get_access_token()
90 |
91 | # 准备请求数据
92 | params = {
93 | 'image': image_data,
94 | 'language_type': 'auto_detect', # 自动检测语言
95 | 'detect_direction': 'true', # 检测图像朝向
96 | 'probability': 'false' # 不返回置信度(减少响应大小)
97 | }
98 |
99 | data = urllib.parse.urlencode(params).encode('utf-8')
100 | url = f"{self.ocr_url}?access_token={access_token}"
101 |
102 | request = urllib.request.Request(url, data=data)
103 | request.add_header('Content-Type', 'application/x-www-form-urlencoded')
104 |
105 | try:
106 | with urllib.request.urlopen(request) as response:
107 | result = json.loads(response.read().decode('utf-8'))
108 |
109 | if 'error_code' in result:
110 | raise Exception(f"百度OCR API错误: {result.get('error_msg', '未知错误')}")
111 |
112 | # 提取识别的文字
113 | words_result = result.get('words_result', [])
114 | text_lines = [item['words'] for item in words_result]
115 |
116 | return '\n'.join(text_lines)
117 |
118 | except Exception as e:
119 | raise Exception(f"OCR识别失败: {str(e)}")
120 |
121 | def extract_full_text(self, image_data: str) -> str:
122 | """
123 | 提取图像中的完整文本(与Mathpix兼容的接口)
124 |
125 | Args:
126 | image_data: Base64编码的图像数据
127 |
128 | Returns:
129 | str: 提取的文本内容
130 | """
131 | return self.ocr_image(image_data)
132 |
133 | def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[Dict[str, Any], None, None]:
134 | """
135 | 分析图像并返回OCR结果(流式输出以保持接口一致性)
136 |
137 | Args:
138 | image_data: Base64编码的图像数据
139 | proxies: 代理配置(未使用)
140 |
141 | Yields:
142 | dict: 包含OCR结果的响应
143 | """
144 | try:
145 | text = self.ocr_image(image_data)
146 | yield {
147 | 'status': 'completed',
148 | 'content': text,
149 | 'model': 'baidu-ocr'
150 | }
151 | except Exception as e:
152 | yield {
153 | 'status': 'error',
154 | 'content': f'OCR识别失败: {str(e)}',
155 | 'model': 'baidu-ocr'
156 | }
157 |
158 | def analyze_text(self, text: str, proxies: dict = None) -> Generator[Dict[str, Any], None, None]:
159 | """
160 | 分析文本(OCR模型不支持文本分析)
161 |
162 | Args:
163 | text: 输入文本
164 | proxies: 代理配置(未使用)
165 |
166 | Yields:
167 | dict: 错误响应
168 | """
169 | yield {
170 | 'status': 'error',
171 | 'content': 'OCR模型不支持文本分析功能',
172 | 'model': 'baidu-ocr'
173 | }
174 |
175 | def get_model_identifier(self) -> str:
176 | """返回模型标识符"""
177 | return "baidu-ocr"
178 |
--------------------------------------------------------------------------------
/config/models.json:
--------------------------------------------------------------------------------
1 | {
2 | "providers": {
3 | "anthropic": {
4 | "name": "Anthropic",
5 | "api_key_id": "AnthropicApiKey",
6 | "class_name": "AnthropicModel"
7 | },
8 | "openai": {
9 | "name": "OpenAI",
10 | "api_key_id": "OpenaiApiKey",
11 | "class_name": "OpenAIModel"
12 | },
13 | "deepseek": {
14 | "name": "DeepSeek",
15 | "api_key_id": "DeepseekApiKey",
16 | "class_name": "DeepSeekModel"
17 | },
18 | "alibaba": {
19 | "name": "Alibaba",
20 | "api_key_id": "AlibabaApiKey",
21 | "class_name": "AlibabaModel"
22 | },
23 | "google": {
24 | "name": "Google",
25 | "api_key_id": "GoogleApiKey",
26 | "class_name": "GoogleModel"
27 | },
28 | "doubao": {
29 | "name": "Doubao",
30 | "api_key_id": "DoubaoApiKey",
31 | "class_name": "DoubaoModel"
32 | }
33 | },
34 | "models": {
35 | "claude-opus-4-20250514": {
36 | "name": "Claude 4 Opus",
37 | "provider": "anthropic",
38 | "supportsMultimodal": true,
39 | "isReasoning": true,
40 | "version": "20250514",
41 | "description": "最强大的Claude 4 Opus模型,支持图像理解和深度思考过程"
42 | },
43 | "claude-opus-4-1-20250805": {
44 | "name": "Claude 4.1 Opus",
45 | "provider": "anthropic",
46 | "supportsMultimodal": true,
47 | "isReasoning": false,
48 | "version": "20250805",
49 | "description": "Claude Opus 4.1 最新标准模式,快速响应并支持多模态输入"
50 | },
51 | "claude-opus-4-1-20250805-thinking": {
52 | "name": "Claude 4.1 Opus (Thinking)",
53 | "provider": "anthropic",
54 | "supportsMultimodal": true,
55 | "isReasoning": true,
56 | "version": "20250805",
57 | "description": "Claude Opus 4.1 思考模式,启用更长思考过程以提升推理质量"
58 | },
59 | "claude-sonnet-4-20250514": {
60 | "name": "Claude 4 Sonnet",
61 | "provider": "anthropic",
62 | "supportsMultimodal": true,
63 | "isReasoning": true,
64 | "version": "20250514",
65 | "description": "高性能的Claude 4 Sonnet模型,支持图像理解和思考过程"
66 | },
67 | "claude-sonnet-4-5-20250929": {
68 | "name": "Claude 4.5 Sonnet",
69 | "provider": "anthropic",
70 | "supportsMultimodal": true,
71 | "isReasoning": true,
72 | "version": "20250929",
73 | "description": "Claude Sonnet 4.5 版,兼具多模态理解与最新推理能力"
74 | },
75 | "gpt-4o-2024-11-20": {
76 | "name": "GPT-4o",
77 | "provider": "openai",
78 | "supportsMultimodal": true,
79 | "isReasoning": false,
80 | "version": "2024-11-20",
81 | "description": "OpenAI的GPT-4o模型,支持图像理解"
82 | },
83 | "gpt-5-2025-08-07": {
84 | "name": "GPT-5",
85 | "provider": "openai",
86 | "supportsMultimodal": true,
87 | "isReasoning": true,
88 | "version": "2025-08-07",
89 | "description": "OpenAI旗舰级GPT-5模型,支持多模态输入与高级推理"
90 | },
91 | "gpt-5-1": {
92 | "name": "GPT-5.1",
93 | "provider": "openai",
94 | "supportsMultimodal": true,
95 | "isReasoning": true,
96 | "version": "latest",
97 | "description": "GPT-5.1 新版旗舰模型,强化长上下文与推理表现"
98 | },
99 | "gpt-5-codex-high": {
100 | "name": "GPT Codex High",
101 | "provider": "openai",
102 | "supportsMultimodal": false,
103 | "isReasoning": true,
104 | "version": "latest",
105 | "description": "OpenAI高性能代码模型Codex High,侧重复杂代码生成与重构"
106 | },
107 | "o3-mini": {
108 | "name": "o3-mini",
109 | "provider": "openai",
110 | "supportsMultimodal": false,
111 | "isReasoning": true,
112 | "version": "latest",
113 | "description": "OpenAI的o3-mini模型,支持图像理解和思考过程"
114 | },
115 | "deepseek-chat": {
116 | "name": "DeepSeek-V3",
117 | "provider": "deepseek",
118 | "supportsMultimodal": false,
119 | "isReasoning": false,
120 | "version": "latest",
121 | "description": "DeepSeek最新大模型,671B MoE模型,支持60 tokens/秒的高速生成"
122 | },
123 | "deepseek-reasoner": {
124 | "name": "DeepSeek-R1",
125 | "provider": "deepseek",
126 | "supportsMultimodal": false,
127 | "isReasoning": true,
128 | "version": "latest",
129 | "description": "DeepSeek推理模型,提供详细思考过程(仅支持文本)"
130 | },
131 | "QVQ-Max-2025-03-25": {
132 | "name": "QVQ-Max",
133 | "provider": "alibaba",
134 | "supportsMultimodal": true,
135 | "isReasoning": true,
136 | "version": "2025-03-25",
137 | "description": "阿里巴巴通义千问-QVQ-Max版本,支持图像理解和思考过程"
138 | },
139 | "qwen-vl-max-latest": {
140 | "name": "Qwen-VL-MAX",
141 | "provider": "alibaba",
142 | "supportsMultimodal": true,
143 | "isReasoning": false,
144 | "version": "latest",
145 | "description": "阿里通义千问VL-MAX模型,视觉理解能力最强,支持图像理解和复杂任务"
146 | },
147 | "gemini-2.5-pro": {
148 | "name": "Gemini 2.5 Pro",
149 | "provider": "google",
150 | "supportsMultimodal": true,
151 | "isReasoning": true,
152 | "version": "latest",
153 | "description": "Google最强大的Gemini 2.5 Pro模型,支持图像理解(需要付费API密钥)"
154 | },
155 | "gemini-2.5-flash": {
156 | "name": "Gemini 2.5 Flash",
157 | "provider": "google",
158 | "supportsMultimodal": true,
159 | "isReasoning": false,
160 | "version": "latest",
161 | "description": "Google最新的Gemini 2.5 Flash模型,支持图像理解,速度更快,性能更好"
162 | },
163 | "gemini-2.0-flash": {
164 | "name": "Gemini 2.0 Flash",
165 | "provider": "google",
166 | "supportsMultimodal": true,
167 | "isReasoning": false,
168 | "version": "latest",
169 | "description": "Google更快速的Gemini 2.0 Flash模型,支持图像理解,有免费配额"
170 | },
171 | "gemini-3-pro": {
172 | "name": "Gemini 3 Pro",
173 | "provider": "google",
174 | "supportsMultimodal": true,
175 | "isReasoning": true,
176 | "version": "latest",
177 | "description": "Google Gemini 3 Pro 顶级推理模型,面向复杂多模态任务"
178 | },
179 | "doubao-seed-1-6-250615": {
180 | "name": "Doubao-Seed-1.6",
181 | "provider": "doubao",
182 | "supportsMultimodal": true,
183 | "isReasoning": true,
184 | "version": "latest",
185 | "description": "支持auto/thinking/non-thinking三种思考模式、支持多模态、256K长上下文"
186 | }
187 | }
188 | }
189 |
--------------------------------------------------------------------------------
/docs/beginner-tutorial.md:
--------------------------------------------------------------------------------
1 | # Snap-Solver 零基础上手教程
2 |
3 | 这篇教程面向第一次接触编程或 Python 的朋友,手把手带你从安装环境开始,直到在电脑和手机上顺利使用 Snap-Solver 完成题目分析。如果你在任何步骤遇到困难,建议按章节逐步检查,或对照文末的常见问题排查。
4 |
5 | ---
6 |
7 | ## 1. Snap-Solver 是什么?
8 |
9 | Snap-Solver 是一个本地运行的截屏解题工具,主要功能包括:
10 | - 一键截取电脑屏幕的题目图片;
11 | - 自动调用 OCR(文字识别)和多种大模型,给出详细解析;
12 | - 支持在手机、平板等局域网设备上实时查看结果;
13 | - 可以按需配置代理、中转 API、自定义提示词等高级选项。
14 |
15 | 整个应用基于 Python + Flask,只要能启动一个 Python 程序,就可以完全离线地掌握它的运行方式。
16 |
17 | ---
18 |
19 | ## 2. 准备清单
20 |
21 | - 一台可以联网的 Windows、macOS 或 Linux 电脑;
22 | - 至少一个可用的模型 API Key(推荐准备 2~3 个,方便切换):
23 | - OpenAI、Anthropic、DeepSeek、阿里灵积(Qwen)、Google、Mathpix 等任一即可;
24 | - 约 2 GB 可用硬盘空间;
25 | - 基本的文本编辑器(Windows 自带记事本即可,推荐使用 VS Code / Notepad++ 等更易读的工具)。
26 |
27 | > **提示**:Snap-Solver 不依赖显卡或 GPU,普通轻薄本即可顺利运行。
28 |
29 | ---
30 |
31 | ## 3. 第一次打开命令行
32 |
33 | Snap-Solver 需要在命令行里执行几条简单的指令。命令行是一个黑色(或白色)窗口,通过输入文字来让电脑完成任务。不同系统打开方式略有区别:
34 |
35 | ### 3.1 Windows
36 | 1. 同时按下键盘 `Win` 键(左下角带 Windows 徽标的键)+ `S`,输入 `cmd` 或 `terminal`。
37 | 2. 选择 **命令提示符(Command Prompt)** 或 **Windows Terminal**,回车打开。
38 | 3. 复制命令时,可在窗口上点击右键 → 「粘贴」,或使用快捷键 `Ctrl + V`。
39 | 4. 想切换到某个文件夹(例如 `D:\Snap-Solver`),输入:
40 | ```powershell
41 | cd /d D:\Snap-Solver
42 | ```
43 | 5. 查看当前文件夹内的内容:
44 | ```powershell
45 | dir
46 | ```
47 |
48 | ### 3.2 macOS
49 | 1. 同时按下 `Command + Space` 呼出 Spotlight,输入 `Terminal` 并回车。
50 | 2. 在终端中,复制粘贴使用常规快捷键 `Command + C` / `Command + V`。
51 | 3. 切换到下载好的项目目录(例如在「下载」文件夹内):
52 | ```bash
53 | cd ~/Downloads/Snap-Solver
54 | ```
55 | 4. 查看当前文件夹内容:
56 | ```bash
57 | ls
58 | ```
59 |
60 | ### 3.3 Linux(Ubuntu 示例)
61 | 1. 同时按 `Ctrl + Alt + T` 打开终端。
62 | 2. 切换到项目目录:
63 | ```bash
64 | cd ~/Snap-Solver
65 | ```
66 | 3. 查看内容:
67 | ```bash
68 | ls
69 | ```
70 |
71 | > **常用命令速记**
72 | > - `cd 路径`:进入某个文件夹(路径中有空格请用双引号包住,例如 `cd "C:\My Folder"`)。
73 | > - `dir`(Windows)/`ls`(macOS、Linux):查看当前文件夹下的文件。
74 | > - 键盘方向键 ↑ 可以快速调出上一条命令,避免重复输入。
75 |
76 | ---
77 |
78 | ## 4. 安装 Python 3
79 |
80 | Snap-Solver 基于 Python 3.9+,推荐使用 3.10 或 3.11 版本。
81 |
82 | ### 4.1 Windows
83 | 1. 打开浏览器访问:https://www.python.org/downloads/
84 | 2. 点击最新的稳定版(例如 `Python 3.11.x`)的 **Download Windows installer (64-bit)**。
85 | 3. 双击下载的安装包,记得在第一步勾选 **Add Python to PATH**。
86 | 4. 按提示完成安装。
87 | 5. 打开命令行窗口,输入:
88 | ```powershell
89 | python --version
90 | pip --version
91 | ```
92 | 若能看到版本号(如 `Python 3.11.7`),说明安装成功。
93 |
94 | ### 4.2 macOS
95 | 1. 访问 https://www.python.org/downloads/mac-osx/ 下载 `macOS 64-bit universal2 installer`。
96 | 2. 双击 `.pkg` 文件按提示安装。
97 | 3. 打开终端输入:
98 | ```bash
99 | python3 --version
100 | pip3 --version
101 | ```
102 | 如果输出版本号,表示安装完成。后续命令中的 `python`、`pip` 均可替换为 `python3`、`pip3`。
103 |
104 | ### 4.3 Linux(Ubuntu 示例)
105 | ```bash
106 | sudo apt update
107 | sudo apt install python3 python3-venv python3-pip -y
108 | python3 --version
109 | pip3 --version
110 | ```
111 |
112 | ---
113 |
114 | ## 5. (可选)安装 Git
115 |
116 | Git 方便后续更新项目,也可以用来下载代码。
117 | - Windows:https://git-scm.com/download/win
118 | - macOS:在终端输入 `xcode-select --install` 或从 https://git-scm.com/download/mac 获取
119 | - Linux:`sudo apt install git -y`
120 |
121 | 如果暂时不想安装 Git,也可以稍后直接下载压缩包。
122 |
123 | ---
124 |
125 | ## 6. 获取 Snap-Solver 项目代码
126 |
127 | 任选其一:
128 | 1. **使用 Git 克隆(推荐)**
129 | ```bash
130 | git clone https://github.com/Zippland/Snap-Solver.git
131 | cd Snap-Solver
132 | ```
133 | 2. **下载压缩包**
134 | - 打开项目主页:https://github.com/Zippland/Snap-Solver
135 | - 点击右侧 `Release` → `Source code (zip)`
136 | - 解压缩后,将文件夹重命名为 `Snap-Solver` 并记住路径
137 |
138 | 后续步骤默认你已经位于项目根目录(包含 `app.py`、`requirements.txt` 的那个文件夹)。如果忘记位置,可再次查看文件夹并使用 `cd` 进入。
139 |
140 | ---
141 |
142 | ## 7. 创建虚拟环境并安装依赖
143 |
144 | 虚拟环境可以把项目依赖和系统环境隔离,避免冲突。
145 |
146 | ### 7.1 创建虚拟环境
147 |
148 | - **Windows PowerShell**
149 | ```powershell
150 | python -m venv .venv
151 | .\.venv\Scripts\Activate
152 | ```
153 | - **macOS / Linux**
154 | ```bash
155 | python3 -m venv .venv
156 | source .venv/bin/activate
157 | ```
158 |
159 | 激活成功后,命令行前面会出现 `(.venv)` 前缀。若你关闭了命令行窗口,需要重新进入项目目录并再次执行激活命令。
160 |
161 | ### 7.2 安装依赖
162 |
163 | ```bash
164 | pip install --upgrade pip
165 | pip install -r requirements.txt
166 | ```
167 |
168 | 常见依赖(Flask、PyAutoGUI、Pillow 等)都会自动安装。首次安装可能用时 1~5 分钟,请耐心等待。
169 |
170 | > **如果安装失败**:请检查网络、切换镜像源或参考文末常见问题。
171 |
172 | ---
173 |
174 | ## 8. 首次启动与访问
175 |
176 | 1. 保证虚拟环境处于激活状态。
177 | 2. 在项目根目录执行:
178 | ```bash
179 | python app.py
180 | ```
181 | 3. 终端中会看到 Flask/SocketIO 的日志,最后出现 `Running on http://127.0.0.1:5000` 表示启动成功。
182 | 4. 若需要在手机/平板访问,请在**同一局域网下**输入 `http://<电脑IP>:5000`。电脑 IP 可在终端日志中看到,例如 `http://192.168.1.8:5000`(可能是别的,每次打开都会刷新)。
183 |
184 | > **暂停服务**:在终端按 `Ctrl + C` 即可停止运行。再次启动时,只需重新激活虚拟环境并执行 `python app.py`。
185 |
186 | ---
187 |
188 | ## 9. 配置 API 密钥与基础设置
189 |
190 | 启动网页后,点击右上角的齿轮图标进入「设置」面板,建议先完成以下几项:
191 |
192 | ### 9.1 填写模型 API Key
193 |
194 | - 根据你手上的 Key,将对应值填入设置页面的输入框中;
195 | - 常用字段:
196 | - `OpenaiApiKey`:OpenAI 模型(如 GPT-4o、o3-mini)
197 | - `AnthropicApiKey`:Claude 系列
198 | - `DeepseekApiKey`:DeepSeek
199 | - `AlibabaApiKey`:通义千问 / Qwen / QVQ
200 | - `GoogleApiKey`:Gemini 系列
201 | - `MathpixAppId` & `MathpixAppKey`:用于高精度公式识别
202 | - 点击保存后,信息会写入 `config/api_keys.json` 方便下次启动直接读取。
203 |
204 | ### 9.2 设置代理与中转(可选)
205 |
206 | - 若你需要走代理或企业中转通道,可在设置面板中开启代理选项;
207 | - 对应的 JSON 文件是 `config/proxy_api.json`,可直接编辑来指定各模型的自定义 `base_url`;
208 | - 修改后需重启应用才能生效。
209 |
210 | ### 9.3 如何确认 VPN/代理端口
211 |
212 | 很多加速器或 VPN 客户端会在本地启动一个「系统代理」服务(常见端口如 `7890`、`1080` 等)。具体端口位置通常可以通过以下途径找到:
213 | - 打开 VPN 客户端的设置页面,寻找「本地监听端口」「HTTP(S) 代理」「SOCKS 代理」等字样;
214 | - Windows 用户也可以在「设置 → 网络和 Internet → 代理」里查看「使用代理服务器」的地址和端口;
215 | - macOS 用户可在「系统设置 → 网络 → Wi-Fi(或以太网)→ 详情 → 代理」里查看勾选的服务和端口;
216 | - 高级用户可以在命令行里运行 `netstat -ano | findstr 127.0.0.1`(Windows)或 `lsof -iTCP -sTCP:LISTEN | grep 127.0.0.1`(macOS/Linux)确认本地监听端口。
217 |
218 | 拿到端口后,在 Snap-Solver 的代理设置中填入对应的地址(通常是 `127.0.0.1:<端口>`),就能让模型请求走 VPN。不同工具的界面名称可能略有差异,重点是找出「本地监听地址 + 端口号」这一对信息。
219 |
220 | ---
221 |
222 | ## 10. 获取常用 API Key(详细教程)
223 |
224 | API Key 相当于你在各大模型平台上的「门票」。不同平台的获取流程不同,以下列出了最常用的几个来源。申请过程中务必保护好个人隐私与账号安全,切勿向他人泄露密钥。
225 |
226 | ### 10.1 OpenAI(GPT-4o / o3-mini 等)
227 | 1. 打开 https://platform.openai.com/ 并使用邮箱或第三方账号注册 / 登录。
228 | 2. 首次使用需完成实名和支付方式绑定(可选择信用卡或预付费余额)。
229 | 3. 登录后点击右上角头像 → `View API keys`。
230 | 4. 点击 `Create new secret key`,复制生成的密钥(形如 `sk-...`)。
231 | 5. 将该密钥粘贴到 Snap-Solver 的 `OpenaiApiKey` 输入框,并妥善保存。
232 |
233 | ### 10.2 Anthropic(Claude 系列)
234 | 1. 打开 https://console.anthropic.com/ 并注册账号。
235 | 2. 按提示完成手机号验证和支付方式绑定(部分国家需排队开通)。
236 | 3. 登录后进入 `API Keys` 页面,点击 `Create Key`。
237 | 4. 复制生成的密钥(形如 `sk-ant-...`),粘贴到 Snap-Solver 的 `AnthropicApiKey`。
238 |
239 | ### 10.3 DeepSeek
240 | 1. 访问 https://platform.deepseek.com/ 并注册登录。
241 | 2. 如果需要人民币支付,可在「账号设置」绑定支付宝;海外用户可使用信用卡。
242 | 3. 进入 `API Keys`,点击 `新建密钥`。
243 | 4. 复制生成的密钥(形如 `sk-xxx`),填入 `DeepseekApiKey`。
244 |
245 | ### 10.4 阿里云通义千问 / Qwen / QVQ
246 | 1. 打开 https://dashscope.console.aliyun.com/ 并使用阿里云账号登录。
247 | 2. 进入「API Key 管理」页面,点击 `创建 API Key`。
248 | 3. 复制密钥(形如 `sk-yourkey`)填入 `AlibabaApiKey`。
249 | 4. 如需开通收费模型,请在「计费与配额」中先完成实名认证并开通付费策略。
250 |
251 | ### 10.5 Google Gemini
252 | 1. 前往 https://ai.google.dev/ 并登录 Google 账号。
253 | 2. 点击右上角 `Get API key`。
254 | 3. 选择或创建项目,生成新的 API Key。
255 | 4. 将密钥填入 `GoogleApiKey`。
256 |
257 | ### 10.6 Mathpix(高精度公式识别)
258 | 1. 访问 https://dashboard.mathpix.com/ 注册账号。
259 | 2. 完成邮箱验证后,在侧边栏找到 `API Keys`。
260 | 3. 创建新的 App,复制 `App ID` 和 `App Key`。
261 | 4. 分别填入 Snap-Solver 的 `MathpixAppId` 与 `MathpixAppKey` 字段。
262 |
263 | > **安全小贴士**
264 | > - API Key 和密码一样重要,泄露后他人可能代你调用接口、消耗额度。
265 | > - 建议为不同用途创建多个密钥,定期检查和撤销不用的密钥。
266 | > - 如果平台支持额度上限、IP 白名单等功能,可以酌情启用以降低风险。
267 |
268 | ---
269 |
270 | ## 11. 完成第一次题目解析
271 |
272 | 1. 确认右上角的「连接状态」显示为绿色的「已连接」。
273 | 2. 点击顶部的「开始截图」,按提示框拖拽需要识别的题目区域。
274 | 3. 截图完成后,预览区会显示图片,并出现「发送至 AI」或「提取文本」按钮:
275 | - **发送至 AI**:直接让所选模型解析图像;
276 | - **提取文本**:先做 OCR,把文字复制出来,再发送给模型。
277 | 4. 在右侧的「分析结果」面板可以查看:
278 | - AI 的思考过程(可折叠);
279 | - 最终解答、代码或步骤;
280 | - 中间日志与计时。
281 | 5. 若需要改用其他模型,重新打开设置面板即可实时切换。
282 |
283 | > **小技巧**:长按或双击分析结果中的文本,可快速复制粘贴;终端会实时输出请求日志,方便排查问题。
284 |
285 | ---
286 |
287 | ## 12. 常见问题速查
288 |
289 | - **`python` 命令找不到**:在 Windows 上打开新的终端后请重启电脑,或使用 `py` 命令;macOS/Linux 请尝试 `python3`。
290 | - **`pip install` 超时**:可以临时使用清华源 `pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt`。
291 | - **启动后网页打不开**:确认终端没有报错;检查防火墙、端口占用,或尝试 `http://127.0.0.1:5000`。
292 | - **截图没反应**:Windows/macOS 需要授权「辅助功能 / 截屏」权限给 Python;macOS 在「系统设置 - 隐私与安全」中勾选 `python` 或终端应用。
293 | - **模型报 401/403**:检查 API Key 是否正确、账号余额是否充足,必要时在设置里更换模型或填入自定义域名。
294 | - **手机访问失败**:确保手机和电脑在同一个 Wi-Fi 下,且电脑未开启 VPN 导致局域网隔离。
295 |
296 | ---
297 |
298 | ## 13. 进一步探索
299 |
300 | - `config/models.json`:自定义展示在下拉框的模型列表,包含模型名称、供应商、能力标签等,可按需添加。
301 | - `config/prompts.json`:定义默认 prompt,可根据学科优化。
302 | - 更新项目:如果是 Git 克隆,执行 `git pull`;压缩包用户可重新下载覆盖。
303 |
304 | 完成以上步骤后,你已经具备运行和日常使用 Snap-Solver 的全部基础。如果你有新的需求或遇到无法解决的问题,可以先查看 README 或在 Issues 中搜索 / 提问。祝你学习顺利,刷题提效!
305 |
--------------------------------------------------------------------------------
/models/openai.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import Generator, Dict, Optional
3 | from openai import OpenAI
4 | from .base import BaseModel
5 |
6 | class OpenAIModel(BaseModel):
7 | def __init__(self, api_key, temperature=0.7, system_prompt=None, language=None, api_base_url=None, model_identifier=None):
8 | super().__init__(api_key, temperature, system_prompt, language)
9 | # 设置API基础URL,默认为OpenAI官方API
10 | self.api_base_url = api_base_url
11 | # 允许从外部配置显式指定模型标识符
12 | self.model_identifier = model_identifier or "gpt-4o-2024-11-20"
13 |
14 | def get_default_system_prompt(self) -> str:
15 | return """You are an expert at analyzing questions and providing detailed solutions. When presented with an image of a question:
16 | 1. First read and understand the question carefully
17 | 2. Break down the key components of the question
18 | 3. Provide a clear, step-by-step solution
19 | 4. If relevant, explain any concepts or theories involved
20 | 5. If there are multiple approaches, explain the most efficient one first"""
21 |
22 | def get_model_identifier(self) -> str:
23 | return self.model_identifier
24 |
25 | def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
26 | """Stream GPT-4o's response for text analysis"""
27 | try:
28 | # Initial status
29 | yield {"status": "started", "content": ""}
30 |
31 | # Save original environment state
32 | original_env = {
33 | 'http_proxy': os.environ.get('http_proxy'),
34 | 'https_proxy': os.environ.get('https_proxy')
35 | }
36 |
37 | try:
38 | # Set proxy environment variables if provided
39 | if proxies:
40 | if 'http' in proxies:
41 | os.environ['http_proxy'] = proxies['http']
42 | if 'https' in proxies:
43 | os.environ['https_proxy'] = proxies['https']
44 |
45 | # Initialize OpenAI client with base_url if provided
46 | if self.api_base_url:
47 | client = OpenAI(api_key=self.api_key, base_url=self.api_base_url)
48 | else:
49 | client = OpenAI(api_key=self.api_key)
50 |
51 | # Prepare messages
52 | messages = [
53 | {
54 | "role": "system",
55 | "content": self.system_prompt
56 | },
57 | {
58 | "role": "user",
59 | "content": text
60 | }
61 | ]
62 |
63 | response = client.chat.completions.create(
64 | model=self.get_model_identifier(),
65 | messages=messages,
66 | temperature=self.temperature,
67 | stream=True,
68 | max_tokens=4000
69 | )
70 |
71 | # 使用累积缓冲区
72 | response_buffer = ""
73 |
74 | for chunk in response:
75 | if hasattr(chunk.choices[0].delta, 'content'):
76 | content = chunk.choices[0].delta.content
77 | if content:
78 | # 累积内容
79 | response_buffer += content
80 |
81 | # 只在累积一定数量的字符或遇到句子结束标记时才发送
82 | if len(content) >= 10 or content.endswith(('.', '!', '?', '。', '!', '?', '\n')):
83 | yield {
84 | "status": "streaming",
85 | "content": response_buffer
86 | }
87 |
88 | # 确保发送最终完整内容
89 | if response_buffer:
90 | yield {
91 | "status": "streaming",
92 | "content": response_buffer
93 | }
94 |
95 | # Send completion status
96 | yield {
97 | "status": "completed",
98 | "content": response_buffer
99 | }
100 |
101 | finally:
102 | # Restore original environment state
103 | for key, value in original_env.items():
104 | if value is None:
105 | if key in os.environ:
106 | del os.environ[key]
107 | else:
108 | os.environ[key] = value
109 |
110 | except Exception as e:
111 | yield {
112 | "status": "error",
113 | "error": str(e)
114 | }
115 |
116 | def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
117 | """Stream GPT-4o's response for image analysis"""
118 | try:
119 | # Initial status
120 | yield {"status": "started", "content": ""}
121 |
122 | # Save original environment state
123 | original_env = {
124 | 'http_proxy': os.environ.get('http_proxy'),
125 | 'https_proxy': os.environ.get('https_proxy')
126 | }
127 |
128 | try:
129 | # Set proxy environment variables if provided
130 | if proxies:
131 | if 'http' in proxies:
132 | os.environ['http_proxy'] = proxies['http']
133 | if 'https' in proxies:
134 | os.environ['https_proxy'] = proxies['https']
135 |
136 | # Initialize OpenAI client with base_url if provided
137 | if self.api_base_url:
138 | client = OpenAI(api_key=self.api_key, base_url=self.api_base_url)
139 | else:
140 | client = OpenAI(api_key=self.api_key)
141 |
142 | # 使用系统提供的系统提示词,不再自动添加语言指令
143 | system_prompt = self.system_prompt
144 |
145 | # Prepare messages with image
146 | messages = [
147 | {
148 | "role": "system",
149 | "content": system_prompt
150 | },
151 | {
152 | "role": "user",
153 | "content": [
154 | {
155 | "type": "image_url",
156 | "image_url": {
157 | "url": f"data:image/jpeg;base64,{image_data}"
158 | }
159 | },
160 | {
161 | "type": "text",
162 | "text": "Please analyze this image and provide a detailed solution."
163 | }
164 | ]
165 | }
166 | ]
167 |
168 | response = client.chat.completions.create(
169 | model=self.get_model_identifier(),
170 | messages=messages,
171 | temperature=self.temperature,
172 | stream=True,
173 | max_tokens=4000
174 | )
175 |
176 | # 使用累积缓冲区
177 | response_buffer = ""
178 |
179 | for chunk in response:
180 | if hasattr(chunk.choices[0].delta, 'content'):
181 | content = chunk.choices[0].delta.content
182 | if content:
183 | # 累积内容
184 | response_buffer += content
185 |
186 | # 只在累积一定数量的字符或遇到句子结束标记时才发送
187 | if len(content) >= 10 or content.endswith(('.', '!', '?', '。', '!', '?', '\n')):
188 | yield {
189 | "status": "streaming",
190 | "content": response_buffer
191 | }
192 |
193 | # 确保发送最终完整内容
194 | if response_buffer:
195 | yield {
196 | "status": "streaming",
197 | "content": response_buffer
198 | }
199 |
200 | # Send completion status
201 | yield {
202 | "status": "completed",
203 | "content": response_buffer
204 | }
205 |
206 | finally:
207 | # Restore original environment state
208 | for key, value in original_env.items():
209 | if value is None:
210 | if key in os.environ:
211 | del os.environ[key]
212 | else:
213 | os.environ[key] = value
214 |
215 | except Exception as e:
216 | yield {
217 | "status": "error",
218 | "error": str(e)
219 | }
220 |
--------------------------------------------------------------------------------
/models/google.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import base64
4 | from typing import Generator, Dict, Any, Optional, List
5 | import google.generativeai as genai
6 | from .base import BaseModel
7 |
8 | class GoogleModel(BaseModel):
9 | """
10 | Google Gemini API模型实现类
11 | 支持Gemini 2.5 Pro等模型,可处理文本和图像输入
12 | """
13 |
14 | def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None, api_base_url: str = None):
15 | """
16 | 初始化Google模型
17 |
18 | Args:
19 | api_key: Google API密钥
20 | temperature: 生成温度
21 | system_prompt: 系统提示词
22 | language: 首选语言
23 | model_name: 指定具体模型名称,如不指定则使用默认值
24 | api_base_url: API基础URL,用于设置自定义API端点
25 | """
26 | super().__init__(api_key, temperature, system_prompt, language)
27 | self.model_name = model_name or self.get_model_identifier()
28 | self.max_tokens = 8192 # 默认最大输出token数
29 | self.api_base_url = api_base_url
30 |
31 | # 配置Google API
32 | if api_base_url:
33 | # 配置中转API - 使用环境变量方式
34 | # 移除末尾的斜杠以避免重复路径问题
35 | clean_base_url = api_base_url.rstrip('/')
36 | # 设置环境变量来指定API端点
37 | os.environ['GOOGLE_AI_API_ENDPOINT'] = clean_base_url
38 | genai.configure(api_key=api_key)
39 | else:
40 | # 使用默认API端点
41 | # 清除可能存在的自定义端点环境变量
42 | if 'GOOGLE_AI_API_ENDPOINT' in os.environ:
43 | del os.environ['GOOGLE_AI_API_ENDPOINT']
44 | genai.configure(api_key=api_key)
45 |
46 | def get_default_system_prompt(self) -> str:
47 | return """You are an expert at analyzing questions and providing detailed solutions. When presented with an image of a question:
48 | 1. First read and understand the question carefully
49 | 2. Break down the key components of the question
50 | 3. Provide a clear, step-by-step solution
51 | 4. If relevant, explain any concepts or theories involved
52 | 5. If there are multiple approaches, explain the most efficient one first"""
53 |
54 | def get_model_identifier(self) -> str:
55 | """返回默认的模型标识符"""
56 | return "gemini-2.0-flash" # 使用有免费配额的模型作为默认值
57 |
58 | def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
59 | """流式生成文本响应"""
60 | try:
61 | yield {"status": "started"}
62 |
63 | # 设置环境变量代理(如果提供)
64 | original_proxies = None
65 | if proxies:
66 | original_proxies = {
67 | 'http_proxy': os.environ.get('http_proxy'),
68 | 'https_proxy': os.environ.get('https_proxy')
69 | }
70 | if 'http' in proxies:
71 | os.environ['http_proxy'] = proxies['http']
72 | if 'https' in proxies:
73 | os.environ['https_proxy'] = proxies['https']
74 |
75 | try:
76 | # 初始化模型
77 | model = genai.GenerativeModel(self.model_name)
78 |
79 | # 获取最大输出Token设置
80 | max_tokens = self.max_tokens if hasattr(self, 'max_tokens') else 8192
81 |
82 | # 创建配置参数
83 | generation_config = {
84 | 'temperature': self.temperature,
85 | 'max_output_tokens': max_tokens,
86 | 'top_p': 0.95,
87 | 'top_k': 64,
88 | }
89 |
90 | # 构建提示
91 | prompt_parts = []
92 |
93 | # 添加系统提示词
94 | if self.system_prompt:
95 | prompt_parts.append(self.system_prompt)
96 |
97 | # 添加用户查询
98 | if self.language and self.language != 'auto':
99 | prompt_parts.append(f"请使用{self.language}回答以下问题: {text}")
100 | else:
101 | prompt_parts.append(text)
102 |
103 | # 初始化响应缓冲区
104 | response_buffer = ""
105 |
106 | # 流式生成响应
107 | response = model.generate_content(
108 | prompt_parts,
109 | generation_config=generation_config,
110 | stream=True
111 | )
112 |
113 | for chunk in response:
114 | if not chunk.text:
115 | continue
116 |
117 | # 累积响应文本
118 | response_buffer += chunk.text
119 |
120 | # 发送响应进度
121 | if len(chunk.text) >= 10 or chunk.text.endswith(('.', '!', '?', '。', '!', '?', '\n')):
122 | yield {
123 | "status": "streaming",
124 | "content": response_buffer
125 | }
126 |
127 | # 确保发送完整的最终内容
128 | yield {
129 | "status": "completed",
130 | "content": response_buffer
131 | }
132 |
133 | finally:
134 | # 恢复原始代理设置
135 | if original_proxies:
136 | for key, value in original_proxies.items():
137 | if value is None:
138 | if key in os.environ:
139 | del os.environ[key]
140 | else:
141 | os.environ[key] = value
142 |
143 | except Exception as e:
144 | yield {
145 | "status": "error",
146 | "error": f"Gemini API错误: {str(e)}"
147 | }
148 |
149 | def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
150 | """分析图像并流式生成响应"""
151 | try:
152 | yield {"status": "started"}
153 |
154 | # 设置环境变量代理(如果提供)
155 | original_proxies = None
156 | if proxies:
157 | original_proxies = {
158 | 'http_proxy': os.environ.get('http_proxy'),
159 | 'https_proxy': os.environ.get('https_proxy')
160 | }
161 | if 'http' in proxies:
162 | os.environ['http_proxy'] = proxies['http']
163 | if 'https' in proxies:
164 | os.environ['https_proxy'] = proxies['https']
165 |
166 | try:
167 | # 初始化模型
168 | model = genai.GenerativeModel(self.model_name)
169 |
170 | # 获取最大输出Token设置
171 | max_tokens = self.max_tokens if hasattr(self, 'max_tokens') else 8192
172 |
173 | # 创建配置参数
174 | generation_config = {
175 | 'temperature': self.temperature,
176 | 'max_output_tokens': max_tokens,
177 | 'top_p': 0.95,
178 | 'top_k': 64,
179 | }
180 |
181 | # 构建提示词
182 | prompt_parts = []
183 |
184 | # 添加系统提示词
185 | if self.system_prompt:
186 | prompt_parts.append(self.system_prompt)
187 |
188 | # 添加默认图像分析指令
189 | if self.language and self.language != 'auto':
190 | prompt_parts.append(f"请使用{self.language}分析这张图片并提供详细解答。")
191 | else:
192 | prompt_parts.append("请分析这张图片并提供详细解答。")
193 |
194 | # 处理图像数据
195 | if image_data.startswith('data:image'):
196 | # 如果是data URI,提取base64部分
197 | image_data = image_data.split(',', 1)[1]
198 |
199 | # 使用genai的特定方法处理图像
200 | image_part = {
201 | "mime_type": "image/jpeg",
202 | "data": base64.b64decode(image_data)
203 | }
204 | prompt_parts.append(image_part)
205 |
206 | # 初始化响应缓冲区
207 | response_buffer = ""
208 |
209 | # 流式生成响应
210 | response = model.generate_content(
211 | prompt_parts,
212 | generation_config=generation_config,
213 | stream=True
214 | )
215 |
216 | for chunk in response:
217 | if not chunk.text:
218 | continue
219 |
220 | # 累积响应文本
221 | response_buffer += chunk.text
222 |
223 | # 发送响应进度
224 | if len(chunk.text) >= 10 or chunk.text.endswith(('.', '!', '?', '。', '!', '?', '\n')):
225 | yield {
226 | "status": "streaming",
227 | "content": response_buffer
228 | }
229 |
230 | # 确保发送完整的最终内容
231 | yield {
232 | "status": "completed",
233 | "content": response_buffer
234 | }
235 |
236 | finally:
237 | # 恢复原始代理设置
238 | if original_proxies:
239 | for key, value in original_proxies.items():
240 | if value is None:
241 | if key in os.environ:
242 | del os.environ[key]
243 | else:
244 | os.environ[key] = value
245 |
246 | except Exception as e:
247 | yield {
248 | "status": "error",
249 | "error": f"Gemini图像分析错误: {str(e)}"
250 | }
--------------------------------------------------------------------------------
/static/js/ui.js:
--------------------------------------------------------------------------------
1 | class UIManager {
2 | constructor() {
3 | // 延迟初始化,确保DOM已加载
4 | if (document.readyState === 'loading') {
5 | document.addEventListener('DOMContentLoaded', () => this.init());
6 | } else {
7 | // 如果DOM已经加载完成,则立即初始化
8 | this.init();
9 | }
10 | }
11 |
12 | init() {
13 | console.log('初始化UI管理器...');
14 | // UI elements
15 | this.settingsPanel = document.getElementById('settingsPanel');
16 | this.settingsToggle = document.getElementById('settingsToggle');
17 | this.closeSettings = document.getElementById('closeSettings');
18 | this.themeToggle = document.getElementById('themeToggle');
19 | this.toastContainer = document.getElementById('toastContainer');
20 |
21 | // 验证关键元素是否存在
22 | if (!this.themeToggle) {
23 | console.error('主题切换按钮未找到!');
24 | return;
25 | }
26 |
27 | if (!this.toastContainer) {
28 | console.error('Toast容器未找到!');
29 | // 尝试创建Toast容器
30 | this.toastContainer = this.createToastContainer();
31 | }
32 |
33 | // Check for preferred color scheme
34 | this.checkPreferredColorScheme();
35 |
36 | // Initialize event listeners
37 | this.setupEventListeners();
38 |
39 | console.log('UI管理器初始化完成');
40 | }
41 |
42 | createToastContainer() {
43 | console.log('创建Toast容器');
44 | const container = document.createElement('div');
45 | container.id = 'toastContainer';
46 | container.className = 'toast-container';
47 | document.body.appendChild(container);
48 | return container;
49 | }
50 |
51 | checkPreferredColorScheme() {
52 | const savedTheme = localStorage.getItem('theme');
53 | const prefersDark = window.matchMedia('(prefers-color-scheme: dark)');
54 |
55 | if (savedTheme) {
56 | this.setTheme(savedTheme === 'dark');
57 | } else {
58 | this.setTheme(prefersDark.matches);
59 | }
60 |
61 | prefersDark.addEventListener('change', (e) => this.setTheme(e.matches));
62 | }
63 |
64 | setTheme(isDark) {
65 | try {
66 | document.documentElement.setAttribute('data-theme', isDark ? 'dark' : 'light');
67 | if (this.themeToggle) {
68 | this.themeToggle.innerHTML = ``;
69 | }
70 | localStorage.setItem('theme', isDark ? 'dark' : 'light');
71 | console.log(`主题已切换为: ${isDark ? '深色' : '浅色'}`);
72 | } catch (error) {
73 | console.error('设置主题时出错:', error);
74 | }
75 | }
76 |
77 | /**
78 | * 显示一个Toast消息
79 | * @param {string} message 显示的消息内容
80 | * @param {string} type 消息类型,可以是'success', 'error', 'info', 'warning'
81 | * @param {number} displayTime 显示的时间(毫秒),如果为-1则持续显示直到手动关闭
82 | * @returns {HTMLElement} 返回创建的Toast元素,可用于后续移除
83 | */
84 | showToast(message, type = 'success', displayTime) {
85 | try {
86 | if (!message) {
87 | console.warn('尝试显示空消息');
88 | message = '';
89 | }
90 |
91 | if (!this.toastContainer) {
92 | console.error('Toast容器不存在,正在创建新容器');
93 | this.toastContainer = this.createToastContainer();
94 | if (!this.toastContainer) {
95 | console.error('无法创建Toast容器,放弃显示消息');
96 | return null;
97 | }
98 | }
99 |
100 | // 检查是否已经存在相同内容的提示
101 | try {
102 | const existingToasts = this.toastContainer.querySelectorAll('.toast');
103 | for (const existingToast of existingToasts) {
104 | try {
105 | const spanElement = existingToast.querySelector('span');
106 | if (spanElement && spanElement.textContent === message) {
107 | // 已经存在相同的提示,不再创建新的
108 | return existingToast;
109 | }
110 | } catch (e) {
111 | console.warn('检查现有toast时出错:', e);
112 | // 继续检查其他toast元素
113 | }
114 | }
115 | } catch (e) {
116 | console.warn('查询现有toast时出错:', e);
117 | // 继续创建新的toast
118 | }
119 |
120 | const toast = document.createElement('div');
121 | toast.className = `toast ${type}`;
122 |
123 | // 根据类型设置图标
124 | let icon = 'check-circle';
125 | if (type === 'error') icon = 'exclamation-circle';
126 | else if (type === 'warning') icon = 'exclamation-triangle';
127 | else if (type === 'info') icon = 'info-circle';
128 |
129 | toast.innerHTML = `
130 |
131 | ${message}
132 | `;
133 |
134 | // 如果是持续显示的Toast,添加关闭按钮
135 | if (displayTime === -1) {
136 | const closeButton = document.createElement('button');
137 | closeButton.className = 'toast-close';
138 | closeButton.innerHTML = '';
139 | closeButton.addEventListener('click', (e) => {
140 | this.hideToast(toast);
141 | });
142 | toast.appendChild(closeButton);
143 | toast.classList.add('persistent');
144 | }
145 |
146 | this.toastContainer.appendChild(toast);
147 |
148 | // 为不同类型的提示设置不同的显示时间
149 | if (displayTime !== -1) {
150 | // 如果没有指定时间,则根据消息类型和内容长度设置默认时间
151 | if (displayTime === undefined) {
152 | displayTime = message === '截图成功' ? 1500 :
153 | type === 'error' ? 5000 :
154 | message.length > 50 ? 4000 : 3000;
155 | }
156 |
157 | setTimeout(() => {
158 | this.hideToast(toast);
159 | }, displayTime);
160 | }
161 |
162 | return toast;
163 | } catch (error) {
164 | console.error('显示Toast消息时出错:', error);
165 | return null;
166 | }
167 | }
168 |
169 | /**
170 | * 隐藏一个Toast消息
171 | * @param {HTMLElement} toast 要隐藏的Toast元素
172 | */
173 | hideToast(toast) {
174 | if (!toast || !toast.parentNode) return;
175 |
176 | toast.style.opacity = '0';
177 | setTimeout(() => {
178 | if (toast.parentNode) {
179 | toast.remove();
180 | }
181 | }, 300);
182 | }
183 |
184 | closeAllPanels() {
185 | if (this.settingsPanel) {
186 | this.settingsPanel.classList.remove('active');
187 | }
188 | }
189 |
190 | hideSettingsPanel() {
191 | if (this.settingsPanel) {
192 | this.settingsPanel.classList.remove('active');
193 | }
194 | }
195 |
196 | toggleSettingsPanel() {
197 | if (this.settingsPanel) {
198 | this.settingsPanel.classList.toggle('active');
199 | }
200 | }
201 |
202 | closeSettingsPanel() {
203 | if (this.settingsPanel) {
204 | this.settingsPanel.classList.remove('active');
205 | }
206 | }
207 |
208 | // 检查点击事件,如果点击了设置面板外部,则关闭设置面板
209 | checkClickOutsideSettings(e) {
210 | if (this.settingsPanel &&
211 | !this.settingsPanel.contains(e.target) &&
212 | !e.target.closest('#settingsToggle')) {
213 | this.settingsPanel.classList.remove('active');
214 | }
215 | }
216 |
217 | setupEventListeners() {
218 | // 确保所有元素都存在
219 | if (!this.settingsToggle || !this.closeSettings || !this.themeToggle) {
220 | console.error('无法设置事件监听器:一些UI元素未找到');
221 | return;
222 | }
223 |
224 | // Settings panel
225 | this.settingsToggle.addEventListener('click', () => {
226 | this.closeAllPanels();
227 | this.settingsPanel.classList.toggle('active');
228 | });
229 |
230 | this.closeSettings.addEventListener('click', () => {
231 | this.settingsPanel.classList.remove('active');
232 | });
233 |
234 | // Theme toggle
235 | this.themeToggle.addEventListener('click', () => {
236 | try {
237 | const currentTheme = document.documentElement.getAttribute('data-theme');
238 | console.log('当前主题:', currentTheme);
239 | this.setTheme(currentTheme !== 'dark');
240 | } catch (error) {
241 | console.error('切换主题时出错:', error);
242 | }
243 | });
244 |
245 | // Close panels when clicking outside
246 | document.addEventListener('click', (e) => {
247 | this.checkClickOutsideSettings(e);
248 | });
249 | }
250 | }
251 |
252 | // 创建全局实例
253 | window.UIManager = UIManager;
254 |
255 | // 确保在DOM加载完毕后才创建UIManager实例
256 | if (document.readyState === 'loading') {
257 | document.addEventListener('DOMContentLoaded', () => {
258 | window.uiManager = new UIManager();
259 | });
260 | } else {
261 | window.uiManager = new UIManager();
262 | }
263 |
264 | // 导出全局辅助函数
265 | window.showToast = (message, type) => {
266 | if (window.uiManager) {
267 | return window.uiManager.showToast(message, type);
268 | } else {
269 | console.error('UI管理器未初始化,无法显示Toast');
270 | return null;
271 | }
272 | };
273 |
274 | window.closeAllPanels = () => {
275 | if (window.uiManager) {
276 | window.uiManager.closeAllPanels();
277 | } else {
278 | console.error('UI管理器未初始化,无法关闭面板');
279 | }
280 | };
281 |
--------------------------------------------------------------------------------
/models/factory.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Type, Any, Optional
2 | import json
3 | import os
4 | import importlib
5 | from .base import BaseModel
6 | from .mathpix import MathpixModel # MathpixModel需要直接导入,因为它是特殊OCR工具
7 | from .baidu_ocr import BaiduOCRModel # 百度OCR也是特殊OCR工具,直接导入
8 |
9 | class ModelFactory:
10 | # 模型基本信息,包含类型和特性
11 | _models: Dict[str, Dict[str, Any]] = {}
12 | _class_map: Dict[str, Type[BaseModel]] = {}
13 |
14 | @classmethod
15 | def initialize(cls):
16 | """从配置文件加载模型信息"""
17 | try:
18 | config_path = os.path.join(os.path.dirname(__file__), '..', 'config', 'models.json')
19 | with open(config_path, 'r', encoding='utf-8') as f:
20 | config = json.load(f)
21 |
22 | # 加载提供商信息和类映射
23 | providers = config.get('providers', {})
24 | for provider_id, provider_info in providers.items():
25 | class_name = provider_info.get('class_name')
26 | if class_name:
27 | # 从当前包动态导入模型类
28 | module = importlib.import_module(f'.{provider_id.lower()}', package=__package__)
29 | cls._class_map[provider_id] = getattr(module, class_name)
30 |
31 | # 加载模型信息
32 | for model_id, model_info in config.get('models', {}).items():
33 | provider_id = model_info.get('provider')
34 | if provider_id and provider_id in cls._class_map:
35 | cls._models[model_id] = {
36 | 'class': cls._class_map[provider_id],
37 | 'provider_id': provider_id,
38 | 'is_multimodal': model_info.get('supportsMultimodal', False),
39 | 'is_reasoning': model_info.get('isReasoning', False),
40 | 'display_name': model_info.get('name', model_id),
41 | 'description': model_info.get('description', '')
42 | }
43 |
44 | # 添加特殊OCR工具模型(不在配置文件中定义)
45 |
46 | # 添加Mathpix OCR工具
47 | cls._models['mathpix'] = {
48 | 'class': MathpixModel,
49 | 'is_multimodal': True,
50 | 'is_reasoning': False,
51 | 'display_name': 'Mathpix OCR',
52 | 'description': '数学公式识别工具,适用于复杂数学内容',
53 | 'is_ocr_only': True
54 | }
55 |
56 | # 添加百度OCR工具
57 | cls._models['baidu-ocr'] = {
58 | 'class': BaiduOCRModel,
59 | 'is_multimodal': True,
60 | 'is_reasoning': False,
61 | 'display_name': '百度OCR',
62 | 'description': '通用文字识别工具,支持中文识别',
63 | 'is_ocr_only': True
64 | }
65 |
66 | print(f"已从配置加载 {len(cls._models)} 个模型")
67 | except Exception as e:
68 | print(f"加载模型配置失败: {str(e)}")
69 | cls._initialize_defaults()
70 |
71 | @classmethod
72 | def _initialize_defaults(cls):
73 | """初始化默认模型(当配置加载失败时)"""
74 | print("配置加载失败,使用空模型列表")
75 |
76 | # 不再硬编码模型定义,而是使用空字典
77 | cls._models = {}
78 |
79 | # 添加特殊OCR工具(当配置加载失败时的备用)
80 | try:
81 | # 导入并添加Mathpix OCR工具
82 | from .mathpix import MathpixModel
83 |
84 | cls._models['mathpix'] = {
85 | 'class': MathpixModel,
86 | 'is_multimodal': True,
87 | 'is_reasoning': False,
88 | 'display_name': 'Mathpix OCR',
89 | 'description': '数学公式识别工具,适用于复杂数学内容',
90 | 'is_ocr_only': True
91 | }
92 | except Exception as e:
93 | print(f"无法加载Mathpix OCR工具: {str(e)}")
94 |
95 | # 添加百度OCR工具
96 | try:
97 | from .baidu_ocr import BaiduOCRModel
98 |
99 | cls._models['baidu-ocr'] = {
100 | 'class': BaiduOCRModel,
101 | 'is_multimodal': True,
102 | 'is_reasoning': False,
103 | 'display_name': '百度OCR',
104 | 'description': '通用文字识别工具,支持中文识别',
105 | 'is_ocr_only': True
106 | }
107 | except Exception as e:
108 | print(f"无法加载百度OCR工具: {str(e)}")
109 |
110 | @classmethod
111 | def create_model(cls, model_name: str, api_key: str, temperature: float = 0.7,
112 | system_prompt: Optional[str] = None, language: Optional[str] = None, api_base_url: Optional[str] = None) -> BaseModel:
113 | """
114 | Create a model instance based on the model name.
115 |
116 | Args:
117 | model_name: The identifier for the model
118 | api_key: The API key for the model service
119 | temperature: The temperature to use for generation
120 | system_prompt: The system prompt to use
121 | language: The preferred language for responses
122 | api_base_url: The base URL for API requests
123 |
124 | Returns:
125 | A model instance
126 | """
127 | if model_name not in cls._models:
128 | raise ValueError(f"Unknown model: {model_name}")
129 |
130 | model_info = cls._models[model_name]
131 | model_class = model_info['class']
132 | provider_id = model_info.get('provider_id')
133 |
134 | if provider_id == 'openai':
135 | return model_class(
136 | api_key=api_key,
137 | temperature=temperature,
138 | system_prompt=system_prompt,
139 | language=language,
140 | api_base_url=api_base_url,
141 | model_identifier=model_name
142 | )
143 |
144 | # 对于DeepSeek模型,需要传递正确的模型名称
145 | if 'deepseek' in model_name.lower():
146 | return model_class(
147 | api_key=api_key,
148 | temperature=temperature,
149 | system_prompt=system_prompt,
150 | language=language,
151 | model_name=model_name,
152 | api_base_url=api_base_url
153 | )
154 | # 对于阿里巴巴模型,也需要传递正确的模型名称
155 | elif 'qwen' in model_name.lower() or 'qvq' in model_name.lower() or 'alibaba' in model_name.lower():
156 | return model_class(
157 | api_key=api_key,
158 | temperature=temperature,
159 | system_prompt=system_prompt,
160 | language=language,
161 | model_name=model_name
162 | )
163 | # 对于Google模型,也需要传递正确的模型名称
164 | elif 'gemini' in model_name.lower() or 'google' in model_name.lower():
165 | return model_class(
166 | api_key=api_key,
167 | temperature=temperature,
168 | system_prompt=system_prompt,
169 | language=language,
170 | model_name=model_name,
171 | api_base_url=api_base_url
172 | )
173 | # 对于豆包模型,也需要传递正确的模型名称
174 | elif 'doubao' in model_name.lower():
175 | return model_class(
176 | api_key=api_key,
177 | temperature=temperature,
178 | system_prompt=system_prompt,
179 | language=language,
180 | model_name=model_name,
181 | api_base_url=api_base_url
182 | )
183 | # 对于Mathpix模型,不传递language参数
184 | elif model_name == 'mathpix':
185 | return model_class(
186 | api_key=api_key,
187 | temperature=temperature,
188 | system_prompt=system_prompt
189 | )
190 | # 对于百度OCR模型,传递api_key(支持API_KEY:SECRET_KEY格式)
191 | elif model_name == 'baidu-ocr':
192 | return model_class(
193 | api_key=api_key,
194 | temperature=temperature,
195 | system_prompt=system_prompt
196 | )
197 | # 对于Anthropic模型,需要传递model_identifier参数
198 | elif 'claude' in model_name.lower() or 'anthropic' in model_name.lower():
199 | return model_class(
200 | api_key=api_key,
201 | temperature=temperature,
202 | system_prompt=system_prompt,
203 | language=language,
204 | api_base_url=api_base_url,
205 | model_identifier=model_name
206 | )
207 | else:
208 | # 其他模型仅传递标准参数
209 | return model_class(
210 | api_key=api_key,
211 | temperature=temperature,
212 | system_prompt=system_prompt,
213 | language=language,
214 | api_base_url=api_base_url
215 | )
216 |
217 | @classmethod
218 | def get_available_models(cls) -> list[Dict[str, Any]]:
219 | """Return a list of available models with their information"""
220 | models_info = []
221 | for model_id, info in cls._models.items():
222 | # 跳过仅OCR工具模型
223 | if info.get('is_ocr_only', False):
224 | continue
225 |
226 | models_info.append({
227 | 'id': model_id,
228 | 'display_name': info.get('display_name', model_id),
229 | 'description': info.get('description', ''),
230 | 'is_multimodal': info.get('is_multimodal', False),
231 | 'is_reasoning': info.get('is_reasoning', False)
232 | })
233 | return models_info
234 |
235 | @classmethod
236 | def get_model_ids(cls) -> list[str]:
237 | """Return a list of available model identifiers"""
238 | return [model_id for model_id in cls._models.keys()
239 | if not cls._models[model_id].get('is_ocr_only', False)]
240 |
241 | @classmethod
242 | def is_multimodal(cls, model_name: str) -> bool:
243 | """判断模型是否支持多模态输入"""
244 | return cls._models.get(model_name, {}).get('is_multimodal', False)
245 |
246 | @classmethod
247 | def is_reasoning(cls, model_name: str) -> bool:
248 | """判断模型是否为推理模型"""
249 | return cls._models.get(model_name, {}).get('is_reasoning', False)
250 |
251 | @classmethod
252 | def get_model_display_name(cls, model_name: str) -> str:
253 | """获取模型的显示名称"""
254 | return cls._models.get(model_name, {}).get('display_name', model_name)
255 |
256 | @classmethod
257 | def register_model(cls, model_name: str, model_class: Type[BaseModel],
258 | is_multimodal: bool = False, is_reasoning: bool = False,
259 | display_name: Optional[str] = None, description: Optional[str] = None) -> None:
260 | """
261 | Register a new model type with the factory.
262 |
263 | Args:
264 | model_name: The identifier for the model
265 | model_class: The model class to register
266 | is_multimodal: Whether the model supports image input
267 | is_reasoning: Whether the model provides reasoning process
268 | display_name: Human-readable name for the model
269 | description: Description of the model
270 | """
271 | cls._models[model_name] = {
272 | 'class': model_class,
273 | 'is_multimodal': is_multimodal,
274 | 'is_reasoning': is_reasoning,
275 | 'display_name': display_name or model_name,
276 | 'description': description or ''
277 | }
278 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [2025] [Zippland]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/models/alibaba.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import Generator, Dict, Optional, Any
3 | from openai import OpenAI
4 | from .base import BaseModel
5 |
6 | class AlibabaModel(BaseModel):
7 | def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None, api_base_url: str = None):
8 | # 如果没有提供模型名称,才使用默认值
9 | self.model_name = model_name if model_name else "QVQ-Max-2025-03-25"
10 | print(f"初始化阿里巴巴模型: {self.model_name}")
11 | # 在super().__init__之前设置model_name,这样get_default_system_prompt能使用它
12 | super().__init__(api_key, temperature, system_prompt, language)
13 | self.api_base_url = api_base_url # 存储API基础URL
14 |
15 | def get_default_system_prompt(self) -> str:
16 | """根据模型名称返回不同的默认系统提示词"""
17 | # 检查是否是通义千问VL模型
18 | if self.model_name and "qwen-vl" in self.model_name:
19 | return """你是通义千问VL视觉语言助手,擅长图像理解、文字识别、内容分析和创作。请根据用户提供的图像:
20 | 1. 仔细阅读并理解问题
21 | 2. 分析问题的关键组成部分
22 | 3. 提供清晰的、逐步的解决方案
23 | 4. 如果相关,解释涉及的概念或理论
24 | 5. 如果有多种解决方法,先解释最高效的方法"""
25 | else:
26 | # QVQ模型使用原先的提示词
27 | return """你是一位专业的问题分析与解答助手。当看到一个问题图片时,请:
28 | 1. 仔细阅读并理解问题
29 | 2. 分析问题的关键组成部分
30 | 3. 提供清晰的、逐步的解决方案
31 | 4. 如果相关,解释涉及的概念或理论
32 | 5. 如果有多种解决方法,先解释最高效的方法"""
33 |
34 | def get_model_identifier(self) -> str:
35 | """根据模型名称返回对应的模型标识符"""
36 | # 直接映射模型ID到DashScope API使用的标识符
37 | model_mapping = {
38 | "QVQ-Max-2025-03-25": "qvq-max",
39 | "qwen-vl-max-latest": "qwen-vl-max", # 修正为正确的API标识符
40 | }
41 |
42 | print(f"模型名称: {self.model_name}")
43 |
44 | # 从模型映射表中获取模型标识符,如果不存在则使用默认值
45 | model_id = model_mapping.get(self.model_name)
46 | if model_id:
47 | print(f"从映射表中获取到模型标识符: {model_id}")
48 | return model_id
49 |
50 | # 如果没有精确匹配,检查是否包含特定前缀
51 | if self.model_name and "qwen-vl" in self.model_name.lower():
52 | if "max" in self.model_name.lower():
53 | print(f"识别为qwen-vl-max模型")
54 | return "qwen-vl-max"
55 | elif "plus" in self.model_name.lower():
56 | print(f"识别为qwen-vl-plus模型")
57 | return "qwen-vl-plus"
58 | elif "lite" in self.model_name.lower():
59 | print(f"识别为qwen-vl-lite模型")
60 | return "qwen-vl-lite"
61 | print(f"默认使用qwen-vl-max模型")
62 | return "qwen-vl-max" # 默认使用最强版本
63 |
64 | # 如果包含QVQ或alibaba关键词,默认使用qvq-max
65 | if self.model_name and ("qvq" in self.model_name.lower() or "alibaba" in self.model_name.lower()):
66 | print(f"识别为QVQ模型,使用qvq-max")
67 | return "qvq-max"
68 |
69 | # 最后的默认值
70 | print(f"警告:无法识别的模型名称 {self.model_name},默认使用qvq-max")
71 | return "qvq-max"
72 |
73 | def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
74 | """Stream QVQ-Max's response for text analysis"""
75 | try:
76 | # Initial status
77 | yield {"status": "started", "content": ""}
78 |
79 | # Save original environment state
80 | original_env = {
81 | 'http_proxy': os.environ.get('http_proxy'),
82 | 'https_proxy': os.environ.get('https_proxy')
83 | }
84 |
85 | try:
86 | # Set proxy environment variables if provided
87 | if proxies:
88 | if 'http' in proxies:
89 | os.environ['http_proxy'] = proxies['http']
90 | if 'https' in proxies:
91 | os.environ['https_proxy'] = proxies['https']
92 |
93 | # Initialize OpenAI compatible client for DashScope
94 | client = OpenAI(
95 | api_key=self.api_key,
96 | base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
97 | )
98 |
99 | # Prepare messages
100 | messages = [
101 | {
102 | "role": "system",
103 | "content": [{"type": "text", "text": self.system_prompt}]
104 | },
105 | {
106 | "role": "user",
107 | "content": [{"type": "text", "text": text}]
108 | }
109 | ]
110 |
111 | # 创建聊天完成请求
112 | response = client.chat.completions.create(
113 | model=self.get_model_identifier(),
114 | messages=messages,
115 | temperature=self.temperature,
116 | stream=True,
117 | max_tokens=self._get_max_tokens()
118 | )
119 |
120 | # 记录思考过程和回答
121 | reasoning_content = ""
122 | answer_content = ""
123 | is_answering = False
124 |
125 | # 检查是否为通义千问VL模型(不支持reasoning_content)
126 | is_qwen_vl = "qwen-vl" in self.get_model_identifier().lower()
127 | print(f"分析文本使用模型标识符: {self.get_model_identifier()}, 是否为千问VL模型: {is_qwen_vl}")
128 |
129 | for chunk in response:
130 | if not chunk.choices:
131 | continue
132 |
133 | delta = chunk.choices[0].delta
134 |
135 | # 处理思考过程(仅适用于QVQ模型)
136 | if not is_qwen_vl and hasattr(delta, 'reasoning_content') and delta.reasoning_content is not None:
137 | reasoning_content += delta.reasoning_content
138 | # 思考过程作为一个独立的内容发送
139 | yield {
140 | "status": "reasoning",
141 | "content": reasoning_content,
142 | "is_reasoning": True
143 | }
144 | elif delta.content != "":
145 | # 判断是否开始回答(从思考过程切换到回答)
146 | if not is_answering and not is_qwen_vl:
147 | is_answering = True
148 | # 发送完整的思考过程
149 | if reasoning_content:
150 | yield {
151 | "status": "reasoning_complete",
152 | "content": reasoning_content,
153 | "is_reasoning": True
154 | }
155 |
156 | # 累积回答内容
157 | answer_content += delta.content
158 |
159 | # 发送回答内容
160 | yield {
161 | "status": "streaming",
162 | "content": answer_content
163 | }
164 |
165 | # 确保发送最终完整内容
166 | if answer_content:
167 | yield {
168 | "status": "completed",
169 | "content": answer_content
170 | }
171 |
172 | finally:
173 | # Restore original environment state
174 | for key, value in original_env.items():
175 | if value is None:
176 | if key in os.environ:
177 | del os.environ[key]
178 | else:
179 | os.environ[key] = value
180 |
181 | except Exception as e:
182 | yield {
183 | "status": "error",
184 | "error": str(e)
185 | }
186 |
187 | def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
188 | """Stream model's response for image analysis"""
189 | try:
190 | # Initial status
191 | yield {"status": "started", "content": ""}
192 |
193 | # Save original environment state
194 | original_env = {
195 | 'http_proxy': os.environ.get('http_proxy'),
196 | 'https_proxy': os.environ.get('https_proxy')
197 | }
198 |
199 | try:
200 | # Set proxy environment variables if provided
201 | if proxies:
202 | if 'http' in proxies:
203 | os.environ['http_proxy'] = proxies['http']
204 | if 'https' in proxies:
205 | os.environ['https_proxy'] = proxies['https']
206 |
207 | # Initialize OpenAI compatible client for DashScope
208 | client = OpenAI(
209 | api_key=self.api_key,
210 | base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
211 | )
212 |
213 | # 使用系统提供的系统提示词,不再自动添加语言指令
214 | system_prompt = self.system_prompt
215 |
216 | # Prepare messages with image
217 | messages = [
218 | {
219 | "role": "system",
220 | "content": [{"type": "text", "text": system_prompt}]
221 | },
222 | {
223 | "role": "user",
224 | "content": [
225 | {
226 | "type": "image_url",
227 | "image_url": {
228 | "url": f"data:image/jpeg;base64,{image_data}"
229 | }
230 | },
231 | {
232 | "type": "text",
233 | "text": "请分析这个图片并提供详细的解答。"
234 | }
235 | ]
236 | }
237 | ]
238 |
239 | # 创建聊天完成请求
240 | response = client.chat.completions.create(
241 | model=self.get_model_identifier(),
242 | messages=messages,
243 | temperature=self.temperature,
244 | stream=True,
245 | max_tokens=self._get_max_tokens()
246 | )
247 |
248 | # 记录思考过程和回答
249 | reasoning_content = ""
250 | answer_content = ""
251 | is_answering = False
252 |
253 | # 检查是否为通义千问VL模型(不支持reasoning_content)
254 | is_qwen_vl = "qwen-vl" in self.get_model_identifier().lower()
255 | print(f"分析图像使用模型标识符: {self.get_model_identifier()}, 是否为千问VL模型: {is_qwen_vl}")
256 |
257 | for chunk in response:
258 | if not chunk.choices:
259 | continue
260 |
261 | delta = chunk.choices[0].delta
262 |
263 | # 处理思考过程(仅适用于QVQ模型)
264 | if not is_qwen_vl and hasattr(delta, 'reasoning_content') and delta.reasoning_content is not None:
265 | reasoning_content += delta.reasoning_content
266 | # 思考过程作为一个独立的内容发送
267 | yield {
268 | "status": "reasoning",
269 | "content": reasoning_content,
270 | "is_reasoning": True
271 | }
272 | elif delta.content != "":
273 | # 判断是否开始回答(从思考过程切换到回答)
274 | if not is_answering and not is_qwen_vl:
275 | is_answering = True
276 | # 发送完整的思考过程
277 | if reasoning_content:
278 | yield {
279 | "status": "reasoning_complete",
280 | "content": reasoning_content,
281 | "is_reasoning": True
282 | }
283 |
284 | # 累积回答内容
285 | answer_content += delta.content
286 |
287 | # 发送回答内容
288 | yield {
289 | "status": "streaming",
290 | "content": answer_content
291 | }
292 |
293 | # 确保发送最终完整内容
294 | if answer_content:
295 | yield {
296 | "status": "completed",
297 | "content": answer_content
298 | }
299 |
300 | finally:
301 | # Restore original environment state
302 | for key, value in original_env.items():
303 | if value is None:
304 | if key in os.environ:
305 | del os.environ[key]
306 | else:
307 | os.environ[key] = value
308 |
309 | except Exception as e:
310 | yield {
311 | "status": "error",
312 | "error": str(e)
313 | }
314 |
315 | def _get_max_tokens(self) -> int:
316 | """根据模型类型返回合适的max_tokens值"""
317 | # 检查是否为通义千问VL模型
318 | if "qwen-vl" in self.get_model_identifier():
319 | return 2000 # 通义千问VL模型最大支持2048,留一些余量
320 | # QVQ模型或其他模型
321 | return self.max_tokens if hasattr(self, 'max_tokens') and self.max_tokens else 4000
--------------------------------------------------------------------------------
/models/doubao.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import base64
4 | from typing import Generator, Dict, Any, Optional
5 | import requests
6 | from .base import BaseModel
7 |
8 | class DoubaoModel(BaseModel):
9 | """
10 | 豆包API模型实现类
11 | 支持字节跳动的豆包AI模型,可处理文本和图像输入
12 | """
13 |
14 | def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None, api_base_url: str = None):
15 | """
16 | 初始化豆包模型
17 |
18 | Args:
19 | api_key: 豆包API密钥
20 | temperature: 生成温度
21 | system_prompt: 系统提示词
22 | language: 首选语言
23 | model_name: 指定具体模型名称,如不指定则使用默认值
24 | api_base_url: API基础URL,用于设置自定义API端点
25 | """
26 | super().__init__(api_key, temperature, system_prompt, language)
27 | self.model_name = model_name or self.get_model_identifier()
28 | self.base_url = api_base_url or "https://ark.cn-beijing.volces.com/api/v3"
29 | self.max_tokens = 4096 # 默认最大输出token数
30 | self.reasoning_config = None # 推理配置,类似于AnthropicModel
31 |
32 | def get_default_system_prompt(self) -> str:
33 | return """你是一个专业的问题分析专家。当看到问题图片时:
34 | 1. 仔细阅读并理解问题
35 | 2. 分解问题的关键组成部分
36 | 3. 提供清晰的分步解决方案
37 | 4. 如果相关,解释涉及的概念或理论
38 | 5. 如果有多种方法,优先解释最有效的方法"""
39 |
40 | def get_model_identifier(self) -> str:
41 | """返回默认的模型标识符"""
42 | return "doubao-seed-1-6-250615" # Doubao-Seed-1.6
43 |
44 | def get_actual_model_name(self) -> str:
45 | """根据配置的模型名称返回实际的API调用标识符"""
46 | # 豆包API的实际模型名称映射
47 | model_mapping = {
48 | "doubao-seed-1-6-250615": "doubao-seed-1-6-250615"
49 | }
50 |
51 | return model_mapping.get(self.model_name, "doubao-seed-1-6-250615")
52 |
53 | def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
54 | """流式生成文本响应"""
55 | try:
56 | yield {"status": "started"}
57 |
58 | # 设置环境变量代理(如果提供)
59 | original_proxies = None
60 | if proxies:
61 | original_proxies = {
62 | 'http_proxy': os.environ.get('http_proxy'),
63 | 'https_proxy': os.environ.get('https_proxy')
64 | }
65 | if 'http' in proxies:
66 | os.environ['http_proxy'] = proxies['http']
67 | if 'https' in proxies:
68 | os.environ['https_proxy'] = proxies['https']
69 |
70 | try:
71 | # 构建请求头
72 | headers = {
73 | "Authorization": f"Bearer {self.api_key}",
74 | "Content-Type": "application/json"
75 | }
76 |
77 | # 构建消息 - 添加系统提示词
78 | messages = []
79 |
80 | # 添加系统提示词
81 | if self.system_prompt:
82 | messages.append({
83 | "role": "system",
84 | "content": self.system_prompt
85 | })
86 |
87 | # 添加用户查询
88 | user_content = text
89 | if self.language and self.language != 'auto':
90 | user_content = f"请使用{self.language}回答以下问题: {text}"
91 |
92 | messages.append({
93 | "role": "user",
94 | "content": user_content
95 | })
96 |
97 | # 处理推理配置
98 | thinking = {
99 | "type": "auto" # 默认值
100 | }
101 |
102 | if hasattr(self, 'reasoning_config') and self.reasoning_config:
103 | # 从reasoning_config中获取thinking_mode
104 | thinking_mode = self.reasoning_config.get('thinking_mode', "auto")
105 | thinking = {
106 | "type": thinking_mode
107 | }
108 |
109 | # 构建请求数据
110 | data = {
111 | "model": self.get_actual_model_name(),
112 | "messages": messages,
113 | "thinking": thinking,
114 | "temperature": self.temperature,
115 | "max_tokens": self.max_tokens,
116 | "stream": True
117 | }
118 |
119 | # 发送流式请求
120 | response = requests.post(
121 | f"{self.base_url}/chat/completions",
122 | headers=headers,
123 | json=data,
124 | stream=True,
125 | proxies=proxies if proxies else None,
126 | timeout=60
127 | )
128 |
129 | if response.status_code != 200:
130 | error_text = response.text
131 | raise Exception(f"HTTP {response.status_code}: {error_text}")
132 |
133 | response.raise_for_status()
134 |
135 | # 初始化响应缓冲区
136 | response_buffer = ""
137 |
138 | # 处理流式响应
139 | for line in response.iter_lines():
140 | if not line:
141 | continue
142 |
143 | line = line.decode('utf-8')
144 | if not line.startswith('data: '):
145 | continue
146 |
147 | line = line[6:] # 移除 'data: ' 前缀
148 |
149 | if line == '[DONE]':
150 | break
151 |
152 | try:
153 | chunk_data = json.loads(line)
154 | choices = chunk_data.get('choices', [])
155 |
156 | if choices and len(choices) > 0:
157 | delta = choices[0].get('delta', {})
158 | content = delta.get('content', '')
159 |
160 | if content:
161 | response_buffer += content
162 |
163 | # 发送响应进度
164 | yield {
165 | "status": "streaming",
166 | "content": response_buffer
167 | }
168 |
169 | except json.JSONDecodeError:
170 | continue
171 |
172 | # 确保发送完整的最终内容
173 | yield {
174 | "status": "completed",
175 | "content": response_buffer
176 | }
177 |
178 | finally:
179 | # 恢复原始代理设置
180 | if original_proxies:
181 | for key, value in original_proxies.items():
182 | if value is None:
183 | if key in os.environ:
184 | del os.environ[key]
185 | else:
186 | os.environ[key] = value
187 |
188 | except Exception as e:
189 | yield {
190 | "status": "error",
191 | "error": f"豆包API错误: {str(e)}"
192 | }
193 |
194 | def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
195 | """分析图像并流式生成响应"""
196 | try:
197 | yield {"status": "started"}
198 |
199 | # 设置环境变量代理(如果提供)
200 | original_proxies = None
201 | if proxies:
202 | original_proxies = {
203 | 'http_proxy': os.environ.get('http_proxy'),
204 | 'https_proxy': os.environ.get('https_proxy')
205 | }
206 | if 'http' in proxies:
207 | os.environ['http_proxy'] = proxies['http']
208 | if 'https' in proxies:
209 | os.environ['https_proxy'] = proxies['https']
210 |
211 | try:
212 | # 构建请求头
213 | headers = {
214 | "Authorization": f"Bearer {self.api_key}",
215 | "Content-Type": "application/json"
216 | }
217 |
218 | # 处理图像数据
219 | if image_data.startswith('data:image'):
220 | # 如果是data URI,提取base64部分
221 | image_data = image_data.split(',', 1)[1]
222 |
223 | # 构建用户消息 - 使用豆包API官方示例格式
224 | # 首先检查图像数据的格式,确保是有效的图像
225 | image_format = "jpeg" # 默认使用jpeg
226 | if image_data.startswith('/9j/'): # JPEG magic number in base64
227 | image_format = "jpeg"
228 | elif image_data.startswith('iVBORw0KGgo'): # PNG magic number in base64
229 | image_format = "png"
230 |
231 | # 构建消息
232 | messages = []
233 |
234 | # 添加系统提示词
235 | if self.system_prompt:
236 | messages.append({
237 | "role": "system",
238 | "content": self.system_prompt
239 | })
240 |
241 | user_content = [
242 | {
243 | "type": "text",
244 | "text": f"请使用{self.language}分析这张图片并提供详细解答。" if self.language and self.language != 'auto' else "请分析这张图片并提供详细解答?"
245 | },
246 | {
247 | "type": "image_url",
248 | "image_url": {
249 | "url": f"data:image/{image_format};base64,{image_data}"
250 | }
251 | }
252 | ]
253 |
254 | messages.append({
255 | "role": "user",
256 | "content": user_content
257 | })
258 |
259 | # 处理推理配置
260 | thinking = {
261 | "type": "auto" # 默认值
262 | }
263 |
264 | if hasattr(self, 'reasoning_config') and self.reasoning_config:
265 | # 从reasoning_config中获取thinking_mode
266 | thinking_mode = self.reasoning_config.get('thinking_mode', "auto")
267 | thinking = {
268 | "type": thinking_mode
269 | }
270 |
271 | # 构建请求数据
272 | data = {
273 | "model": self.get_actual_model_name(),
274 | "messages": messages,
275 | "thinking": thinking,
276 | "temperature": self.temperature,
277 | "max_tokens": self.max_tokens,
278 | "stream": True
279 | }
280 |
281 | # 发送流式请求
282 | response = requests.post(
283 | f"{self.base_url}/chat/completions",
284 | headers=headers,
285 | json=data,
286 | stream=True,
287 | proxies=proxies if proxies else None,
288 | timeout=60
289 | )
290 |
291 | if response.status_code != 200:
292 | error_text = response.text
293 | raise Exception(f"HTTP {response.status_code}: {error_text}")
294 |
295 | response.raise_for_status()
296 |
297 | # 初始化响应缓冲区
298 | response_buffer = ""
299 |
300 | # 处理流式响应
301 | for line in response.iter_lines():
302 | if not line:
303 | continue
304 |
305 | line = line.decode('utf-8')
306 | if not line.startswith('data: '):
307 | continue
308 |
309 | line = line[6:] # 移除 'data: ' 前缀
310 |
311 | if line == '[DONE]':
312 | break
313 |
314 | try:
315 | chunk_data = json.loads(line)
316 | choices = chunk_data.get('choices', [])
317 |
318 | if choices and len(choices) > 0:
319 | delta = choices[0].get('delta', {})
320 | content = delta.get('content', '')
321 |
322 | if content:
323 | response_buffer += content
324 |
325 | # 发送响应进度
326 | yield {
327 | "status": "streaming",
328 | "content": response_buffer
329 | }
330 |
331 | except json.JSONDecodeError:
332 | continue
333 |
334 | # 确保发送完整的最终内容
335 | yield {
336 | "status": "completed",
337 | "content": response_buffer
338 | }
339 |
340 | finally:
341 | # 恢复原始代理设置
342 | if original_proxies:
343 | for key, value in original_proxies.items():
344 | if value is None:
345 | if key in os.environ:
346 | del os.environ[key]
347 | else:
348 | os.environ[key] = value
349 |
350 | except Exception as e:
351 | yield {
352 | "status": "error",
353 | "error": f"豆包图像分析错误: {str(e)}"
354 | }
355 |
--------------------------------------------------------------------------------
/models/mathpix.py:
--------------------------------------------------------------------------------
1 | from typing import Generator, Dict, Any
2 | import json
3 | import requests
4 | from .base import BaseModel
5 |
6 | class MathpixModel(BaseModel):
7 | """
8 | Mathpix OCR model for processing images containing mathematical formulas,
9 | text, and tables.
10 | """
11 |
12 | def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None):
13 | """
14 | Initialize the Mathpix model.
15 |
16 | Args:
17 | api_key: Mathpix API key in format "app_id:app_key"
18 | temperature: Not used for Mathpix but kept for BaseModel compatibility
19 | system_prompt: Not used for Mathpix but kept for BaseModel compatibility
20 |
21 | Raises:
22 | ValueError: If the API key format is invalid
23 | """
24 | # 只传递必需的参数,不传递language参数
25 | super().__init__(api_key, temperature, system_prompt)
26 | try:
27 | self.app_id, self.app_key = api_key.split(':')
28 | except ValueError:
29 | raise ValueError("Mathpix API key must be in format 'app_id:app_key'")
30 |
31 | self.api_url = "https://api.mathpix.com/v3/text"
32 | self.headers = {
33 | "app_id": self.app_id,
34 | "app_key": self.app_key,
35 | "Content-Type": "application/json"
36 | }
37 |
38 | # Content type presets
39 | self.presets = {
40 | "math": {
41 | "formats": ["latex_normal", "latex_styled", "asciimath"],
42 | "data_options": {
43 | "include_asciimath": True,
44 | "include_latex": True,
45 | "include_mathml": True
46 | },
47 | "ocr_options": {
48 | "detect_formulas": True,
49 | "enable_math_ocr": True,
50 | "enable_handwritten": True,
51 | "rm_spaces": True
52 | }
53 | },
54 | "text": {
55 | "formats": ["text"],
56 | "data_options": {
57 | "include_latex": False,
58 | "include_asciimath": False
59 | },
60 | "ocr_options": {
61 | "enable_spell_check": True,
62 | "enable_handwritten": True,
63 | "rm_spaces": False
64 | }
65 | },
66 | "table": {
67 | "formats": ["text", "data"],
68 | "data_options": {
69 | "include_latex": True
70 | },
71 | "ocr_options": {
72 | "detect_tables": True,
73 | "enable_spell_check": True,
74 | "rm_spaces": True
75 | }
76 | },
77 | "full_text": {
78 | "formats": ["text"],
79 | "data_options": {
80 | "include_latex": False,
81 | "include_asciimath": False
82 | },
83 | "ocr_options": {
84 | "enable_spell_check": True,
85 | "enable_handwritten": True,
86 | "rm_spaces": False,
87 | "detect_paragraphs": True,
88 | "enable_tables": False,
89 | "enable_math_ocr": False
90 | }
91 | }
92 | }
93 |
94 | # Default to math preset
95 | self.current_preset = "math"
96 |
97 | def analyze_image(self, image_data: str, proxies: dict = None, content_type: str = None,
98 | confidence_threshold: float = 0.8, max_retries: int = 3) -> Generator[dict, None, None]:
99 | """
100 | Analyze an image using Mathpix OCR API.
101 |
102 | Args:
103 | image_data: Base64 encoded image data
104 | proxies: Optional proxy configuration
105 | content_type: Type of content to analyze ('math', 'text', or 'table')
106 | confidence_threshold: Minimum confidence score to accept (0.0 to 1.0)
107 | max_retries: Maximum number of retry attempts for failed requests
108 |
109 | Yields:
110 | dict: Response chunks with status and content
111 | """
112 | if content_type and content_type in self.presets:
113 | self.current_preset = content_type
114 |
115 | preset = self.presets[self.current_preset]
116 |
117 | try:
118 | # Prepare request payload
119 | payload = {
120 | "src": f"data:image/jpeg;base64,{image_data}",
121 | "formats": preset["formats"],
122 | "data_options": preset["data_options"],
123 | "ocr_options": preset["ocr_options"]
124 | }
125 |
126 | # Initialize retry counter
127 | retry_count = 0
128 |
129 | while retry_count < max_retries:
130 | try:
131 | # Send request to Mathpix API with timeout
132 | response = requests.post(
133 | self.api_url,
134 | headers=self.headers,
135 | json=payload,
136 | proxies=proxies,
137 | timeout=25 # 25 second timeout
138 | )
139 |
140 | # Handle specific API error codes
141 | if response.status_code == 429: # Rate limit exceeded
142 | if retry_count < max_retries - 1:
143 | retry_count += 1
144 | continue
145 | else:
146 | raise requests.exceptions.RequestException("Rate limit exceeded")
147 |
148 | response.raise_for_status()
149 | result = response.json()
150 |
151 | # Check confidence threshold
152 | if 'confidence' in result and result['confidence'] < confidence_threshold:
153 | yield {
154 | "status": "warning",
155 | "content": f"Low confidence score: {result['confidence']:.2%}"
156 | }
157 |
158 | break # Success, exit retry loop
159 |
160 | except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
161 | if retry_count < max_retries - 1:
162 | retry_count += 1
163 | continue
164 | raise
165 |
166 | # Format the response
167 | formatted_response = self._format_response(result)
168 |
169 | # Yield initial status
170 | yield {
171 | "status": "started",
172 | "content": ""
173 | }
174 |
175 | # Yield the formatted response
176 | yield {
177 | "status": "completed",
178 | "content": formatted_response,
179 | "model": self.get_model_identifier()
180 | }
181 |
182 | except requests.exceptions.RequestException as e:
183 | yield {
184 | "status": "error",
185 | "error": f"Mathpix API error: {str(e)}"
186 | }
187 | except Exception as e:
188 | yield {
189 | "status": "error",
190 | "error": f"Error processing image: {str(e)}"
191 | }
192 |
193 | def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
194 | """
195 | Not implemented for Mathpix model as it only processes images.
196 | """
197 | yield {
198 | "status": "error",
199 | "error": "Text analysis is not supported by Mathpix model"
200 | }
201 |
202 | def get_default_system_prompt(self) -> str:
203 | """
204 | Not used for Mathpix model.
205 | """
206 | return ""
207 |
208 | def get_model_identifier(self) -> str:
209 | """
210 | Return the model identifier.
211 | """
212 | return "mathpix"
213 |
214 | def _format_response(self, result: Dict[str, Any]) -> str:
215 | """
216 | Format the Mathpix API response into a readable string.
217 |
218 | Args:
219 | result: Raw API response from Mathpix
220 |
221 | Returns:
222 | str: Formatted response string with all available formats
223 | """
224 | formatted_parts = []
225 |
226 | # Add confidence score if available
227 | if 'confidence' in result:
228 | formatted_parts.append(f"Confidence: {result['confidence']:.2%}\n")
229 |
230 | # Add text content
231 | if 'text' in result:
232 | formatted_parts.append("Text Content:")
233 | formatted_parts.append(result['text'])
234 | formatted_parts.append("")
235 |
236 | # Add LaTeX content
237 | if 'latex_normal' in result:
238 | formatted_parts.append("LaTeX (Normal):")
239 | formatted_parts.append(result['latex_normal'])
240 | formatted_parts.append("")
241 |
242 | if 'latex_styled' in result:
243 | formatted_parts.append("LaTeX (Styled):")
244 | formatted_parts.append(result['latex_styled'])
245 | formatted_parts.append("")
246 |
247 | # Add data formats (ASCII math, MathML)
248 | if 'data' in result and isinstance(result['data'], list):
249 | for item in result['data']:
250 | item_type = item.get('type', '')
251 | if item_type and 'value' in item:
252 | formatted_parts.append(f"{item_type.upper()}:")
253 | formatted_parts.append(item['value'])
254 | formatted_parts.append("")
255 |
256 | # Add table data if present
257 | if 'tables' in result and result['tables']:
258 | formatted_parts.append("Tables Detected:")
259 | for i, table in enumerate(result['tables'], 1):
260 | formatted_parts.append(f"Table {i}:")
261 | if 'cells' in table:
262 | # Format table as a grid
263 | cells = table['cells']
264 | if cells:
265 | max_col = max(cell.get('col', 0) for cell in cells) + 1
266 | max_row = max(cell.get('row', 0) for cell in cells) + 1
267 | grid = [['' for _ in range(max_col)] for _ in range(max_row)]
268 |
269 | for cell in cells:
270 | row = cell.get('row', 0)
271 | col = cell.get('col', 0)
272 | text = cell.get('text', '')
273 | grid[row][col] = text
274 |
275 | # Format grid as table
276 | col_widths = [max(len(str(grid[r][c])) for r in range(max_row)) for c in range(max_col)]
277 | for row in grid:
278 | row_str = ' | '.join(f"{str(cell):<{width}}" for cell, width in zip(row, col_widths))
279 | formatted_parts.append(f"| {row_str} |")
280 | formatted_parts.append("")
281 |
282 | # Add error message if present
283 | if 'error' in result:
284 | error_msg = result['error']
285 | if isinstance(error_msg, dict):
286 | error_msg = error_msg.get('message', str(error_msg))
287 | formatted_parts.append(f"Error: {error_msg}")
288 |
289 | return "\n".join(formatted_parts).strip()
290 |
291 | def extract_full_text(self, image_data: str, proxies: dict = None, max_retries: int = 3) -> str:
292 | """
293 | 专门用于提取图像中的全部文本内容,忽略数学公式和表格等其他元素。
294 |
295 | Args:
296 | image_data: Base64编码的图像数据
297 | proxies: 可选的代理配置
298 | max_retries: 请求失败时的最大重试次数
299 |
300 | Returns:
301 | str: 图像中提取的完整文本内容
302 | """
303 | try:
304 | # 准备请求负载,使用专为全文提取配置的参数
305 | payload = {
306 | "src": f"data:image/jpeg;base64,{image_data}",
307 | "formats": ["text"],
308 | "data_options": {
309 | "include_latex": False,
310 | "include_asciimath": False
311 | },
312 | "ocr_options": {
313 | "enable_spell_check": True,
314 | "enable_handwritten": True,
315 | "rm_spaces": False,
316 | "detect_paragraphs": True,
317 | "enable_tables": False,
318 | "enable_math_ocr": False
319 | }
320 | }
321 |
322 | # 初始化重试计数器
323 | retry_count = 0
324 |
325 | while retry_count < max_retries:
326 | try:
327 | # 发送请求到Mathpix API
328 | response = requests.post(
329 | self.api_url,
330 | headers=self.headers,
331 | json=payload,
332 | proxies=proxies,
333 | timeout=30 # 30秒超时
334 | )
335 |
336 | # 处理特定API错误代码
337 | if response.status_code == 429: # 超出速率限制
338 | if retry_count < max_retries - 1:
339 | retry_count += 1
340 | continue
341 | else:
342 | raise requests.exceptions.RequestException("超出API速率限制")
343 |
344 | response.raise_for_status()
345 | result = response.json()
346 |
347 | # 直接返回文本内容
348 | if 'text' in result:
349 | return result['text']
350 | else:
351 | return "未能提取到文本内容"
352 |
353 | except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
354 | if retry_count < max_retries - 1:
355 | retry_count += 1
356 | continue
357 | raise
358 |
359 | except requests.exceptions.RequestException as e:
360 | return f"Mathpix API错误: {str(e)}"
361 | except Exception as e:
362 | return f"处理图像时出错: {str(e)}"
363 |
--------------------------------------------------------------------------------
/models/deepseek.py:
--------------------------------------------------------------------------------
1 | import json
2 | import requests
3 | import os
4 | from typing import Generator
5 | from openai import OpenAI
6 | from .base import BaseModel
7 |
8 | class DeepSeekModel(BaseModel):
9 | def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = "deepseek-reasoner", api_base_url: str = None):
10 | super().__init__(api_key, temperature, system_prompt, language)
11 | self.model_name = model_name
12 | self.api_base_url = api_base_url # 存储API基础URL
13 |
14 | def get_default_system_prompt(self) -> str:
15 | return """You are an expert at analyzing questions and providing detailed solutions. When presented with an image of a question:
16 | 1. First read and understand the question carefully
17 | 2. Break down the key components of the question
18 | 3. Provide a clear, step-by-step solution
19 | 4. If relevant, explain any concepts or theories involved
20 | 5. If there are multiple approaches, explain the most efficient one first"""
21 |
22 | def get_model_identifier(self) -> str:
23 | """根据模型名称返回正确的API标识符"""
24 | # 通过模型名称来确定实际的API调用标识符
25 | if self.model_name == "deepseek-chat":
26 | return "deepseek-chat"
27 | # 如果是deepseek-reasoner或包含reasoner的模型名称,返回推理模型标识符
28 | if "reasoner" in self.model_name.lower():
29 | return "deepseek-reasoner"
30 | # 对于deepseek-chat也返回对应的模型名称
31 | if "chat" in self.model_name.lower() or self.model_name == "deepseek-chat":
32 | return "deepseek-chat"
33 |
34 | # 根据配置中的模型ID来确定实际的模型类型
35 | if self.model_name == "deepseek-reasoner":
36 | return "deepseek-reasoner"
37 | elif self.model_name == "deepseek-chat":
38 | return "deepseek-chat"
39 |
40 | # 默认使用deepseek-chat作为API标识符
41 | print(f"未知的DeepSeek模型名称: {self.model_name},使用deepseek-chat作为默认值")
42 | return "deepseek-chat"
43 |
44 | def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
45 | """Stream DeepSeek's response for text analysis"""
46 | try:
47 | # Initial status
48 | yield {"status": "started", "content": ""}
49 |
50 | # 保存原始环境变量
51 | original_env = {
52 | 'http_proxy': os.environ.get('http_proxy'),
53 | 'https_proxy': os.environ.get('https_proxy')
54 | }
55 |
56 | try:
57 | # 如果提供了代理设置,通过环境变量设置
58 | if proxies:
59 | if 'http' in proxies:
60 | os.environ['http_proxy'] = proxies['http']
61 | if 'https' in proxies:
62 | os.environ['https_proxy'] = proxies['https']
63 |
64 | # 初始化DeepSeek客户端,不再使用session对象
65 | client = OpenAI(
66 | api_key=self.api_key,
67 | base_url="https://api.deepseek.com"
68 | )
69 |
70 | # 使用系统提供的系统提示词,不再自动添加语言指令
71 | system_prompt = self.system_prompt
72 |
73 | # 构建请求参数
74 | params = {
75 | "model": self.get_model_identifier(),
76 | "messages": [
77 | {
78 | 'role': 'system',
79 | 'content': system_prompt
80 | },
81 | {
82 | 'role': 'user',
83 | 'content': text
84 | }
85 | ],
86 | "stream": True
87 | }
88 |
89 | # 只有非推理模型才设置temperature参数
90 | if not self.get_model_identifier().endswith('reasoner') and self.temperature is not None:
91 | params["temperature"] = self.temperature
92 |
93 | print(f"调用DeepSeek API: {self.get_model_identifier()}, 是否设置温度: {not self.get_model_identifier().endswith('reasoner')}, 温度值: {self.temperature if not self.get_model_identifier().endswith('reasoner') else 'N/A'}")
94 |
95 | response = client.chat.completions.create(**params)
96 |
97 | # 使用两个缓冲区,分别用于常规内容和思考内容
98 | response_buffer = ""
99 | thinking_buffer = ""
100 |
101 | for chunk in response:
102 | # 打印chunk以调试
103 | try:
104 | print(f"DeepSeek API返回chunk: {chunk}")
105 | except:
106 | print("无法打印chunk")
107 |
108 | try:
109 | # 同时处理两种不同的内容,确保正确区分思考内容和最终内容
110 | delta = chunk.choices[0].delta
111 |
112 | # 处理推理模型的思考内容
113 | if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
114 | content = delta.reasoning_content
115 | thinking_buffer += content
116 |
117 | # 发送思考内容更新
118 | if len(content) >= 20 or content.endswith(('.', '!', '?', '。', '!', '?', '\n')):
119 | yield {
120 | "status": "thinking",
121 | "content": thinking_buffer
122 | }
123 |
124 | # 处理最终结果内容 - 即使在推理模型中也会有content字段
125 | if hasattr(delta, 'content') and delta.content:
126 | content = delta.content
127 | response_buffer += content
128 | print(f"累积响应内容: '{content}', 当前buffer: '{response_buffer}'")
129 |
130 | # 发送结果内容更新
131 | if len(content) >= 10 or content.endswith(('.', '!', '?', '。', '!', '?', '\n')):
132 | yield {
133 | "status": "streaming",
134 | "content": response_buffer
135 | }
136 |
137 | # 处理消息结束
138 | if hasattr(chunk.choices[0], 'finish_reason') and chunk.choices[0].finish_reason:
139 | print(f"生成结束,原因: {chunk.choices[0].finish_reason}")
140 | # 注意:不要在这里把思考内容作为正文,因为这可能导致重复内容
141 | except Exception as e:
142 | print(f"解析响应chunk时出错: {str(e)}")
143 | continue
144 |
145 | # 确保发送最终的缓冲内容
146 | if thinking_buffer:
147 | yield {
148 | "status": "thinking_complete",
149 | "content": thinking_buffer
150 | }
151 |
152 | # 发送最终响应内容
153 | if response_buffer:
154 | yield {
155 | "status": "completed",
156 | "content": response_buffer
157 | }
158 |
159 | # 如果没有正常的响应内容,但有思考内容,则将思考内容作为最终结果
160 | elif thinking_buffer:
161 | yield {
162 | "status": "completed",
163 | "content": thinking_buffer
164 | }
165 | else:
166 | # 如果两者都没有,返回一个空结果
167 | yield {
168 | "status": "completed",
169 | "content": "没有获取到内容"
170 | }
171 |
172 | except Exception as e:
173 | error_msg = str(e)
174 | print(f"DeepSeek API调用出错: {error_msg}")
175 |
176 | # 提供具体的错误信息
177 | if "invalid_api_key" in error_msg.lower():
178 | error_msg = "DeepSeek API密钥无效,请检查您的API密钥"
179 | elif "rate_limit" in error_msg.lower():
180 | error_msg = "DeepSeek API请求频率超限,请稍后再试"
181 | elif "quota_exceeded" in error_msg.lower():
182 | error_msg = "DeepSeek API配额已用完,请续费或等待下个计费周期"
183 |
184 | yield {
185 | "status": "error",
186 | "error": f"DeepSeek API错误: {error_msg}"
187 | }
188 | finally:
189 | # 恢复原始环境变量
190 | for key, value in original_env.items():
191 | if value is None:
192 | if key in os.environ:
193 | del os.environ[key]
194 | else:
195 | os.environ[key] = value
196 |
197 | except Exception as e:
198 | error_msg = str(e)
199 | print(f"调用DeepSeek模型时发生错误: {error_msg}")
200 |
201 | if "invalid_api_key" in error_msg.lower():
202 | error_msg = "API密钥无效,请检查设置"
203 | elif "rate_limit" in error_msg.lower():
204 | error_msg = "API请求频率超限,请稍后再试"
205 |
206 | yield {
207 | "status": "error",
208 | "error": f"DeepSeek API错误: {error_msg}"
209 | }
210 |
211 | def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
212 | """Stream DeepSeek's response for image analysis"""
213 | try:
214 | # 检查我们是否有支持图像的模型
215 | if self.model_name == "deepseek-chat" or self.model_name == "deepseek-reasoner":
216 | yield {
217 | "status": "error",
218 | "error": "当前DeepSeek模型不支持图像分析,请使用Anthropic或OpenAI的多模态模型"
219 | }
220 | return
221 |
222 | # Initial status
223 | yield {"status": "started", "content": ""}
224 |
225 | # 保存原始环境变量
226 | original_env = {
227 | 'http_proxy': os.environ.get('http_proxy'),
228 | 'https_proxy': os.environ.get('https_proxy')
229 | }
230 |
231 | try:
232 | # 如果提供了代理设置,通过环境变量设置
233 | if proxies:
234 | if 'http' in proxies:
235 | os.environ['http_proxy'] = proxies['http']
236 | if 'https' in proxies:
237 | os.environ['https_proxy'] = proxies['https']
238 |
239 | # 初始化DeepSeek客户端,不再使用session对象
240 | client = OpenAI(
241 | api_key=self.api_key,
242 | base_url="https://api.deepseek.com"
243 | )
244 |
245 | # 使用系统提供的系统提示词,不再自动添加语言指令
246 | system_prompt = self.system_prompt
247 |
248 | # 构建请求参数
249 | params = {
250 | "model": self.get_model_identifier(),
251 | "messages": [
252 | {
253 | 'role': 'system',
254 | 'content': system_prompt
255 | },
256 | {
257 | 'role': 'user',
258 | 'content': f"Here's an image of a question to analyze: data:image/png;base64,{image_data}"
259 | }
260 | ],
261 | "stream": True
262 | }
263 |
264 | # 只有非推理模型才设置temperature参数
265 | if not self.get_model_identifier().endswith('reasoner') and self.temperature is not None:
266 | params["temperature"] = self.temperature
267 |
268 | response = client.chat.completions.create(**params)
269 |
270 | # 使用两个缓冲区,分别用于常规内容和思考内容
271 | response_buffer = ""
272 | thinking_buffer = ""
273 |
274 | for chunk in response:
275 | # 打印chunk以调试
276 | try:
277 | print(f"DeepSeek图像API返回chunk: {chunk}")
278 | except:
279 | print("无法打印chunk")
280 |
281 | try:
282 | # 同时处理两种不同的内容,确保正确区分思考内容和最终内容
283 | delta = chunk.choices[0].delta
284 |
285 | # 处理推理模型的思考内容
286 | if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
287 | content = delta.reasoning_content
288 | thinking_buffer += content
289 |
290 | # 发送思考内容更新
291 | if len(content) >= 20 or content.endswith(('.', '!', '?', '。', '!', '?', '\n')):
292 | yield {
293 | "status": "thinking",
294 | "content": thinking_buffer
295 | }
296 |
297 | # 处理最终结果内容 - 即使在推理模型中也会有content字段
298 | if hasattr(delta, 'content') and delta.content:
299 | content = delta.content
300 | response_buffer += content
301 | print(f"累积图像响应内容: '{content}', 当前buffer: '{response_buffer}'")
302 |
303 | # 发送结果内容更新
304 | if len(content) >= 10 or content.endswith(('.', '!', '?', '。', '!', '?', '\n')):
305 | yield {
306 | "status": "streaming",
307 | "content": response_buffer
308 | }
309 |
310 | # 处理消息结束
311 | if hasattr(chunk.choices[0], 'finish_reason') and chunk.choices[0].finish_reason:
312 | print(f"图像生成结束,原因: {chunk.choices[0].finish_reason}")
313 | except Exception as e:
314 | print(f"解析图像响应chunk时出错: {str(e)}")
315 | continue
316 |
317 | # 确保发送最终的缓冲内容
318 | if thinking_buffer:
319 | yield {
320 | "status": "thinking_complete",
321 | "content": thinking_buffer
322 | }
323 |
324 | # 发送最终响应内容
325 | if response_buffer:
326 | yield {
327 | "status": "completed",
328 | "content": response_buffer
329 | }
330 |
331 | except Exception as e:
332 | error_msg = str(e)
333 | print(f"DeepSeek API调用出错: {error_msg}")
334 |
335 | # 提供具体的错误信息
336 | if "invalid_api_key" in error_msg.lower():
337 | error_msg = "DeepSeek API密钥无效,请检查您的API密钥"
338 | elif "rate_limit" in error_msg.lower():
339 | error_msg = "DeepSeek API请求频率超限,请稍后再试"
340 |
341 | yield {
342 | "status": "error",
343 | "error": f"DeepSeek API错误: {error_msg}"
344 | }
345 | finally:
346 | # 恢复原始环境变量
347 | for key, value in original_env.items():
348 | if value is None:
349 | if key in os.environ:
350 | del os.environ[key]
351 | else:
352 | os.environ[key] = value
353 |
354 | except Exception as e:
355 | error_msg = str(e)
356 | if "invalid_api_key" in error_msg.lower():
357 | error_msg = "API密钥无效,请检查设置"
358 | elif "rate_limit" in error_msg.lower():
359 | error_msg = "API请求频率超限,请稍后再试"
360 |
361 | yield {
362 | "status": "error",
363 | "error": f"DeepSeek API错误: {error_msg}"
364 | }
365 |
--------------------------------------------------------------------------------
/models/anthropic.py:
--------------------------------------------------------------------------------
1 | import json
2 | import requests
3 | from typing import Generator, Optional
4 | from .base import BaseModel
5 |
6 | class AnthropicModel(BaseModel):
7 | def __init__(self, api_key, temperature=0.7, system_prompt=None, language=None, api_base_url=None, model_identifier=None):
8 | super().__init__(api_key, temperature, system_prompt or self.get_default_system_prompt(), language or "en")
9 | # 设置API基础URL,默认为Anthropic官方API
10 | self.api_base_url = api_base_url or "https://api.anthropic.com/v1"
11 | # 设置模型标识符,支持动态选择
12 | self.model_identifier = model_identifier or "claude-3-7-sonnet-20250219"
13 | # 初始化推理配置
14 | self.reasoning_config = None
15 | # 初始化最大Token数
16 | self.max_tokens = None
17 |
18 | def get_default_system_prompt(self) -> str:
19 | return """You are an expert at analyzing questions and providing detailed solutions. When presented with an image of a question:
20 | 1. First read and understand the question carefully
21 | 2. Break down the key components of the question
22 | 3. Provide a clear, step-by-step solution
23 | 4. If relevant, explain any concepts or theories involved
24 | 5. If there are multiple approaches, explain the most efficient one first"""
25 |
26 | def get_model_identifier(self) -> str:
27 | return self.model_identifier
28 |
29 | def analyze_text(self, text: str, proxies: Optional[dict] = None) -> Generator[dict, None, None]:
30 | """Stream Claude's response for text analysis"""
31 | try:
32 | yield {"status": "started"}
33 |
34 | api_key = self.api_key
35 | if api_key.startswith('Bearer '):
36 | api_key = api_key[7:]
37 |
38 | headers = {
39 | 'x-api-key': api_key,
40 | 'anthropic-version': '2023-06-01',
41 | 'content-type': 'application/json',
42 | 'accept': 'application/json',
43 | }
44 |
45 | # 获取最大输出Token设置
46 | max_tokens = 8192 # 默认值
47 | if hasattr(self, 'max_tokens') and self.max_tokens:
48 | max_tokens = self.max_tokens
49 |
50 | payload = {
51 | 'model': self.get_model_identifier(),
52 | 'stream': True,
53 | 'max_tokens': max_tokens,
54 | 'temperature': 1,
55 | 'system': self.system_prompt,
56 | 'messages': [{
57 | 'role': 'user',
58 | 'content': [
59 | {
60 | 'type': 'text',
61 | 'text': text
62 | }
63 | ]
64 | }]
65 | }
66 |
67 | # 处理推理配置
68 | if hasattr(self, 'reasoning_config') and self.reasoning_config:
69 | # 如果设置了extended reasoning
70 | if self.reasoning_config.get('reasoning_depth') == 'extended':
71 | think_budget = self.reasoning_config.get('think_budget', max_tokens // 2)
72 | payload['thinking'] = {
73 | 'type': 'enabled',
74 | 'budget_tokens': think_budget
75 | }
76 | # 如果设置了instant模式
77 | elif self.reasoning_config.get('speed_mode') == 'instant':
78 | # 确保当使用speed_mode时不包含thinking参数
79 | if 'thinking' in payload:
80 | del payload['thinking']
81 | # 默认启用思考但使用较小的预算
82 | else:
83 | payload['thinking'] = {
84 | 'type': 'enabled',
85 | 'budget_tokens': min(4096, max_tokens // 4)
86 | }
87 | # 默认设置
88 | else:
89 | payload['thinking'] = {
90 | 'type': 'enabled',
91 | 'budget_tokens': min(4096, max_tokens // 4)
92 | }
93 |
94 | print(f"Debug - 推理配置: max_tokens={max_tokens}, thinking={payload.get('thinking', payload.get('speed_mode', 'default'))}")
95 |
96 | # 使用配置的API基础URL
97 | api_endpoint = f"{self.api_base_url}/messages"
98 |
99 | response = requests.post(
100 | api_endpoint,
101 | headers=headers,
102 | json=payload,
103 | stream=True,
104 | proxies=proxies,
105 | timeout=60
106 | )
107 |
108 | if response.status_code != 200:
109 | error_msg = f'API error: {response.status_code}'
110 | try:
111 | error_data = response.json()
112 | if 'error' in error_data:
113 | error_msg += f" - {error_data['error']['message']}"
114 | except:
115 | error_msg += f" - {response.text}"
116 | yield {"status": "error", "error": error_msg}
117 | return
118 |
119 | thinking_content = ""
120 | response_buffer = ""
121 |
122 | for chunk in response.iter_lines():
123 | if not chunk:
124 | continue
125 |
126 | try:
127 | chunk_str = chunk.decode('utf-8')
128 | if not chunk_str.startswith('data: '):
129 | continue
130 |
131 | chunk_str = chunk_str[6:]
132 | data = json.loads(chunk_str)
133 |
134 | if data.get('type') == 'content_block_delta':
135 | if 'delta' in data:
136 | if 'text' in data['delta']:
137 | text_chunk = data['delta']['text']
138 | response_buffer += text_chunk
139 | # 只在每累积一定数量的字符后才发送,减少UI跳变
140 | if len(text_chunk) >= 10 or text_chunk.endswith(('.', '!', '?', '。', '!', '?', '\n')):
141 | yield {
142 | "status": "streaming",
143 | "content": response_buffer
144 | }
145 |
146 | elif 'thinking' in data['delta']:
147 | thinking_chunk = data['delta']['thinking']
148 | thinking_content += thinking_chunk
149 | # 只在每累积一定数量的字符后才发送,减少UI跳变
150 | if len(thinking_chunk) >= 20 or thinking_chunk.endswith(('.', '!', '?', '。', '!', '?', '\n')):
151 | yield {
152 | "status": "thinking",
153 | "content": thinking_content
154 | }
155 |
156 | # 处理新的extended_thinking格式
157 | elif data.get('type') == 'extended_thinking_delta':
158 | if 'delta' in data and 'text' in data['delta']:
159 | thinking_chunk = data['delta']['text']
160 | thinking_content += thinking_chunk
161 | # 只在每累积一定数量的字符后才发送,减少UI跳变
162 | if len(thinking_chunk) >= 20 or thinking_chunk.endswith(('.', '!', '?', '。', '!', '?', '\n')):
163 | yield {
164 | "status": "thinking",
165 | "content": thinking_content
166 | }
167 |
168 | elif data.get('type') == 'message_stop':
169 | # 确保发送完整的思考内容
170 | if thinking_content:
171 | yield {
172 | "status": "thinking_complete",
173 | "content": thinking_content
174 | }
175 | # 确保发送完整的响应内容
176 | yield {
177 | "status": "completed",
178 | "content": response_buffer
179 | }
180 |
181 | elif data.get('type') == 'error':
182 | error_msg = data.get('error', {}).get('message', 'Unknown error')
183 | yield {
184 | "status": "error",
185 | "error": error_msg
186 | }
187 | break
188 |
189 | except json.JSONDecodeError as e:
190 | print(f"JSON decode error: {str(e)}")
191 | continue
192 |
193 | except Exception as e:
194 | yield {
195 | "status": "error",
196 | "error": f"Streaming error: {str(e)}"
197 | }
198 |
199 | def analyze_image(self, image_data, proxies: Optional[dict] = None):
200 | yield {"status": "started"}
201 |
202 | api_key = self.api_key
203 | if api_key.startswith('Bearer '):
204 | api_key = api_key[7:]
205 |
206 | headers = {
207 | 'x-api-key': api_key,
208 | 'anthropic-version': '2023-06-01',
209 | 'content-type': 'application/json'
210 | }
211 |
212 | # 使用系统提供的系统提示词,不再自动添加语言指令
213 | system_prompt = self.system_prompt
214 |
215 | # 获取最大输出Token设置
216 | max_tokens = 8192 # 默认值
217 | if hasattr(self, 'max_tokens') and self.max_tokens:
218 | max_tokens = self.max_tokens
219 |
220 | payload = {
221 | 'model': self.get_model_identifier(),
222 | 'stream': True,
223 | 'max_tokens': max_tokens,
224 | 'temperature': 1,
225 | 'system': system_prompt,
226 | 'messages': [{
227 | 'role': 'user',
228 | 'content': [
229 | {
230 | 'type': 'image',
231 | 'source': {
232 | 'type': 'base64',
233 | 'media_type': 'image/png',
234 | 'data': image_data
235 | }
236 | },
237 | {
238 | 'type': 'text',
239 | 'text': "请分析这个问题并提供详细的解决方案。如果你看到多个问题,请逐一解决。"
240 | }
241 | ]
242 | }]
243 | }
244 |
245 | # 处理推理配置
246 | if hasattr(self, 'reasoning_config') and self.reasoning_config:
247 | # 如果设置了extended reasoning
248 | if self.reasoning_config.get('reasoning_depth') == 'extended':
249 | think_budget = self.reasoning_config.get('think_budget', max_tokens // 2)
250 | payload['thinking'] = {
251 | 'type': 'enabled',
252 | 'budget_tokens': think_budget
253 | }
254 | # 如果设置了instant模式
255 | elif self.reasoning_config.get('speed_mode') == 'instant':
256 | # 只需确保不包含thinking参数,不添加speed_mode参数
257 | if 'thinking' in payload:
258 | del payload['thinking']
259 | # 默认启用思考但使用较小的预算
260 | else:
261 | payload['thinking'] = {
262 | 'type': 'enabled',
263 | 'budget_tokens': min(4096, max_tokens // 4)
264 | }
265 | # 默认设置
266 | else:
267 | payload['thinking'] = {
268 | 'type': 'enabled',
269 | 'budget_tokens': min(4096, max_tokens // 4)
270 | }
271 |
272 | print(f"Debug - 图像分析推理配置: max_tokens={max_tokens}, thinking={payload.get('thinking', payload.get('speed_mode', 'default'))}")
273 |
274 | # 使用配置的API基础URL
275 | api_endpoint = f"{self.api_base_url}/messages"
276 |
277 | response = requests.post(
278 | api_endpoint,
279 | headers=headers,
280 | json=payload,
281 | stream=True,
282 | proxies=proxies,
283 | timeout=60
284 | )
285 |
286 | if response.status_code != 200:
287 | error_msg = f'API error: {response.status_code}'
288 | try:
289 | error_data = response.json()
290 | if 'error' in error_data:
291 | error_msg += f" - {error_data['error']['message']}"
292 | except:
293 | error_msg += f" - {response.text}"
294 | yield {"status": "error", "error": error_msg}
295 | return
296 |
297 | thinking_content = ""
298 | response_buffer = ""
299 |
300 | for chunk in response.iter_lines():
301 | if not chunk:
302 | continue
303 |
304 | try:
305 | chunk_str = chunk.decode('utf-8')
306 | if not chunk_str.startswith('data: '):
307 | continue
308 |
309 | chunk_str = chunk_str[6:]
310 | data = json.loads(chunk_str)
311 |
312 | if data.get('type') == 'content_block_delta':
313 | if 'delta' in data:
314 | if 'text' in data['delta']:
315 | text_chunk = data['delta']['text']
316 | response_buffer += text_chunk
317 | # 只在每累积一定数量的字符后才发送,减少UI跳变
318 | if len(text_chunk) >= 10 or text_chunk.endswith(('.', '!', '?', '。', '!', '?', '\n')):
319 | yield {
320 | "status": "streaming",
321 | "content": response_buffer
322 | }
323 |
324 | elif 'thinking' in data['delta']:
325 | thinking_chunk = data['delta']['thinking']
326 | thinking_content += thinking_chunk
327 | # 只在每累积一定数量的字符后才发送,减少UI跳变
328 | if len(thinking_chunk) >= 20 or thinking_chunk.endswith(('.', '!', '?', '。', '!', '?', '\n')):
329 | yield {
330 | "status": "thinking",
331 | "content": thinking_content
332 | }
333 |
334 | # 处理新的extended_thinking格式
335 | elif data.get('type') == 'extended_thinking_delta':
336 | if 'delta' in data and 'text' in data['delta']:
337 | thinking_chunk = data['delta']['text']
338 | thinking_content += thinking_chunk
339 | # 只在每累积一定数量的字符后才发送,减少UI跳变
340 | if len(thinking_chunk) >= 20 or thinking_chunk.endswith(('.', '!', '?', '。', '!', '?', '\n')):
341 | yield {
342 | "status": "thinking",
343 | "content": thinking_content
344 | }
345 |
346 | elif data.get('type') == 'message_stop':
347 | # 确保发送完整的思考内容
348 | if thinking_content:
349 | yield {
350 | "status": "thinking_complete",
351 | "content": thinking_content
352 | }
353 | # 确保发送完整的响应内容
354 | yield {
355 | "status": "completed",
356 | "content": response_buffer
357 | }
358 |
359 | elif data.get('type') == 'error':
360 | error_message = data.get('error', {}).get('message', 'Unknown error')
361 | yield {
362 | "status": "error",
363 | "error": error_message
364 | }
365 |
366 | except Exception as e:
367 | yield {
368 | "status": "error",
369 | "error": f"Error processing response: {str(e)}"
370 | }
371 | break
372 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, jsonify, render_template, request, send_from_directory
2 | from flask_socketio import SocketIO
3 | import pyautogui
4 | import base64
5 | from io import BytesIO
6 | import socket
7 | from threading import Thread, Event
8 | import threading
9 | from PIL import Image
10 | import pyperclip
11 | from models import ModelFactory
12 | import time
13 | import os
14 | import json
15 | import traceback
16 | import requests
17 | from datetime import datetime
18 | import sys
19 |
20 | app = Flask(__name__)
21 | socketio = SocketIO(
22 | app,
23 | cors_allowed_origins="*",
24 | ping_timeout=30,
25 | ping_interval=5,
26 | max_http_buffer_size=50 * 1024 * 1024,
27 | async_mode='threading', # 使用threading模式提高兼容性
28 | engineio_logger=True, # 启用引擎日志,便于调试
29 | logger=True # 启用Socket.IO日志
30 | )
31 |
32 | # 常量定义
33 | CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
34 | CONFIG_DIR = os.path.join(CURRENT_DIR, 'config')
35 | STATIC_DIR = os.path.join(CURRENT_DIR, 'static')
36 | # 确保配置目录存在
37 | os.makedirs(CONFIG_DIR, exist_ok=True)
38 |
39 | # 密钥和其他配置文件路径
40 | API_KEYS_FILE = os.path.join(CONFIG_DIR, 'api_keys.json')
41 | API_BASE_URLS_FILE = os.path.join(CONFIG_DIR, 'api_base_urls.json')
42 | VERSION_FILE = os.path.join(CONFIG_DIR, 'version.json')
43 | UPDATE_INFO_FILE = os.path.join(CONFIG_DIR, 'update_info.json')
44 | PROMPT_FILE = os.path.join(CONFIG_DIR, 'prompts.json') # 新增提示词配置文件路径
45 | PROXY_API_FILE = os.path.join(CONFIG_DIR, 'proxy_api.json') # 新增中转API配置文件路径
46 |
47 | DEFAULT_API_BASE_URLS = {
48 | "AnthropicApiBaseUrl": "",
49 | "OpenaiApiBaseUrl": "",
50 | "DeepseekApiBaseUrl": "",
51 | "AlibabaApiBaseUrl": "",
52 | "GoogleApiBaseUrl": "",
53 | "DoubaoApiBaseUrl": ""
54 | }
55 |
56 | def ensure_api_base_urls_file():
57 | """确保 API 基础 URL 配置文件存在并包含所有占位符"""
58 | try:
59 | file_exists = os.path.exists(API_BASE_URLS_FILE)
60 | base_urls = {}
61 | if file_exists:
62 | try:
63 | with open(API_BASE_URLS_FILE, 'r', encoding='utf-8') as f:
64 | loaded = json.load(f)
65 | if isinstance(loaded, dict):
66 | base_urls = loaded
67 | else:
68 | file_exists = False
69 | except json.JSONDecodeError:
70 | file_exists = False
71 |
72 | missing_key_added = False
73 | for key, default_value in DEFAULT_API_BASE_URLS.items():
74 | if key not in base_urls:
75 | base_urls[key] = default_value
76 | missing_key_added = True
77 |
78 | if not file_exists or missing_key_added or not base_urls:
79 | with open(API_BASE_URLS_FILE, 'w', encoding='utf-8') as f:
80 | json.dump(base_urls or DEFAULT_API_BASE_URLS, f, ensure_ascii=False, indent=2)
81 | except Exception as e:
82 | print(f"初始化API基础URL配置失败: {e}")
83 |
84 | # 确保API基础URL文件已经生成
85 | ensure_api_base_urls_file()
86 |
87 | # 跟踪用户生成任务的字典
88 | generation_tasks = {}
89 |
90 | # 初始化模型工厂
91 | ModelFactory.initialize()
92 |
93 | def get_local_ip():
94 | try:
95 | # Get local IP address
96 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
97 | s.connect(("8.8.8.8", 80))
98 | ip = s.getsockname()[0]
99 | s.close()
100 | return ip
101 | except Exception:
102 | return "127.0.0.1"
103 |
104 | @app.route('/')
105 | def index():
106 | local_ip = get_local_ip()
107 |
108 | # 检查更新
109 | try:
110 | update_info = check_for_updates()
111 | except:
112 | update_info = {'has_update': False}
113 |
114 | return render_template('index.html', local_ip=local_ip, update_info=update_info)
115 |
116 | @socketio.on('connect')
117 | def handle_connect():
118 | print('Client connected')
119 |
120 | @socketio.on('disconnect')
121 | def handle_disconnect():
122 | print('Client disconnected')
123 |
124 | def create_model_instance(model_id, settings, is_reasoning=False):
125 | """创建模型实例"""
126 | # 提取API密钥
127 | api_keys = settings.get('apiKeys', {})
128 |
129 | # 确定需要哪个API密钥
130 | api_key_id = None
131 | # 特殊情况:o3-mini使用OpenAI API密钥
132 | if model_id.lower() == "o3-mini":
133 | api_key_id = "OpenaiApiKey"
134 | # 其他Anthropic/Claude模型
135 | elif "claude" in model_id.lower() or "anthropic" in model_id.lower():
136 | api_key_id = "AnthropicApiKey"
137 | elif any(keyword in model_id.lower() for keyword in ["gpt", "openai"]):
138 | api_key_id = "OpenaiApiKey"
139 | elif "deepseek" in model_id.lower():
140 | api_key_id = "DeepseekApiKey"
141 | elif "qvq" in model_id.lower() or "alibaba" in model_id.lower() or "qwen" in model_id.lower():
142 | api_key_id = "AlibabaApiKey"
143 | elif "gemini" in model_id.lower() or "google" in model_id.lower():
144 | api_key_id = "GoogleApiKey"
145 | elif "doubao" in model_id.lower():
146 | api_key_id = "DoubaoApiKey"
147 |
148 | # 首先尝试从本地配置获取API密钥
149 | api_key = get_api_key(api_key_id)
150 |
151 | # 如果本地没有配置,尝试使用前端传递的密钥(向后兼容)
152 | if not api_key:
153 | api_key = api_keys.get(api_key_id)
154 |
155 | if not api_key:
156 | raise ValueError(f"API key is required for the selected model (keyId: {api_key_id})")
157 |
158 | # 获取maxTokens参数,默认为8192
159 | max_tokens = int(settings.get('maxTokens', 8192))
160 |
161 | # 检查是否启用中转API
162 | proxy_api_config = load_proxy_api()
163 | base_url = None
164 |
165 | if proxy_api_config.get('enabled', False):
166 | # 根据模型类型选择对应的中转API
167 | if "claude" in model_id.lower() or "anthropic" in model_id.lower():
168 | base_url = proxy_api_config.get('apis', {}).get('anthropic', '')
169 | elif any(keyword in model_id.lower() for keyword in ["gpt", "openai"]):
170 | base_url = proxy_api_config.get('apis', {}).get('openai', '')
171 | elif "deepseek" in model_id.lower():
172 | base_url = proxy_api_config.get('apis', {}).get('deepseek', '')
173 | elif "qvq" in model_id.lower() or "alibaba" in model_id.lower() or "qwen" in model_id.lower():
174 | base_url = proxy_api_config.get('apis', {}).get('alibaba', '')
175 | elif "gemini" in model_id.lower() or "google" in model_id.lower():
176 | base_url = proxy_api_config.get('apis', {}).get('google', '')
177 |
178 | # 从前端设置获取自定义API基础URL (apiBaseUrls)
179 | api_base_urls = settings.get('apiBaseUrls', {})
180 | if api_base_urls:
181 | # 根据模型类型选择对应的自定义API基础URL
182 | if "claude" in model_id.lower() or "anthropic" in model_id.lower():
183 | custom_base_url = api_base_urls.get('anthropic')
184 | if custom_base_url:
185 | base_url = custom_base_url
186 | elif any(keyword in model_id.lower() for keyword in ["gpt", "openai"]):
187 | custom_base_url = api_base_urls.get('openai')
188 | if custom_base_url:
189 | base_url = custom_base_url
190 | elif "deepseek" in model_id.lower():
191 | custom_base_url = api_base_urls.get('deepseek')
192 | if custom_base_url:
193 | base_url = custom_base_url
194 | elif "qvq" in model_id.lower() or "alibaba" in model_id.lower() or "qwen" in model_id.lower():
195 | custom_base_url = api_base_urls.get('alibaba')
196 | if custom_base_url:
197 | base_url = custom_base_url
198 | elif "gemini" in model_id.lower() or "google" in model_id.lower():
199 | custom_base_url = api_base_urls.get('google')
200 | if custom_base_url:
201 | base_url = custom_base_url
202 | elif "doubao" in model_id.lower():
203 | custom_base_url = api_base_urls.get('doubao')
204 | if custom_base_url:
205 | base_url = custom_base_url
206 |
207 | # 创建模型实例
208 | model_instance = ModelFactory.create_model(
209 | model_name=model_id,
210 | api_key=api_key,
211 | temperature=None if is_reasoning else float(settings.get('temperature', 0.7)),
212 | system_prompt=settings.get('systemPrompt'),
213 | language=settings.get('language', '中文'),
214 | api_base_url=base_url # 现在BaseModel支持api_base_url参数
215 | )
216 |
217 | # 设置最大输出Token,但不为阿里巴巴模型设置(它们有自己内部的处理逻辑)
218 | is_alibaba_model = "qvq" in model_id.lower() or "alibaba" in model_id.lower() or "qwen" in model_id.lower()
219 | if not is_alibaba_model:
220 | model_instance.max_tokens = max_tokens
221 |
222 | return model_instance
223 |
224 | def stream_model_response(response_generator, sid, model_name=None):
225 | """Stream model responses to the client"""
226 | try:
227 | print("Starting response streaming...")
228 |
229 | # 判断模型是否为推理模型
230 | is_reasoning = model_name and ModelFactory.is_reasoning(model_name)
231 | if is_reasoning:
232 | print(f"使用推理模型 {model_name},将显示思考过程")
233 |
234 | # 初始化:发送开始状态
235 | socketio.emit('ai_response', {
236 | 'status': 'started',
237 | 'content': '',
238 | 'is_reasoning': is_reasoning
239 | }, room=sid)
240 | print("Sent initial status to client")
241 |
242 | # 维护服务端缓冲区以累积完整内容
243 | response_buffer = ""
244 | thinking_buffer = ""
245 |
246 | # 上次发送的时间戳,用于控制发送频率
247 | last_emit_time = time.time()
248 |
249 | # 流式处理响应
250 | for response in response_generator:
251 | # 处理Mathpix响应
252 | if isinstance(response.get('content', ''), str) and 'mathpix' in response.get('model', ''):
253 | if current_time - last_emit_time >= 0.3:
254 | socketio.emit('ai_response', {
255 | 'status': 'thinking',
256 | 'content': thinking_buffer,
257 | 'is_reasoning': True
258 | }, room=sid)
259 | last_emit_time = current_time
260 |
261 | elif status == 'thinking_complete':
262 | # 仅对推理模型处理思考过程
263 | if is_reasoning:
264 | # 直接使用完整的思考内容
265 | thinking_buffer = content
266 |
267 | print(f"Thinking complete, total length: {len(thinking_buffer)} chars")
268 | socketio.emit('ai_response', {
269 | 'status': 'thinking_complete',
270 | 'content': thinking_buffer,
271 | 'is_reasoning': True
272 | }, room=sid)
273 |
274 | elif status == 'streaming':
275 | # 直接使用模型提供的完整内容
276 | response_buffer = content
277 |
278 | # 控制发送频率,至少间隔0.3秒
279 | current_time = time.time()
280 | if current_time - last_emit_time >= 0.3:
281 | socketio.emit('ai_response', {
282 | 'status': 'streaming',
283 | 'content': response_buffer,
284 | 'is_reasoning': is_reasoning
285 | }, room=sid)
286 | last_emit_time = current_time
287 |
288 | elif status == 'completed':
289 | # 确保发送最终完整内容
290 | socketio.emit('ai_response', {
291 | 'status': 'completed',
292 | 'content': content or response_buffer,
293 | 'is_reasoning': is_reasoning
294 | }, room=sid)
295 | print("Response completed")
296 |
297 | elif status == 'error':
298 | # 错误状态直接转发
299 | response['is_reasoning'] = is_reasoning
300 | socketio.emit('ai_response', response, room=sid)
301 | print(f"Error: {response.get('error', 'Unknown error')}")
302 |
303 | # 其他状态直接转发
304 | else:
305 | response['is_reasoning'] = is_reasoning
306 | socketio.emit('ai_response', response, room=sid)
307 |
308 | except Exception as e:
309 | error_msg = f"Streaming error: {str(e)}"
310 | print(error_msg)
311 | socketio.emit('ai_response', {
312 | 'status': 'error',
313 | 'error': error_msg,
314 | 'is_reasoning': model_name and ModelFactory.is_reasoning(model_name)
315 | }, room=sid)
316 |
317 | @socketio.on('request_screenshot')
318 | def handle_screenshot_request():
319 | try:
320 | # 添加调试信息
321 | print("DEBUG: 执行request_screenshot截图")
322 |
323 | # Capture the screen
324 | screenshot = pyautogui.screenshot()
325 |
326 | # Convert the image to base64 string
327 | buffered = BytesIO()
328 | screenshot.save(buffered, format="PNG")
329 | img_str = base64.b64encode(buffered.getvalue()).decode()
330 |
331 | # Emit the screenshot back to the client,不打印base64数据
332 | print("DEBUG: 完成request_screenshot截图,图片大小: {} KB".format(len(img_str) // 1024))
333 | socketio.emit('screenshot_response', {
334 | 'success': True,
335 | 'image': img_str
336 | })
337 | except Exception as e:
338 | socketio.emit('screenshot_response', {
339 | 'success': False,
340 | 'error': str(e)
341 | })
342 |
343 | @socketio.on('extract_text')
344 | def handle_text_extraction(data):
345 | try:
346 | print("Starting text extraction...")
347 |
348 | # Validate input data
349 | if not data or not isinstance(data, dict):
350 | raise ValueError("Invalid request data")
351 |
352 | if 'image' not in data:
353 | raise ValueError("No image data provided")
354 |
355 | image_data = data['image']
356 | if not isinstance(image_data, str):
357 | raise ValueError("Invalid image data format")
358 |
359 | # 检查图像大小,避免处理过大的图像导致断开连接
360 | image_size_bytes = len(image_data) * 3 / 4 # 估算base64的实际大小
361 | if image_size_bytes > 10 * 1024 * 1024: # 10MB
362 | raise ValueError("Image too large, please crop to a smaller area")
363 |
364 | settings = data.get('settings', {})
365 | if not isinstance(settings, dict):
366 | raise ValueError("Invalid settings format")
367 |
368 | # 优先使用百度OCR,如果没有配置则使用Mathpix
369 | # 首先尝试获取百度OCR API密钥
370 | baidu_api_key = get_api_key('BaiduApiKey')
371 | baidu_secret_key = get_api_key('BaiduSecretKey')
372 |
373 | # 构建百度OCR API密钥(格式:api_key:secret_key)
374 | ocr_key = None
375 | ocr_model = None
376 |
377 | if baidu_api_key and baidu_secret_key:
378 | ocr_key = f"{baidu_api_key}:{baidu_secret_key}"
379 | ocr_model = 'baidu-ocr'
380 | print("Using Baidu OCR for text extraction...")
381 | else:
382 | # 回退到Mathpix
383 | mathpix_app_id = get_api_key('MathpixAppId')
384 | mathpix_app_key = get_api_key('MathpixAppKey')
385 |
386 | # 构建完整的Mathpix API密钥(格式:app_id:app_key)
387 | mathpix_key = f"{mathpix_app_id}:{mathpix_app_key}" if mathpix_app_id and mathpix_app_key else None
388 |
389 | # 如果本地没有配置,尝试使用前端传递的密钥(向后兼容)
390 | if not mathpix_key:
391 | mathpix_key = settings.get('mathpixApiKey')
392 |
393 | if mathpix_key:
394 | ocr_key = mathpix_key
395 | ocr_model = 'mathpix'
396 | print("Using Mathpix OCR for text extraction...")
397 |
398 | if not ocr_key:
399 | raise ValueError("OCR API key is required. Please configure Baidu OCR (API Key + Secret Key) or Mathpix (App ID + App Key)")
400 |
401 | # 先回复客户端,确认已收到请求,防止超时断开
402 | # 注意:这里不能使用return,否则后续代码不会执行
403 | socketio.emit('request_acknowledged', {
404 | 'status': 'received',
405 | 'message': f'Image received, text extraction in progress using {ocr_model}'
406 | }, room=request.sid)
407 |
408 | try:
409 | if ocr_model == 'baidu-ocr':
410 | api_key, secret_key = ocr_key.split(':')
411 | if not api_key.strip() or not secret_key.strip():
412 | raise ValueError()
413 | elif ocr_model == 'mathpix':
414 | app_id, app_key = ocr_key.split(':')
415 | if not app_id.strip() or not app_key.strip():
416 | raise ValueError()
417 | except ValueError:
418 | if ocr_model == 'baidu-ocr':
419 | raise ValueError("Invalid Baidu OCR API key format. Expected format: 'API_KEY:SECRET_KEY'")
420 | else:
421 | raise ValueError("Invalid Mathpix API key format. Expected format: 'app_id:app_key'")
422 |
423 | print(f"Creating {ocr_model} model instance...")
424 | # ModelFactory.create_model会处理不同模型类型
425 | model = ModelFactory.create_model(
426 | model_name=ocr_model,
427 | api_key=ocr_key
428 | )
429 |
430 | print("Starting text extraction...")
431 | # 使用新的extract_full_text方法直接提取完整文本
432 | extracted_text = model.extract_full_text(image_data)
433 |
434 | # 直接返回文本结果
435 | socketio.emit('text_extracted', {
436 | 'content': extracted_text
437 | }, room=request.sid)
438 |
439 | except ValueError as e:
440 | error_msg = str(e)
441 | print(f"Validation error: {error_msg}")
442 | socketio.emit('text_extracted', {
443 | 'error': error_msg
444 | }, room=request.sid)
445 | except Exception as e:
446 | error_msg = f"Text extraction error: {str(e)}"
447 | print(f"Unexpected error: {error_msg}")
448 | print(f"Error details: {type(e).__name__}")
449 | socketio.emit('text_extracted', {
450 | 'error': error_msg
451 | }, room=request.sid)
452 |
453 | @socketio.on('stop_generation')
454 | def handle_stop_generation():
455 | """处理停止生成请求"""
456 | sid = request.sid
457 | print(f"接收到停止生成请求: {sid}")
458 |
459 | if sid in generation_tasks:
460 | # 设置停止标志
461 | stop_event = generation_tasks[sid]
462 | stop_event.set()
463 |
464 | # 发送已停止状态
465 | socketio.emit('ai_response', {
466 | 'status': 'stopped',
467 | 'content': '生成已停止'
468 | }, room=sid)
469 |
470 | print(f"已停止用户 {sid} 的生成任务")
471 | else:
472 | print(f"未找到用户 {sid} 的生成任务")
473 |
474 | @socketio.on('analyze_text')
475 | def handle_analyze_text(data):
476 | try:
477 | text = data.get('text', '')
478 | settings = data.get('settings', {})
479 |
480 | # 获取推理配置
481 | reasoning_config = settings.get('reasoningConfig', {})
482 |
483 | # 获取maxTokens
484 | max_tokens = int(settings.get('maxTokens', 8192))
485 |
486 | print(f"Debug - 文本分析请求: {text[:50]}...")
487 | print(f"Debug - 最大Token: {max_tokens}, 推理配置: {reasoning_config}")
488 |
489 | # 获取模型和API密钥
490 | model_id = settings.get('model', 'claude-3-7-sonnet-20250219')
491 |
492 | if not text:
493 | socketio.emit('error', {'message': '文本内容不能为空'})
494 | return
495 |
496 | # 获取模型信息,判断是否为推理模型
497 | model_info = settings.get('modelInfo', {})
498 | is_reasoning = model_info.get('isReasoning', False)
499 |
500 | model_instance = create_model_instance(model_id, settings, is_reasoning)
501 |
502 | # 将推理配置传递给模型
503 | if reasoning_config:
504 | model_instance.reasoning_config = reasoning_config
505 |
506 | # 如果启用代理,配置代理设置
507 | proxies = None
508 | if settings.get('proxyEnabled'):
509 | proxies = {
510 | 'http': f"http://{settings.get('proxyHost')}:{settings.get('proxyPort')}",
511 | 'https': f"http://{settings.get('proxyHost')}:{settings.get('proxyPort')}"
512 | }
513 |
514 | # 创建用于停止生成的事件
515 | sid = request.sid
516 | stop_event = Event()
517 | generation_tasks[sid] = stop_event
518 |
519 | try:
520 | for response in model_instance.analyze_text(text, proxies=proxies):
521 | # 检查是否收到停止信号
522 | if stop_event.is_set():
523 | print(f"分析文本生成被用户 {sid} 停止")
524 | break
525 |
526 | socketio.emit('ai_response', response, room=sid)
527 | finally:
528 | # 清理任务
529 | if sid in generation_tasks:
530 | del generation_tasks[sid]
531 |
532 | except Exception as e:
533 | print(f"Error in analyze_text: {str(e)}")
534 | traceback.print_exc()
535 | socketio.emit('error', {'message': f'分析文本时出错: {str(e)}'})
536 |
537 | @socketio.on('analyze_image')
538 | def handle_analyze_image(data):
539 | try:
540 | image_data = data.get('image')
541 | settings = data.get('settings', {})
542 |
543 | # 获取推理配置
544 | reasoning_config = settings.get('reasoningConfig', {})
545 |
546 | # 获取maxTokens
547 | max_tokens = int(settings.get('maxTokens', 8192))
548 |
549 | print(f"Debug - 图像分析请求")
550 | print(f"Debug - 最大Token: {max_tokens}, 推理配置: {reasoning_config}")
551 |
552 | # 获取模型和API密钥
553 | model_id = settings.get('model', 'claude-3-7-sonnet-20250219')
554 |
555 | if not image_data:
556 | socketio.emit('error', {'message': '图像数据不能为空'})
557 | return
558 |
559 | # 获取模型信息,判断是否为推理模型
560 | model_info = settings.get('modelInfo', {})
561 | is_reasoning = model_info.get('isReasoning', False)
562 |
563 | model_instance = create_model_instance(model_id, settings, is_reasoning)
564 |
565 | # 将推理配置传递给模型
566 | if reasoning_config:
567 | model_instance.reasoning_config = reasoning_config
568 |
569 | # 如果启用代理,配置代理设置
570 | proxies = None
571 | if settings.get('proxyEnabled'):
572 | proxies = {
573 | 'http': f"http://{settings.get('proxyHost')}:{settings.get('proxyPort')}",
574 | 'https': f"http://{settings.get('proxyHost')}:{settings.get('proxyPort')}"
575 | }
576 |
577 | # 创建用于停止生成的事件
578 | sid = request.sid
579 | stop_event = Event()
580 | generation_tasks[sid] = stop_event
581 |
582 | try:
583 | for response in model_instance.analyze_image(image_data, proxies=proxies):
584 | # 检查是否收到停止信号
585 | if stop_event.is_set():
586 | print(f"分析图像生成被用户 {sid} 停止")
587 | break
588 |
589 | socketio.emit('ai_response', response, room=sid)
590 | finally:
591 | # 清理任务
592 | if sid in generation_tasks:
593 | del generation_tasks[sid]
594 |
595 | except Exception as e:
596 | print(f"Error in analyze_image: {str(e)}")
597 | traceback.print_exc()
598 | socketio.emit('error', {'message': f'分析图像时出错: {str(e)}'})
599 |
600 | @socketio.on('capture_screenshot')
601 | def handle_capture_screenshot(data):
602 | try:
603 | # 添加调试信息
604 | print("DEBUG: 执行capture_screenshot截图")
605 |
606 | # Capture the screen
607 | screenshot = pyautogui.screenshot()
608 |
609 | # Convert the image to base64 string
610 | buffered = BytesIO()
611 | screenshot.save(buffered, format="PNG")
612 | img_str = base64.b64encode(buffered.getvalue()).decode()
613 |
614 | # Emit the screenshot back to the client,不打印base64数据
615 | print("DEBUG: 完成capture_screenshot截图,图片大小: {} KB".format(len(img_str) // 1024))
616 | socketio.emit('screenshot_complete', {
617 | 'success': True,
618 | 'image': img_str
619 | }, room=request.sid)
620 | except Exception as e:
621 | error_msg = f"Screenshot error: {str(e)}"
622 | print(f"Error capturing screenshot: {error_msg}")
623 | socketio.emit('screenshot_complete', {
624 | 'success': False,
625 | 'error': error_msg
626 | }, room=request.sid)
627 |
628 | def load_model_config():
629 | """加载模型配置信息"""
630 | try:
631 | config_path = os.path.join(CONFIG_DIR, 'models.json')
632 | with open(config_path, 'r', encoding='utf-8') as f:
633 | config = json.load(f)
634 | return config
635 | except Exception as e:
636 | print(f"加载模型配置失败: {e}")
637 | return {
638 | "providers": {},
639 | "models": {}
640 | }
641 |
642 | def load_prompts():
643 | """加载系统提示词配置"""
644 | try:
645 | if os.path.exists(PROMPT_FILE):
646 | with open(PROMPT_FILE, 'r', encoding='utf-8') as f:
647 | return json.load(f)
648 | else:
649 | # 如果文件不存在,创建默认提示词配置
650 | default_prompts = {
651 | "default": {
652 | "name": "默认提示词",
653 | "content": "您是一位专业的问题解决专家。请逐步分析问题,找出问题所在,并提供详细的解决方案。始终使用用户偏好的语言回答。",
654 | "description": "通用问题解决提示词"
655 | }
656 | }
657 | with open(PROMPT_FILE, 'w', encoding='utf-8') as f:
658 | json.dump(default_prompts, f, ensure_ascii=False, indent=4)
659 | return default_prompts
660 | except Exception as e:
661 | print(f"加载提示词配置失败: {e}")
662 | return {
663 | "default": {
664 | "name": "默认提示词",
665 | "content": "您是一位专业的问题解决专家。请逐步分析问题,找出问题所在,并提供详细的解决方案。始终使用用户偏好的语言回答。",
666 | "description": "通用问题解决提示词"
667 | }
668 | }
669 |
670 | def save_prompt(prompt_id, prompt_data):
671 | """保存单个提示词到配置文件"""
672 | try:
673 | prompts = load_prompts()
674 | prompts[prompt_id] = prompt_data
675 | with open(PROMPT_FILE, 'w', encoding='utf-8') as f:
676 | json.dump(prompts, f, ensure_ascii=False, indent=4)
677 | return True
678 | except Exception as e:
679 | print(f"保存提示词配置失败: {e}")
680 | return False
681 |
682 | def delete_prompt(prompt_id):
683 | """从配置文件中删除一个提示词"""
684 | try:
685 | prompts = load_prompts()
686 | if prompt_id in prompts:
687 | del prompts[prompt_id]
688 | with open(PROMPT_FILE, 'w', encoding='utf-8') as f:
689 | json.dump(prompts, f, ensure_ascii=False, indent=4)
690 | return True
691 | return False
692 | except Exception as e:
693 | print(f"删除提示词配置失败: {e}")
694 | return False
695 |
696 | # 替换 before_first_request 装饰器
697 | def init_model_config():
698 | """初始化模型配置"""
699 | try:
700 | model_config = load_model_config()
701 | # 更新ModelFactory的模型信息
702 | if hasattr(ModelFactory, 'update_model_capabilities'):
703 | ModelFactory.update_model_capabilities(model_config)
704 | print("已加载模型配置")
705 | except Exception as e:
706 | print(f"初始化模型配置失败: {e}")
707 |
708 | # 在请求处理前注册初始化函数
709 | @app.before_request
710 | def before_request_handler():
711 | # 使用全局变量跟踪是否已初始化
712 | if not getattr(app, '_model_config_initialized', False):
713 | init_model_config()
714 | app._model_config_initialized = True
715 |
716 | # 版本检查函数
717 | def check_for_updates():
718 | """检查GitHub上是否有新版本"""
719 | try:
720 | # 读取当前版本信息
721 | version_file = os.path.join(CONFIG_DIR, 'version.json')
722 | with open(version_file, 'r', encoding='utf-8') as f:
723 | version_info = json.load(f)
724 |
725 | current_version = version_info.get('version', '0.0.0')
726 | repo = version_info.get('github_repo', 'Zippland/Snap-Solver')
727 |
728 | # 请求GitHub API获取最新发布版本
729 | api_url = f"https://api.github.com/repos/{repo}/releases/latest"
730 |
731 | # 添加User-Agent以符合GitHub API要求
732 | headers = {'User-Agent': 'Snap-Solver-Update-Checker'}
733 |
734 | response = requests.get(api_url, headers=headers, timeout=5)
735 | if response.status_code == 200:
736 | latest_release = response.json()
737 | latest_version = latest_release.get('tag_name', '').lstrip('v')
738 |
739 | # 如果版本号为空,尝试从名称中提取
740 | if not latest_version and 'name' in latest_release:
741 | import re
742 | version_match = re.search(r'v?(\d+\.\d+\.\d+)', latest_release['name'])
743 | if version_match:
744 | latest_version = version_match.group(1)
745 |
746 | # 比较版本号(简单比较,可以改进为更复杂的语义版本比较)
747 | has_update = compare_versions(latest_version, current_version)
748 |
749 | update_info = {
750 | 'has_update': has_update,
751 | 'current_version': current_version,
752 | 'latest_version': latest_version,
753 | 'release_url': latest_release.get('html_url', f"https://github.com/{repo}/releases/latest"),
754 | 'release_date': latest_release.get('published_at', ''),
755 | 'release_notes': latest_release.get('body', ''),
756 | }
757 |
758 | # 缓存更新信息
759 | update_info_file = os.path.join(CONFIG_DIR, 'update_info.json')
760 | with open(update_info_file, 'w', encoding='utf-8') as f:
761 | json.dump(update_info, f, ensure_ascii=False, indent=2)
762 |
763 | return update_info
764 |
765 | # 如果无法连接GitHub,尝试读取缓存的更新信息
766 | update_info_file = os.path.join(CONFIG_DIR, 'update_info.json')
767 | if os.path.exists(update_info_file):
768 | with open(update_info_file, 'r', encoding='utf-8') as f:
769 | return json.load(f)
770 |
771 | return {'has_update': False, 'current_version': current_version}
772 |
773 | except Exception as e:
774 | print(f"检查更新失败: {str(e)}")
775 | # 出错时返回一个默认的值
776 | return {'has_update': False, 'error': str(e)}
777 |
778 | def compare_versions(version1, version2):
779 | """比较两个版本号,如果version1比version2更新,则返回True"""
780 | try:
781 | v1_parts = [int(x) for x in version1.split('.')]
782 | v2_parts = [int(x) for x in version2.split('.')]
783 |
784 | # 确保两个版本号的组成部分长度相同
785 | while len(v1_parts) < len(v2_parts):
786 | v1_parts.append(0)
787 | while len(v2_parts) < len(v1_parts):
788 | v2_parts.append(0)
789 |
790 | # 逐部分比较
791 | for i in range(len(v1_parts)):
792 | if v1_parts[i] > v2_parts[i]:
793 | return True
794 | elif v1_parts[i] < v2_parts[i]:
795 | return False
796 |
797 | # 完全相同的版本
798 | return False
799 | except:
800 | # 如果解析出错,默认不更新
801 | return False
802 |
803 | @app.route('/api/check-update', methods=['GET'])
804 | def api_check_update():
805 | """检查更新的API端点"""
806 | update_info = check_for_updates()
807 | return jsonify(update_info)
808 |
809 | # 添加配置文件路由
810 | @app.route('/config/')
811 | def serve_config(filename):
812 | return send_from_directory(CONFIG_DIR, filename)
813 |
814 | # 添加用于获取所有模型信息的API
815 | @app.route('/api/models', methods=['GET'])
816 | def get_models():
817 | """返回可用的模型列表"""
818 | models = ModelFactory.get_available_models()
819 | return jsonify(models)
820 |
821 | # 获取所有API密钥
822 | @app.route('/api/keys', methods=['GET'])
823 | def get_api_keys():
824 | """获取所有API密钥"""
825 | api_keys = load_api_keys()
826 | return jsonify(api_keys)
827 |
828 | # 保存API密钥
829 | @app.route('/api/keys', methods=['POST'])
830 | def update_api_keys():
831 | """更新API密钥配置"""
832 | try:
833 | new_keys = request.json
834 | if not isinstance(new_keys, dict):
835 | return jsonify({"success": False, "message": "无效的API密钥格式"}), 400
836 |
837 | # 加载当前密钥
838 | current_keys = load_api_keys()
839 |
840 | # 更新密钥
841 | for key, value in new_keys.items():
842 | current_keys[key] = value
843 |
844 | # 保存回文件
845 | if save_api_keys(current_keys):
846 | return jsonify({"success": True, "message": "API密钥已保存"})
847 | else:
848 | return jsonify({"success": False, "message": "保存API密钥失败"}), 500
849 |
850 | except Exception as e:
851 | return jsonify({"success": False, "message": f"更新API密钥错误: {str(e)}"}), 500
852 |
853 | # 加载API密钥配置
854 | def load_api_keys():
855 | """从配置文件加载API密钥"""
856 | try:
857 | default_keys = {
858 | "AnthropicApiKey": "",
859 | "OpenaiApiKey": "",
860 | "DeepseekApiKey": "",
861 | "AlibabaApiKey": "",
862 | "MathpixAppId": "",
863 | "MathpixAppKey": "",
864 | "GoogleApiKey": "",
865 | "DoubaoApiKey": "",
866 | "BaiduApiKey": "",
867 | "BaiduSecretKey": ""
868 | }
869 | if os.path.exists(API_KEYS_FILE):
870 | with open(API_KEYS_FILE, 'r', encoding='utf-8') as f:
871 | api_keys = json.load(f)
872 |
873 | # 确保新增的密钥占位符能自动补充
874 | missing_key_added = False
875 | for key, default_value in default_keys.items():
876 | if key not in api_keys:
877 | api_keys[key] = default_value
878 | missing_key_added = True
879 |
880 | if missing_key_added:
881 | save_api_keys(api_keys)
882 |
883 | return api_keys
884 | else:
885 | # 如果文件不存在,创建默认配置
886 | save_api_keys(default_keys)
887 | return default_keys
888 | except Exception as e:
889 | print(f"加载API密钥配置失败: {e}")
890 | return {}
891 |
892 | # 加载中转API配置
893 | def load_proxy_api():
894 | """从配置文件加载中转API配置"""
895 | try:
896 | if os.path.exists(PROXY_API_FILE):
897 | with open(PROXY_API_FILE, 'r', encoding='utf-8') as f:
898 | return json.load(f)
899 | else:
900 | # 如果文件不存在,创建默认配置
901 | default_proxy_apis = {
902 | "enabled": False,
903 | "apis": {
904 | "anthropic": "",
905 | "openai": "",
906 | "deepseek": "",
907 | "alibaba": "",
908 | "google": ""
909 | }
910 | }
911 | save_proxy_api(default_proxy_apis)
912 | return default_proxy_apis
913 | except Exception as e:
914 | print(f"加载中转API配置失败: {e}")
915 | return {"enabled": False, "apis": {}}
916 |
917 | # 保存中转API配置
918 | def save_proxy_api(proxy_api_config):
919 | """保存中转API配置到文件"""
920 | try:
921 | # 确保配置目录存在
922 | os.makedirs(os.path.dirname(PROXY_API_FILE), exist_ok=True)
923 |
924 | with open(PROXY_API_FILE, 'w', encoding='utf-8') as f:
925 | json.dump(proxy_api_config, f, ensure_ascii=False, indent=2)
926 | return True
927 | except Exception as e:
928 | print(f"保存中转API配置失败: {e}")
929 | return False
930 |
931 | # 保存API密钥配置
932 | def save_api_keys(api_keys):
933 | try:
934 | # 确保配置目录存在
935 | os.makedirs(os.path.dirname(API_KEYS_FILE), exist_ok=True)
936 |
937 | with open(API_KEYS_FILE, 'w', encoding='utf-8') as f:
938 | json.dump(api_keys, f, ensure_ascii=False, indent=2)
939 | return True
940 | except Exception as e:
941 | print(f"保存API密钥配置失败: {e}")
942 | return False
943 |
944 | # 获取特定API密钥
945 | def get_api_key(key_name):
946 | """获取指定的API密钥"""
947 | api_keys = load_api_keys()
948 | return api_keys.get(key_name, "")
949 |
950 | @app.route('/api/models')
951 | def api_models():
952 | """API端点:获取可用模型列表"""
953 | try:
954 | # 加载模型配置
955 | config = load_model_config()
956 |
957 | # 转换为前端需要的格式
958 | models = []
959 | for model_id, model_info in config['models'].items():
960 | models.append({
961 | 'id': model_id,
962 | 'display_name': model_info.get('name', model_id),
963 | 'is_multimodal': model_info.get('supportsMultimodal', False),
964 | 'is_reasoning': model_info.get('isReasoning', False),
965 | 'description': model_info.get('description', ''),
966 | 'version': model_info.get('version', 'latest')
967 | })
968 |
969 | # 返回模型列表
970 | return jsonify(models)
971 | except Exception as e:
972 | print(f"获取模型列表时出错: {e}")
973 | return jsonify([]), 500
974 |
975 | @app.route('/api/prompts', methods=['GET'])
976 | def get_prompts():
977 | """API端点:获取所有系统提示词"""
978 | try:
979 | prompts = load_prompts()
980 | return jsonify(prompts)
981 | except Exception as e:
982 | print(f"获取提示词列表时出错: {e}")
983 | return jsonify({"error": str(e)}), 500
984 |
985 | @app.route('/api/prompts/', methods=['GET'])
986 | def get_prompt(prompt_id):
987 | """API端点:获取单个系统提示词"""
988 | try:
989 | prompts = load_prompts()
990 | if prompt_id in prompts:
991 | return jsonify(prompts[prompt_id])
992 | else:
993 | return jsonify({"error": "提示词不存在"}), 404
994 | except Exception as e:
995 | print(f"获取提示词时出错: {e}")
996 | return jsonify({"error": str(e)}), 500
997 |
998 | @app.route('/api/prompts', methods=['POST'])
999 | def add_prompt():
1000 | """API端点:添加或更新系统提示词"""
1001 | try:
1002 | data = request.json
1003 | if not data or not isinstance(data, dict):
1004 | return jsonify({"error": "无效的请求数据"}), 400
1005 |
1006 | prompt_id = data.get('id')
1007 | if not prompt_id:
1008 | return jsonify({"error": "提示词ID不能为空"}), 400
1009 |
1010 | prompt_data = {
1011 | "name": data.get('name', f"提示词{prompt_id}"),
1012 | "content": data.get('content', ""),
1013 | "description": data.get('description', "")
1014 | }
1015 |
1016 | save_prompt(prompt_id, prompt_data)
1017 | return jsonify({"success": True, "id": prompt_id})
1018 | except Exception as e:
1019 | print(f"保存提示词时出错: {e}")
1020 | return jsonify({"error": str(e)}), 500
1021 |
1022 | @app.route('/api/prompts/', methods=['DELETE'])
1023 | def remove_prompt(prompt_id):
1024 | """API端点:删除系统提示词"""
1025 | try:
1026 | success = delete_prompt(prompt_id)
1027 | if success:
1028 | return jsonify({"success": True})
1029 | else:
1030 | return jsonify({"error": "提示词不存在或删除失败"}), 404
1031 | except Exception as e:
1032 | print(f"删除提示词时出错: {e}")
1033 | return jsonify({"error": str(e)}), 500
1034 |
1035 | @app.route('/api/proxy-api', methods=['GET'])
1036 | def get_proxy_api():
1037 | """API端点:获取中转API配置"""
1038 | try:
1039 | proxy_api_config = load_proxy_api()
1040 | return jsonify(proxy_api_config)
1041 | except Exception as e:
1042 | print(f"获取中转API配置时出错: {e}")
1043 | return jsonify({"error": str(e)}), 500
1044 |
1045 | @app.route('/api/proxy-api', methods=['POST'])
1046 | def update_proxy_api():
1047 | """API端点:更新中转API配置"""
1048 | try:
1049 | new_config = request.json
1050 | if not isinstance(new_config, dict):
1051 | return jsonify({"success": False, "message": "无效的中转API配置格式"}), 400
1052 |
1053 | # 保存回文件
1054 | if save_proxy_api(new_config):
1055 | return jsonify({"success": True, "message": "中转API配置已保存"})
1056 | else:
1057 | return jsonify({"success": False, "message": "保存中转API配置失败"}), 500
1058 |
1059 | except Exception as e:
1060 | return jsonify({"success": False, "message": f"更新中转API配置错误: {str(e)}"}), 500
1061 |
1062 | @app.route('/api/clipboard', methods=['POST'])
1063 | def update_clipboard():
1064 | """将文本复制到服务器剪贴板"""
1065 | try:
1066 | data = request.get_json(silent=True) or {}
1067 | text = data.get('text', '')
1068 |
1069 | if not isinstance(text, str) or not text.strip():
1070 | return jsonify({"success": False, "message": "剪贴板内容不能为空"}), 400
1071 |
1072 | # 直接尝试复制,不使用is_available()检查
1073 | try:
1074 | pyperclip.copy(text)
1075 | return jsonify({"success": True})
1076 | except Exception as e:
1077 | return jsonify({"success": False, "message": f"复制到剪贴板失败: {str(e)}"}), 500
1078 | except Exception as e:
1079 | app.logger.exception("更新剪贴板时发生异常")
1080 | return jsonify({"success": False, "message": f"服务器内部错误: {str(e)}"}), 500
1081 |
1082 | @app.route('/api/clipboard', methods=['GET'])
1083 | def get_clipboard():
1084 | """从服务器剪贴板读取文本"""
1085 | try:
1086 | # 直接尝试读取,不使用is_available()检查
1087 | try:
1088 | text = pyperclip.paste()
1089 | if text is None:
1090 | text = ""
1091 |
1092 | return jsonify({
1093 | "success": True,
1094 | "text": text,
1095 | "message": "成功读取剪贴板内容"
1096 | })
1097 | except Exception as e:
1098 | return jsonify({"success": False, "message": f"读取剪贴板失败: {str(e)}"}), 500
1099 | except Exception as e:
1100 | app.logger.exception("读取剪贴板时发生异常")
1101 | return jsonify({"success": False, "message": f"服务器内部错误: {str(e)}"}), 500
1102 |
1103 | if __name__ == '__main__':
1104 | # 尝试使用5000端口,如果被占用则使用5001
1105 | port = 5000
1106 | import socket
1107 | try:
1108 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1109 | s.bind(('0.0.0.0', port))
1110 | s.close()
1111 | except OSError:
1112 | port = 5001
1113 | print(f"端口5000被占用,将使用端口{port}")
1114 |
1115 | local_ip = get_local_ip()
1116 | print(f"Local IP Address: {local_ip}")
1117 | print(f"Connect from your mobile device using: {local_ip}:{port}")
1118 |
1119 | # 加载模型配置
1120 | model_config = load_model_config()
1121 | if hasattr(ModelFactory, 'update_model_capabilities'):
1122 | ModelFactory.update_model_capabilities(model_config)
1123 | print("已加载模型配置信息")
1124 |
1125 | # Run Flask in the main thread without debug mode
1126 | socketio.run(app, host='0.0.0.0', port=port, allow_unsafe_werkzeug=True)
1127 |
--------------------------------------------------------------------------------