├── app.ico
├── config
    ├── version.json
    ├── proxy_api.json
    ├── prompts.json
    └── models.json
├── requirements.txt
├── models
    ├── __init__.py
    ├── base.py
    ├── baidu_ocr.py
    ├── openai.py
    ├── google.py
    ├── factory.py
    ├── alibaba.py
    ├── doubao.py
    ├── mathpix.py
    ├── deepseek.py
    └── anthropic.py
├── .gitignore
├── AGENTS.md
├── README.md
├── docs
    └── beginner-tutorial.md
├── static
    └── js
    │   └── ui.js
├── LICENSE
└── app.py


/app.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zippland/Snap-Solver/HEAD/app.ico


--------------------------------------------------------------------------------
/config/version.json:
--------------------------------------------------------------------------------
1 | {
2 |     "version": "1.5.1",
3 |     "build_date": "2025-04-11",
4 |     "github_repo": "Zippland/Snap-Solver"
5 | } 


--------------------------------------------------------------------------------
/config/proxy_api.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "apis": {
 3 |     "alibaba": "",
 4 |     "anthropic": "",
 5 |     "deepseek": "",
 6 |     "doubao": "",
 7 |     "google": "",
 8 |     "openai": ""
 9 |   },
10 |   "enabled": true
11 | }


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | flask==3.1.0
 2 | pyautogui==0.9.54
 3 | pyperclip==1.8.2
 4 | Pillow==11.1.0
 5 | flask-socketio==5.5.1
 6 | python-engineio==4.11.2
 7 | python-socketio==5.12.1
 8 | requests==2.32.3
 9 | openai==1.61.0
10 | google-generativeai==0.7.0
11 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import BaseModel
 2 | from .anthropic import AnthropicModel
 3 | from .openai import OpenAIModel
 4 | from .deepseek import DeepSeekModel
 5 | from .alibaba import AlibabaModel
 6 | from .google import GoogleModel
 7 | from .doubao import DoubaoModel
 8 | from .factory import ModelFactory
 9 | 
10 | __all__ = [
11 |     'BaseModel',
12 |     'AnthropicModel',
13 |     'OpenAIModel',
14 |     'DeepSeekModel',
15 |     'AlibabaModel',
16 |     'GoogleModel',
17 |     'DoubaoModel',
18 |     'ModelFactory'
19 | ]
20 | 


--------------------------------------------------------------------------------
/models/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Generator, Any
 3 | 
 4 | class BaseModel(ABC):
 5 |     def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, api_base_url: str = None):
 6 |         self.api_key = api_key
 7 |         self.temperature = temperature
 8 |         self.language = language
 9 |         self.system_prompt = system_prompt or self.get_default_system_prompt()
10 |         self.api_base_url = api_base_url
11 | 
12 |     @abstractmethod
13 |     def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
14 |         """
15 |         Analyze the given image and yield response chunks.
16 |         
17 |         Args:
18 |             image_data: Base64 encoded image data
19 |             proxies: Optional proxy configuration
20 |             
21 |         Yields:
22 |             dict: Response chunks with status and content
23 |         """
24 |         pass
25 | 
26 |     @abstractmethod
27 |     def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
28 |         """
29 |         Analyze the given text and yield response chunks.
30 |         
31 |         Args:
32 |             text: Text to analyze
33 |             proxies: Optional proxy configuration
34 |             
35 |         Yields:
36 |             dict: Response chunks with status and content
37 |         """
38 |         pass
39 | 
40 |     def get_default_system_prompt(self) -> str:
41 |         """返回默认的系统提示词，子类可覆盖但不再是必须实现的方法"""
42 |         return "您是一位专业的问题解决专家。请逐步分析问题，找出问题所在，并提供详细的解决方案。始终使用用户偏好的语言回答。"
43 | 
44 |     @abstractmethod
45 |     def get_model_identifier(self) -> str:
46 |         """Return the model identifier used in API calls"""
47 |         pass
48 | 


--------------------------------------------------------------------------------
/config/prompts.json:
--------------------------------------------------------------------------------
 1 | {   "ACM_hard": {
 2 |         "name": "ACM编程题（困难）",
 3 |         "content":"你是一个顶尖的算法竞赛选手 + 程序员。你的任务是接收一道 ACM / 编程题目（包含题目描述、输入输出格式、约束）并输出一份完整可运行的解法。请严格按照以下步骤：\n1. 题目复述；\n2. 复杂度与限制分析；\n3. 思路与算法设计；\n4. 伪代码 / 算法框架；\n5. 最终可运行python代码（带注释）；\n6. 时间复杂度 / 空间复杂度总结 + 边界 / 特殊输入测试。输出格式必须包含这些部分，不得省略分析或直接跳到代码。",
 4 |         "description": "专为ACM编程竞赛题设计的提示词"
 5 |     },
 6 |     "a_default": {
 7 |         "name": "默认提示词",
 8 |         "content": "如果给的是图片，请先识别图片上面的题目，并输出完整题干；如果给的不是图片，直接诠释一下题目。然后解决该问题，如果是编程题，请输出最终可运行代码（带注释）。",
 9 |         "description": "通用问题解决提示词"
10 |     },
11 |     "single_choice": {
12 |         "name": "单选题提示词",
13 |         "content": "您是一位专业的单选题解析专家。当看到一个单选题时，请：\n1. 仔细阅读题目要求和选项\n2. 分析每个选项的正确性\n3. 明确指出正确选项\n4. 解释为什么该选项正确\n5. 简要说明其他选项错误的原因\n6. 总结相关知识点",
14 |         "description": "专为单选题分析设计的提示词"
15 |     },
16 |     "multiple_choice": {
17 |         "name": "多选题提示词",
18 |         "content": "您是一位专业的多选题解析专家。当看到一个多选题时，请：\n1. 仔细阅读题目要求和所有选项\n2. 逐一分析每个选项的正确性\n3. 明确列出所有正确选项\n4. 详细解释每个正确选项的理由\n5. 说明错误选项的问题所在\n6. 归纳总结相关知识点",
19 |         "description": "专为多选题分析设计的提示词"
20 |     },
21 |     "programming": {
22 |         "name": "ACM编程题提示词",
23 |         "content": "您是一位专业的ACM编程竞赛解题专家。当看到一个编程题时，请：\n1. 分析题目要求、输入输出格式和约束条件\n2. 确定解题思路和算法策略\n3. 分析算法复杂度\n4. 提供完整、可运行的代码实现\n5. 解释代码中的关键部分\n6. 提供一些测试用例及其输出\n7. 讨论可能的优化方向",
24 |         "description": "专为ACM编程竞赛题设计的提示词"
25 |     },
26 |     "pattern_reasoning": {
27 |         "name": "图形推理题提示词",
28 |         "content": "您是一位专业的图形推理题解析专家。当看到一个图形推理题时，请：\n1. 观察并描述题目给出的图形序列\n2. 分析图形之间的变化规律\n3. 归纳可能的变化模式（如旋转、翻转、数量变化等）\n4. 应用发现的规律预测下一个图形\n5. 在多个选项中确定符合规律的答案\n6. 详细解释推理过程",
29 |         "description": "专为图形推理题设计的提示词"
30 |     },
31 |     "chart_calculation": {
32 |         "name": "图表计算题提示词",
33 |         "content": "您是一位专业的图表数据分析专家。当看到一个包含图表的计算题时，请：\n1. 仔细阅读并描述图表包含的信息（表格、柱状图、折线图等）\n2. 确定题目要求计算的具体内容\n3. 从图表中提取相关数据\n4. 设计合适的计算方法\n5. 进行准确的计算过程\n6. 清晰呈现计算结果\n7. 必要时解释数据的含义和趋势",
34 |         "description": "专为图表数据分析和计算题设计的提示词"
35 |     }
36 | }


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Dependencies
  2 | node_modules/
  3 | /.pnp
  4 | .pnp.js
  5 | yarn.lock
  6 | package-lock.json
  7 | .npm
  8 | .yarn-integrity
  9 | 
 10 | # Python
 11 | __pycache__/
 12 | *.py[cod]
 13 | *$py.class
 14 | *.so
 15 | .Python
 16 | env/
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | 
 33 | # Testing & Coverage
 34 | /coverage
 35 | .nyc_output
 36 | 
 37 | # Production & Build
 38 | /build
 39 | /dist
 40 | /out
 41 | .next/
 42 | out/
 43 | 
 44 | # Development & Environment
 45 | .env
 46 | .env.local
 47 | .env.development.local
 48 | .env.test.local
 49 | .env.production.local
 50 | config.local.js
 51 | config.dev.js
 52 | 
 53 | # Logs
 54 | logs/
 55 | *.log
 56 | npm-debug.log*
 57 | yarn-debug.log*
 58 | yarn-error.log*
 59 | 
 60 | # IDEs and Editors
 61 | /.idea/
 62 | .project
 63 | .classpath
 64 | .c9/
 65 | *.launch
 66 | .settings/
 67 | *.sublime-workspace
 68 | .vscode/*
 69 | !.vscode/settings.json
 70 | !.vscode/tasks.json
 71 | !.vscode/launch.json
 72 | !.vscode/extensions.json
 73 | 
 74 | # Java
 75 | *.class
 76 | *.war
 77 | *.ear
 78 | *.jar
 79 | target/
 80 | 
 81 | # Gradle
 82 | .gradle
 83 | /build/
 84 | 
 85 | # Maven
 86 | target/
 87 | pom.xml.tag
 88 | pom.xml.releaseBackup
 89 | pom.xml.versionsBackup
 90 | pom.xml.next
 91 | release.properties
 92 | dependency-reduced-pom.xml
 93 | 
 94 | # TypeScript
 95 | *.tsbuildinfo
 96 | 
 97 | # OS Generated Files
 98 | .DS_Store
 99 | .DS_Store?
100 | ._*
101 | .Spotlight-V100
102 | .Trashes
103 | ehthumbs.db
104 | Thumbs.db
105 | 
106 | # Backup Files
107 | *.bak
108 | *.swp
109 | *.swo
110 | *~
111 | 
112 | # Optional REPL history
113 | .node_repl_history
114 | 
115 | # Media & Large Files
116 | *.mp4
117 | *.tiff
118 | *.avi
119 | *.flv
120 | *.mov
121 | *.wmv
122 | *.tgz
123 | 
124 | # Optional eslint cache
125 | .eslintcache
126 | 
127 | # Project specific
128 | config/update_info.json
129 | config/api_keys.json
130 | config/api_base_urls.json
131 | .venv/
132 | venv/
133 | 
134 | # uv
135 | .python-version
136 | pyproject.toml
137 | uv.lock
138 | 


--------------------------------------------------------------------------------
/AGENTS.md:
--------------------------------------------------------------------------------
 1 | # Repository Guidelines
 2 | 
 3 | ## Project Structure & Module Organization
 4 | Snap-Solver is a Flask web app served from `app.py`, which wires Socket.IO streaming, screenshot capture, and model dispatch. Model adapters live in `models/`, with `factory.py` loading provider metadata from `config/models.json` and creating the appropriate client (OpenAI, Anthropic, DeepSeek, Qwen, etc.). User-facing templates live under `templates/`, with shared assets in `static/`. Runtime configuration and secrets are JSON files in `config/`; treat these as local-only overrides even if sample values exist in the repo. Python dependencies are listed in `requirements.txt` (lockfile: `uv.lock`).
 5 | 
 6 | ## Build, Test, and Development Commands
 7 | - `python -m venv .venv && source .venv/bin/activate` sets up an isolated environment.
 8 | - `pip install -r requirements.txt` or `uv sync` installs Flask, provider SDKs, and Socket.IO.
 9 | - `python app.py` boots the development server at `http://localhost:5000` with verbose engine logs.
10 | - `FLASK_ENV=development python app.py` enables auto-reload during active development.
11 | 
12 | ## Coding Style & Naming Conventions
13 | Follow PEP 8: 4-space indentation, `snake_case` for Python functions, and descriptive class names that match provider roles (see `models/openai.py`). JSON configs use lowerCamelCase keys so the web client can consume them directly; keep that convention when adding settings. Client scripts in `static/js/` should stay modular and avoid sprawling event handlers.
14 | 
15 | ## Testing Guidelines
16 | There is no automated test suite yet; whenever you add features, verify end-to-end by launching `python app.py`, triggering a screenshot from the UI, and confirming Socket.IO events stream without tracebacks. When integrating a new model, seed a temporary key in `config/api_keys.json`, exercise one request, and capture console logs before reverting secrets. If you introduce automated tests, place them in `tests/` and gate external calls behind mocks so the suite can run offline.
17 | 
18 | ## Commit & Pull Request Guidelines
19 | The history favors concise, imperative commit subjects in Chinese (e.g., `修复发送按钮保存裁剪框数据`). Keep messages under 70 characters, enumerate multi-part changes in the body, and reference related issues with `#123` when applicable. Pull requests should outline the user-visible impact, note any config updates or new dependencies, attach UI screenshots for front-end tweaks, and list manual verification steps so reviewers can reproduce them quickly.
20 | 
21 | ## Configuration & Security Tips
22 | Never commit real API keys—`.gitignore` already excludes `config/api_keys.json` and other volatile files, so create local copies (`config/api_keys.local.json`) for experimentation. When sharing deployment instructions, direct operators to set API credentials via environment variables or secure vaults and only populate JSON stubs during runtime startup logic.
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <h1 align="center">Snap-Solver <img src="https://img.shields.io/badge/版本-1.5.1-blue" alt="版本"></h1>
  2 | 
  3 | 
  4 | <p align="center">
  5 |   <b>🔍 一键截屏，自动解题 - 线上考试，从未如此简单</b>
  6 | </p>
  7 | 
  8 | <p align="center">
  9 |   <img src="https://img.shields.io/badge/Python-3.x-blue?logo=python" alt="Python">
 10 |   <img src="https://img.shields.io/badge/Framework-Flask-green?logo=flask" alt="Flask">
 11 |   <img src="https://img.shields.io/badge/AI-Multi--Model-orange" alt="AI">
 12 |   <img src="https://img.shields.io/badge/License-Apache%202.0-lightgrey" alt="License">
 13 | </p>
 14 | 
 15 | 
 16 | <p align="center">
 17 |   <a href="#-核心特性">核心特性</a> •
 18 |   <a href="#-快速开始">快速开始</a> •
 19 |   <a href="#-新手教程">新手教程</a> •
 20 |   <a href="#-使用指南">使用指南</a> •
 21 |   <a href="#-技术架构">技术架构</a> •
 22 |   <a href="#-高级配置">高级配置</a> •
 23 |   <a href="#-常见问题">常见问题</a> •
 24 |   <a href="#-获取帮助">获取帮助</a>
 25 | </p>
 26 | 
 27 | <div align="center">
 28 |   <a href="https://github.com/Zippland/Snap-Solver/releases">
 29 |     <img src="https://img.shields.io/badge/⚡%20快速开始-下载最新版本-0366D6?style=for-the-badge&logo=github&logoColor=white" alt="获取Release" width="240" />
 30 |   </a>
 31 |   &nbsp;&nbsp;&nbsp;&nbsp;
 32 |   <a href="docs/beginner-tutorial.md">
 33 |     <img src="https://img.shields.io/badge/📘%20零基础入门-阅读新手教程-FF9800?style=for-the-badge&logo=bookstack&logoColor=white" alt="阅读新手教程" width="240" />
 34 |   </a>
 35 |   &nbsp;&nbsp;&nbsp;&nbsp;
 36 |   <a href="mailto:zylanjian@outlook.com">
 37 |     <img src="https://img.shields.io/badge/📞%20代部署支持-联系我们-28a745?style=for-the-badge&logo=mail.ru&logoColor=white" alt="联系我们" width="220" />
 38 |   </a>
 39 | </div>
 40 | <!-- <p align="center">
 41 |   <img src="pic.jpg" alt="Snap-Solver 截图" width="300" />
 42 | </p> -->
 43 | 
 44 | ## 💫 项目简介
 45 | 
 46 | **Snap-Solver** 是一个革命性的AI笔试测评工具，专为学生、考生和自学者设计。只需**按下快捷键**，即可自动截取屏幕上的任何题目，通过AI进行分析并提供详细解答。
 47 | 
 48 | 无论是复杂的数学公式、物理难题、编程问题，还是其他学科的挑战，Snap-Solver都能提供清晰、准确、有条理的解决方案，帮助您更好地理解和掌握知识点。
 49 | 
 50 | ## 📚 新手教程
 51 | 
 52 | 第一次使用？按照我们的 [《新手教程》](docs/beginner-tutorial.md) 完成环境准备、模型配置和首次解题演练，全程图文指引，几分钟即可上手。
 53 | 
 54 | ## 🔧 技术架构
 55 | 
 56 | ```mermaid
 57 | graph TD
 58 |     A[用户界面] --> B[Flask Web服务]
 59 |     B --> C{API路由}
 60 |     C --> D[截图服务]
 61 |     C --> E[OCR识别]
 62 |     C --> F[AI分析]
 63 |     E --> |Mathpix API| G[文本提取]
 64 |     F --> |模型选择| H1[OpenAI]
 65 |     F --> |模型选择| H2[Anthropic]
 66 |     F --> |模型选择| H3[DeepSeek]
 67 |     F --> |模型选择| H4[Alibaba]
 68 |     F --> |模型选择| H5[Google]
 69 |     F --> |模型选择| H6[Doubao]
 70 |     D --> I[Socket.IO实时通信]
 71 |     I --> A
 72 | ```
 73 | 
 74 | ## ✨ 核心特性
 75 | 
 76 | <table>
 77 |   <tr>
 78 |     <td width="50%">
 79 |       <h3>📱 跨设备协同</h3>
 80 |       <ul>
 81 |         <li><b>一键截图</b>：按下快捷键，即可在移动设备上查看和分析电脑屏幕</li>
 82 |         <li><b>局域网共享</b>：一处部署，多设备访问，提升学习效率</li>
 83 |       </ul>
 84 |     </td>
 85 |     <td width="50%">
 86 |       <h3>🧠 多模型AI支持</h3>
 87 |       <ul>
 88 |         <li><b>GPT 家族</b>：OpenAI强大的推理能力</li>
 89 |         <li><b>Claude 家族</b>：Anthropic的高级理解与解释</li>
 90 |         <li><b>DeepSeek 家族</b>：专为中文场景优化的模型</li>
 91 |         <li><b>QVQ 和 Qwen 家族</b>：以视觉推理闻名的国产AI</li>
 92 |         <li><b>Gemini 家族</b>：智商130的非推理AI</li>
 93 |       </ul>
 94 |     </td>
 95 |   </tr>
 96 |   <tr>
 97 |     <td>
 98 |       <h3>🔍 精准识别</h3>
 99 |       <ul>
100 |         <li><b>OCR文字识别</b>：准确捕捉图片中的文本</li>
101 |         <li><b>数学公式支持</b>：通过Mathpix精确识别复杂数学符号</li>
102 |       </ul>
103 |     </td>
104 |     <td>
105 |       <h3>🌐 全球无障碍</h3>
106 |       <ul>
107 |         <li><b>VPN代理支持</b>：自定义代理设置，解决网络访问限制</li>
108 |         <li><b>多语言响应</b>：支持定制AI回复语言</li>
109 |       </ul>
110 |     </td>
111 |   </tr>
112 |   <tr>
113 |     <td>
114 |       <h3>💻 全平台兼容</h3>
115 |       <ul>
116 |         <li><b>桌面支持</b>：Windows、MacOS、Linux</li>
117 |         <li><b>移动访问</b>：手机、平板通过浏览器直接使用</li>
118 |       </ul>
119 |     </td>
120 |     <td>
121 |       <h3>⚙️ 高度可定制</h3>
122 |       <ul>
123 |         <li><b>思考深度控制</b>：调整AI的分析深度</li>
124 |         <li><b>自定义提示词</b>：针对特定学科优化提示</li>
125 |       </ul>
126 |     </td>
127 |   </tr>
128 | </table>
129 | 
130 | ## 🚀 快速开始
131 | 
132 | ### 📋 前置要求
133 | 
134 | - Python 3.x
135 | - 至少以下一个API Key:
136 |   - OpenAI API Key
137 |   - Anthropic API Key (推荐✅)
138 |   - DeepSeek API Key
139 |   - Alibaba API Key （国内用户首选）
140 |   - Google API Key
141 |   - Mathpix API Key (推荐OCR识别✅)
142 | 
143 | ### 📥 开始使用
144 | 
145 | ```bash
146 | # 启动应用
147 | python app.py
148 | ```
149 | 
150 | ### 📱 访问方式
151 | 
152 | - **本机访问**：打开浏览器，访问 http://localhost:5000
153 | - **局域网设备访问**：在同一网络的任何设备上访问 `http://[电脑IP]:5000`
154 | 
155 | ### 🎯 使用场景示例
156 | 
157 | - **课后习题**：截取教材或作业中的难题，获取步骤详解
158 | - **编程调试**：截取代码错误信息，获取修复建议
159 | - **考试复习**：分析错题并理解解题思路
160 | - **文献研究**：截取复杂论文段落，获取简化解释
161 | 
162 | ### 🧩 组件详情
163 | 
164 | - **前端**：响应式HTML/CSS/JS界面，支持移动设备
165 | - **后端**：Flask + SocketIO，提供RESTful API和WebSocket
166 | - **AI接口**：多模型支持，统一接口标准
167 | - **图像处理**：高效的截图和裁剪功能
168 | 
169 | ## ⚙️ 高级可调参数
170 | 
171 | - **温度**：调整回答的创造性与确定性（0.1-1.0）
172 | - **最大输出Token**：控制回答长度
173 | - **推理深度**：标准模式（快速）或深度思考（详细）
174 | - **思考预算占比**：平衡思考过程与最终答案的详细程度
175 | - **系统提示词**：自定义AI的基础行为与专业领域
176 | 
177 | ## ❓ 常见问题
178 | 
179 | <details>
180 | <summary><b>如何获得最佳识别效果？</b></summary>
181 | <p>
182 | 确保截图清晰，包含完整题目和必要上下文。对于数学公式，建议使用Mathpix OCR以获得更准确的识别结果。
183 | </p>
184 | </details>
185 | 
186 | <details>
187 | <summary><b>无法连接到服务怎么办？</b></summary>
188 | <p>
189 | 1. 检查防火墙设置是否允许5000端口<br>
190 | 2. 确认设备在同一局域网内<br>
191 | 3. 尝试重启应用程序<br>
192 | 4. 查看控制台日志获取错误信息
193 | </p>
194 | </details>
195 | 
196 | <details>
197 | <summary><b>API调用失败的原因？</b></summary>
198 | <p>
199 | 1. API密钥可能无效或余额不足<br>
200 | 2. 网络连接问题，特别是国际API<br>
201 | 3. 代理设置不正确<br>
202 | 4. API服务可能临时不可用
203 | </p>
204 | </details>
205 | 
206 | <details>
207 | <summary><b>如何优化AI回答质量？</b></summary>
208 | <p>
209 | 1. 调整系统提示词，添加特定学科的指导<br>
210 | 2. 根据问题复杂度选择合适的模型<br>
211 | 3. 对于复杂题目，使用"深度思考"模式<br>
212 | 4. 确保截取的题目包含完整信息
213 | </p>
214 | </details>
215 | 
216 | ## 🤝 获取帮助
217 | 
218 | - **代部署服务**：如果您不擅长编程，需要代部署服务，请联系 [zylanjian@outlook.com](mailto:zylanjian@outlook.com)
219 | - **问题报告**：在GitHub仓库提交Issue
220 | - **功能建议**：欢迎通过Issue或邮件提供改进建议
221 | 
222 | ## 📜 开源协议
223 | 
224 | 本项目采用 [Apache 2.0](LICENSE) 协议。
225 | 


--------------------------------------------------------------------------------
/models/baidu_ocr.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import json
  3 | import time
  4 | import urllib.request
  5 | import urllib.parse
  6 | from typing import Generator, Dict, Any
  7 | from .base import BaseModel
  8 | 
  9 | class BaiduOCRModel(BaseModel):
 10 |     """
 11 |     百度OCR模型，用于图像文字识别
 12 |     """
 13 |     
 14 |     def __init__(self, api_key: str, secret_key: str = None, temperature: float = 0.7, system_prompt: str = None):
 15 |         """
 16 |         初始化百度OCR模型
 17 |         
 18 |         Args:
 19 |             api_key: 百度API Key
 20 |             secret_key: 百度Secret Key（可以在api_key中用冒号分隔传入）
 21 |             temperature: 不用于OCR但保持BaseModel兼容性
 22 |             system_prompt: 不用于OCR但保持BaseModel兼容性
 23 |             
 24 |         Raises:
 25 |             ValueError: 如果API密钥格式无效
 26 |         """
 27 |         super().__init__(api_key, temperature, system_prompt)
 28 |         
 29 |         # 支持两种格式：单独传递或在api_key中用冒号分隔
 30 |         if secret_key:
 31 |             self.api_key = api_key
 32 |             self.secret_key = secret_key
 33 |         else:
 34 |             try:
 35 |                 self.api_key, self.secret_key = api_key.split(':')
 36 |             except ValueError:
 37 |                 raise ValueError("百度OCR API密钥必须是 'API_KEY:SECRET_KEY' 格式或单独传递secret_key参数")
 38 |         
 39 |         # 百度API URLs
 40 |         self.token_url = "https://aip.baidubce.com/oauth/2.0/token"
 41 |         self.ocr_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic"
 42 |         
 43 |         # 缓存access_token
 44 |         self._access_token = None
 45 |         self._token_expires = 0
 46 |     
 47 |     def get_access_token(self) -> str:
 48 |         """获取百度API的access_token"""
 49 |         # 检查是否需要刷新token（提前5分钟刷新）
 50 |         if self._access_token and time.time() < self._token_expires - 300:
 51 |             return self._access_token
 52 |         
 53 |         # 请求新的access_token
 54 |         params = {
 55 |             'grant_type': 'client_credentials',
 56 |             'client_id': self.api_key,
 57 |             'client_secret': self.secret_key
 58 |         }
 59 |         
 60 |         data = urllib.parse.urlencode(params).encode('utf-8')
 61 |         request = urllib.request.Request(self.token_url, data=data)
 62 |         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
 63 |         
 64 |         try:
 65 |             with urllib.request.urlopen(request) as response:
 66 |                 result = json.loads(response.read().decode('utf-8'))
 67 |                 
 68 |             if 'access_token' in result:
 69 |                 self._access_token = result['access_token']
 70 |                 # 设置过期时间（默认30天，但我们提前刷新）
 71 |                 self._token_expires = time.time() + result.get('expires_in', 2592000)
 72 |                 return self._access_token
 73 |             else:
 74 |                 raise Exception(f"获取access_token失败: {result.get('error_description', '未知错误')}")
 75 |                 
 76 |         except Exception as e:
 77 |             raise Exception(f"请求access_token失败: {str(e)}")
 78 |     
 79 |     def ocr_image(self, image_data: str) -> str:
 80 |         """
 81 |         对图像进行OCR识别
 82 |         
 83 |         Args:
 84 |             image_data: Base64编码的图像数据
 85 |             
 86 |         Returns:
 87 |             str: 识别出的文字内容
 88 |         """
 89 |         access_token = self.get_access_token()
 90 |         
 91 |         # 准备请求数据
 92 |         params = {
 93 |             'image': image_data,
 94 |             'language_type': 'auto_detect',  # 自动检测语言
 95 |             'detect_direction': 'true',      # 检测图像朝向
 96 |             'probability': 'false'           # 不返回置信度（减少响应大小）
 97 |         }
 98 |         
 99 |         data = urllib.parse.urlencode(params).encode('utf-8')
100 |         url = f"{self.ocr_url}?access_token={access_token}"
101 |         
102 |         request = urllib.request.Request(url, data=data)
103 |         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
104 |         
105 |         try:
106 |             with urllib.request.urlopen(request) as response:
107 |                 result = json.loads(response.read().decode('utf-8'))
108 |                 
109 |             if 'error_code' in result:
110 |                 raise Exception(f"百度OCR API错误: {result.get('error_msg', '未知错误')}")
111 |             
112 |             # 提取识别的文字
113 |             words_result = result.get('words_result', [])
114 |             text_lines = [item['words'] for item in words_result]
115 |             
116 |             return '\n'.join(text_lines)
117 |             
118 |         except Exception as e:
119 |             raise Exception(f"OCR识别失败: {str(e)}")
120 |     
121 |     def extract_full_text(self, image_data: str) -> str:
122 |         """
123 |         提取图像中的完整文本（与Mathpix兼容的接口）
124 |         
125 |         Args:
126 |             image_data: Base64编码的图像数据
127 |             
128 |         Returns:
129 |             str: 提取的文本内容
130 |         """
131 |         return self.ocr_image(image_data)
132 |     
133 |     def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[Dict[str, Any], None, None]:
134 |         """
135 |         分析图像并返回OCR结果（流式输出以保持接口一致性）
136 |         
137 |         Args:
138 |             image_data: Base64编码的图像数据
139 |             proxies: 代理配置（未使用）
140 |             
141 |         Yields:
142 |             dict: 包含OCR结果的响应
143 |         """
144 |         try:
145 |             text = self.ocr_image(image_data)
146 |             yield {
147 |                 'status': 'completed',
148 |                 'content': text,
149 |                 'model': 'baidu-ocr'
150 |             }
151 |         except Exception as e:
152 |             yield {
153 |                 'status': 'error',
154 |                 'content': f'OCR识别失败: {str(e)}',
155 |                 'model': 'baidu-ocr'
156 |             }
157 |     
158 |     def analyze_text(self, text: str, proxies: dict = None) -> Generator[Dict[str, Any], None, None]:
159 |         """
160 |         分析文本（OCR模型不支持文本分析）
161 |         
162 |         Args:
163 |             text: 输入文本
164 |             proxies: 代理配置（未使用）
165 |             
166 |         Yields:
167 |             dict: 错误响应
168 |         """
169 |         yield {
170 |             'status': 'error',
171 |             'content': 'OCR模型不支持文本分析功能',
172 |             'model': 'baidu-ocr'
173 |         }
174 |     
175 |     def get_model_identifier(self) -> str:
176 |         """返回模型标识符"""
177 |         return "baidu-ocr"
178 | 


--------------------------------------------------------------------------------
/config/models.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "providers": {
  3 |         "anthropic": {
  4 |             "name": "Anthropic",
  5 |             "api_key_id": "AnthropicApiKey",
  6 |             "class_name": "AnthropicModel"
  7 |         },
  8 |         "openai": {
  9 |             "name": "OpenAI",
 10 |             "api_key_id": "OpenaiApiKey",
 11 |             "class_name": "OpenAIModel"
 12 |         },
 13 |         "deepseek": {
 14 |             "name": "DeepSeek",
 15 |             "api_key_id": "DeepseekApiKey",
 16 |             "class_name": "DeepSeekModel"
 17 |         },
 18 |         "alibaba": {
 19 |             "name": "Alibaba",
 20 |             "api_key_id": "AlibabaApiKey",
 21 |             "class_name": "AlibabaModel"
 22 |         },
 23 |         "google": {
 24 |             "name": "Google",
 25 |             "api_key_id": "GoogleApiKey",
 26 |             "class_name": "GoogleModel"
 27 |         },
 28 |         "doubao": {
 29 |             "name": "Doubao",
 30 |             "api_key_id": "DoubaoApiKey",
 31 |             "class_name": "DoubaoModel"
 32 |         }
 33 |     },
 34 |     "models": {
 35 |         "claude-opus-4-20250514": {
 36 |             "name": "Claude 4 Opus",
 37 |             "provider": "anthropic",
 38 |             "supportsMultimodal": true,
 39 |             "isReasoning": true,
 40 |             "version": "20250514",
 41 |             "description": "最强大的Claude 4 Opus模型，支持图像理解和深度思考过程"
 42 |         },
 43 |         "claude-opus-4-1-20250805": {
 44 |             "name": "Claude 4.1 Opus",
 45 |             "provider": "anthropic",
 46 |             "supportsMultimodal": true,
 47 |             "isReasoning": false,
 48 |             "version": "20250805",
 49 |             "description": "Claude Opus 4.1 最新标准模式，快速响应并支持多模态输入"
 50 |         },
 51 |         "claude-opus-4-1-20250805-thinking": {
 52 |             "name": "Claude 4.1 Opus (Thinking)",
 53 |             "provider": "anthropic",
 54 |             "supportsMultimodal": true,
 55 |             "isReasoning": true,
 56 |             "version": "20250805",
 57 |             "description": "Claude Opus 4.1 思考模式，启用更长思考过程以提升推理质量"
 58 |         },
 59 |         "claude-sonnet-4-20250514": {
 60 |             "name": "Claude 4 Sonnet",
 61 |             "provider": "anthropic",
 62 |             "supportsMultimodal": true,
 63 |             "isReasoning": true,
 64 |             "version": "20250514",
 65 |             "description": "高性能的Claude 4 Sonnet模型，支持图像理解和思考过程"
 66 |         },
 67 |         "claude-sonnet-4-5-20250929": {
 68 |             "name": "Claude 4.5 Sonnet",
 69 |             "provider": "anthropic",
 70 |             "supportsMultimodal": true,
 71 |             "isReasoning": true,
 72 |             "version": "20250929",
 73 |             "description": "Claude Sonnet 4.5 版，兼具多模态理解与最新推理能力"
 74 |         },
 75 |         "gpt-4o-2024-11-20": {
 76 |             "name": "GPT-4o",
 77 |             "provider": "openai",
 78 |             "supportsMultimodal": true,
 79 |             "isReasoning": false,
 80 |             "version": "2024-11-20",
 81 |             "description": "OpenAI的GPT-4o模型，支持图像理解"
 82 |         },
 83 |         "gpt-5-2025-08-07": {
 84 |             "name": "GPT-5",
 85 |             "provider": "openai",
 86 |             "supportsMultimodal": true,
 87 |             "isReasoning": true,
 88 |             "version": "2025-08-07",
 89 |             "description": "OpenAI旗舰级GPT-5模型，支持多模态输入与高级推理"
 90 |         },
 91 |         "gpt-5-1": {
 92 |             "name": "GPT-5.1",
 93 |             "provider": "openai",
 94 |             "supportsMultimodal": true,
 95 |             "isReasoning": true,
 96 |             "version": "latest",
 97 |             "description": "GPT-5.1 新版旗舰模型，强化长上下文与推理表现"
 98 |         },
 99 |         "gpt-5-codex-high": {
100 |             "name": "GPT Codex High",
101 |             "provider": "openai",
102 |             "supportsMultimodal": false,
103 |             "isReasoning": true,
104 |             "version": "latest",
105 |             "description": "OpenAI高性能代码模型Codex High，侧重复杂代码生成与重构"
106 |         },
107 |         "o3-mini": {
108 |             "name": "o3-mini",
109 |             "provider": "openai",
110 |             "supportsMultimodal": false,
111 |             "isReasoning": true,
112 |             "version": "latest",
113 |             "description": "OpenAI的o3-mini模型，支持图像理解和思考过程"
114 |         },
115 |         "deepseek-chat": {
116 |             "name": "DeepSeek-V3",
117 |             "provider": "deepseek",
118 |             "supportsMultimodal": false,
119 |             "isReasoning": false,
120 |             "version": "latest",
121 |             "description": "DeepSeek最新大模型，671B MoE模型，支持60 tokens/秒的高速生成"
122 |         },
123 |         "deepseek-reasoner": {
124 |             "name": "DeepSeek-R1",
125 |             "provider": "deepseek",
126 |             "supportsMultimodal": false,
127 |             "isReasoning": true,
128 |             "version": "latest",
129 |             "description": "DeepSeek推理模型，提供详细思考过程（仅支持文本）"
130 |         },
131 |         "QVQ-Max-2025-03-25": {
132 |             "name": "QVQ-Max",
133 |             "provider": "alibaba",
134 |             "supportsMultimodal": true,
135 |             "isReasoning": true,
136 |             "version": "2025-03-25",
137 |             "description": "阿里巴巴通义千问-QVQ-Max版本，支持图像理解和思考过程"
138 |         },
139 |         "qwen-vl-max-latest": {
140 |             "name": "Qwen-VL-MAX",
141 |             "provider": "alibaba",
142 |             "supportsMultimodal": true,
143 |             "isReasoning": false,
144 |             "version": "latest",
145 |             "description": "阿里通义千问VL-MAX模型，视觉理解能力最强，支持图像理解和复杂任务"
146 |         },
147 |         "gemini-2.5-pro": {
148 |             "name": "Gemini 2.5 Pro",
149 |             "provider": "google",
150 |             "supportsMultimodal": true,
151 |             "isReasoning": true,
152 |             "version": "latest",
153 |             "description": "Google最强大的Gemini 2.5 Pro模型，支持图像理解（需要付费API密钥）"
154 |         },
155 |         "gemini-2.5-flash": {
156 |             "name": "Gemini 2.5 Flash",
157 |             "provider": "google",
158 |             "supportsMultimodal": true,
159 |             "isReasoning": false,
160 |             "version": "latest",
161 |             "description": "Google最新的Gemini 2.5 Flash模型，支持图像理解，速度更快，性能更好"
162 |         },
163 |         "gemini-2.0-flash": {
164 |             "name": "Gemini 2.0 Flash",
165 |             "provider": "google",
166 |             "supportsMultimodal": true,
167 |             "isReasoning": false,
168 |             "version": "latest",
169 |             "description": "Google更快速的Gemini 2.0 Flash模型，支持图像理解，有免费配额"
170 |         },
171 |         "gemini-3-pro": {
172 |             "name": "Gemini 3 Pro",
173 |             "provider": "google",
174 |             "supportsMultimodal": true,
175 |             "isReasoning": true,
176 |             "version": "latest",
177 |             "description": "Google Gemini 3 Pro 顶级推理模型，面向复杂多模态任务"
178 |         },
179 |         "doubao-seed-1-6-250615": {
180 |             "name": "Doubao-Seed-1.6",
181 |             "provider": "doubao",
182 |             "supportsMultimodal": true,
183 |             "isReasoning": true,
184 |             "version": "latest",
185 |             "description": "支持auto/thinking/non-thinking三种思考模式、支持多模态、256K长上下文"
186 |         }
187 |     }
188 | }
189 | 


--------------------------------------------------------------------------------
/docs/beginner-tutorial.md:
--------------------------------------------------------------------------------
  1 | # Snap-Solver 零基础上手教程
  2 | 
  3 | 这篇教程面向第一次接触编程或 Python 的朋友，手把手带你从安装环境开始，直到在电脑和手机上顺利使用 Snap-Solver 完成题目分析。如果你在任何步骤遇到困难，建议按章节逐步检查，或对照文末的常见问题排查。
  4 | 
  5 | ---
  6 | 
  7 | ## 1. Snap-Solver 是什么？
  8 | 
  9 | Snap-Solver 是一个本地运行的截屏解题工具，主要功能包括：
 10 | - 一键截取电脑屏幕的题目图片；
 11 | - 自动调用 OCR（文字识别）和多种大模型，给出详细解析；
 12 | - 支持在手机、平板等局域网设备上实时查看结果；
 13 | - 可以按需配置代理、中转 API、自定义提示词等高级选项。
 14 | 
 15 | 整个应用基于 Python + Flask，只要能启动一个 Python 程序，就可以完全离线地掌握它的运行方式。
 16 | 
 17 | ---
 18 | 
 19 | ## 2. 准备清单
 20 | 
 21 | - 一台可以联网的 Windows、macOS 或 Linux 电脑；
 22 | - 至少一个可用的模型 API Key（推荐准备 2~3 个，方便切换）：
 23 |   - OpenAI、Anthropic、DeepSeek、阿里灵积（Qwen）、Google、Mathpix 等任一即可；
 24 | - 约 2 GB 可用硬盘空间；
 25 | - 基本的文本编辑器（Windows 自带记事本即可，推荐使用 VS Code / Notepad++ 等更易读的工具）。
 26 | 
 27 | > **提示**：Snap-Solver 不依赖显卡或 GPU，普通轻薄本即可顺利运行。
 28 | 
 29 | ---
 30 | 
 31 | ## 3. 第一次打开命令行
 32 | 
 33 | Snap-Solver 需要在命令行里执行几条简单的指令。命令行是一个黑色（或白色）窗口，通过输入文字来让电脑完成任务。不同系统打开方式略有区别：
 34 | 
 35 | ### 3.1 Windows
 36 | 1. 同时按下键盘 `Win` 键（左下角带 Windows 徽标的键）+ `S`，输入 `cmd` 或 `terminal`。
 37 | 2. 选择 **命令提示符（Command Prompt）** 或 **Windows Terminal**，回车打开。
 38 | 3. 复制命令时，可在窗口上点击右键 → 「粘贴」，或使用快捷键 `Ctrl + V`。
 39 | 4. 想切换到某个文件夹（例如 `D:\Snap-Solver`），输入：
 40 |    ```powershell
 41 |    cd /d D:\Snap-Solver
 42 |    ```
 43 | 5. 查看当前文件夹内的内容：
 44 |    ```powershell
 45 |    dir
 46 |    ```
 47 | 
 48 | ### 3.2 macOS
 49 | 1. 同时按下 `Command + Space` 呼出 Spotlight，输入 `Terminal` 并回车。
 50 | 2. 在终端中，复制粘贴使用常规快捷键 `Command + C` / `Command + V`。
 51 | 3. 切换到下载好的项目目录（例如在「下载」文件夹内）：
 52 |    ```bash
 53 |    cd ~/Downloads/Snap-Solver
 54 |    ```
 55 | 4. 查看当前文件夹内容：
 56 |    ```bash
 57 |    ls
 58 |    ```
 59 | 
 60 | ### 3.3 Linux（Ubuntu 示例）
 61 | 1. 同时按 `Ctrl + Alt + T` 打开终端。
 62 | 2. 切换到项目目录：
 63 |    ```bash
 64 |    cd ~/Snap-Solver
 65 |    ```
 66 | 3. 查看内容：
 67 |    ```bash
 68 |    ls
 69 |    ```
 70 | 
 71 | > **常用命令速记**
 72 | > - `cd 路径`：进入某个文件夹（路径中有空格请用双引号包住，例如 `cd "C:\My Folder"`）。
 73 | > - `dir`（Windows）/`ls`（macOS、Linux）：查看当前文件夹下的文件。
 74 | > - 键盘方向键 ↑ 可以快速调出上一条命令，避免重复输入。
 75 | 
 76 | ---
 77 | 
 78 | ## 4. 安装 Python 3
 79 | 
 80 | Snap-Solver 基于 Python 3.9+，推荐使用 3.10 或 3.11 版本。
 81 | 
 82 | ### 4.1 Windows
 83 | 1. 打开浏览器访问：https://www.python.org/downloads/
 84 | 2. 点击最新的稳定版（例如 `Python 3.11.x`）的 **Download Windows installer (64-bit)**。
 85 | 3. 双击下载的安装包，记得在第一步勾选 **Add Python to PATH**。
 86 | 4. 按提示完成安装。
 87 | 5. 打开命令行窗口，输入：
 88 |    ```powershell
 89 |    python --version
 90 |    pip --version
 91 |    ```
 92 |    若能看到版本号（如 `Python 3.11.7`），说明安装成功。
 93 | 
 94 | ### 4.2 macOS
 95 | 1. 访问 https://www.python.org/downloads/mac-osx/ 下载 `macOS 64-bit universal2 installer`。
 96 | 2. 双击 `.pkg` 文件按提示安装。
 97 | 3. 打开终端输入：
 98 |    ```bash
 99 |    python3 --version
100 |    pip3 --version
101 |    ```
102 |    如果输出版本号，表示安装完成。后续命令中的 `python`、`pip` 均可替换为 `python3`、`pip3`。
103 | 
104 | ### 4.3 Linux（Ubuntu 示例）
105 | ```bash
106 | sudo apt update
107 | sudo apt install python3 python3-venv python3-pip -y
108 | python3 --version
109 | pip3 --version
110 | ```
111 | 
112 | ---
113 | 
114 | ## 5. （可选）安装 Git
115 | 
116 | Git 方便后续更新项目，也可以用来下载代码。
117 | - Windows：https://git-scm.com/download/win
118 | - macOS：在终端输入 `xcode-select --install` 或从 https://git-scm.com/download/mac 获取
119 | - Linux：`sudo apt install git -y`
120 | 
121 | 如果暂时不想安装 Git，也可以稍后直接下载压缩包。
122 | 
123 | ---
124 | 
125 | ## 6. 获取 Snap-Solver 项目代码
126 | 
127 | 任选其一：
128 | 1. **使用 Git 克隆（推荐）**
129 |    ```bash
130 |    git clone https://github.com/Zippland/Snap-Solver.git
131 |    cd Snap-Solver
132 |    ```
133 | 2. **下载压缩包**
134 |    - 打开项目主页：https://github.com/Zippland/Snap-Solver
135 |    - 点击右侧 `Release` → `Source code (zip)`
136 |    - 解压缩后，将文件夹重命名为 `Snap-Solver` 并记住路径
137 | 
138 | 后续步骤默认你已经位于项目根目录（包含 `app.py`、`requirements.txt` 的那个文件夹）。如果忘记位置，可再次查看文件夹并使用 `cd` 进入。
139 | 
140 | ---
141 | 
142 | ## 7. 创建虚拟环境并安装依赖
143 | 
144 | 虚拟环境可以把项目依赖和系统环境隔离，避免冲突。
145 | 
146 | ### 7.1 创建虚拟环境
147 | 
148 | - **Windows PowerShell**
149 |   ```powershell
150 |   python -m venv .venv
151 |   .\.venv\Scripts\Activate
152 |   ```
153 | - **macOS / Linux**
154 |   ```bash
155 |   python3 -m venv .venv
156 |   source .venv/bin/activate
157 |   ```
158 | 
159 | 激活成功后，命令行前面会出现 `(.venv)` 前缀。若你关闭了命令行窗口，需要重新进入项目目录并再次执行激活命令。
160 | 
161 | ### 7.2 安装依赖
162 | 
163 | ```bash
164 | pip install --upgrade pip
165 | pip install -r requirements.txt
166 | ```
167 | 
168 | 常见依赖（Flask、PyAutoGUI、Pillow 等）都会自动安装。首次安装可能用时 1~5 分钟，请耐心等待。
169 | 
170 | > **如果安装失败**：请检查网络、切换镜像源或参考文末常见问题。
171 | 
172 | ---
173 | 
174 | ## 8. 首次启动与访问
175 | 
176 | 1. 保证虚拟环境处于激活状态。
177 | 2. 在项目根目录执行：
178 |    ```bash
179 |    python app.py
180 |    ```
181 | 3. 终端中会看到 Flask/SocketIO 的日志，最后出现 `Running on http://127.0.0.1:5000` 表示启动成功。
182 | 4. 若需要在手机/平板访问，请在**同一局域网下**输入 `http://<电脑IP>:5000`。电脑 IP 可在终端日志中看到，例如 `http://192.168.1.8:5000`（可能是别的，每次打开都会刷新）。
183 | 
184 | > **暂停服务**：在终端按 `Ctrl + C` 即可停止运行。再次启动时，只需重新激活虚拟环境并执行 `python app.py`。
185 | 
186 | ---
187 | 
188 | ## 9. 配置 API 密钥与基础设置
189 | 
190 | 启动网页后，点击右上角的齿轮图标进入「设置」面板，建议先完成以下几项：
191 | 
192 | ### 9.1 填写模型 API Key
193 | 
194 | - 根据你手上的 Key，将对应值填入设置页面的输入框中；
195 | - 常用字段：
196 |   - `OpenaiApiKey`：OpenAI 模型（如 GPT-4o、o3-mini）
197 |   - `AnthropicApiKey`：Claude 系列
198 |   - `DeepseekApiKey`：DeepSeek
199 |   - `AlibabaApiKey`：通义千问 / Qwen / QVQ
200 |   - `GoogleApiKey`：Gemini 系列
201 |   - `MathpixAppId` & `MathpixAppKey`：用于高精度公式识别
202 | - 点击保存后，信息会写入 `config/api_keys.json` 方便下次启动直接读取。
203 | 
204 | ### 9.2  设置代理与中转（可选）
205 | 
206 | - 若你需要走代理或企业中转通道，可在设置面板中开启代理选项；
207 | - 对应的 JSON 文件是 `config/proxy_api.json`，可直接编辑来指定各模型的自定义 `base_url`；
208 | - 修改后需重启应用才能生效。
209 | 
210 | ### 9.3 如何确认 VPN/代理端口
211 | 
212 | 很多加速器或 VPN 客户端会在本地启动一个「系统代理」服务（常见端口如 `7890`、`1080` 等）。具体端口位置通常可以通过以下途径找到：
213 | - 打开 VPN 客户端的设置页面，寻找「本地监听端口」「HTTP(S) 代理」「SOCKS 代理」等字样；
214 | - Windows 用户也可以在「设置 → 网络和 Internet → 代理」里查看「使用代理服务器」的地址和端口；
215 | - macOS 用户可在「系统设置 → 网络 → Wi-Fi（或以太网）→ 详情 → 代理」里查看勾选的服务和端口；
216 | - 高级用户可以在命令行里运行 `netstat -ano | findstr 127.0.0.1`（Windows）或 `lsof -iTCP -sTCP:LISTEN | grep 127.0.0.1`（macOS/Linux）确认本地监听端口。
217 | 
218 | 拿到端口后，在 Snap-Solver 的代理设置中填入对应的地址（通常是 `127.0.0.1:<端口>`），就能让模型请求走 VPN。不同工具的界面名称可能略有差异，重点是找出「本地监听地址 + 端口号」这一对信息。
219 | 
220 | ---
221 | 
222 | ## 10. 获取常用 API Key（详细教程）
223 | 
224 | API Key 相当于你在各大模型平台上的「门票」。不同平台的获取流程不同，以下列出了最常用的几个来源。申请过程中务必保护好个人隐私与账号安全，切勿向他人泄露密钥。
225 | 
226 | ### 10.1 OpenAI（GPT-4o / o3-mini 等）
227 | 1. 打开 https://platform.openai.com/ 并使用邮箱或第三方账号注册 / 登录。
228 | 2. 首次使用需完成实名和支付方式绑定（可选择信用卡或预付费余额）。
229 | 3. 登录后点击右上角头像 → `View API keys`。
230 | 4. 点击 `Create new secret key`，复制生成的密钥（形如 `sk-...`）。
231 | 5. 将该密钥粘贴到 Snap-Solver 的 `OpenaiApiKey` 输入框，并妥善保存。
232 | 
233 | ### 10.2 Anthropic（Claude 系列）
234 | 1. 打开 https://console.anthropic.com/ 并注册账号。
235 | 2. 按提示完成手机号验证和支付方式绑定（部分国家需排队开通）。
236 | 3. 登录后进入 `API Keys` 页面，点击 `Create Key`。
237 | 4. 复制生成的密钥（形如 `sk-ant-...`），粘贴到 Snap-Solver 的 `AnthropicApiKey`。
238 | 
239 | ### 10.3 DeepSeek
240 | 1. 访问 https://platform.deepseek.com/ 并注册登录。
241 | 2. 如果需要人民币支付，可在「账号设置」绑定支付宝；海外用户可使用信用卡。
242 | 3. 进入 `API Keys`，点击 `新建密钥`。
243 | 4. 复制生成的密钥（形如 `sk-xxx`），填入 `DeepseekApiKey`。
244 | 
245 | ### 10.4 阿里云通义千问 / Qwen / QVQ
246 | 1. 打开 https://dashscope.console.aliyun.com/ 并使用阿里云账号登录。
247 | 2. 进入「API Key 管理」页面，点击 `创建 API Key`。
248 | 3. 复制密钥（形如 `sk-yourkey`）填入 `AlibabaApiKey`。
249 | 4. 如需开通收费模型，请在「计费与配额」中先完成实名认证并开通付费策略。
250 | 
251 | ### 10.5 Google Gemini
252 | 1. 前往 https://ai.google.dev/ 并登录 Google 账号。
253 | 2. 点击右上角 `Get API key`。
254 | 3. 选择或创建项目，生成新的 API Key。
255 | 4. 将密钥填入 `GoogleApiKey`。
256 | 
257 | ### 10.6 Mathpix（高精度公式识别）
258 | 1. 访问 https://dashboard.mathpix.com/ 注册账号。
259 | 2. 完成邮箱验证后，在侧边栏找到 `API Keys`。
260 | 3. 创建新的 App，复制 `App ID` 和 `App Key`。
261 | 4. 分别填入 Snap-Solver 的 `MathpixAppId` 与 `MathpixAppKey` 字段。
262 | 
263 | > **安全小贴士**
264 | > - API Key 和密码一样重要，泄露后他人可能代你调用接口、消耗额度。
265 | > - 建议为不同用途创建多个密钥，定期检查和撤销不用的密钥。
266 | > - 如果平台支持额度上限、IP 白名单等功能，可以酌情启用以降低风险。
267 | 
268 | ---
269 | 
270 | ## 11. 完成第一次题目解析
271 | 
272 | 1. 确认右上角的「连接状态」显示为绿色的「已连接」。
273 | 2. 点击顶部的「开始截图」，按提示框拖拽需要识别的题目区域。
274 | 3. 截图完成后，预览区会显示图片，并出现「发送至 AI」或「提取文本」按钮：
275 |    - **发送至 AI**：直接让所选模型解析图像；
276 |    - **提取文本**：先做 OCR，把文字复制出来，再发送给模型。
277 | 4. 在右侧的「分析结果」面板可以查看：
278 |    - AI 的思考过程（可折叠）；
279 |    - 最终解答、代码或步骤；
280 |    - 中间日志与计时。
281 | 5. 若需要改用其他模型，重新打开设置面板即可实时切换。
282 | 
283 | > **小技巧**：长按或双击分析结果中的文本，可快速复制粘贴；终端会实时输出请求日志，方便排查问题。
284 | 
285 | ---
286 | 
287 | ## 12. 常见问题速查
288 | 
289 | - **`python` 命令找不到**：在 Windows 上打开新的终端后请重启电脑，或使用 `py` 命令；macOS/Linux 请尝试 `python3`。
290 | - **`pip install` 超时**：可以临时使用清华源 `pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt`。
291 | - **启动后网页打不开**：确认终端没有报错；检查防火墙、端口占用，或尝试 `http://127.0.0.1:5000`。
292 | - **截图没反应**：Windows/macOS 需要授权「辅助功能 / 截屏」权限给 Python；macOS 在「系统设置 - 隐私与安全」中勾选 `python` 或终端应用。
293 | - **模型报 401/403**：检查 API Key 是否正确、账号余额是否充足，必要时在设置里更换模型或填入自定义域名。
294 | - **手机访问失败**：确保手机和电脑在同一个 Wi-Fi 下，且电脑未开启 VPN 导致局域网隔离。
295 | 
296 | ---
297 | 
298 | ## 13. 进一步探索
299 | 
300 | - `config/models.json`：自定义展示在下拉框的模型列表，包含模型名称、供应商、能力标签等，可按需添加。
301 | - `config/prompts.json`：定义默认 prompt，可根据学科优化。
302 | - 更新项目：如果是 Git 克隆，执行 `git pull`；压缩包用户可重新下载覆盖。
303 | 
304 | 完成以上步骤后，你已经具备运行和日常使用 Snap-Solver 的全部基础。如果你有新的需求或遇到无法解决的问题，可以先查看 README 或在 Issues 中搜索 / 提问。祝你学习顺利，刷题提效！
305 | 


--------------------------------------------------------------------------------
/models/openai.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Generator, Dict, Optional
  3 | from openai import OpenAI
  4 | from .base import BaseModel
  5 | 
  6 | class OpenAIModel(BaseModel):
  7 |     def __init__(self, api_key, temperature=0.7, system_prompt=None, language=None, api_base_url=None, model_identifier=None):
  8 |         super().__init__(api_key, temperature, system_prompt, language)
  9 |         # 设置API基础URL，默认为OpenAI官方API
 10 |         self.api_base_url = api_base_url
 11 |         # 允许从外部配置显式指定模型标识符
 12 |         self.model_identifier = model_identifier or "gpt-4o-2024-11-20"
 13 |         
 14 |     def get_default_system_prompt(self) -> str:
 15 |         return """You are an expert at analyzing questions and providing detailed solutions. When presented with an image of a question:
 16 | 1. First read and understand the question carefully
 17 | 2. Break down the key components of the question
 18 | 3. Provide a clear, step-by-step solution
 19 | 4. If relevant, explain any concepts or theories involved
 20 | 5. If there are multiple approaches, explain the most efficient one first"""
 21 | 
 22 |     def get_model_identifier(self) -> str:
 23 |         return self.model_identifier
 24 | 
 25 |     def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
 26 |         """Stream GPT-4o's response for text analysis"""
 27 |         try:
 28 |             # Initial status
 29 |             yield {"status": "started", "content": ""}
 30 | 
 31 |             # Save original environment state
 32 |             original_env = {
 33 |                 'http_proxy': os.environ.get('http_proxy'),
 34 |                 'https_proxy': os.environ.get('https_proxy')
 35 |             }
 36 | 
 37 |             try:
 38 |                 # Set proxy environment variables if provided
 39 |                 if proxies:
 40 |                     if 'http' in proxies:
 41 |                         os.environ['http_proxy'] = proxies['http']
 42 |                     if 'https' in proxies:
 43 |                         os.environ['https_proxy'] = proxies['https']
 44 | 
 45 |                 # Initialize OpenAI client with base_url if provided
 46 |                 if self.api_base_url:
 47 |                     client = OpenAI(api_key=self.api_key, base_url=self.api_base_url)
 48 |                 else:
 49 |                     client = OpenAI(api_key=self.api_key)
 50 | 
 51 |                 # Prepare messages
 52 |                 messages = [
 53 |                     {
 54 |                         "role": "system",
 55 |                         "content": self.system_prompt
 56 |                     },
 57 |                     {
 58 |                         "role": "user",
 59 |                         "content": text
 60 |                     }
 61 |                 ]
 62 | 
 63 |                 response = client.chat.completions.create(
 64 |                     model=self.get_model_identifier(),
 65 |                     messages=messages,
 66 |                     temperature=self.temperature,
 67 |                     stream=True,
 68 |                     max_tokens=4000
 69 |                 )
 70 | 
 71 |                 # 使用累积缓冲区
 72 |                 response_buffer = ""
 73 |                 
 74 |                 for chunk in response:
 75 |                     if hasattr(chunk.choices[0].delta, 'content'):
 76 |                         content = chunk.choices[0].delta.content
 77 |                         if content:
 78 |                             # 累积内容
 79 |                             response_buffer += content
 80 |                             
 81 |                             # 只在累积一定数量的字符或遇到句子结束标记时才发送
 82 |                             if len(content) >= 10 or content.endswith(('.', '!', '?', '。', '！', '？', '\n')):
 83 |                                 yield {
 84 |                                     "status": "streaming",
 85 |                                     "content": response_buffer
 86 |                                 }
 87 | 
 88 |                 # 确保发送最终完整内容
 89 |                 if response_buffer:
 90 |                     yield {
 91 |                         "status": "streaming",
 92 |                         "content": response_buffer
 93 |                     }
 94 | 
 95 |                 # Send completion status
 96 |                 yield {
 97 |                     "status": "completed",
 98 |                     "content": response_buffer
 99 |                 }
100 | 
101 |             finally:
102 |                 # Restore original environment state
103 |                 for key, value in original_env.items():
104 |                     if value is None:
105 |                         if key in os.environ:
106 |                             del os.environ[key]
107 |                     else:
108 |                         os.environ[key] = value
109 | 
110 |         except Exception as e:
111 |             yield {
112 |                 "status": "error",
113 |                 "error": str(e)
114 |             }
115 | 
116 |     def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
117 |         """Stream GPT-4o's response for image analysis"""
118 |         try:
119 |             # Initial status
120 |             yield {"status": "started", "content": ""}
121 | 
122 |             # Save original environment state
123 |             original_env = {
124 |                 'http_proxy': os.environ.get('http_proxy'),
125 |                 'https_proxy': os.environ.get('https_proxy')
126 |             }
127 | 
128 |             try:
129 |                 # Set proxy environment variables if provided
130 |                 if proxies:
131 |                     if 'http' in proxies:
132 |                         os.environ['http_proxy'] = proxies['http']
133 |                     if 'https' in proxies:
134 |                         os.environ['https_proxy'] = proxies['https']
135 | 
136 |                 # Initialize OpenAI client with base_url if provided
137 |                 if self.api_base_url:
138 |                     client = OpenAI(api_key=self.api_key, base_url=self.api_base_url)
139 |                 else:
140 |                     client = OpenAI(api_key=self.api_key)
141 | 
142 |                 # 使用系统提供的系统提示词，不再自动添加语言指令
143 |                 system_prompt = self.system_prompt
144 | 
145 |                 # Prepare messages with image
146 |                 messages = [
147 |                     {
148 |                         "role": "system",
149 |                         "content": system_prompt
150 |                     },
151 |                     {
152 |                         "role": "user",
153 |                         "content": [
154 |                             {
155 |                                 "type": "image_url",
156 |                                 "image_url": {
157 |                                     "url": f"data:image/jpeg;base64,{image_data}"
158 |                                 }
159 |                             },
160 |                             {
161 |                                 "type": "text",
162 |                                 "text": "Please analyze this image and provide a detailed solution."
163 |                             }
164 |                         ]
165 |                     }
166 |                 ]
167 | 
168 |                 response = client.chat.completions.create(
169 |                     model=self.get_model_identifier(),
170 |                     messages=messages,
171 |                     temperature=self.temperature,
172 |                     stream=True,
173 |                     max_tokens=4000
174 |                 )
175 | 
176 |                 # 使用累积缓冲区
177 |                 response_buffer = ""
178 |                 
179 |                 for chunk in response:
180 |                     if hasattr(chunk.choices[0].delta, 'content'):
181 |                         content = chunk.choices[0].delta.content
182 |                         if content:
183 |                             # 累积内容
184 |                             response_buffer += content
185 |                             
186 |                             # 只在累积一定数量的字符或遇到句子结束标记时才发送
187 |                             if len(content) >= 10 or content.endswith(('.', '!', '?', '。', '！', '？', '\n')):
188 |                                 yield {
189 |                                     "status": "streaming",
190 |                                     "content": response_buffer
191 |                                 }
192 | 
193 |                 # 确保发送最终完整内容
194 |                 if response_buffer:
195 |                     yield {
196 |                         "status": "streaming",
197 |                         "content": response_buffer
198 |                     }
199 | 
200 |                 # Send completion status
201 |                 yield {
202 |                     "status": "completed",
203 |                     "content": response_buffer
204 |                 }
205 | 
206 |             finally:
207 |                 # Restore original environment state
208 |                 for key, value in original_env.items():
209 |                     if value is None:
210 |                         if key in os.environ:
211 |                             del os.environ[key]
212 |                     else:
213 |                         os.environ[key] = value
214 | 
215 |         except Exception as e:
216 |             yield {
217 |                 "status": "error",
218 |                 "error": str(e)
219 |             }
220 | 


--------------------------------------------------------------------------------
/models/google.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import base64
  4 | from typing import Generator, Dict, Any, Optional, List
  5 | import google.generativeai as genai
  6 | from .base import BaseModel
  7 | 
  8 | class GoogleModel(BaseModel):
  9 |     """
 10 |     Google Gemini API模型实现类
 11 |     支持Gemini 2.5 Pro等模型，可处理文本和图像输入
 12 |     """
 13 |     
 14 |     def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None, api_base_url: str = None):
 15 |         """
 16 |         初始化Google模型
 17 |         
 18 |         Args:
 19 |             api_key: Google API密钥
 20 |             temperature: 生成温度
 21 |             system_prompt: 系统提示词
 22 |             language: 首选语言
 23 |             model_name: 指定具体模型名称，如不指定则使用默认值
 24 |             api_base_url: API基础URL，用于设置自定义API端点
 25 |         """
 26 |         super().__init__(api_key, temperature, system_prompt, language)
 27 |         self.model_name = model_name or self.get_model_identifier()
 28 |         self.max_tokens = 8192  # 默认最大输出token数
 29 |         self.api_base_url = api_base_url
 30 |         
 31 |         # 配置Google API
 32 |         if api_base_url:
 33 |             # 配置中转API - 使用环境变量方式
 34 |             # 移除末尾的斜杠以避免重复路径问题
 35 |             clean_base_url = api_base_url.rstrip('/')
 36 |             # 设置环境变量来指定API端点
 37 |             os.environ['GOOGLE_AI_API_ENDPOINT'] = clean_base_url
 38 |             genai.configure(api_key=api_key)
 39 |         else:
 40 |             # 使用默认API端点
 41 |             # 清除可能存在的自定义端点环境变量
 42 |             if 'GOOGLE_AI_API_ENDPOINT' in os.environ:
 43 |                 del os.environ['GOOGLE_AI_API_ENDPOINT']
 44 |             genai.configure(api_key=api_key)
 45 |     
 46 |     def get_default_system_prompt(self) -> str:
 47 |         return """You are an expert at analyzing questions and providing detailed solutions. When presented with an image of a question:
 48 | 1. First read and understand the question carefully
 49 | 2. Break down the key components of the question
 50 | 3. Provide a clear, step-by-step solution
 51 | 4. If relevant, explain any concepts or theories involved
 52 | 5. If there are multiple approaches, explain the most efficient one first"""
 53 | 
 54 |     def get_model_identifier(self) -> str:
 55 |         """返回默认的模型标识符"""
 56 |         return "gemini-2.0-flash"  # 使用有免费配额的模型作为默认值
 57 |     
 58 |     def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
 59 |         """流式生成文本响应"""
 60 |         try:
 61 |             yield {"status": "started"}
 62 |             
 63 |             # 设置环境变量代理（如果提供）
 64 |             original_proxies = None
 65 |             if proxies:
 66 |                 original_proxies = {
 67 |                     'http_proxy': os.environ.get('http_proxy'),
 68 |                     'https_proxy': os.environ.get('https_proxy')
 69 |                 }
 70 |                 if 'http' in proxies:
 71 |                     os.environ['http_proxy'] = proxies['http']
 72 |                 if 'https' in proxies:
 73 |                     os.environ['https_proxy'] = proxies['https']
 74 |             
 75 |             try:
 76 |                 # 初始化模型
 77 |                 model = genai.GenerativeModel(self.model_name)
 78 |                 
 79 |                 # 获取最大输出Token设置
 80 |                 max_tokens = self.max_tokens if hasattr(self, 'max_tokens') else 8192
 81 |                 
 82 |                 # 创建配置参数
 83 |                 generation_config = {
 84 |                     'temperature': self.temperature,
 85 |                     'max_output_tokens': max_tokens,
 86 |                     'top_p': 0.95,
 87 |                     'top_k': 64,
 88 |                 }
 89 |                 
 90 |                 # 构建提示
 91 |                 prompt_parts = []
 92 |                 
 93 |                 # 添加系统提示词
 94 |                 if self.system_prompt:
 95 |                     prompt_parts.append(self.system_prompt)
 96 |                 
 97 |                 # 添加用户查询
 98 |                 if self.language and self.language != 'auto':
 99 |                     prompt_parts.append(f"请使用{self.language}回答以下问题: {text}")
100 |                 else:
101 |                     prompt_parts.append(text)
102 |                 
103 |                 # 初始化响应缓冲区
104 |                 response_buffer = ""
105 |                 
106 |                 # 流式生成响应
107 |                 response = model.generate_content(
108 |                     prompt_parts,
109 |                     generation_config=generation_config,
110 |                     stream=True
111 |                 )
112 |                 
113 |                 for chunk in response:
114 |                     if not chunk.text:
115 |                         continue
116 |                     
117 |                     # 累积响应文本
118 |                     response_buffer += chunk.text
119 |                     
120 |                     # 发送响应进度
121 |                     if len(chunk.text) >= 10 or chunk.text.endswith(('.', '!', '?', '。', '！', '？', '\n')):
122 |                         yield {
123 |                             "status": "streaming",
124 |                             "content": response_buffer
125 |                         }
126 |                 
127 |                 # 确保发送完整的最终内容
128 |                 yield {
129 |                     "status": "completed",
130 |                     "content": response_buffer
131 |                 }
132 |             
133 |             finally:
134 |                 # 恢复原始代理设置
135 |                 if original_proxies:
136 |                     for key, value in original_proxies.items():
137 |                         if value is None:
138 |                             if key in os.environ:
139 |                                 del os.environ[key]
140 |                         else:
141 |                             os.environ[key] = value
142 |                 
143 |         except Exception as e:
144 |             yield {
145 |                 "status": "error",
146 |                 "error": f"Gemini API错误: {str(e)}"
147 |             }
148 |     
149 |     def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
150 |         """分析图像并流式生成响应"""
151 |         try:
152 |             yield {"status": "started"}
153 |             
154 |             # 设置环境变量代理（如果提供）
155 |             original_proxies = None
156 |             if proxies:
157 |                 original_proxies = {
158 |                     'http_proxy': os.environ.get('http_proxy'),
159 |                     'https_proxy': os.environ.get('https_proxy')
160 |                 }
161 |                 if 'http' in proxies:
162 |                     os.environ['http_proxy'] = proxies['http']
163 |                 if 'https' in proxies:
164 |                     os.environ['https_proxy'] = proxies['https']
165 |             
166 |             try:
167 |                 # 初始化模型
168 |                 model = genai.GenerativeModel(self.model_name)
169 |                 
170 |                 # 获取最大输出Token设置
171 |                 max_tokens = self.max_tokens if hasattr(self, 'max_tokens') else 8192
172 |                 
173 |                 # 创建配置参数
174 |                 generation_config = {
175 |                     'temperature': self.temperature,
176 |                     'max_output_tokens': max_tokens,
177 |                     'top_p': 0.95,
178 |                     'top_k': 64,
179 |                 }
180 |                 
181 |                 # 构建提示词
182 |                 prompt_parts = []
183 |                 
184 |                 # 添加系统提示词
185 |                 if self.system_prompt:
186 |                     prompt_parts.append(self.system_prompt)
187 |                 
188 |                 # 添加默认图像分析指令
189 |                 if self.language and self.language != 'auto':
190 |                     prompt_parts.append(f"请使用{self.language}分析这张图片并提供详细解答。")
191 |                 else:
192 |                     prompt_parts.append("请分析这张图片并提供详细解答。")
193 |                 
194 |                 # 处理图像数据
195 |                 if image_data.startswith('data:image'):
196 |                     # 如果是data URI，提取base64部分
197 |                     image_data = image_data.split(',', 1)[1]
198 |                 
199 |                 # 使用genai的特定方法处理图像
200 |                 image_part = {
201 |                     "mime_type": "image/jpeg",
202 |                     "data": base64.b64decode(image_data)
203 |                 }
204 |                 prompt_parts.append(image_part)
205 |                 
206 |                 # 初始化响应缓冲区
207 |                 response_buffer = ""
208 |                 
209 |                 # 流式生成响应
210 |                 response = model.generate_content(
211 |                     prompt_parts,
212 |                     generation_config=generation_config,
213 |                     stream=True
214 |                 )
215 |                 
216 |                 for chunk in response:
217 |                     if not chunk.text:
218 |                         continue
219 |                     
220 |                     # 累积响应文本
221 |                     response_buffer += chunk.text
222 |                     
223 |                     # 发送响应进度
224 |                     if len(chunk.text) >= 10 or chunk.text.endswith(('.', '!', '?', '。', '！', '？', '\n')):
225 |                         yield {
226 |                             "status": "streaming",
227 |                             "content": response_buffer
228 |                         }
229 |                 
230 |                 # 确保发送完整的最终内容
231 |                 yield {
232 |                     "status": "completed",
233 |                     "content": response_buffer
234 |                 }
235 |             
236 |             finally:
237 |                 # 恢复原始代理设置
238 |                 if original_proxies:
239 |                     for key, value in original_proxies.items():
240 |                         if value is None:
241 |                             if key in os.environ:
242 |                                 del os.environ[key]
243 |                         else:
244 |                             os.environ[key] = value
245 |                 
246 |         except Exception as e:
247 |             yield {
248 |                 "status": "error",
249 |                 "error": f"Gemini图像分析错误: {str(e)}"
250 |             } 


--------------------------------------------------------------------------------
/static/js/ui.js:
--------------------------------------------------------------------------------
  1 | class UIManager {
  2 |     constructor() {
  3 |         // 延迟初始化，确保DOM已加载
  4 |         if (document.readyState === 'loading') {
  5 |             document.addEventListener('DOMContentLoaded', () => this.init());
  6 |         } else {
  7 |             // 如果DOM已经加载完成，则立即初始化
  8 |             this.init();
  9 |         }
 10 |     }
 11 |     
 12 |     init() {
 13 |         console.log('初始化UI管理器...');
 14 |         // UI elements
 15 |         this.settingsPanel = document.getElementById('settingsPanel');
 16 |         this.settingsToggle = document.getElementById('settingsToggle');
 17 |         this.closeSettings = document.getElementById('closeSettings');
 18 |         this.themeToggle = document.getElementById('themeToggle');
 19 |         this.toastContainer = document.getElementById('toastContainer');
 20 |         
 21 |         // 验证关键元素是否存在
 22 |         if (!this.themeToggle) {
 23 |             console.error('主题切换按钮未找到！');
 24 |             return;
 25 |         }
 26 |         
 27 |         if (!this.toastContainer) {
 28 |             console.error('Toast容器未找到！');
 29 |             // 尝试创建Toast容器
 30 |             this.toastContainer = this.createToastContainer();
 31 |         }
 32 |         
 33 |         // Check for preferred color scheme
 34 |         this.checkPreferredColorScheme();
 35 |         
 36 |         // Initialize event listeners
 37 |         this.setupEventListeners();
 38 |         
 39 |         console.log('UI管理器初始化完成');
 40 |     }
 41 |     
 42 |     createToastContainer() {
 43 |         console.log('创建Toast容器');
 44 |         const container = document.createElement('div');
 45 |         container.id = 'toastContainer';
 46 |         container.className = 'toast-container';
 47 |         document.body.appendChild(container);
 48 |         return container;
 49 |     }
 50 |     
 51 |     checkPreferredColorScheme() {
 52 |         const savedTheme = localStorage.getItem('theme');
 53 |         const prefersDark = window.matchMedia('(prefers-color-scheme: dark)');
 54 |         
 55 |         if (savedTheme) {
 56 |             this.setTheme(savedTheme === 'dark');
 57 |         } else {
 58 |             this.setTheme(prefersDark.matches);
 59 |         }
 60 |         
 61 |         prefersDark.addEventListener('change', (e) => this.setTheme(e.matches));
 62 |     }
 63 |     
 64 |     setTheme(isDark) {
 65 |         try {
 66 |             document.documentElement.setAttribute('data-theme', isDark ? 'dark' : 'light');
 67 |             if (this.themeToggle) {
 68 |                 this.themeToggle.innerHTML = `<i class="fas fa-${isDark ? 'sun' : 'moon'}"></i>`;
 69 |             }
 70 |             localStorage.setItem('theme', isDark ? 'dark' : 'light');
 71 |             console.log(`主题已切换为: ${isDark ? '深色' : '浅色'}`);
 72 |         } catch (error) {
 73 |             console.error('设置主题时出错:', error);
 74 |         }
 75 |     }
 76 |     
 77 |     /**
 78 |      * 显示一个Toast消息
 79 |      * @param {string} message 显示的消息内容
 80 |      * @param {string} type 消息类型，可以是'success', 'error', 'info', 'warning'
 81 |      * @param {number} displayTime 显示的时间(毫秒)，如果为-1则持续显示直到手动关闭
 82 |      * @returns {HTMLElement} 返回创建的Toast元素，可用于后续移除
 83 |      */
 84 |     showToast(message, type = 'success', displayTime) {
 85 |         try {
 86 |             if (!message) {
 87 |                 console.warn('尝试显示空消息');
 88 |                 message = '';
 89 |             }
 90 |             
 91 |             if (!this.toastContainer) {
 92 |                 console.error('Toast容器不存在，正在创建新容器');
 93 |                 this.toastContainer = this.createToastContainer();
 94 |                 if (!this.toastContainer) {
 95 |                     console.error('无法创建Toast容器，放弃显示消息');
 96 |                     return null;
 97 |                 }
 98 |             }
 99 |             
100 |             // 检查是否已经存在相同内容的提示
101 |             try {
102 |                 const existingToasts = this.toastContainer.querySelectorAll('.toast');
103 |                 for (const existingToast of existingToasts) {
104 |                     try {
105 |                         const spanElement = existingToast.querySelector('span');
106 |                         if (spanElement && spanElement.textContent === message) {
107 |                             // 已经存在相同的提示，不再创建新的
108 |                             return existingToast;
109 |                         }
110 |                     } catch (e) {
111 |                         console.warn('检查现有toast时出错:', e);
112 |                         // 继续检查其他toast元素
113 |                     }
114 |                 }
115 |             } catch (e) {
116 |                 console.warn('查询现有toast时出错:', e);
117 |                 // 继续创建新的toast
118 |             }
119 |             
120 |             const toast = document.createElement('div');
121 |             toast.className = `toast ${type}`;
122 |             
123 |             // 根据类型设置图标
124 |             let icon = 'check-circle';
125 |             if (type === 'error') icon = 'exclamation-circle';
126 |             else if (type === 'warning') icon = 'exclamation-triangle';
127 |             else if (type === 'info') icon = 'info-circle';
128 |             
129 |             toast.innerHTML = `
130 |                 <i class="fas fa-${icon}"></i>
131 |                 <span>${message}</span>
132 |             `;
133 |             
134 |             // 如果是持续显示的Toast，添加关闭按钮
135 |             if (displayTime === -1) {
136 |                 const closeButton = document.createElement('button');
137 |                 closeButton.className = 'toast-close';
138 |                 closeButton.innerHTML = '<i class="fas fa-times"></i>';
139 |                 closeButton.addEventListener('click', (e) => {
140 |                     this.hideToast(toast);
141 |                 });
142 |                 toast.appendChild(closeButton);
143 |                 toast.classList.add('persistent');
144 |             }
145 |             
146 |             this.toastContainer.appendChild(toast);
147 |             
148 |             // 为不同类型的提示设置不同的显示时间
149 |             if (displayTime !== -1) {
150 |                 // 如果没有指定时间，则根据消息类型和内容长度设置默认时间
151 |                 if (displayTime === undefined) {
152 |                     displayTime = message === '截图成功' ? 1500 : 
153 |                                  type === 'error' ? 5000 : 
154 |                                  message.length > 50 ? 4000 : 3000;
155 |                 }
156 |                 
157 |                 setTimeout(() => {
158 |                     this.hideToast(toast);
159 |                 }, displayTime);
160 |             }
161 |             
162 |             return toast;
163 |         } catch (error) {
164 |             console.error('显示Toast消息时出错:', error);
165 |             return null;
166 |         }
167 |     }
168 |     
169 |     /**
170 |      * 隐藏一个Toast消息
171 |      * @param {HTMLElement} toast 要隐藏的Toast元素
172 |      */
173 |     hideToast(toast) {
174 |         if (!toast || !toast.parentNode) return;
175 |         
176 |         toast.style.opacity = '0';
177 |         setTimeout(() => {
178 |             if (toast.parentNode) {
179 |                 toast.remove();
180 |             }
181 |         }, 300);
182 |     }
183 |     
184 |     closeAllPanels() {
185 |         if (this.settingsPanel) {
186 |             this.settingsPanel.classList.remove('active');
187 |         }
188 |     }
189 |     
190 |     hideSettingsPanel() {
191 |         if (this.settingsPanel) {
192 |             this.settingsPanel.classList.remove('active');
193 |         }
194 |     }
195 |     
196 |     toggleSettingsPanel() {
197 |         if (this.settingsPanel) {
198 |             this.settingsPanel.classList.toggle('active');
199 |         }
200 |     }
201 |     
202 |     closeSettingsPanel() {
203 |         if (this.settingsPanel) {
204 |             this.settingsPanel.classList.remove('active');
205 |         }
206 |     }
207 |     
208 |     // 检查点击事件，如果点击了设置面板外部，则关闭设置面板
209 |     checkClickOutsideSettings(e) {
210 |         if (this.settingsPanel &&
211 |             !this.settingsPanel.contains(e.target) &&
212 |             !e.target.closest('#settingsToggle')) {
213 |             this.settingsPanel.classList.remove('active');
214 |         }
215 |     }
216 |     
217 |     setupEventListeners() {
218 |         // 确保所有元素都存在
219 |         if (!this.settingsToggle || !this.closeSettings || !this.themeToggle) {
220 |             console.error('无法设置事件监听器：一些UI元素未找到');
221 |             return;
222 |         }
223 |         
224 |         // Settings panel
225 |         this.settingsToggle.addEventListener('click', () => {
226 |             this.closeAllPanels();
227 |             this.settingsPanel.classList.toggle('active');
228 |         });
229 |         
230 |         this.closeSettings.addEventListener('click', () => {
231 |             this.settingsPanel.classList.remove('active');
232 |         });
233 |         
234 |         // Theme toggle
235 |         this.themeToggle.addEventListener('click', () => {
236 |             try {
237 |                 const currentTheme = document.documentElement.getAttribute('data-theme');
238 |                 console.log('当前主题:', currentTheme);
239 |                 this.setTheme(currentTheme !== 'dark');
240 |             } catch (error) {
241 |                 console.error('切换主题时出错:', error);
242 |             }
243 |         });
244 |         
245 |         // Close panels when clicking outside
246 |         document.addEventListener('click', (e) => {
247 |             this.checkClickOutsideSettings(e);
248 |         });
249 |     }
250 | }
251 | 
252 | // 创建全局实例
253 | window.UIManager = UIManager;
254 | 
255 | // 确保在DOM加载完毕后才创建UIManager实例
256 | if (document.readyState === 'loading') {
257 |     document.addEventListener('DOMContentLoaded', () => {
258 |         window.uiManager = new UIManager();
259 |     });
260 | } else {
261 |     window.uiManager = new UIManager();
262 | }
263 | 
264 | // 导出全局辅助函数
265 | window.showToast = (message, type) => {
266 |     if (window.uiManager) {
267 |         return window.uiManager.showToast(message, type);
268 |     } else {
269 |         console.error('UI管理器未初始化，无法显示Toast');
270 |         return null;
271 |     }
272 | };
273 | 
274 | window.closeAllPanels = () => {
275 |     if (window.uiManager) {
276 |         window.uiManager.closeAllPanels();
277 |     } else {
278 |         console.error('UI管理器未初始化，无法关闭面板');
279 |     }
280 | };
281 | 


--------------------------------------------------------------------------------
/models/factory.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, Type, Any, Optional
  2 | import json
  3 | import os
  4 | import importlib
  5 | from .base import BaseModel
  6 | from .mathpix import MathpixModel  # MathpixModel需要直接导入，因为它是特殊OCR工具
  7 | from .baidu_ocr import BaiduOCRModel  # 百度OCR也是特殊OCR工具，直接导入
  8 | 
  9 | class ModelFactory:
 10 |     # 模型基本信息，包含类型和特性
 11 |     _models: Dict[str, Dict[str, Any]] = {}
 12 |     _class_map: Dict[str, Type[BaseModel]] = {}
 13 |     
 14 |     @classmethod
 15 |     def initialize(cls):
 16 |         """从配置文件加载模型信息"""
 17 |         try:
 18 |             config_path = os.path.join(os.path.dirname(__file__), '..', 'config', 'models.json')
 19 |             with open(config_path, 'r', encoding='utf-8') as f:
 20 |                 config = json.load(f)
 21 |             
 22 |             # 加载提供商信息和类映射
 23 |             providers = config.get('providers', {})
 24 |             for provider_id, provider_info in providers.items():
 25 |                 class_name = provider_info.get('class_name')
 26 |                 if class_name:
 27 |                     # 从当前包动态导入模型类
 28 |                     module = importlib.import_module(f'.{provider_id.lower()}', package=__package__)
 29 |                     cls._class_map[provider_id] = getattr(module, class_name)
 30 |             
 31 |             # 加载模型信息
 32 |             for model_id, model_info in config.get('models', {}).items():
 33 |                 provider_id = model_info.get('provider')
 34 |                 if provider_id and provider_id in cls._class_map:
 35 |                     cls._models[model_id] = {
 36 |                         'class': cls._class_map[provider_id],
 37 |                         'provider_id': provider_id,
 38 |                         'is_multimodal': model_info.get('supportsMultimodal', False),
 39 |                         'is_reasoning': model_info.get('isReasoning', False),
 40 |                         'display_name': model_info.get('name', model_id),
 41 |                         'description': model_info.get('description', '')
 42 |                     }
 43 |             
 44 |             # 添加特殊OCR工具模型（不在配置文件中定义）
 45 |             
 46 |             # 添加Mathpix OCR工具
 47 |             cls._models['mathpix'] = {
 48 |                 'class': MathpixModel,
 49 |                 'is_multimodal': True,
 50 |                 'is_reasoning': False,
 51 |                 'display_name': 'Mathpix OCR',
 52 |                 'description': '数学公式识别工具，适用于复杂数学内容',
 53 |                 'is_ocr_only': True
 54 |             }
 55 |             
 56 |             # 添加百度OCR工具
 57 |             cls._models['baidu-ocr'] = {
 58 |                 'class': BaiduOCRModel,
 59 |                 'is_multimodal': True,
 60 |                 'is_reasoning': False,
 61 |                 'display_name': '百度OCR',
 62 |                 'description': '通用文字识别工具，支持中文识别',
 63 |                 'is_ocr_only': True
 64 |             }
 65 |             
 66 |             print(f"已从配置加载 {len(cls._models)} 个模型")
 67 |         except Exception as e:
 68 |             print(f"加载模型配置失败: {str(e)}")
 69 |             cls._initialize_defaults()
 70 |     
 71 |     @classmethod
 72 |     def _initialize_defaults(cls):
 73 |         """初始化默认模型（当配置加载失败时）"""
 74 |         print("配置加载失败，使用空模型列表")
 75 |         
 76 |         # 不再硬编码模型定义，而是使用空字典
 77 |         cls._models = {}
 78 |         
 79 |         # 添加特殊OCR工具（当配置加载失败时的备用）
 80 |         try:
 81 |             # 导入并添加Mathpix OCR工具
 82 |             from .mathpix import MathpixModel
 83 |             
 84 |             cls._models['mathpix'] = {
 85 |                 'class': MathpixModel,
 86 |                 'is_multimodal': True,
 87 |                 'is_reasoning': False,
 88 |                 'display_name': 'Mathpix OCR',
 89 |                 'description': '数学公式识别工具，适用于复杂数学内容',
 90 |                 'is_ocr_only': True
 91 |             }
 92 |         except Exception as e:
 93 |             print(f"无法加载Mathpix OCR工具: {str(e)}")
 94 |             
 95 |         # 添加百度OCR工具
 96 |         try:
 97 |             from .baidu_ocr import BaiduOCRModel
 98 |             
 99 |             cls._models['baidu-ocr'] = {
100 |                 'class': BaiduOCRModel,
101 |                 'is_multimodal': True,
102 |                 'is_reasoning': False,
103 |                 'display_name': '百度OCR',
104 |                 'description': '通用文字识别工具，支持中文识别',
105 |                 'is_ocr_only': True
106 |             }
107 |         except Exception as e:
108 |             print(f"无法加载百度OCR工具: {str(e)}")
109 | 
110 |     @classmethod
111 |     def create_model(cls, model_name: str, api_key: str, temperature: float = 0.7, 
112 |                      system_prompt: Optional[str] = None, language: Optional[str] = None, api_base_url: Optional[str] = None) -> BaseModel:
113 |         """
114 |         Create a model instance based on the model name.
115 |         
116 |         Args:
117 |             model_name: The identifier for the model
118 |             api_key: The API key for the model service
119 |             temperature: The temperature to use for generation
120 |             system_prompt: The system prompt to use
121 |             language: The preferred language for responses
122 |             api_base_url: The base URL for API requests
123 |             
124 |         Returns:
125 |             A model instance
126 |         """
127 |         if model_name not in cls._models:
128 |             raise ValueError(f"Unknown model: {model_name}")
129 |             
130 |         model_info = cls._models[model_name]
131 |         model_class = model_info['class']
132 |         provider_id = model_info.get('provider_id')
133 |         
134 |         if provider_id == 'openai':
135 |             return model_class(
136 |                 api_key=api_key,
137 |                 temperature=temperature,
138 |                 system_prompt=system_prompt,
139 |                 language=language,
140 |                 api_base_url=api_base_url,
141 |                 model_identifier=model_name
142 |             )
143 |         
144 |         # 对于DeepSeek模型，需要传递正确的模型名称
145 |         if 'deepseek' in model_name.lower():
146 |             return model_class(
147 |                 api_key=api_key,
148 |                 temperature=temperature,
149 |                 system_prompt=system_prompt,
150 |                 language=language,
151 |                 model_name=model_name,
152 |                 api_base_url=api_base_url
153 |             )
154 |         # 对于阿里巴巴模型，也需要传递正确的模型名称
155 |         elif 'qwen' in model_name.lower() or 'qvq' in model_name.lower() or 'alibaba' in model_name.lower():
156 |             return model_class(
157 |                 api_key=api_key,
158 |                 temperature=temperature,
159 |                 system_prompt=system_prompt,
160 |                 language=language,
161 |                 model_name=model_name
162 |             )
163 |         # 对于Google模型，也需要传递正确的模型名称
164 |         elif 'gemini' in model_name.lower() or 'google' in model_name.lower():
165 |             return model_class(
166 |                 api_key=api_key,
167 |                 temperature=temperature,
168 |                 system_prompt=system_prompt,
169 |                 language=language,
170 |                 model_name=model_name,
171 |                 api_base_url=api_base_url
172 |             )
173 |         # 对于豆包模型，也需要传递正确的模型名称
174 |         elif 'doubao' in model_name.lower():
175 |             return model_class(
176 |                 api_key=api_key,
177 |                 temperature=temperature,
178 |                 system_prompt=system_prompt,
179 |                 language=language,
180 |                 model_name=model_name,
181 |                 api_base_url=api_base_url
182 |             )
183 |         # 对于Mathpix模型，不传递language参数
184 |         elif model_name == 'mathpix':
185 |             return model_class(
186 |                 api_key=api_key,
187 |                 temperature=temperature,
188 |                 system_prompt=system_prompt
189 |             )
190 |         # 对于百度OCR模型，传递api_key（支持API_KEY:SECRET_KEY格式）
191 |         elif model_name == 'baidu-ocr':
192 |             return model_class(
193 |                 api_key=api_key,
194 |                 temperature=temperature,
195 |                 system_prompt=system_prompt
196 |             )
197 |         # 对于Anthropic模型，需要传递model_identifier参数
198 |         elif 'claude' in model_name.lower() or 'anthropic' in model_name.lower():
199 |             return model_class(
200 |                 api_key=api_key,
201 |                 temperature=temperature,
202 |                 system_prompt=system_prompt,
203 |                 language=language,
204 |                 api_base_url=api_base_url,
205 |                 model_identifier=model_name
206 |             )
207 |         else:
208 |             # 其他模型仅传递标准参数
209 |             return model_class(
210 |                 api_key=api_key,
211 |                 temperature=temperature,
212 |                 system_prompt=system_prompt,
213 |                 language=language,
214 |                 api_base_url=api_base_url
215 |             )
216 | 
217 |     @classmethod
218 |     def get_available_models(cls) -> list[Dict[str, Any]]:
219 |         """Return a list of available models with their information"""
220 |         models_info = []
221 |         for model_id, info in cls._models.items():
222 |             # 跳过仅OCR工具模型
223 |             if info.get('is_ocr_only', False):
224 |                 continue
225 |                 
226 |             models_info.append({
227 |                 'id': model_id,
228 |                 'display_name': info.get('display_name', model_id),
229 |                 'description': info.get('description', ''),
230 |                 'is_multimodal': info.get('is_multimodal', False),
231 |                 'is_reasoning': info.get('is_reasoning', False)
232 |             })
233 |         return models_info
234 |     
235 |     @classmethod
236 |     def get_model_ids(cls) -> list[str]:
237 |         """Return a list of available model identifiers"""
238 |         return [model_id for model_id in cls._models.keys() 
239 |                 if not cls._models[model_id].get('is_ocr_only', False)]
240 | 
241 |     @classmethod
242 |     def is_multimodal(cls, model_name: str) -> bool:
243 |         """判断模型是否支持多模态输入"""
244 |         return cls._models.get(model_name, {}).get('is_multimodal', False)
245 |     
246 |     @classmethod
247 |     def is_reasoning(cls, model_name: str) -> bool:
248 |         """判断模型是否为推理模型"""
249 |         return cls._models.get(model_name, {}).get('is_reasoning', False)
250 |     
251 |     @classmethod
252 |     def get_model_display_name(cls, model_name: str) -> str:
253 |         """获取模型的显示名称"""
254 |         return cls._models.get(model_name, {}).get('display_name', model_name)
255 | 
256 |     @classmethod
257 |     def register_model(cls, model_name: str, model_class: Type[BaseModel], 
258 |                       is_multimodal: bool = False, is_reasoning: bool = False,
259 |                       display_name: Optional[str] = None, description: Optional[str] = None) -> None:
260 |         """
261 |         Register a new model type with the factory.
262 |         
263 |         Args:
264 |             model_name: The identifier for the model
265 |             model_class: The model class to register
266 |             is_multimodal: Whether the model supports image input
267 |             is_reasoning: Whether the model provides reasoning process
268 |             display_name: Human-readable name for the model
269 |             description: Description of the model
270 |         """
271 |         cls._models[model_name] = {
272 |             'class': model_class,
273 |             'is_multimodal': is_multimodal,
274 |             'is_reasoning': is_reasoning,
275 |             'display_name': display_name or model_name,
276 |             'description': description or ''
277 |         }
278 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [2025] [Zippland]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/models/alibaba.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Generator, Dict, Optional, Any
  3 | from openai import OpenAI
  4 | from .base import BaseModel
  5 | 
  6 | class AlibabaModel(BaseModel):
  7 |     def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None, api_base_url: str = None):
  8 |         # 如果没有提供模型名称，才使用默认值
  9 |         self.model_name = model_name if model_name else "QVQ-Max-2025-03-25"
 10 |         print(f"初始化阿里巴巴模型: {self.model_name}")
 11 |         # 在super().__init__之前设置model_name，这样get_default_system_prompt能使用它
 12 |         super().__init__(api_key, temperature, system_prompt, language)
 13 |         self.api_base_url = api_base_url  # 存储API基础URL
 14 |     
 15 |     def get_default_system_prompt(self) -> str:
 16 |         """根据模型名称返回不同的默认系统提示词"""
 17 |         # 检查是否是通义千问VL模型
 18 |         if self.model_name and "qwen-vl" in self.model_name:
 19 |             return """你是通义千问VL视觉语言助手，擅长图像理解、文字识别、内容分析和创作。请根据用户提供的图像：
 20 |                 1. 仔细阅读并理解问题
 21 |                 2. 分析问题的关键组成部分
 22 |                 3. 提供清晰的、逐步的解决方案
 23 |                 4. 如果相关，解释涉及的概念或理论
 24 |                 5. 如果有多种解决方法，先解释最高效的方法"""
 25 |         else:
 26 |             # QVQ模型使用原先的提示词
 27 |             return """你是一位专业的问题分析与解答助手。当看到一个问题图片时，请：
 28 |                 1. 仔细阅读并理解问题
 29 |                 2. 分析问题的关键组成部分
 30 |                 3. 提供清晰的、逐步的解决方案
 31 |                 4. 如果相关，解释涉及的概念或理论
 32 |                 5. 如果有多种解决方法，先解释最高效的方法"""
 33 | 
 34 |     def get_model_identifier(self) -> str:
 35 |         """根据模型名称返回对应的模型标识符"""
 36 |         # 直接映射模型ID到DashScope API使用的标识符
 37 |         model_mapping = {
 38 |             "QVQ-Max-2025-03-25": "qvq-max",
 39 |             "qwen-vl-max-latest": "qwen-vl-max",  # 修正为正确的API标识符
 40 |         }
 41 |         
 42 |         print(f"模型名称: {self.model_name}")
 43 |         
 44 |         # 从模型映射表中获取模型标识符，如果不存在则使用默认值
 45 |         model_id = model_mapping.get(self.model_name)
 46 |         if model_id:
 47 |             print(f"从映射表中获取到模型标识符: {model_id}")
 48 |             return model_id
 49 |             
 50 |         # 如果没有精确匹配，检查是否包含特定前缀
 51 |         if self.model_name and "qwen-vl" in self.model_name.lower():
 52 |             if "max" in self.model_name.lower():
 53 |                 print(f"识别为qwen-vl-max模型")
 54 |                 return "qwen-vl-max"
 55 |             elif "plus" in self.model_name.lower():
 56 |                 print(f"识别为qwen-vl-plus模型")
 57 |                 return "qwen-vl-plus"
 58 |             elif "lite" in self.model_name.lower():
 59 |                 print(f"识别为qwen-vl-lite模型")
 60 |                 return "qwen-vl-lite"
 61 |             print(f"默认使用qwen-vl-max模型")
 62 |             return "qwen-vl-max"  # 默认使用最强版本
 63 |         
 64 |         # 如果包含QVQ或alibaba关键词，默认使用qvq-max
 65 |         if self.model_name and ("qvq" in self.model_name.lower() or "alibaba" in self.model_name.lower()):
 66 |             print(f"识别为QVQ模型，使用qvq-max")
 67 |             return "qvq-max"
 68 |             
 69 |         # 最后的默认值
 70 |         print(f"警告：无法识别的模型名称 {self.model_name}，默认使用qvq-max")
 71 |         return "qvq-max"
 72 | 
 73 |     def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
 74 |         """Stream QVQ-Max's response for text analysis"""
 75 |         try:
 76 |             # Initial status
 77 |             yield {"status": "started", "content": ""}
 78 | 
 79 |             # Save original environment state
 80 |             original_env = {
 81 |                 'http_proxy': os.environ.get('http_proxy'),
 82 |                 'https_proxy': os.environ.get('https_proxy')
 83 |             }
 84 | 
 85 |             try:
 86 |                 # Set proxy environment variables if provided
 87 |                 if proxies:
 88 |                     if 'http' in proxies:
 89 |                         os.environ['http_proxy'] = proxies['http']
 90 |                     if 'https' in proxies:
 91 |                         os.environ['https_proxy'] = proxies['https']
 92 | 
 93 |                 # Initialize OpenAI compatible client for DashScope
 94 |                 client = OpenAI(
 95 |                     api_key=self.api_key,
 96 |                     base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
 97 |                 )
 98 | 
 99 |                 # Prepare messages
100 |                 messages = [
101 |                     {
102 |                         "role": "system",
103 |                         "content": [{"type": "text", "text": self.system_prompt}]
104 |                     },
105 |                     {
106 |                         "role": "user",
107 |                         "content": [{"type": "text", "text": text}]
108 |                     }
109 |                 ]
110 | 
111 |                 # 创建聊天完成请求
112 |                 response = client.chat.completions.create(
113 |                     model=self.get_model_identifier(),
114 |                     messages=messages,
115 |                     temperature=self.temperature,
116 |                     stream=True,
117 |                     max_tokens=self._get_max_tokens()
118 |                 )
119 | 
120 |                 # 记录思考过程和回答
121 |                 reasoning_content = ""
122 |                 answer_content = ""
123 |                 is_answering = False
124 |                 
125 |                 # 检查是否为通义千问VL模型（不支持reasoning_content）
126 |                 is_qwen_vl = "qwen-vl" in self.get_model_identifier().lower()
127 |                 print(f"分析文本使用模型标识符: {self.get_model_identifier()}, 是否为千问VL模型: {is_qwen_vl}")
128 |                 
129 |                 for chunk in response:
130 |                     if not chunk.choices:
131 |                         continue
132 |                         
133 |                     delta = chunk.choices[0].delta
134 |                     
135 |                     # 处理思考过程（仅适用于QVQ模型）
136 |                     if not is_qwen_vl and hasattr(delta, 'reasoning_content') and delta.reasoning_content is not None:
137 |                         reasoning_content += delta.reasoning_content
138 |                         # 思考过程作为一个独立的内容发送
139 |                         yield {
140 |                             "status": "reasoning",
141 |                             "content": reasoning_content,
142 |                             "is_reasoning": True
143 |                         }
144 |                     elif delta.content != "":
145 |                         # 判断是否开始回答（从思考过程切换到回答）
146 |                         if not is_answering and not is_qwen_vl:
147 |                             is_answering = True
148 |                             # 发送完整的思考过程
149 |                             if reasoning_content:
150 |                                 yield {
151 |                                     "status": "reasoning_complete",
152 |                                     "content": reasoning_content,
153 |                                     "is_reasoning": True
154 |                                 }
155 |                         
156 |                         # 累积回答内容
157 |                         answer_content += delta.content
158 |                         
159 |                         # 发送回答内容
160 |                         yield {
161 |                             "status": "streaming",
162 |                             "content": answer_content
163 |                         }
164 | 
165 |                 # 确保发送最终完整内容
166 |                 if answer_content:
167 |                     yield {
168 |                         "status": "completed",
169 |                         "content": answer_content
170 |                     }
171 | 
172 |             finally:
173 |                 # Restore original environment state
174 |                 for key, value in original_env.items():
175 |                     if value is None:
176 |                         if key in os.environ:
177 |                             del os.environ[key]
178 |                     else:
179 |                         os.environ[key] = value
180 | 
181 |         except Exception as e:
182 |             yield {
183 |                 "status": "error",
184 |                 "error": str(e)
185 |             }
186 | 
187 |     def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
188 |         """Stream model's response for image analysis"""
189 |         try:
190 |             # Initial status
191 |             yield {"status": "started", "content": ""}
192 | 
193 |             # Save original environment state
194 |             original_env = {
195 |                 'http_proxy': os.environ.get('http_proxy'),
196 |                 'https_proxy': os.environ.get('https_proxy')
197 |             }
198 | 
199 |             try:
200 |                 # Set proxy environment variables if provided
201 |                 if proxies:
202 |                     if 'http' in proxies:
203 |                         os.environ['http_proxy'] = proxies['http']
204 |                     if 'https' in proxies:
205 |                         os.environ['https_proxy'] = proxies['https']
206 | 
207 |                 # Initialize OpenAI compatible client for DashScope
208 |                 client = OpenAI(
209 |                     api_key=self.api_key,
210 |                     base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
211 |                 )
212 | 
213 |                 # 使用系统提供的系统提示词，不再自动添加语言指令
214 |                 system_prompt = self.system_prompt
215 | 
216 |                 # Prepare messages with image
217 |                 messages = [
218 |                     {
219 |                         "role": "system",
220 |                         "content": [{"type": "text", "text": system_prompt}]
221 |                     },
222 |                     {
223 |                         "role": "user",
224 |                         "content": [
225 |                             {
226 |                                 "type": "image_url",
227 |                                 "image_url": {
228 |                                     "url": f"data:image/jpeg;base64,{image_data}"
229 |                                 }
230 |                             },
231 |                             {
232 |                                 "type": "text",
233 |                                 "text": "请分析这个图片并提供详细的解答。"
234 |                             }
235 |                         ]
236 |                     }
237 |                 ]
238 | 
239 |                 # 创建聊天完成请求
240 |                 response = client.chat.completions.create(
241 |                     model=self.get_model_identifier(),
242 |                     messages=messages,
243 |                     temperature=self.temperature,
244 |                     stream=True,
245 |                     max_tokens=self._get_max_tokens()
246 |                 )
247 | 
248 |                 # 记录思考过程和回答
249 |                 reasoning_content = ""
250 |                 answer_content = ""
251 |                 is_answering = False
252 |                 
253 |                 # 检查是否为通义千问VL模型（不支持reasoning_content）
254 |                 is_qwen_vl = "qwen-vl" in self.get_model_identifier().lower()
255 |                 print(f"分析图像使用模型标识符: {self.get_model_identifier()}, 是否为千问VL模型: {is_qwen_vl}")
256 |                 
257 |                 for chunk in response:
258 |                     if not chunk.choices:
259 |                         continue
260 |                         
261 |                     delta = chunk.choices[0].delta
262 |                     
263 |                     # 处理思考过程（仅适用于QVQ模型）
264 |                     if not is_qwen_vl and hasattr(delta, 'reasoning_content') and delta.reasoning_content is not None:
265 |                         reasoning_content += delta.reasoning_content
266 |                         # 思考过程作为一个独立的内容发送
267 |                         yield {
268 |                             "status": "reasoning",
269 |                             "content": reasoning_content,
270 |                             "is_reasoning": True
271 |                         }
272 |                     elif delta.content != "":
273 |                         # 判断是否开始回答（从思考过程切换到回答）
274 |                         if not is_answering and not is_qwen_vl:
275 |                             is_answering = True
276 |                             # 发送完整的思考过程
277 |                             if reasoning_content:
278 |                                 yield {
279 |                                     "status": "reasoning_complete",
280 |                                     "content": reasoning_content,
281 |                                     "is_reasoning": True
282 |                                 }
283 |                         
284 |                         # 累积回答内容
285 |                         answer_content += delta.content
286 |                         
287 |                         # 发送回答内容
288 |                         yield {
289 |                             "status": "streaming",
290 |                             "content": answer_content
291 |                         }
292 | 
293 |                 # 确保发送最终完整内容
294 |                 if answer_content:
295 |                     yield {
296 |                         "status": "completed",
297 |                         "content": answer_content
298 |                     }
299 | 
300 |             finally:
301 |                 # Restore original environment state
302 |                 for key, value in original_env.items():
303 |                     if value is None:
304 |                         if key in os.environ:
305 |                             del os.environ[key]
306 |                     else:
307 |                         os.environ[key] = value
308 | 
309 |         except Exception as e:
310 |             yield {
311 |                 "status": "error",
312 |                 "error": str(e)
313 |             } 
314 | 
315 |     def _get_max_tokens(self) -> int:
316 |         """根据模型类型返回合适的max_tokens值"""
317 |         # 检查是否为通义千问VL模型
318 |         if "qwen-vl" in self.get_model_identifier():
319 |             return 2000  # 通义千问VL模型最大支持2048，留一些余量
320 |         # QVQ模型或其他模型
321 |         return self.max_tokens if hasattr(self, 'max_tokens') and self.max_tokens else 4000 


--------------------------------------------------------------------------------
/models/doubao.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import base64
  4 | from typing import Generator, Dict, Any, Optional
  5 | import requests
  6 | from .base import BaseModel
  7 | 
  8 | class DoubaoModel(BaseModel):
  9 |     """
 10 |     豆包API模型实现类
 11 |     支持字节跳动的豆包AI模型，可处理文本和图像输入
 12 |     """
 13 |     
 14 |     def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = None, api_base_url: str = None):
 15 |         """
 16 |         初始化豆包模型
 17 |         
 18 |         Args:
 19 |             api_key: 豆包API密钥
 20 |             temperature: 生成温度
 21 |             system_prompt: 系统提示词
 22 |             language: 首选语言
 23 |             model_name: 指定具体模型名称，如不指定则使用默认值
 24 |             api_base_url: API基础URL，用于设置自定义API端点
 25 |         """
 26 |         super().__init__(api_key, temperature, system_prompt, language)
 27 |         self.model_name = model_name or self.get_model_identifier()
 28 |         self.base_url = api_base_url or "https://ark.cn-beijing.volces.com/api/v3"
 29 |         self.max_tokens = 4096  # 默认最大输出token数
 30 |         self.reasoning_config = None  # 推理配置，类似于AnthropicModel
 31 |     
 32 |     def get_default_system_prompt(self) -> str:
 33 |         return """你是一个专业的问题分析专家。当看到问题图片时：
 34 | 1. 仔细阅读并理解问题
 35 | 2. 分解问题的关键组成部分
 36 | 3. 提供清晰的分步解决方案
 37 | 4. 如果相关，解释涉及的概念或理论
 38 | 5. 如果有多种方法，优先解释最有效的方法"""
 39 | 
 40 |     def get_model_identifier(self) -> str:
 41 |         """返回默认的模型标识符"""
 42 |         return "doubao-seed-1-6-250615"  # Doubao-Seed-1.6
 43 |     
 44 |     def get_actual_model_name(self) -> str:
 45 |         """根据配置的模型名称返回实际的API调用标识符"""
 46 |         # 豆包API的实际模型名称映射
 47 |         model_mapping = {
 48 |             "doubao-seed-1-6-250615": "doubao-seed-1-6-250615"
 49 |         }
 50 |         
 51 |         return model_mapping.get(self.model_name, "doubao-seed-1-6-250615")
 52 |     
 53 |     def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
 54 |         """流式生成文本响应"""
 55 |         try:
 56 |             yield {"status": "started"}
 57 |             
 58 |             # 设置环境变量代理（如果提供）
 59 |             original_proxies = None
 60 |             if proxies:
 61 |                 original_proxies = {
 62 |                     'http_proxy': os.environ.get('http_proxy'),
 63 |                     'https_proxy': os.environ.get('https_proxy')
 64 |                 }
 65 |                 if 'http' in proxies:
 66 |                     os.environ['http_proxy'] = proxies['http']
 67 |                 if 'https' in proxies:
 68 |                     os.environ['https_proxy'] = proxies['https']
 69 |             
 70 |             try:
 71 |                 # 构建请求头
 72 |                 headers = {
 73 |                     "Authorization": f"Bearer {self.api_key}",
 74 |                     "Content-Type": "application/json"
 75 |                 }
 76 |                 
 77 |                 # 构建消息 - 添加系统提示词
 78 |                 messages = []
 79 |                 
 80 |                 # 添加系统提示词
 81 |                 if self.system_prompt:
 82 |                     messages.append({
 83 |                         "role": "system",
 84 |                         "content": self.system_prompt
 85 |                     })
 86 |                 
 87 |                 # 添加用户查询
 88 |                 user_content = text
 89 |                 if self.language and self.language != 'auto':
 90 |                     user_content = f"请使用{self.language}回答以下问题: {text}"
 91 |                 
 92 |                 messages.append({
 93 |                     "role": "user",
 94 |                     "content": user_content
 95 |                 })
 96 | 
 97 |                 # 处理推理配置
 98 |                 thinking = {
 99 |                     "type": "auto"  # 默认值
100 |                 }
101 |                 
102 |                 if hasattr(self, 'reasoning_config') and self.reasoning_config:
103 |                     # 从reasoning_config中获取thinking_mode
104 |                     thinking_mode = self.reasoning_config.get('thinking_mode', "auto")
105 |                     thinking = {
106 |                         "type": thinking_mode
107 |                     }
108 | 
109 |                 # 构建请求数据
110 |                 data = {
111 |                     "model": self.get_actual_model_name(),
112 |                     "messages": messages,
113 |                     "thinking": thinking,
114 |                     "temperature": self.temperature,
115 |                     "max_tokens": self.max_tokens,
116 |                     "stream": True
117 |                 }
118 |                 
119 |                 # 发送流式请求
120 |                 response = requests.post(
121 |                     f"{self.base_url}/chat/completions",
122 |                     headers=headers,
123 |                     json=data,
124 |                     stream=True,
125 |                     proxies=proxies if proxies else None,
126 |                     timeout=60
127 |                 )
128 |                 
129 |                 if response.status_code != 200:
130 |                     error_text = response.text
131 |                     raise Exception(f"HTTP {response.status_code}: {error_text}")
132 |                 
133 |                 response.raise_for_status()
134 |                 
135 |                 # 初始化响应缓冲区
136 |                 response_buffer = ""
137 |                 
138 |                 # 处理流式响应
139 |                 for line in response.iter_lines():
140 |                     if not line:
141 |                         continue
142 |                     
143 |                     line = line.decode('utf-8')
144 |                     if not line.startswith('data: '):
145 |                         continue
146 |                     
147 |                     line = line[6:]  # 移除 'data: ' 前缀
148 |                     
149 |                     if line == '[DONE]':
150 |                         break
151 |                     
152 |                     try:
153 |                         chunk_data = json.loads(line)
154 |                         choices = chunk_data.get('choices', [])
155 |                         
156 |                         if choices and len(choices) > 0:
157 |                             delta = choices[0].get('delta', {})
158 |                             content = delta.get('content', '')
159 |                             
160 |                             if content:
161 |                                 response_buffer += content
162 |                                 
163 |                                 # 发送响应进度
164 |                                 yield {
165 |                                     "status": "streaming",
166 |                                     "content": response_buffer
167 |                                 }
168 |                     
169 |                     except json.JSONDecodeError:
170 |                         continue
171 |                 
172 |                 # 确保发送完整的最终内容
173 |                 yield {
174 |                     "status": "completed",
175 |                     "content": response_buffer
176 |                 }
177 |             
178 |             finally:
179 |                 # 恢复原始代理设置
180 |                 if original_proxies:
181 |                     for key, value in original_proxies.items():
182 |                         if value is None:
183 |                             if key in os.environ:
184 |                                 del os.environ[key]
185 |                         else:
186 |                             os.environ[key] = value
187 |                 
188 |         except Exception as e:
189 |             yield {
190 |                 "status": "error",
191 |                 "error": f"豆包API错误: {str(e)}"
192 |             }
193 |     
194 |     def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
195 |         """分析图像并流式生成响应"""
196 |         try:
197 |             yield {"status": "started"}
198 |             
199 |             # 设置环境变量代理（如果提供）
200 |             original_proxies = None
201 |             if proxies:
202 |                 original_proxies = {
203 |                     'http_proxy': os.environ.get('http_proxy'),
204 |                     'https_proxy': os.environ.get('https_proxy')
205 |                 }
206 |                 if 'http' in proxies:
207 |                     os.environ['http_proxy'] = proxies['http']
208 |                 if 'https' in proxies:
209 |                     os.environ['https_proxy'] = proxies['https']
210 |             
211 |             try:
212 |                 # 构建请求头
213 |                 headers = {
214 |                     "Authorization": f"Bearer {self.api_key}",
215 |                     "Content-Type": "application/json"
216 |                 }
217 |                 
218 |                 # 处理图像数据
219 |                 if image_data.startswith('data:image'):
220 |                     # 如果是data URI，提取base64部分
221 |                     image_data = image_data.split(',', 1)[1]
222 |                 
223 |                 # 构建用户消息 - 使用豆包API官方示例格式
224 |                 # 首先检查图像数据的格式，确保是有效的图像
225 |                 image_format = "jpeg"  # 默认使用jpeg
226 |                 if image_data.startswith('/9j/'):  # JPEG magic number in base64
227 |                     image_format = "jpeg"
228 |                 elif image_data.startswith('iVBORw0KGgo'):  # PNG magic number in base64
229 |                     image_format = "png"
230 |                 
231 |                 # 构建消息
232 |                 messages = []
233 |                 
234 |                 # 添加系统提示词
235 |                 if self.system_prompt:
236 |                     messages.append({
237 |                         "role": "system",
238 |                         "content": self.system_prompt
239 |                     })
240 |                 
241 |                 user_content = [
242 |                     {
243 |                         "type": "text",
244 |                         "text": f"请使用{self.language}分析这张图片并提供详细解答。" if self.language and self.language != 'auto' else "请分析这张图片并提供详细解答?"
245 |                     },
246 |                     {
247 |                         "type": "image_url",
248 |                         "image_url": {
249 |                             "url": f"data:image/{image_format};base64,{image_data}"
250 |                         }
251 |                     }
252 |                 ]
253 |                 
254 |                 messages.append({
255 |                     "role": "user",
256 |                     "content": user_content
257 |                 })
258 | 
259 |                 # 处理推理配置
260 |                 thinking = {
261 |                     "type": "auto"  # 默认值
262 |                 }
263 |                 
264 |                 if hasattr(self, 'reasoning_config') and self.reasoning_config:
265 |                     # 从reasoning_config中获取thinking_mode
266 |                     thinking_mode = self.reasoning_config.get('thinking_mode', "auto")
267 |                     thinking = {
268 |                         "type": thinking_mode
269 |                     }
270 |                 
271 |                 # 构建请求数据
272 |                 data = {
273 |                     "model": self.get_actual_model_name(),
274 |                     "messages": messages,
275 |                     "thinking": thinking,
276 |                     "temperature": self.temperature,
277 |                     "max_tokens": self.max_tokens,
278 |                     "stream": True
279 |                 }
280 |                 
281 |                 # 发送流式请求
282 |                 response = requests.post(
283 |                     f"{self.base_url}/chat/completions",
284 |                     headers=headers,
285 |                     json=data,
286 |                     stream=True,
287 |                     proxies=proxies if proxies else None,
288 |                     timeout=60
289 |                 )
290 |                 
291 |                 if response.status_code != 200:
292 |                     error_text = response.text
293 |                     raise Exception(f"HTTP {response.status_code}: {error_text}")
294 |                 
295 |                 response.raise_for_status()
296 |                 
297 |                 # 初始化响应缓冲区
298 |                 response_buffer = ""
299 |                 
300 |                 # 处理流式响应
301 |                 for line in response.iter_lines():
302 |                     if not line:
303 |                         continue
304 |                     
305 |                     line = line.decode('utf-8')
306 |                     if not line.startswith('data: '):
307 |                         continue
308 |                     
309 |                     line = line[6:]  # 移除 'data: ' 前缀
310 |                     
311 |                     if line == '[DONE]':
312 |                         break
313 |                     
314 |                     try:
315 |                         chunk_data = json.loads(line)
316 |                         choices = chunk_data.get('choices', [])
317 |                         
318 |                         if choices and len(choices) > 0:
319 |                             delta = choices[0].get('delta', {})
320 |                             content = delta.get('content', '')
321 |                             
322 |                             if content:
323 |                                 response_buffer += content
324 |                                 
325 |                                 # 发送响应进度
326 |                                 yield {
327 |                                     "status": "streaming",
328 |                                     "content": response_buffer
329 |                                 }
330 |                     
331 |                     except json.JSONDecodeError:
332 |                         continue
333 |                 
334 |                 # 确保发送完整的最终内容
335 |                 yield {
336 |                     "status": "completed",
337 |                     "content": response_buffer
338 |                 }
339 |             
340 |             finally:
341 |                 # 恢复原始代理设置
342 |                 if original_proxies:
343 |                     for key, value in original_proxies.items():
344 |                         if value is None:
345 |                             if key in os.environ:
346 |                                 del os.environ[key]
347 |                         else:
348 |                             os.environ[key] = value
349 |                 
350 |         except Exception as e:
351 |             yield {
352 |                 "status": "error",
353 |                 "error": f"豆包图像分析错误: {str(e)}"
354 |             }
355 | 


--------------------------------------------------------------------------------
/models/mathpix.py:
--------------------------------------------------------------------------------
  1 | from typing import Generator, Dict, Any
  2 | import json
  3 | import requests
  4 | from .base import BaseModel
  5 | 
  6 | class MathpixModel(BaseModel):
  7 |     """
  8 |     Mathpix OCR model for processing images containing mathematical formulas,
  9 |     text, and tables.
 10 |     """
 11 |     
 12 |     def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None):
 13 |         """
 14 |         Initialize the Mathpix model.
 15 |         
 16 |         Args:
 17 |             api_key: Mathpix API key in format "app_id:app_key"
 18 |             temperature: Not used for Mathpix but kept for BaseModel compatibility
 19 |             system_prompt: Not used for Mathpix but kept for BaseModel compatibility
 20 |             
 21 |         Raises:
 22 |             ValueError: If the API key format is invalid
 23 |         """
 24 |         # 只传递必需的参数，不传递language参数
 25 |         super().__init__(api_key, temperature, system_prompt)
 26 |         try:
 27 |             self.app_id, self.app_key = api_key.split(':')
 28 |         except ValueError:
 29 |             raise ValueError("Mathpix API key must be in format 'app_id:app_key'")
 30 |         
 31 |         self.api_url = "https://api.mathpix.com/v3/text"
 32 |         self.headers = {
 33 |             "app_id": self.app_id,
 34 |             "app_key": self.app_key,
 35 |             "Content-Type": "application/json"
 36 |         }
 37 |         
 38 |         # Content type presets
 39 |         self.presets = {
 40 |             "math": {
 41 |                 "formats": ["latex_normal", "latex_styled", "asciimath"],
 42 |                 "data_options": {
 43 |                     "include_asciimath": True,
 44 |                     "include_latex": True,
 45 |                     "include_mathml": True
 46 |                 },
 47 |                 "ocr_options": {
 48 |                     "detect_formulas": True,
 49 |                     "enable_math_ocr": True,
 50 |                     "enable_handwritten": True,
 51 |                     "rm_spaces": True
 52 |                 }
 53 |             },
 54 |             "text": {
 55 |                 "formats": ["text"],
 56 |                 "data_options": {
 57 |                     "include_latex": False,
 58 |                     "include_asciimath": False
 59 |                 },
 60 |                 "ocr_options": {
 61 |                     "enable_spell_check": True,
 62 |                     "enable_handwritten": True,
 63 |                     "rm_spaces": False
 64 |                 }
 65 |             },
 66 |             "table": {
 67 |                 "formats": ["text", "data"],
 68 |                 "data_options": {
 69 |                     "include_latex": True
 70 |                 },
 71 |                 "ocr_options": {
 72 |                     "detect_tables": True,
 73 |                     "enable_spell_check": True,
 74 |                     "rm_spaces": True
 75 |                 }
 76 |             },
 77 |             "full_text": {
 78 |                 "formats": ["text"],
 79 |                 "data_options": {
 80 |                     "include_latex": False,
 81 |                     "include_asciimath": False
 82 |                 },
 83 |                 "ocr_options": {
 84 |                     "enable_spell_check": True,
 85 |                     "enable_handwritten": True,
 86 |                     "rm_spaces": False,
 87 |                     "detect_paragraphs": True,
 88 |                     "enable_tables": False,
 89 |                     "enable_math_ocr": False
 90 |                 }
 91 |             }
 92 |         }
 93 |         
 94 |         # Default to math preset
 95 |         self.current_preset = "math"
 96 | 
 97 |     def analyze_image(self, image_data: str, proxies: dict = None, content_type: str = None, 
 98 |                      confidence_threshold: float = 0.8, max_retries: int = 3) -> Generator[dict, None, None]:
 99 |         """
100 |         Analyze an image using Mathpix OCR API.
101 |         
102 |         Args:
103 |             image_data: Base64 encoded image data
104 |             proxies: Optional proxy configuration
105 |             content_type: Type of content to analyze ('math', 'text', or 'table')
106 |             confidence_threshold: Minimum confidence score to accept (0.0 to 1.0)
107 |             max_retries: Maximum number of retry attempts for failed requests
108 |             
109 |         Yields:
110 |             dict: Response chunks with status and content
111 |         """
112 |         if content_type and content_type in self.presets:
113 |             self.current_preset = content_type
114 | 
115 |         preset = self.presets[self.current_preset]
116 |         
117 |         try:
118 |             # Prepare request payload
119 |             payload = {
120 |                 "src": f"data:image/jpeg;base64,{image_data}",
121 |                 "formats": preset["formats"],
122 |                 "data_options": preset["data_options"],
123 |                 "ocr_options": preset["ocr_options"]
124 |             }
125 |             
126 |             # Initialize retry counter
127 |             retry_count = 0
128 |             
129 |             while retry_count < max_retries:
130 |                 try:
131 |                     # Send request to Mathpix API with timeout
132 |                     response = requests.post(
133 |                         self.api_url,
134 |                         headers=self.headers,
135 |                         json=payload,
136 |                         proxies=proxies,
137 |                         timeout=25  # 25 second timeout
138 |                     )
139 |                     
140 |                     # Handle specific API error codes
141 |                     if response.status_code == 429:  # Rate limit exceeded
142 |                         if retry_count < max_retries - 1:
143 |                             retry_count += 1
144 |                             continue
145 |                         else:
146 |                             raise requests.exceptions.RequestException("Rate limit exceeded")
147 |                     
148 |                     response.raise_for_status()
149 |                     result = response.json()
150 |                     
151 |                     # Check confidence threshold
152 |                     if 'confidence' in result and result['confidence'] < confidence_threshold:
153 |                         yield {
154 |                             "status": "warning",
155 |                             "content": f"Low confidence score: {result['confidence']:.2%}"
156 |                         }
157 |                     
158 |                     break  # Success, exit retry loop
159 |                     
160 |                 except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
161 |                     if retry_count < max_retries - 1:
162 |                         retry_count += 1
163 |                         continue
164 |                     raise
165 |             
166 |             # Format the response
167 |             formatted_response = self._format_response(result)
168 |             
169 |             # Yield initial status
170 |             yield {
171 |                 "status": "started",
172 |                 "content": ""
173 |             }
174 |             
175 |             # Yield the formatted response
176 |             yield {
177 |                 "status": "completed",
178 |                 "content": formatted_response,
179 |                 "model": self.get_model_identifier()
180 |             }
181 |             
182 |         except requests.exceptions.RequestException as e:
183 |             yield {
184 |                 "status": "error",
185 |                 "error": f"Mathpix API error: {str(e)}"
186 |             }
187 |         except Exception as e:
188 |             yield {
189 |                 "status": "error",
190 |                 "error": f"Error processing image: {str(e)}"
191 |             }
192 | 
193 |     def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
194 |         """
195 |         Not implemented for Mathpix model as it only processes images.
196 |         """
197 |         yield {
198 |             "status": "error",
199 |             "error": "Text analysis is not supported by Mathpix model"
200 |         }
201 | 
202 |     def get_default_system_prompt(self) -> str:
203 |         """
204 |         Not used for Mathpix model.
205 |         """
206 |         return ""
207 | 
208 |     def get_model_identifier(self) -> str:
209 |         """
210 |         Return the model identifier.
211 |         """
212 |         return "mathpix"
213 | 
214 |     def _format_response(self, result: Dict[str, Any]) -> str:
215 |         """
216 |         Format the Mathpix API response into a readable string.
217 |         
218 |         Args:
219 |             result: Raw API response from Mathpix
220 |             
221 |         Returns:
222 |             str: Formatted response string with all available formats
223 |         """
224 |         formatted_parts = []
225 |         
226 |         # Add confidence score if available
227 |         if 'confidence' in result:
228 |             formatted_parts.append(f"Confidence: {result['confidence']:.2%}\n")
229 |         
230 |         # Add text content
231 |         if 'text' in result:
232 |             formatted_parts.append("Text Content:")
233 |             formatted_parts.append(result['text'])
234 |             formatted_parts.append("")
235 |         
236 |         # Add LaTeX content
237 |         if 'latex_normal' in result:
238 |             formatted_parts.append("LaTeX (Normal):")
239 |             formatted_parts.append(result['latex_normal'])
240 |             formatted_parts.append("")
241 |             
242 |         if 'latex_styled' in result:
243 |             formatted_parts.append("LaTeX (Styled):")
244 |             formatted_parts.append(result['latex_styled'])
245 |             formatted_parts.append("")
246 |         
247 |         # Add data formats (ASCII math, MathML)
248 |         if 'data' in result and isinstance(result['data'], list):
249 |             for item in result['data']:
250 |                 item_type = item.get('type', '')
251 |                 if item_type and 'value' in item:
252 |                     formatted_parts.append(f"{item_type.upper()}:")
253 |                     formatted_parts.append(item['value'])
254 |                     formatted_parts.append("")
255 |         
256 |         # Add table data if present
257 |         if 'tables' in result and result['tables']:
258 |             formatted_parts.append("Tables Detected:")
259 |             for i, table in enumerate(result['tables'], 1):
260 |                 formatted_parts.append(f"Table {i}:")
261 |                 if 'cells' in table:
262 |                     # Format table as a grid
263 |                     cells = table['cells']
264 |                     if cells:
265 |                         max_col = max(cell.get('col', 0) for cell in cells) + 1
266 |                         max_row = max(cell.get('row', 0) for cell in cells) + 1
267 |                         grid = [['' for _ in range(max_col)] for _ in range(max_row)]
268 |                         
269 |                         for cell in cells:
270 |                             row = cell.get('row', 0)
271 |                             col = cell.get('col', 0)
272 |                             text = cell.get('text', '')
273 |                             grid[row][col] = text
274 |                         
275 |                         # Format grid as table
276 |                         col_widths = [max(len(str(grid[r][c])) for r in range(max_row)) for c in range(max_col)]
277 |                         for row in grid:
278 |                             row_str = ' | '.join(f"{str(cell):<{width}}" for cell, width in zip(row, col_widths))
279 |                             formatted_parts.append(f"| {row_str} |")
280 |                 formatted_parts.append("")
281 |         
282 |         # Add error message if present
283 |         if 'error' in result:
284 |             error_msg = result['error']
285 |             if isinstance(error_msg, dict):
286 |                 error_msg = error_msg.get('message', str(error_msg))
287 |             formatted_parts.append(f"Error: {error_msg}")
288 |         
289 |         return "\n".join(formatted_parts).strip()
290 | 
291 |     def extract_full_text(self, image_data: str, proxies: dict = None, max_retries: int = 3) -> str:
292 |         """
293 |         专门用于提取图像中的全部文本内容，忽略数学公式和表格等其他元素。
294 |         
295 |         Args:
296 |             image_data: Base64编码的图像数据
297 |             proxies: 可选的代理配置
298 |             max_retries: 请求失败时的最大重试次数
299 |             
300 |         Returns:
301 |             str: 图像中提取的完整文本内容
302 |         """
303 |         try:
304 |             # 准备请求负载，使用专为全文提取配置的参数
305 |             payload = {
306 |                 "src": f"data:image/jpeg;base64,{image_data}",
307 |                 "formats": ["text"],
308 |                 "data_options": {
309 |                     "include_latex": False,
310 |                     "include_asciimath": False
311 |                 },
312 |                 "ocr_options": {
313 |                     "enable_spell_check": True,
314 |                     "enable_handwritten": True,
315 |                     "rm_spaces": False,
316 |                     "detect_paragraphs": True,
317 |                     "enable_tables": False,
318 |                     "enable_math_ocr": False
319 |                 }
320 |             }
321 |             
322 |             # 初始化重试计数器
323 |             retry_count = 0
324 |             
325 |             while retry_count < max_retries:
326 |                 try:
327 |                     # 发送请求到Mathpix API
328 |                     response = requests.post(
329 |                         self.api_url,
330 |                         headers=self.headers,
331 |                         json=payload,
332 |                         proxies=proxies,
333 |                         timeout=30  # 30秒超时
334 |                     )
335 |                     
336 |                     # 处理特定API错误代码
337 |                     if response.status_code == 429:  # 超出速率限制
338 |                         if retry_count < max_retries - 1:
339 |                             retry_count += 1
340 |                             continue
341 |                         else:
342 |                             raise requests.exceptions.RequestException("超出API速率限制")
343 |                     
344 |                     response.raise_for_status()
345 |                     result = response.json()
346 |                     
347 |                     # 直接返回文本内容
348 |                     if 'text' in result:
349 |                         return result['text']
350 |                     else:
351 |                         return "未能提取到文本内容"
352 |                     
353 |                 except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
354 |                     if retry_count < max_retries - 1:
355 |                         retry_count += 1
356 |                         continue
357 |                     raise
358 |             
359 |         except requests.exceptions.RequestException as e:
360 |             return f"Mathpix API错误: {str(e)}"
361 |         except Exception as e:
362 |             return f"处理图像时出错: {str(e)}"
363 | 


--------------------------------------------------------------------------------
/models/deepseek.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import requests
  3 | import os
  4 | from typing import Generator
  5 | from openai import OpenAI
  6 | from .base import BaseModel
  7 | 
  8 | class DeepSeekModel(BaseModel):
  9 |     def __init__(self, api_key: str, temperature: float = 0.7, system_prompt: str = None, language: str = None, model_name: str = "deepseek-reasoner", api_base_url: str = None):
 10 |         super().__init__(api_key, temperature, system_prompt, language)
 11 |         self.model_name = model_name
 12 |         self.api_base_url = api_base_url  # 存储API基础URL
 13 | 
 14 |     def get_default_system_prompt(self) -> str:
 15 |         return """You are an expert at analyzing questions and providing detailed solutions. When presented with an image of a question:
 16 | 1. First read and understand the question carefully
 17 | 2. Break down the key components of the question
 18 | 3. Provide a clear, step-by-step solution
 19 | 4. If relevant, explain any concepts or theories involved
 20 | 5. If there are multiple approaches, explain the most efficient one first"""
 21 | 
 22 |     def get_model_identifier(self) -> str:
 23 |         """根据模型名称返回正确的API标识符"""
 24 |         # 通过模型名称来确定实际的API调用标识符
 25 |         if self.model_name == "deepseek-chat":
 26 |             return "deepseek-chat"
 27 |         # 如果是deepseek-reasoner或包含reasoner的模型名称，返回推理模型标识符
 28 |         if "reasoner" in self.model_name.lower():
 29 |             return "deepseek-reasoner"
 30 |         # 对于deepseek-chat也返回对应的模型名称
 31 |         if "chat" in self.model_name.lower() or self.model_name == "deepseek-chat":
 32 |             return "deepseek-chat"
 33 |         
 34 |         # 根据配置中的模型ID来确定实际的模型类型
 35 |         if self.model_name == "deepseek-reasoner":
 36 |             return "deepseek-reasoner"
 37 |         elif self.model_name == "deepseek-chat":
 38 |             return "deepseek-chat"
 39 |             
 40 |         # 默认使用deepseek-chat作为API标识符
 41 |         print(f"未知的DeepSeek模型名称: {self.model_name}，使用deepseek-chat作为默认值")
 42 |         return "deepseek-chat"
 43 | 
 44 |     def analyze_text(self, text: str, proxies: dict = None) -> Generator[dict, None, None]:
 45 |         """Stream DeepSeek's response for text analysis"""
 46 |         try:
 47 |             # Initial status
 48 |             yield {"status": "started", "content": ""}
 49 | 
 50 |             # 保存原始环境变量
 51 |             original_env = {
 52 |                 'http_proxy': os.environ.get('http_proxy'),
 53 |                 'https_proxy': os.environ.get('https_proxy')
 54 |             }
 55 | 
 56 |             try:
 57 |                 # 如果提供了代理设置，通过环境变量设置
 58 |                 if proxies:
 59 |                     if 'http' in proxies:
 60 |                         os.environ['http_proxy'] = proxies['http']
 61 |                     if 'https' in proxies:
 62 |                         os.environ['https_proxy'] = proxies['https']
 63 | 
 64 |                 # 初始化DeepSeek客户端，不再使用session对象
 65 |                 client = OpenAI(
 66 |                     api_key=self.api_key,
 67 |                     base_url="https://api.deepseek.com"
 68 |                 )
 69 | 
 70 |                 # 使用系统提供的系统提示词，不再自动添加语言指令
 71 |                 system_prompt = self.system_prompt
 72 | 
 73 |                 # 构建请求参数
 74 |                 params = {
 75 |                     "model": self.get_model_identifier(),
 76 |                     "messages": [
 77 |                         {
 78 |                             'role': 'system',
 79 |                             'content': system_prompt
 80 |                         }, 
 81 |                         {
 82 |                             'role': 'user',
 83 |                             'content': text
 84 |                         }
 85 |                     ],
 86 |                     "stream": True
 87 |                 }
 88 |                 
 89 |                 # 只有非推理模型才设置temperature参数
 90 |                 if not self.get_model_identifier().endswith('reasoner') and self.temperature is not None:
 91 |                     params["temperature"] = self.temperature
 92 |                     
 93 |                 print(f"调用DeepSeek API: {self.get_model_identifier()}, 是否设置温度: {not self.get_model_identifier().endswith('reasoner')}, 温度值: {self.temperature if not self.get_model_identifier().endswith('reasoner') else 'N/A'}")
 94 | 
 95 |                 response = client.chat.completions.create(**params)
 96 |                 
 97 |                 # 使用两个缓冲区，分别用于常规内容和思考内容
 98 |                 response_buffer = ""
 99 |                 thinking_buffer = ""
100 |                 
101 |                 for chunk in response:
102 |                     # 打印chunk以调试
103 |                     try:
104 |                         print(f"DeepSeek API返回chunk: {chunk}")
105 |                     except:
106 |                         print("无法打印chunk")
107 |                         
108 |                     try:
109 |                         # 同时处理两种不同的内容，确保正确区分思考内容和最终内容
110 |                         delta = chunk.choices[0].delta
111 |                         
112 |                         # 处理推理模型的思考内容
113 |                         if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
114 |                             content = delta.reasoning_content
115 |                             thinking_buffer += content
116 |                             
117 |                             # 发送思考内容更新
118 |                             if len(content) >= 20 or content.endswith(('.', '!', '?', '。', '！', '？', '\n')):
119 |                                 yield {
120 |                                     "status": "thinking",
121 |                                     "content": thinking_buffer
122 |                                 }
123 |                         
124 |                         # 处理最终结果内容 - 即使在推理模型中也会有content字段
125 |                         if hasattr(delta, 'content') and delta.content:
126 |                             content = delta.content
127 |                             response_buffer += content
128 |                             print(f"累积响应内容: '{content}', 当前buffer: '{response_buffer}'")
129 |                             
130 |                             # 发送结果内容更新
131 |                             if len(content) >= 10 or content.endswith(('.', '!', '?', '。', '！', '？', '\n')):
132 |                                 yield {
133 |                                     "status": "streaming",
134 |                                     "content": response_buffer
135 |                                 }
136 |                         
137 |                         # 处理消息结束
138 |                         if hasattr(chunk.choices[0], 'finish_reason') and chunk.choices[0].finish_reason:
139 |                             print(f"生成结束，原因: {chunk.choices[0].finish_reason}")
140 |                             # 注意：不要在这里把思考内容作为正文，因为这可能导致重复内容
141 |                     except Exception as e:
142 |                         print(f"解析响应chunk时出错: {str(e)}")
143 |                         continue
144 | 
145 |                 # 确保发送最终的缓冲内容
146 |                 if thinking_buffer:
147 |                     yield {
148 |                         "status": "thinking_complete",
149 |                         "content": thinking_buffer
150 |                     }
151 |                 
152 |                 # 发送最终响应内容
153 |                 if response_buffer:
154 |                     yield {
155 |                         "status": "completed",
156 |                         "content": response_buffer
157 |                     }
158 |                 
159 |                 # 如果没有正常的响应内容，但有思考内容，则将思考内容作为最终结果
160 |                 elif thinking_buffer:
161 |                     yield {
162 |                         "status": "completed",
163 |                         "content": thinking_buffer
164 |                     }
165 |                 else:
166 |                     # 如果两者都没有，返回一个空结果
167 |                     yield {
168 |                         "status": "completed",
169 |                         "content": "没有获取到内容"
170 |                     }
171 | 
172 |             except Exception as e:
173 |                 error_msg = str(e)
174 |                 print(f"DeepSeek API调用出错: {error_msg}")
175 |                 
176 |                 # 提供具体的错误信息
177 |                 if "invalid_api_key" in error_msg.lower():
178 |                     error_msg = "DeepSeek API密钥无效，请检查您的API密钥"
179 |                 elif "rate_limit" in error_msg.lower():
180 |                     error_msg = "DeepSeek API请求频率超限，请稍后再试"
181 |                 elif "quota_exceeded" in error_msg.lower():
182 |                     error_msg = "DeepSeek API配额已用完，请续费或等待下个计费周期"
183 |                 
184 |                 yield {
185 |                     "status": "error",
186 |                     "error": f"DeepSeek API错误: {error_msg}"
187 |                 }
188 |             finally:
189 |                 # 恢复原始环境变量
190 |                 for key, value in original_env.items():
191 |                     if value is None:
192 |                         if key in os.environ:
193 |                             del os.environ[key]
194 |                     else:
195 |                         os.environ[key] = value
196 | 
197 |         except Exception as e:
198 |             error_msg = str(e)
199 |             print(f"调用DeepSeek模型时发生错误: {error_msg}")
200 |             
201 |             if "invalid_api_key" in error_msg.lower():
202 |                 error_msg = "API密钥无效，请检查设置"
203 |             elif "rate_limit" in error_msg.lower():
204 |                 error_msg = "API请求频率超限，请稍后再试"
205 |             
206 |             yield {
207 |                 "status": "error",
208 |                 "error": f"DeepSeek API错误: {error_msg}"
209 |             }
210 | 
211 |     def analyze_image(self, image_data: str, proxies: dict = None) -> Generator[dict, None, None]:
212 |         """Stream DeepSeek's response for image analysis"""
213 |         try:
214 |             # 检查我们是否有支持图像的模型
215 |             if self.model_name == "deepseek-chat" or self.model_name == "deepseek-reasoner":
216 |                 yield {
217 |                     "status": "error",
218 |                     "error": "当前DeepSeek模型不支持图像分析，请使用Anthropic或OpenAI的多模态模型"
219 |                 }
220 |                 return
221 |                 
222 |             # Initial status
223 |             yield {"status": "started", "content": ""}
224 | 
225 |             # 保存原始环境变量
226 |             original_env = {
227 |                 'http_proxy': os.environ.get('http_proxy'),
228 |                 'https_proxy': os.environ.get('https_proxy')
229 |             }
230 | 
231 |             try:
232 |                 # 如果提供了代理设置，通过环境变量设置
233 |                 if proxies:
234 |                     if 'http' in proxies:
235 |                         os.environ['http_proxy'] = proxies['http']
236 |                     if 'https' in proxies:
237 |                         os.environ['https_proxy'] = proxies['https']
238 | 
239 |                 # 初始化DeepSeek客户端，不再使用session对象
240 |                 client = OpenAI(
241 |                     api_key=self.api_key,
242 |                     base_url="https://api.deepseek.com"
243 |                 )
244 | 
245 |                 # 使用系统提供的系统提示词，不再自动添加语言指令
246 |                 system_prompt = self.system_prompt
247 | 
248 |                 # 构建请求参数
249 |                 params = {
250 |                     "model": self.get_model_identifier(),
251 |                     "messages": [
252 |                         {
253 |                             'role': 'system',
254 |                             'content': system_prompt
255 |                         }, 
256 |                         {
257 |                             'role': 'user',
258 |                             'content': f"Here's an image of a question to analyze: data:image/png;base64,{image_data}"
259 |                         }
260 |                     ],
261 |                     "stream": True
262 |                 }
263 |                 
264 |                 # 只有非推理模型才设置temperature参数
265 |                 if not self.get_model_identifier().endswith('reasoner') and self.temperature is not None:
266 |                     params["temperature"] = self.temperature
267 | 
268 |                 response = client.chat.completions.create(**params)
269 |                 
270 |                 # 使用两个缓冲区，分别用于常规内容和思考内容
271 |                 response_buffer = ""
272 |                 thinking_buffer = ""
273 |                 
274 |                 for chunk in response:
275 |                     # 打印chunk以调试
276 |                     try:
277 |                         print(f"DeepSeek图像API返回chunk: {chunk}")
278 |                     except:
279 |                         print("无法打印chunk")
280 |                 
281 |                     try:
282 |                         # 同时处理两种不同的内容，确保正确区分思考内容和最终内容
283 |                         delta = chunk.choices[0].delta
284 |                         
285 |                         # 处理推理模型的思考内容
286 |                         if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
287 |                             content = delta.reasoning_content
288 |                             thinking_buffer += content
289 |                             
290 |                             # 发送思考内容更新
291 |                             if len(content) >= 20 or content.endswith(('.', '!', '?', '。', '！', '？', '\n')):
292 |                                 yield {
293 |                                     "status": "thinking",
294 |                                     "content": thinking_buffer
295 |                                 }
296 |                         
297 |                         # 处理最终结果内容 - 即使在推理模型中也会有content字段
298 |                         if hasattr(delta, 'content') and delta.content:
299 |                             content = delta.content
300 |                             response_buffer += content
301 |                             print(f"累积图像响应内容: '{content}', 当前buffer: '{response_buffer}'")
302 |                             
303 |                             # 发送结果内容更新
304 |                             if len(content) >= 10 or content.endswith(('.', '!', '?', '。', '！', '？', '\n')):
305 |                                 yield {
306 |                                     "status": "streaming",
307 |                                     "content": response_buffer
308 |                                 }
309 |                         
310 |                         # 处理消息结束
311 |                         if hasattr(chunk.choices[0], 'finish_reason') and chunk.choices[0].finish_reason:
312 |                             print(f"图像生成结束，原因: {chunk.choices[0].finish_reason}")
313 |                     except Exception as e:
314 |                         print(f"解析图像响应chunk时出错: {str(e)}")
315 |                         continue
316 | 
317 |                 # 确保发送最终的缓冲内容
318 |                 if thinking_buffer:
319 |                     yield {
320 |                         "status": "thinking_complete",
321 |                         "content": thinking_buffer
322 |                     }
323 |                 
324 |                 # 发送最终响应内容
325 |                 if response_buffer:
326 |                     yield {
327 |                         "status": "completed",
328 |                         "content": response_buffer
329 |                     }
330 |                 
331 |             except Exception as e:
332 |                 error_msg = str(e)
333 |                 print(f"DeepSeek API调用出错: {error_msg}")
334 |                 
335 |                 # 提供具体的错误信息
336 |                 if "invalid_api_key" in error_msg.lower():
337 |                     error_msg = "DeepSeek API密钥无效，请检查您的API密钥"
338 |                 elif "rate_limit" in error_msg.lower():
339 |                     error_msg = "DeepSeek API请求频率超限，请稍后再试"
340 |                 
341 |                 yield {
342 |                     "status": "error",
343 |                     "error": f"DeepSeek API错误: {error_msg}"
344 |                 }
345 |             finally:
346 |                 # 恢复原始环境变量
347 |                 for key, value in original_env.items():
348 |                     if value is None:
349 |                         if key in os.environ:
350 |                             del os.environ[key]
351 |                     else:
352 |                         os.environ[key] = value
353 | 
354 |         except Exception as e:
355 |             error_msg = str(e)
356 |             if "invalid_api_key" in error_msg.lower():
357 |                 error_msg = "API密钥无效，请检查设置"
358 |             elif "rate_limit" in error_msg.lower():
359 |                 error_msg = "API请求频率超限，请稍后再试"
360 |             
361 |             yield {
362 |                 "status": "error",
363 |                 "error": f"DeepSeek API错误: {error_msg}"
364 |             }
365 | 


--------------------------------------------------------------------------------
/models/anthropic.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import requests
  3 | from typing import Generator, Optional
  4 | from .base import BaseModel
  5 | 
  6 | class AnthropicModel(BaseModel):
  7 |     def __init__(self, api_key, temperature=0.7, system_prompt=None, language=None, api_base_url=None, model_identifier=None):
  8 |         super().__init__(api_key, temperature, system_prompt or self.get_default_system_prompt(), language or "en")
  9 |         # 设置API基础URL，默认为Anthropic官方API
 10 |         self.api_base_url = api_base_url or "https://api.anthropic.com/v1"
 11 |         # 设置模型标识符，支持动态选择
 12 |         self.model_identifier = model_identifier or "claude-3-7-sonnet-20250219"
 13 |         # 初始化推理配置
 14 |         self.reasoning_config = None
 15 |         # 初始化最大Token数
 16 |         self.max_tokens = None
 17 |         
 18 |     def get_default_system_prompt(self) -> str:
 19 |         return """You are an expert at analyzing questions and providing detailed solutions. When presented with an image of a question:
 20 | 1. First read and understand the question carefully
 21 | 2. Break down the key components of the question
 22 | 3. Provide a clear, step-by-step solution
 23 | 4. If relevant, explain any concepts or theories involved
 24 | 5. If there are multiple approaches, explain the most efficient one first"""
 25 | 
 26 |     def get_model_identifier(self) -> str:
 27 |         return self.model_identifier
 28 | 
 29 |     def analyze_text(self, text: str, proxies: Optional[dict] = None) -> Generator[dict, None, None]:
 30 |         """Stream Claude's response for text analysis"""
 31 |         try:
 32 |             yield {"status": "started"}
 33 |             
 34 |             api_key = self.api_key
 35 |             if api_key.startswith('Bearer '):
 36 |                 api_key = api_key[7:]
 37 |                 
 38 |             headers = {
 39 |                 'x-api-key': api_key,
 40 |                 'anthropic-version': '2023-06-01',
 41 |                 'content-type': 'application/json',
 42 |                 'accept': 'application/json',
 43 |             }
 44 | 
 45 |             # 获取最大输出Token设置
 46 |             max_tokens = 8192  # 默认值
 47 |             if hasattr(self, 'max_tokens') and self.max_tokens:
 48 |                 max_tokens = self.max_tokens
 49 | 
 50 |             payload = {
 51 |                 'model': self.get_model_identifier(),
 52 |                 'stream': True,
 53 |                 'max_tokens': max_tokens,
 54 |                 'temperature': 1,
 55 |                 'system': self.system_prompt,
 56 |                 'messages': [{
 57 |                     'role': 'user',
 58 |                     'content': [
 59 |                         {
 60 |                             'type': 'text',
 61 |                             'text': text
 62 |                         }
 63 |                     ]
 64 |                 }]
 65 |             }
 66 |             
 67 |             # 处理推理配置
 68 |             if hasattr(self, 'reasoning_config') and self.reasoning_config:
 69 |                 # 如果设置了extended reasoning
 70 |                 if self.reasoning_config.get('reasoning_depth') == 'extended':  
 71 |                     think_budget = self.reasoning_config.get('think_budget', max_tokens // 2)
 72 |                     payload['thinking'] = {
 73 |                         'type': 'enabled',
 74 |                         'budget_tokens': think_budget
 75 |                     }
 76 |                 # 如果设置了instant模式
 77 |                 elif self.reasoning_config.get('speed_mode') == 'instant':
 78 |                     # 确保当使用speed_mode时不包含thinking参数
 79 |                     if 'thinking' in payload:
 80 |                         del payload['thinking']
 81 |                 # 默认启用思考但使用较小的预算
 82 |                 else:
 83 |                     payload['thinking'] = {
 84 |                         'type': 'enabled',
 85 |                         'budget_tokens': min(4096, max_tokens // 4)
 86 |                     }
 87 |             # 默认设置
 88 |             else:
 89 |                 payload['thinking'] = {
 90 |                     'type': 'enabled',
 91 |                     'budget_tokens': min(4096, max_tokens // 4)
 92 |                 }
 93 |                 
 94 |             print(f"Debug - 推理配置: max_tokens={max_tokens}, thinking={payload.get('thinking', payload.get('speed_mode', 'default'))}")
 95 | 
 96 |             # 使用配置的API基础URL
 97 |             api_endpoint = f"{self.api_base_url}/messages"
 98 |             
 99 |             response = requests.post(
100 |                 api_endpoint,
101 |                 headers=headers,
102 |                 json=payload,
103 |                 stream=True,
104 |                 proxies=proxies,
105 |                 timeout=60
106 |             )
107 | 
108 |             if response.status_code != 200:
109 |                 error_msg = f'API error: {response.status_code}'
110 |                 try:
111 |                     error_data = response.json()
112 |                     if 'error' in error_data:
113 |                         error_msg += f" - {error_data['error']['message']}"
114 |                 except:
115 |                     error_msg += f" - {response.text}"
116 |                 yield {"status": "error", "error": error_msg}
117 |                 return
118 | 
119 |             thinking_content = ""
120 |             response_buffer = ""
121 |             
122 |             for chunk in response.iter_lines():
123 |                 if not chunk:
124 |                     continue
125 | 
126 |                 try:
127 |                     chunk_str = chunk.decode('utf-8')
128 |                     if not chunk_str.startswith('data: '):
129 |                         continue
130 | 
131 |                     chunk_str = chunk_str[6:]
132 |                     data = json.loads(chunk_str)
133 | 
134 |                     if data.get('type') == 'content_block_delta':
135 |                         if 'delta' in data:
136 |                             if 'text' in data['delta']:
137 |                                 text_chunk = data['delta']['text']
138 |                                 response_buffer += text_chunk
139 |                                 # 只在每累积一定数量的字符后才发送，减少UI跳变
140 |                                 if len(text_chunk) >= 10 or text_chunk.endswith(('.', '!', '?', '。', '！', '？', '\n')):
141 |                                     yield {
142 |                                         "status": "streaming",
143 |                                         "content": response_buffer
144 |                                     }
145 |                                 
146 |                             elif 'thinking' in data['delta']:
147 |                                 thinking_chunk = data['delta']['thinking']
148 |                                 thinking_content += thinking_chunk
149 |                                 # 只在每累积一定数量的字符后才发送，减少UI跳变
150 |                                 if len(thinking_chunk) >= 20 or thinking_chunk.endswith(('.', '!', '?', '。', '！', '？', '\n')):
151 |                                     yield {
152 |                                         "status": "thinking",
153 |                                         "content": thinking_content
154 |                                     }
155 |                     
156 |                     # 处理新的extended_thinking格式
157 |                     elif data.get('type') == 'extended_thinking_delta':
158 |                         if 'delta' in data and 'text' in data['delta']:
159 |                             thinking_chunk = data['delta']['text']
160 |                             thinking_content += thinking_chunk
161 |                             # 只在每累积一定数量的字符后才发送，减少UI跳变
162 |                             if len(thinking_chunk) >= 20 or thinking_chunk.endswith(('.', '!', '?', '。', '！', '？', '\n')):
163 |                                 yield {
164 |                                     "status": "thinking",
165 |                                     "content": thinking_content
166 |                                 }
167 | 
168 |                     elif data.get('type') == 'message_stop':
169 |                         # 确保发送完整的思考内容
170 |                         if thinking_content:
171 |                             yield {
172 |                                 "status": "thinking_complete",
173 |                                 "content": thinking_content
174 |                             }
175 |                         # 确保发送完整的响应内容
176 |                         yield {
177 |                             "status": "completed",
178 |                             "content": response_buffer
179 |                         }
180 | 
181 |                     elif data.get('type') == 'error':
182 |                         error_msg = data.get('error', {}).get('message', 'Unknown error')
183 |                         yield {
184 |                             "status": "error",
185 |                             "error": error_msg
186 |                         }
187 |                         break
188 | 
189 |                 except json.JSONDecodeError as e:
190 |                     print(f"JSON decode error: {str(e)}")
191 |                     continue
192 | 
193 |         except Exception as e:
194 |             yield {
195 |                 "status": "error",
196 |                 "error": f"Streaming error: {str(e)}"
197 |             }
198 | 
199 |     def analyze_image(self, image_data, proxies: Optional[dict] = None):
200 |         yield {"status": "started"}
201 |         
202 |         api_key = self.api_key
203 |         if api_key.startswith('Bearer '):
204 |             api_key = api_key[7:]
205 |             
206 |         headers = {
207 |             'x-api-key': api_key,
208 |             'anthropic-version': '2023-06-01',
209 |             'content-type': 'application/json'
210 |         }
211 |         
212 |         # 使用系统提供的系统提示词，不再自动添加语言指令
213 |         system_prompt = self.system_prompt
214 |         
215 |         # 获取最大输出Token设置
216 |         max_tokens = 8192  # 默认值
217 |         if hasattr(self, 'max_tokens') and self.max_tokens:
218 |             max_tokens = self.max_tokens
219 |             
220 |         payload = {
221 |             'model': self.get_model_identifier(),
222 |             'stream': True,
223 |             'max_tokens': max_tokens,
224 |             'temperature': 1,
225 |             'system': system_prompt,
226 |             'messages': [{
227 |                 'role': 'user',
228 |                 'content': [
229 |                     {
230 |                         'type': 'image',
231 |                         'source': {
232 |                             'type': 'base64',
233 |                             'media_type': 'image/png',
234 |                             'data': image_data
235 |                         }
236 |                     },
237 |                     {
238 |                         'type': 'text',
239 |                         'text': "请分析这个问题并提供详细的解决方案。如果你看到多个问题，请逐一解决。"
240 |                     }
241 |                 ]
242 |             }]
243 |         }
244 |         
245 |         # 处理推理配置
246 |         if hasattr(self, 'reasoning_config') and self.reasoning_config:
247 |             # 如果设置了extended reasoning
248 |             if self.reasoning_config.get('reasoning_depth') == 'extended':
249 |                 think_budget = self.reasoning_config.get('think_budget', max_tokens // 2)
250 |                 payload['thinking'] = {
251 |                     'type': 'enabled',
252 |                     'budget_tokens': think_budget
253 |                 }
254 |             # 如果设置了instant模式
255 |             elif self.reasoning_config.get('speed_mode') == 'instant':
256 |                 # 只需确保不包含thinking参数，不添加speed_mode参数
257 |                 if 'thinking' in payload:
258 |                     del payload['thinking']
259 |             # 默认启用思考但使用较小的预算
260 |             else:
261 |                 payload['thinking'] = {
262 |                     'type': 'enabled',
263 |                     'budget_tokens': min(4096, max_tokens // 4)
264 |                 }
265 |         # 默认设置
266 |         else:
267 |             payload['thinking'] = {
268 |                 'type': 'enabled',
269 |                 'budget_tokens': min(4096, max_tokens // 4)
270 |             }
271 |             
272 |         print(f"Debug - 图像分析推理配置: max_tokens={max_tokens}, thinking={payload.get('thinking', payload.get('speed_mode', 'default'))}")
273 | 
274 |         # 使用配置的API基础URL
275 |         api_endpoint = f"{self.api_base_url}/messages"
276 |         
277 |         response = requests.post(
278 |             api_endpoint,
279 |             headers=headers,
280 |             json=payload,
281 |             stream=True,
282 |             proxies=proxies,
283 |             timeout=60
284 |         )
285 | 
286 |         if response.status_code != 200:
287 |             error_msg = f'API error: {response.status_code}'
288 |             try:
289 |                 error_data = response.json()
290 |                 if 'error' in error_data:
291 |                     error_msg += f" - {error_data['error']['message']}"
292 |             except:
293 |                 error_msg += f" - {response.text}"
294 |             yield {"status": "error", "error": error_msg}
295 |             return
296 | 
297 |         thinking_content = ""
298 |         response_buffer = ""
299 |         
300 |         for chunk in response.iter_lines():
301 |             if not chunk:
302 |                 continue
303 | 
304 |             try:
305 |                 chunk_str = chunk.decode('utf-8')
306 |                 if not chunk_str.startswith('data: '):
307 |                     continue
308 | 
309 |                 chunk_str = chunk_str[6:]
310 |                 data = json.loads(chunk_str)
311 | 
312 |                 if data.get('type') == 'content_block_delta':
313 |                     if 'delta' in data:
314 |                         if 'text' in data['delta']:
315 |                             text_chunk = data['delta']['text']
316 |                             response_buffer += text_chunk
317 |                             # 只在每累积一定数量的字符后才发送，减少UI跳变
318 |                             if len(text_chunk) >= 10 or text_chunk.endswith(('.', '!', '?', '。', '！', '？', '\n')):
319 |                                 yield {
320 |                                     "status": "streaming",
321 |                                     "content": response_buffer
322 |                                 }
323 |                             
324 |                         elif 'thinking' in data['delta']:
325 |                             thinking_chunk = data['delta']['thinking']
326 |                             thinking_content += thinking_chunk
327 |                             # 只在每累积一定数量的字符后才发送，减少UI跳变
328 |                             if len(thinking_chunk) >= 20 or thinking_chunk.endswith(('.', '!', '?', '。', '！', '？', '\n')):
329 |                                 yield {
330 |                                     "status": "thinking",
331 |                                     "content": thinking_content
332 |                                 }
333 |                 
334 |                 # 处理新的extended_thinking格式
335 |                 elif data.get('type') == 'extended_thinking_delta':
336 |                     if 'delta' in data and 'text' in data['delta']:
337 |                         thinking_chunk = data['delta']['text']
338 |                         thinking_content += thinking_chunk
339 |                         # 只在每累积一定数量的字符后才发送，减少UI跳变
340 |                         if len(thinking_chunk) >= 20 or thinking_chunk.endswith(('.', '!', '?', '。', '！', '？', '\n')):
341 |                             yield {
342 |                                 "status": "thinking",
343 |                                 "content": thinking_content
344 |                             }
345 | 
346 |                 elif data.get('type') == 'message_stop':
347 |                     # 确保发送完整的思考内容
348 |                     if thinking_content:
349 |                         yield {
350 |                             "status": "thinking_complete",
351 |                             "content": thinking_content
352 |                         }
353 |                     # 确保发送完整的响应内容
354 |                     yield {
355 |                         "status": "completed",
356 |                         "content": response_buffer
357 |                     }
358 |                     
359 |                 elif data.get('type') == 'error':
360 |                     error_message = data.get('error', {}).get('message', 'Unknown error')
361 |                     yield {
362 |                         "status": "error",
363 |                         "error": error_message
364 |                     }
365 |                     
366 |             except Exception as e:
367 |                 yield {
368 |                     "status": "error",
369 |                     "error": f"Error processing response: {str(e)}"
370 |                 }
371 |                 break
372 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
   1 | from flask import Flask, jsonify, render_template, request, send_from_directory
   2 | from flask_socketio import SocketIO
   3 | import pyautogui
   4 | import base64
   5 | from io import BytesIO
   6 | import socket
   7 | from threading import Thread, Event
   8 | import threading
   9 | from PIL import Image
  10 | import pyperclip
  11 | from models import ModelFactory
  12 | import time
  13 | import os
  14 | import json
  15 | import traceback
  16 | import requests
  17 | from datetime import datetime
  18 | import sys
  19 | 
  20 | app = Flask(__name__)
  21 | socketio = SocketIO(
  22 |     app, 
  23 |     cors_allowed_origins="*", 
  24 |     ping_timeout=30, 
  25 |     ping_interval=5, 
  26 |     max_http_buffer_size=50 * 1024 * 1024,
  27 |     async_mode='threading',  # 使用threading模式提高兼容性
  28 |     engineio_logger=True,    # 启用引擎日志，便于调试
  29 |     logger=True              # 启用Socket.IO日志
  30 | )
  31 | 
  32 | # 常量定义
  33 | CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
  34 | CONFIG_DIR = os.path.join(CURRENT_DIR, 'config')
  35 | STATIC_DIR = os.path.join(CURRENT_DIR, 'static')
  36 | # 确保配置目录存在
  37 | os.makedirs(CONFIG_DIR, exist_ok=True)
  38 | 
  39 | # 密钥和其他配置文件路径
  40 | API_KEYS_FILE = os.path.join(CONFIG_DIR, 'api_keys.json')
  41 | API_BASE_URLS_FILE = os.path.join(CONFIG_DIR, 'api_base_urls.json')
  42 | VERSION_FILE = os.path.join(CONFIG_DIR, 'version.json')
  43 | UPDATE_INFO_FILE = os.path.join(CONFIG_DIR, 'update_info.json')
  44 | PROMPT_FILE = os.path.join(CONFIG_DIR, 'prompts.json')  # 新增提示词配置文件路径
  45 | PROXY_API_FILE = os.path.join(CONFIG_DIR, 'proxy_api.json')  # 新增中转API配置文件路径
  46 | 
  47 | DEFAULT_API_BASE_URLS = {
  48 |     "AnthropicApiBaseUrl": "",
  49 |     "OpenaiApiBaseUrl": "",
  50 |     "DeepseekApiBaseUrl": "",
  51 |     "AlibabaApiBaseUrl": "",
  52 |     "GoogleApiBaseUrl": "",
  53 |     "DoubaoApiBaseUrl": ""
  54 | }
  55 | 
  56 | def ensure_api_base_urls_file():
  57 |     """确保 API 基础 URL 配置文件存在并包含所有占位符"""
  58 |     try:
  59 |         file_exists = os.path.exists(API_BASE_URLS_FILE)
  60 |         base_urls = {}
  61 |         if file_exists:
  62 |             try:
  63 |                 with open(API_BASE_URLS_FILE, 'r', encoding='utf-8') as f:
  64 |                     loaded = json.load(f)
  65 |                 if isinstance(loaded, dict):
  66 |                     base_urls = loaded
  67 |                 else:
  68 |                     file_exists = False
  69 |             except json.JSONDecodeError:
  70 |                 file_exists = False
  71 | 
  72 |         missing_key_added = False
  73 |         for key, default_value in DEFAULT_API_BASE_URLS.items():
  74 |             if key not in base_urls:
  75 |                 base_urls[key] = default_value
  76 |                 missing_key_added = True
  77 | 
  78 |         if not file_exists or missing_key_added or not base_urls:
  79 |             with open(API_BASE_URLS_FILE, 'w', encoding='utf-8') as f:
  80 |                 json.dump(base_urls or DEFAULT_API_BASE_URLS, f, ensure_ascii=False, indent=2)
  81 |     except Exception as e:
  82 |         print(f"初始化API基础URL配置失败: {e}")
  83 | 
  84 | # 确保API基础URL文件已经生成
  85 | ensure_api_base_urls_file()
  86 | 
  87 | # 跟踪用户生成任务的字典
  88 | generation_tasks = {}
  89 | 
  90 | # 初始化模型工厂
  91 | ModelFactory.initialize()
  92 | 
  93 | def get_local_ip():
  94 |     try:
  95 |         # Get local IP address
  96 |         s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
  97 |         s.connect(("8.8.8.8", 80))
  98 |         ip = s.getsockname()[0]
  99 |         s.close()
 100 |         return ip
 101 |     except Exception:
 102 |         return "127.0.0.1"
 103 | 
 104 | @app.route('/')
 105 | def index():
 106 |     local_ip = get_local_ip()
 107 |     
 108 |     # 检查更新
 109 |     try:
 110 |         update_info = check_for_updates()
 111 |     except:
 112 |         update_info = {'has_update': False}
 113 |         
 114 |     return render_template('index.html', local_ip=local_ip, update_info=update_info)
 115 | 
 116 | @socketio.on('connect')
 117 | def handle_connect():
 118 |     print('Client connected')
 119 | 
 120 | @socketio.on('disconnect')
 121 | def handle_disconnect():
 122 |     print('Client disconnected')
 123 | 
 124 | def create_model_instance(model_id, settings, is_reasoning=False):
 125 |     """创建模型实例"""
 126 |     # 提取API密钥
 127 |     api_keys = settings.get('apiKeys', {})
 128 |     
 129 |     # 确定需要哪个API密钥
 130 |     api_key_id = None
 131 |     # 特殊情况：o3-mini使用OpenAI API密钥
 132 |     if model_id.lower() == "o3-mini":
 133 |         api_key_id = "OpenaiApiKey"
 134 |     # 其他Anthropic/Claude模型
 135 |     elif "claude" in model_id.lower() or "anthropic" in model_id.lower():
 136 |         api_key_id = "AnthropicApiKey"
 137 |     elif any(keyword in model_id.lower() for keyword in ["gpt", "openai"]):
 138 |         api_key_id = "OpenaiApiKey"
 139 |     elif "deepseek" in model_id.lower():
 140 |         api_key_id = "DeepseekApiKey"
 141 |     elif "qvq" in model_id.lower() or "alibaba" in model_id.lower() or "qwen" in model_id.lower():
 142 |         api_key_id = "AlibabaApiKey"
 143 |     elif "gemini" in model_id.lower() or "google" in model_id.lower():
 144 |         api_key_id = "GoogleApiKey"
 145 |     elif "doubao" in model_id.lower():
 146 |         api_key_id = "DoubaoApiKey"
 147 |     
 148 |     # 首先尝试从本地配置获取API密钥
 149 |     api_key = get_api_key(api_key_id)
 150 |     
 151 |     # 如果本地没有配置，尝试使用前端传递的密钥（向后兼容）
 152 |     if not api_key:
 153 |         api_key = api_keys.get(api_key_id)
 154 |     
 155 |     if not api_key:
 156 |         raise ValueError(f"API key is required for the selected model (keyId: {api_key_id})")
 157 |     
 158 |     # 获取maxTokens参数，默认为8192
 159 |     max_tokens = int(settings.get('maxTokens', 8192))
 160 |     
 161 |     # 检查是否启用中转API
 162 |     proxy_api_config = load_proxy_api()
 163 |     base_url = None
 164 |     
 165 |     if proxy_api_config.get('enabled', False):
 166 |         # 根据模型类型选择对应的中转API
 167 |         if "claude" in model_id.lower() or "anthropic" in model_id.lower():
 168 |             base_url = proxy_api_config.get('apis', {}).get('anthropic', '')
 169 |         elif any(keyword in model_id.lower() for keyword in ["gpt", "openai"]):
 170 |             base_url = proxy_api_config.get('apis', {}).get('openai', '')
 171 |         elif "deepseek" in model_id.lower():
 172 |             base_url = proxy_api_config.get('apis', {}).get('deepseek', '')
 173 |         elif "qvq" in model_id.lower() or "alibaba" in model_id.lower() or "qwen" in model_id.lower():
 174 |             base_url = proxy_api_config.get('apis', {}).get('alibaba', '')
 175 |         elif "gemini" in model_id.lower() or "google" in model_id.lower():
 176 |             base_url = proxy_api_config.get('apis', {}).get('google', '')
 177 |     
 178 |     # 从前端设置获取自定义API基础URL (apiBaseUrls)
 179 |     api_base_urls = settings.get('apiBaseUrls', {})
 180 |     if api_base_urls:
 181 |         # 根据模型类型选择对应的自定义API基础URL
 182 |         if "claude" in model_id.lower() or "anthropic" in model_id.lower():
 183 |             custom_base_url = api_base_urls.get('anthropic')
 184 |             if custom_base_url:
 185 |                 base_url = custom_base_url
 186 |         elif any(keyword in model_id.lower() for keyword in ["gpt", "openai"]):
 187 |             custom_base_url = api_base_urls.get('openai')
 188 |             if custom_base_url:
 189 |                 base_url = custom_base_url
 190 |         elif "deepseek" in model_id.lower():
 191 |             custom_base_url = api_base_urls.get('deepseek')
 192 |             if custom_base_url:
 193 |                 base_url = custom_base_url
 194 |         elif "qvq" in model_id.lower() or "alibaba" in model_id.lower() or "qwen" in model_id.lower():
 195 |             custom_base_url = api_base_urls.get('alibaba')
 196 |             if custom_base_url:
 197 |                 base_url = custom_base_url
 198 |         elif "gemini" in model_id.lower() or "google" in model_id.lower():
 199 |             custom_base_url = api_base_urls.get('google')
 200 |             if custom_base_url:
 201 |                 base_url = custom_base_url
 202 |         elif "doubao" in model_id.lower():
 203 |             custom_base_url = api_base_urls.get('doubao')
 204 |             if custom_base_url:
 205 |                 base_url = custom_base_url
 206 |     
 207 |     # 创建模型实例
 208 |     model_instance = ModelFactory.create_model(
 209 |         model_name=model_id,
 210 |         api_key=api_key,
 211 |         temperature=None if is_reasoning else float(settings.get('temperature', 0.7)),
 212 |         system_prompt=settings.get('systemPrompt'),
 213 |         language=settings.get('language', '中文'),
 214 |         api_base_url=base_url  # 现在BaseModel支持api_base_url参数
 215 |     )
 216 |     
 217 |     # 设置最大输出Token，但不为阿里巴巴模型设置（它们有自己内部的处理逻辑）
 218 |     is_alibaba_model = "qvq" in model_id.lower() or "alibaba" in model_id.lower() or "qwen" in model_id.lower()
 219 |     if not is_alibaba_model:
 220 |         model_instance.max_tokens = max_tokens
 221 |     
 222 |     return model_instance
 223 | 
 224 | def stream_model_response(response_generator, sid, model_name=None):
 225 |     """Stream model responses to the client"""
 226 |     try:
 227 |         print("Starting response streaming...")
 228 |         
 229 |         # 判断模型是否为推理模型
 230 |         is_reasoning = model_name and ModelFactory.is_reasoning(model_name)
 231 |         if is_reasoning:
 232 |             print(f"使用推理模型 {model_name}，将显示思考过程")
 233 |         
 234 |         # 初始化：发送开始状态
 235 |         socketio.emit('ai_response', {
 236 |             'status': 'started',
 237 |             'content': '',
 238 |             'is_reasoning': is_reasoning
 239 |         }, room=sid)
 240 |         print("Sent initial status to client")
 241 | 
 242 |         # 维护服务端缓冲区以累积完整内容
 243 |         response_buffer = ""
 244 |         thinking_buffer = ""
 245 |         
 246 |         # 上次发送的时间戳，用于控制发送频率
 247 |         last_emit_time = time.time()
 248 |         
 249 |         # 流式处理响应
 250 |         for response in response_generator:
 251 |             # 处理Mathpix响应
 252 |             if isinstance(response.get('content', ''), str) and 'mathpix' in response.get('model', ''):
 253 |                     if current_time - last_emit_time >= 0.3:
 254 |                         socketio.emit('ai_response', {
 255 |                             'status': 'thinking',
 256 |                             'content': thinking_buffer,
 257 |                             'is_reasoning': True
 258 |                         }, room=sid)
 259 |                         last_emit_time = current_time
 260 |                 
 261 |             elif status == 'thinking_complete':
 262 |                 # 仅对推理模型处理思考过程
 263 |                 if is_reasoning:
 264 |                     # 直接使用完整的思考内容
 265 |                     thinking_buffer = content
 266 |                     
 267 |                     print(f"Thinking complete, total length: {len(thinking_buffer)} chars")
 268 |                     socketio.emit('ai_response', {
 269 |                         'status': 'thinking_complete',
 270 |                         'content': thinking_buffer,
 271 |                         'is_reasoning': True
 272 |                     }, room=sid)
 273 |                     
 274 |             elif status == 'streaming':
 275 |                 # 直接使用模型提供的完整内容
 276 |                 response_buffer = content
 277 |                 
 278 |                 # 控制发送频率，至少间隔0.3秒
 279 |                 current_time = time.time()
 280 |                 if current_time - last_emit_time >= 0.3:
 281 |                     socketio.emit('ai_response', {
 282 |                         'status': 'streaming',
 283 |                         'content': response_buffer,
 284 |                         'is_reasoning': is_reasoning
 285 |                     }, room=sid)
 286 |                     last_emit_time = current_time
 287 |                     
 288 |             elif status == 'completed':
 289 |                 # 确保发送最终完整内容
 290 |                 socketio.emit('ai_response', {
 291 |                     'status': 'completed',
 292 |                     'content': content or response_buffer,
 293 |                     'is_reasoning': is_reasoning
 294 |                 }, room=sid)
 295 |                 print("Response completed")
 296 |                 
 297 |             elif status == 'error':
 298 |                 # 错误状态直接转发
 299 |                 response['is_reasoning'] = is_reasoning
 300 |                 socketio.emit('ai_response', response, room=sid)
 301 |                 print(f"Error: {response.get('error', 'Unknown error')}")
 302 |                 
 303 |             # 其他状态直接转发
 304 |             else:
 305 |                 response['is_reasoning'] = is_reasoning
 306 |                 socketio.emit('ai_response', response, room=sid)
 307 | 
 308 |     except Exception as e:
 309 |         error_msg = f"Streaming error: {str(e)}"
 310 |         print(error_msg)
 311 |         socketio.emit('ai_response', {
 312 |             'status': 'error',
 313 |             'error': error_msg,
 314 |             'is_reasoning': model_name and ModelFactory.is_reasoning(model_name)
 315 |         }, room=sid)
 316 | 
 317 | @socketio.on('request_screenshot')
 318 | def handle_screenshot_request():
 319 |     try:
 320 |         # 添加调试信息
 321 |         print("DEBUG: 执行request_screenshot截图")
 322 |         
 323 |         # Capture the screen
 324 |         screenshot = pyautogui.screenshot()
 325 |         
 326 |         # Convert the image to base64 string
 327 |         buffered = BytesIO()
 328 |         screenshot.save(buffered, format="PNG")
 329 |         img_str = base64.b64encode(buffered.getvalue()).decode()
 330 |         
 331 |         # Emit the screenshot back to the client，不打印base64数据
 332 |         print("DEBUG: 完成request_screenshot截图，图片大小: {} KB".format(len(img_str) // 1024))
 333 |         socketio.emit('screenshot_response', {
 334 |             'success': True,
 335 |             'image': img_str
 336 |         })
 337 |     except Exception as e:
 338 |         socketio.emit('screenshot_response', {
 339 |             'success': False,
 340 |             'error': str(e)
 341 |         })
 342 | 
 343 | @socketio.on('extract_text')
 344 | def handle_text_extraction(data):
 345 |     try:
 346 |         print("Starting text extraction...")
 347 |         
 348 |         # Validate input data
 349 |         if not data or not isinstance(data, dict):
 350 |             raise ValueError("Invalid request data")
 351 |             
 352 |         if 'image' not in data:
 353 |             raise ValueError("No image data provided")
 354 |             
 355 |         image_data = data['image']
 356 |         if not isinstance(image_data, str):
 357 |             raise ValueError("Invalid image data format")
 358 |         
 359 |         # 检查图像大小，避免处理过大的图像导致断开连接
 360 |         image_size_bytes = len(image_data) * 3 / 4  # 估算base64的实际大小
 361 |         if image_size_bytes > 10 * 1024 * 1024:  # 10MB
 362 |             raise ValueError("Image too large, please crop to a smaller area")
 363 |             
 364 |         settings = data.get('settings', {})
 365 |         if not isinstance(settings, dict):
 366 |             raise ValueError("Invalid settings format")
 367 |         
 368 |         # 优先使用百度OCR，如果没有配置则使用Mathpix
 369 |         # 首先尝试获取百度OCR API密钥
 370 |         baidu_api_key = get_api_key('BaiduApiKey')
 371 |         baidu_secret_key = get_api_key('BaiduSecretKey')
 372 |         
 373 |         # 构建百度OCR API密钥（格式：api_key:secret_key）
 374 |         ocr_key = None
 375 |         ocr_model = None
 376 |         
 377 |         if baidu_api_key and baidu_secret_key:
 378 |             ocr_key = f"{baidu_api_key}:{baidu_secret_key}"
 379 |             ocr_model = 'baidu-ocr'
 380 |             print("Using Baidu OCR for text extraction...")
 381 |         else:
 382 |             # 回退到Mathpix
 383 |             mathpix_app_id = get_api_key('MathpixAppId')
 384 |             mathpix_app_key = get_api_key('MathpixAppKey')
 385 |             
 386 |             # 构建完整的Mathpix API密钥（格式：app_id:app_key）
 387 |             mathpix_key = f"{mathpix_app_id}:{mathpix_app_key}" if mathpix_app_id and mathpix_app_key else None
 388 |             
 389 |             # 如果本地没有配置，尝试使用前端传递的密钥（向后兼容）
 390 |             if not mathpix_key:
 391 |                 mathpix_key = settings.get('mathpixApiKey')
 392 |             
 393 |             if mathpix_key:
 394 |                 ocr_key = mathpix_key
 395 |                 ocr_model = 'mathpix'
 396 |                 print("Using Mathpix OCR for text extraction...")
 397 |         
 398 |         if not ocr_key:
 399 |             raise ValueError("OCR API key is required. Please configure Baidu OCR (API Key + Secret Key) or Mathpix (App ID + App Key)")
 400 |         
 401 |         # 先回复客户端，确认已收到请求，防止超时断开
 402 |         # 注意：这里不能使用return，否则后续代码不会执行
 403 |         socketio.emit('request_acknowledged', {
 404 |             'status': 'received', 
 405 |             'message': f'Image received, text extraction in progress using {ocr_model}'
 406 |         }, room=request.sid)
 407 |         
 408 |         try:
 409 |             if ocr_model == 'baidu-ocr':
 410 |                 api_key, secret_key = ocr_key.split(':')
 411 |                 if not api_key.strip() or not secret_key.strip():
 412 |                     raise ValueError()
 413 |             elif ocr_model == 'mathpix':
 414 |                 app_id, app_key = ocr_key.split(':')
 415 |                 if not app_id.strip() or not app_key.strip():
 416 |                     raise ValueError()
 417 |         except ValueError:
 418 |             if ocr_model == 'baidu-ocr':
 419 |                 raise ValueError("Invalid Baidu OCR API key format. Expected format: 'API_KEY:SECRET_KEY'")
 420 |             else:
 421 |                 raise ValueError("Invalid Mathpix API key format. Expected format: 'app_id:app_key'")
 422 | 
 423 |         print(f"Creating {ocr_model} model instance...")
 424 |         # ModelFactory.create_model会处理不同模型类型
 425 |         model = ModelFactory.create_model(
 426 |             model_name=ocr_model,
 427 |             api_key=ocr_key
 428 |         )
 429 | 
 430 |         print("Starting text extraction...")
 431 |         # 使用新的extract_full_text方法直接提取完整文本
 432 |         extracted_text = model.extract_full_text(image_data)
 433 |         
 434 |         # 直接返回文本结果
 435 |         socketio.emit('text_extracted', {
 436 |             'content': extracted_text
 437 |         }, room=request.sid)
 438 | 
 439 |     except ValueError as e:
 440 |         error_msg = str(e)
 441 |         print(f"Validation error: {error_msg}")
 442 |         socketio.emit('text_extracted', {
 443 |             'error': error_msg
 444 |         }, room=request.sid)
 445 |     except Exception as e:
 446 |         error_msg = f"Text extraction error: {str(e)}"
 447 |         print(f"Unexpected error: {error_msg}")
 448 |         print(f"Error details: {type(e).__name__}")
 449 |         socketio.emit('text_extracted', {
 450 |             'error': error_msg
 451 |         }, room=request.sid)
 452 | 
 453 | @socketio.on('stop_generation')
 454 | def handle_stop_generation():
 455 |     """处理停止生成请求"""
 456 |     sid = request.sid
 457 |     print(f"接收到停止生成请求: {sid}")
 458 |     
 459 |     if sid in generation_tasks:
 460 |         # 设置停止标志
 461 |         stop_event = generation_tasks[sid]
 462 |         stop_event.set()
 463 |         
 464 |         # 发送已停止状态
 465 |         socketio.emit('ai_response', {
 466 |             'status': 'stopped',
 467 |             'content': '生成已停止'
 468 |         }, room=sid)
 469 |         
 470 |         print(f"已停止用户 {sid} 的生成任务")
 471 |     else:
 472 |         print(f"未找到用户 {sid} 的生成任务")
 473 | 
 474 | @socketio.on('analyze_text')
 475 | def handle_analyze_text(data):
 476 |     try:
 477 |         text = data.get('text', '')
 478 |         settings = data.get('settings', {})
 479 |         
 480 |         # 获取推理配置
 481 |         reasoning_config = settings.get('reasoningConfig', {})
 482 |         
 483 |         # 获取maxTokens
 484 |         max_tokens = int(settings.get('maxTokens', 8192))
 485 |         
 486 |         print(f"Debug - 文本分析请求: {text[:50]}...")
 487 |         print(f"Debug - 最大Token: {max_tokens}, 推理配置: {reasoning_config}")
 488 |         
 489 |         # 获取模型和API密钥
 490 |         model_id = settings.get('model', 'claude-3-7-sonnet-20250219')
 491 |         
 492 |         if not text:
 493 |             socketio.emit('error', {'message': '文本内容不能为空'})
 494 |             return
 495 | 
 496 |         # 获取模型信息，判断是否为推理模型
 497 |         model_info = settings.get('modelInfo', {})
 498 |         is_reasoning = model_info.get('isReasoning', False)
 499 |         
 500 |         model_instance = create_model_instance(model_id, settings, is_reasoning)
 501 |         
 502 |         # 将推理配置传递给模型
 503 |         if reasoning_config:
 504 |             model_instance.reasoning_config = reasoning_config
 505 |         
 506 |         # 如果启用代理，配置代理设置
 507 |         proxies = None
 508 |         if settings.get('proxyEnabled'):
 509 |             proxies = {
 510 |                 'http': f"http://{settings.get('proxyHost')}:{settings.get('proxyPort')}",
 511 |                 'https': f"http://{settings.get('proxyHost')}:{settings.get('proxyPort')}"
 512 |             }
 513 | 
 514 |         # 创建用于停止生成的事件
 515 |         sid = request.sid
 516 |         stop_event = Event()
 517 |         generation_tasks[sid] = stop_event
 518 |         
 519 |         try:
 520 |             for response in model_instance.analyze_text(text, proxies=proxies):
 521 |                 # 检查是否收到停止信号
 522 |                 if stop_event.is_set():
 523 |                     print(f"分析文本生成被用户 {sid} 停止")
 524 |                     break
 525 |                     
 526 |                 socketio.emit('ai_response', response, room=sid)
 527 |         finally:
 528 |             # 清理任务
 529 |             if sid in generation_tasks:
 530 |                 del generation_tasks[sid]
 531 |             
 532 |     except Exception as e:
 533 |         print(f"Error in analyze_text: {str(e)}")
 534 |         traceback.print_exc()
 535 |         socketio.emit('error', {'message': f'分析文本时出错: {str(e)}'})
 536 | 
 537 | @socketio.on('analyze_image')
 538 | def handle_analyze_image(data):
 539 |     try:
 540 |         image_data = data.get('image')
 541 |         settings = data.get('settings', {})
 542 |         
 543 |         # 获取推理配置
 544 |         reasoning_config = settings.get('reasoningConfig', {})
 545 |         
 546 |         # 获取maxTokens
 547 |         max_tokens = int(settings.get('maxTokens', 8192))
 548 |         
 549 |         print(f"Debug - 图像分析请求")
 550 |         print(f"Debug - 最大Token: {max_tokens}, 推理配置: {reasoning_config}")
 551 |         
 552 |         # 获取模型和API密钥
 553 |         model_id = settings.get('model', 'claude-3-7-sonnet-20250219')
 554 |         
 555 |         if not image_data:
 556 |             socketio.emit('error', {'message': '图像数据不能为空'})
 557 |             return
 558 | 
 559 |         # 获取模型信息，判断是否为推理模型
 560 |         model_info = settings.get('modelInfo', {})
 561 |         is_reasoning = model_info.get('isReasoning', False)
 562 |         
 563 |         model_instance = create_model_instance(model_id, settings, is_reasoning)
 564 |         
 565 |         # 将推理配置传递给模型
 566 |         if reasoning_config:
 567 |             model_instance.reasoning_config = reasoning_config
 568 |             
 569 |         # 如果启用代理，配置代理设置
 570 |         proxies = None
 571 |         if settings.get('proxyEnabled'):
 572 |             proxies = {
 573 |                 'http': f"http://{settings.get('proxyHost')}:{settings.get('proxyPort')}",
 574 |                 'https': f"http://{settings.get('proxyHost')}:{settings.get('proxyPort')}"
 575 |             }
 576 | 
 577 |         # 创建用于停止生成的事件
 578 |         sid = request.sid
 579 |         stop_event = Event()
 580 |         generation_tasks[sid] = stop_event
 581 |         
 582 |         try:
 583 |             for response in model_instance.analyze_image(image_data, proxies=proxies):
 584 |                 # 检查是否收到停止信号
 585 |                 if stop_event.is_set():
 586 |                     print(f"分析图像生成被用户 {sid} 停止")
 587 |                     break
 588 |                     
 589 |                 socketio.emit('ai_response', response, room=sid)
 590 |         finally:
 591 |             # 清理任务
 592 |             if sid in generation_tasks:
 593 |                 del generation_tasks[sid]
 594 |             
 595 |     except Exception as e:
 596 |         print(f"Error in analyze_image: {str(e)}")
 597 |         traceback.print_exc()
 598 |         socketio.emit('error', {'message': f'分析图像时出错: {str(e)}'})
 599 | 
 600 | @socketio.on('capture_screenshot')
 601 | def handle_capture_screenshot(data):
 602 |     try:
 603 |         # 添加调试信息
 604 |         print("DEBUG: 执行capture_screenshot截图")
 605 |         
 606 |         # Capture the screen
 607 |         screenshot = pyautogui.screenshot()
 608 |         
 609 |         # Convert the image to base64 string
 610 |         buffered = BytesIO()
 611 |         screenshot.save(buffered, format="PNG")
 612 |         img_str = base64.b64encode(buffered.getvalue()).decode()
 613 |         
 614 |         # Emit the screenshot back to the client，不打印base64数据
 615 |         print("DEBUG: 完成capture_screenshot截图，图片大小: {} KB".format(len(img_str) // 1024))
 616 |         socketio.emit('screenshot_complete', {
 617 |             'success': True,
 618 |             'image': img_str
 619 |         }, room=request.sid)
 620 |     except Exception as e:
 621 |         error_msg = f"Screenshot error: {str(e)}"
 622 |         print(f"Error capturing screenshot: {error_msg}")
 623 |         socketio.emit('screenshot_complete', {
 624 |             'success': False,
 625 |             'error': error_msg
 626 |         }, room=request.sid)
 627 | 
 628 | def load_model_config():
 629 |     """加载模型配置信息"""
 630 |     try:
 631 |         config_path = os.path.join(CONFIG_DIR, 'models.json')
 632 |         with open(config_path, 'r', encoding='utf-8') as f:
 633 |             config = json.load(f)
 634 |         return config
 635 |     except Exception as e:
 636 |         print(f"加载模型配置失败: {e}")
 637 |         return {
 638 |             "providers": {},
 639 |             "models": {}
 640 |         }
 641 | 
 642 | def load_prompts():
 643 |     """加载系统提示词配置"""
 644 |     try:
 645 |         if os.path.exists(PROMPT_FILE):
 646 |             with open(PROMPT_FILE, 'r', encoding='utf-8') as f:
 647 |                 return json.load(f)
 648 |         else:
 649 |             # 如果文件不存在，创建默认提示词配置
 650 |             default_prompts = {
 651 |                 "default": {
 652 |                     "name": "默认提示词",
 653 |                     "content": "您是一位专业的问题解决专家。请逐步分析问题，找出问题所在，并提供详细的解决方案。始终使用用户偏好的语言回答。",
 654 |                     "description": "通用问题解决提示词"
 655 |                 }
 656 |             }
 657 |             with open(PROMPT_FILE, 'w', encoding='utf-8') as f:
 658 |                 json.dump(default_prompts, f, ensure_ascii=False, indent=4)
 659 |             return default_prompts
 660 |     except Exception as e:
 661 |         print(f"加载提示词配置失败: {e}")
 662 |         return {
 663 |             "default": {
 664 |                 "name": "默认提示词",
 665 |                 "content": "您是一位专业的问题解决专家。请逐步分析问题，找出问题所在，并提供详细的解决方案。始终使用用户偏好的语言回答。",
 666 |                 "description": "通用问题解决提示词"
 667 |             }
 668 |         }
 669 | 
 670 | def save_prompt(prompt_id, prompt_data):
 671 |     """保存单个提示词到配置文件"""
 672 |     try:
 673 |         prompts = load_prompts()
 674 |         prompts[prompt_id] = prompt_data
 675 |         with open(PROMPT_FILE, 'w', encoding='utf-8') as f:
 676 |             json.dump(prompts, f, ensure_ascii=False, indent=4)
 677 |         return True
 678 |     except Exception as e:
 679 |         print(f"保存提示词配置失败: {e}")
 680 |         return False
 681 | 
 682 | def delete_prompt(prompt_id):
 683 |     """从配置文件中删除一个提示词"""
 684 |     try:
 685 |         prompts = load_prompts()
 686 |         if prompt_id in prompts:
 687 |             del prompts[prompt_id]
 688 |             with open(PROMPT_FILE, 'w', encoding='utf-8') as f:
 689 |                 json.dump(prompts, f, ensure_ascii=False, indent=4)
 690 |             return True
 691 |         return False
 692 |     except Exception as e:
 693 |         print(f"删除提示词配置失败: {e}")
 694 |         return False
 695 | 
 696 | # 替换 before_first_request 装饰器
 697 | def init_model_config():
 698 |     """初始化模型配置"""
 699 |     try:
 700 |         model_config = load_model_config()
 701 |         # 更新ModelFactory的模型信息
 702 |         if hasattr(ModelFactory, 'update_model_capabilities'):
 703 |             ModelFactory.update_model_capabilities(model_config)
 704 |         print("已加载模型配置")
 705 |     except Exception as e:
 706 |         print(f"初始化模型配置失败: {e}")
 707 | 
 708 | # 在请求处理前注册初始化函数
 709 | @app.before_request
 710 | def before_request_handler():
 711 |     # 使用全局变量跟踪是否已初始化
 712 |     if not getattr(app, '_model_config_initialized', False):
 713 |         init_model_config()
 714 |         app._model_config_initialized = True
 715 | 
 716 | # 版本检查函数
 717 | def check_for_updates():
 718 |     """检查GitHub上是否有新版本"""
 719 |     try:
 720 |         # 读取当前版本信息
 721 |         version_file = os.path.join(CONFIG_DIR, 'version.json')
 722 |         with open(version_file, 'r', encoding='utf-8') as f:
 723 |             version_info = json.load(f)
 724 |             
 725 |         current_version = version_info.get('version', '0.0.0')
 726 |         repo = version_info.get('github_repo', 'Zippland/Snap-Solver')
 727 |         
 728 |         # 请求GitHub API获取最新发布版本
 729 |         api_url = f"https://api.github.com/repos/{repo}/releases/latest"
 730 |         
 731 |         # 添加User-Agent以符合GitHub API要求
 732 |         headers = {'User-Agent': 'Snap-Solver-Update-Checker'}
 733 |         
 734 |         response = requests.get(api_url, headers=headers, timeout=5)
 735 |         if response.status_code == 200:
 736 |             latest_release = response.json()
 737 |             latest_version = latest_release.get('tag_name', '').lstrip('v')
 738 |             
 739 |             # 如果版本号为空，尝试从名称中提取
 740 |             if not latest_version and 'name' in latest_release:
 741 |                 import re
 742 |                 version_match = re.search(r'v?(\d+\.\d+\.\d+)', latest_release['name'])
 743 |                 if version_match:
 744 |                     latest_version = version_match.group(1)
 745 |             
 746 |             # 比较版本号（简单比较，可以改进为更复杂的语义版本比较）
 747 |             has_update = compare_versions(latest_version, current_version)
 748 |             
 749 |             update_info = {
 750 |                 'has_update': has_update,
 751 |                 'current_version': current_version,
 752 |                 'latest_version': latest_version,
 753 |                 'release_url': latest_release.get('html_url', f"https://github.com/{repo}/releases/latest"),
 754 |                 'release_date': latest_release.get('published_at', ''),
 755 |                 'release_notes': latest_release.get('body', ''),
 756 |             }
 757 |             
 758 |             # 缓存更新信息
 759 |             update_info_file = os.path.join(CONFIG_DIR, 'update_info.json')
 760 |             with open(update_info_file, 'w', encoding='utf-8') as f:
 761 |                 json.dump(update_info, f, ensure_ascii=False, indent=2)
 762 |                 
 763 |             return update_info
 764 |         
 765 |         # 如果无法连接GitHub，尝试读取缓存的更新信息
 766 |         update_info_file = os.path.join(CONFIG_DIR, 'update_info.json')
 767 |         if os.path.exists(update_info_file):
 768 |             with open(update_info_file, 'r', encoding='utf-8') as f:
 769 |                 return json.load(f)
 770 |         
 771 |         return {'has_update': False, 'current_version': current_version}
 772 |             
 773 |     except Exception as e:
 774 |         print(f"检查更新失败: {str(e)}")
 775 |         # 出错时返回一个默认的值
 776 |         return {'has_update': False, 'error': str(e)}
 777 | 
 778 | def compare_versions(version1, version2):
 779 |     """比较两个版本号，如果version1比version2更新，则返回True"""
 780 |     try:
 781 |         v1_parts = [int(x) for x in version1.split('.')]
 782 |         v2_parts = [int(x) for x in version2.split('.')]
 783 |         
 784 |         # 确保两个版本号的组成部分长度相同
 785 |         while len(v1_parts) < len(v2_parts):
 786 |             v1_parts.append(0)
 787 |         while len(v2_parts) < len(v1_parts):
 788 |             v2_parts.append(0)
 789 |             
 790 |         # 逐部分比较
 791 |         for i in range(len(v1_parts)):
 792 |             if v1_parts[i] > v2_parts[i]:
 793 |                 return True
 794 |             elif v1_parts[i] < v2_parts[i]:
 795 |                 return False
 796 |                 
 797 |         # 完全相同的版本
 798 |         return False
 799 |     except:
 800 |         # 如果解析出错，默认不更新
 801 |         return False
 802 | 
 803 | @app.route('/api/check-update', methods=['GET'])
 804 | def api_check_update():
 805 |     """检查更新的API端点"""
 806 |     update_info = check_for_updates()
 807 |     return jsonify(update_info)
 808 | 
 809 | # 添加配置文件路由
 810 | @app.route('/config/<path:filename>')
 811 | def serve_config(filename):
 812 |     return send_from_directory(CONFIG_DIR, filename)
 813 | 
 814 | # 添加用于获取所有模型信息的API
 815 | @app.route('/api/models', methods=['GET'])
 816 | def get_models():
 817 |     """返回可用的模型列表"""
 818 |     models = ModelFactory.get_available_models()
 819 |     return jsonify(models)
 820 | 
 821 | # 获取所有API密钥
 822 | @app.route('/api/keys', methods=['GET'])
 823 | def get_api_keys():
 824 |     """获取所有API密钥"""
 825 |     api_keys = load_api_keys()
 826 |     return jsonify(api_keys)
 827 | 
 828 | # 保存API密钥
 829 | @app.route('/api/keys', methods=['POST'])
 830 | def update_api_keys():
 831 |     """更新API密钥配置"""
 832 |     try:
 833 |         new_keys = request.json
 834 |         if not isinstance(new_keys, dict):
 835 |             return jsonify({"success": False, "message": "无效的API密钥格式"}), 400
 836 |         
 837 |         # 加载当前密钥
 838 |         current_keys = load_api_keys()
 839 |         
 840 |         # 更新密钥
 841 |         for key, value in new_keys.items():
 842 |             current_keys[key] = value
 843 |         
 844 |         # 保存回文件
 845 |         if save_api_keys(current_keys):
 846 |             return jsonify({"success": True, "message": "API密钥已保存"})
 847 |         else:
 848 |             return jsonify({"success": False, "message": "保存API密钥失败"}), 500
 849 |     
 850 |     except Exception as e:
 851 |         return jsonify({"success": False, "message": f"更新API密钥错误: {str(e)}"}), 500
 852 | 
 853 | # 加载API密钥配置
 854 | def load_api_keys():
 855 |     """从配置文件加载API密钥"""
 856 |     try:
 857 |         default_keys = {
 858 |             "AnthropicApiKey": "",
 859 |             "OpenaiApiKey": "",
 860 |             "DeepseekApiKey": "",
 861 |             "AlibabaApiKey": "",
 862 |             "MathpixAppId": "",
 863 |             "MathpixAppKey": "",
 864 |             "GoogleApiKey": "",
 865 |             "DoubaoApiKey": "",
 866 |             "BaiduApiKey": "",
 867 |             "BaiduSecretKey": ""
 868 |         }
 869 |         if os.path.exists(API_KEYS_FILE):
 870 |             with open(API_KEYS_FILE, 'r', encoding='utf-8') as f:
 871 |                 api_keys = json.load(f)
 872 | 
 873 |             # 确保新增的密钥占位符能自动补充
 874 |             missing_key_added = False
 875 |             for key, default_value in default_keys.items():
 876 |                 if key not in api_keys:
 877 |                     api_keys[key] = default_value
 878 |                     missing_key_added = True
 879 | 
 880 |             if missing_key_added:
 881 |                 save_api_keys(api_keys)
 882 | 
 883 |             return api_keys
 884 |         else:
 885 |             # 如果文件不存在，创建默认配置
 886 |             save_api_keys(default_keys)
 887 |             return default_keys
 888 |     except Exception as e:
 889 |         print(f"加载API密钥配置失败: {e}")
 890 |         return {}
 891 | 
 892 | # 加载中转API配置
 893 | def load_proxy_api():
 894 |     """从配置文件加载中转API配置"""
 895 |     try:
 896 |         if os.path.exists(PROXY_API_FILE):
 897 |             with open(PROXY_API_FILE, 'r', encoding='utf-8') as f:
 898 |                 return json.load(f)
 899 |         else:
 900 |             # 如果文件不存在，创建默认配置
 901 |             default_proxy_apis = {
 902 |                 "enabled": False,
 903 |                 "apis": {
 904 |                     "anthropic": "",
 905 |                     "openai": "",
 906 |                     "deepseek": "",
 907 |                     "alibaba": "",
 908 |                     "google": ""
 909 |                 }
 910 |             }
 911 |             save_proxy_api(default_proxy_apis)
 912 |             return default_proxy_apis
 913 |     except Exception as e:
 914 |         print(f"加载中转API配置失败: {e}")
 915 |         return {"enabled": False, "apis": {}}
 916 | 
 917 | # 保存中转API配置
 918 | def save_proxy_api(proxy_api_config):
 919 |     """保存中转API配置到文件"""
 920 |     try:
 921 |         # 确保配置目录存在
 922 |         os.makedirs(os.path.dirname(PROXY_API_FILE), exist_ok=True)
 923 |         
 924 |         with open(PROXY_API_FILE, 'w', encoding='utf-8') as f:
 925 |             json.dump(proxy_api_config, f, ensure_ascii=False, indent=2)
 926 |         return True
 927 |     except Exception as e:
 928 |         print(f"保存中转API配置失败: {e}")
 929 |         return False
 930 | 
 931 | # 保存API密钥配置
 932 | def save_api_keys(api_keys):
 933 |     try:
 934 |         # 确保配置目录存在
 935 |         os.makedirs(os.path.dirname(API_KEYS_FILE), exist_ok=True)
 936 |         
 937 |         with open(API_KEYS_FILE, 'w', encoding='utf-8') as f:
 938 |             json.dump(api_keys, f, ensure_ascii=False, indent=2)
 939 |         return True
 940 |     except Exception as e:
 941 |         print(f"保存API密钥配置失败: {e}")
 942 |         return False
 943 | 
 944 | # 获取特定API密钥
 945 | def get_api_key(key_name):
 946 |     """获取指定的API密钥"""
 947 |     api_keys = load_api_keys()
 948 |     return api_keys.get(key_name, "")
 949 | 
 950 | @app.route('/api/models')
 951 | def api_models():
 952 |     """API端点：获取可用模型列表"""
 953 |     try:
 954 |         # 加载模型配置
 955 |         config = load_model_config()
 956 |         
 957 |         # 转换为前端需要的格式
 958 |         models = []
 959 |         for model_id, model_info in config['models'].items():
 960 |             models.append({
 961 |                 'id': model_id,
 962 |                 'display_name': model_info.get('name', model_id),
 963 |                 'is_multimodal': model_info.get('supportsMultimodal', False),
 964 |                 'is_reasoning': model_info.get('isReasoning', False),
 965 |                 'description': model_info.get('description', ''),
 966 |                 'version': model_info.get('version', 'latest')
 967 |             })
 968 |         
 969 |         # 返回模型列表
 970 |         return jsonify(models)
 971 |     except Exception as e:
 972 |         print(f"获取模型列表时出错: {e}")
 973 |         return jsonify([]), 500
 974 | 
 975 | @app.route('/api/prompts', methods=['GET'])
 976 | def get_prompts():
 977 |     """API端点：获取所有系统提示词"""
 978 |     try:
 979 |         prompts = load_prompts()
 980 |         return jsonify(prompts)
 981 |     except Exception as e:
 982 |         print(f"获取提示词列表时出错: {e}")
 983 |         return jsonify({"error": str(e)}), 500
 984 | 
 985 | @app.route('/api/prompts/<prompt_id>', methods=['GET'])
 986 | def get_prompt(prompt_id):
 987 |     """API端点：获取单个系统提示词"""
 988 |     try:
 989 |         prompts = load_prompts()
 990 |         if prompt_id in prompts:
 991 |             return jsonify(prompts[prompt_id])
 992 |         else:
 993 |             return jsonify({"error": "提示词不存在"}), 404
 994 |     except Exception as e:
 995 |         print(f"获取提示词时出错: {e}")
 996 |         return jsonify({"error": str(e)}), 500
 997 | 
 998 | @app.route('/api/prompts', methods=['POST'])
 999 | def add_prompt():
1000 |     """API端点：添加或更新系统提示词"""
1001 |     try:
1002 |         data = request.json
1003 |         if not data or not isinstance(data, dict):
1004 |             return jsonify({"error": "无效的请求数据"}), 400
1005 |             
1006 |         prompt_id = data.get('id')
1007 |         if not prompt_id:
1008 |             return jsonify({"error": "提示词ID不能为空"}), 400
1009 |             
1010 |         prompt_data = {
1011 |             "name": data.get('name', f"提示词{prompt_id}"),
1012 |             "content": data.get('content', ""),
1013 |             "description": data.get('description', "")
1014 |         }
1015 |         
1016 |         save_prompt(prompt_id, prompt_data)
1017 |         return jsonify({"success": True, "id": prompt_id})
1018 |     except Exception as e:
1019 |         print(f"保存提示词时出错: {e}")
1020 |         return jsonify({"error": str(e)}), 500
1021 | 
1022 | @app.route('/api/prompts/<prompt_id>', methods=['DELETE'])
1023 | def remove_prompt(prompt_id):
1024 |     """API端点：删除系统提示词"""
1025 |     try:
1026 |         success = delete_prompt(prompt_id)
1027 |         if success:
1028 |             return jsonify({"success": True})
1029 |         else:
1030 |             return jsonify({"error": "提示词不存在或删除失败"}), 404
1031 |     except Exception as e:
1032 |         print(f"删除提示词时出错: {e}")
1033 |         return jsonify({"error": str(e)}), 500
1034 | 
1035 | @app.route('/api/proxy-api', methods=['GET'])
1036 | def get_proxy_api():
1037 |     """API端点：获取中转API配置"""
1038 |     try:
1039 |         proxy_api_config = load_proxy_api()
1040 |         return jsonify(proxy_api_config)
1041 |     except Exception as e:
1042 |         print(f"获取中转API配置时出错: {e}")
1043 |         return jsonify({"error": str(e)}), 500
1044 | 
1045 | @app.route('/api/proxy-api', methods=['POST'])
1046 | def update_proxy_api():
1047 |     """API端点：更新中转API配置"""
1048 |     try:
1049 |         new_config = request.json
1050 |         if not isinstance(new_config, dict):
1051 |             return jsonify({"success": False, "message": "无效的中转API配置格式"}), 400
1052 |         
1053 |         # 保存回文件
1054 |         if save_proxy_api(new_config):
1055 |             return jsonify({"success": True, "message": "中转API配置已保存"})
1056 |         else:
1057 |             return jsonify({"success": False, "message": "保存中转API配置失败"}), 500
1058 |     
1059 |     except Exception as e:
1060 |         return jsonify({"success": False, "message": f"更新中转API配置错误: {str(e)}"}), 500
1061 | 
1062 | @app.route('/api/clipboard', methods=['POST'])
1063 | def update_clipboard():
1064 |     """将文本复制到服务器剪贴板"""
1065 |     try:
1066 |         data = request.get_json(silent=True) or {}
1067 |         text = data.get('text', '')
1068 | 
1069 |         if not isinstance(text, str) or not text.strip():
1070 |             return jsonify({"success": False, "message": "剪贴板内容不能为空"}), 400
1071 | 
1072 |         # 直接尝试复制，不使用is_available()检查
1073 |         try:
1074 |             pyperclip.copy(text)
1075 |             return jsonify({"success": True})
1076 |         except Exception as e:
1077 |             return jsonify({"success": False, "message": f"复制到剪贴板失败: {str(e)}"}), 500
1078 |     except Exception as e:
1079 |         app.logger.exception("更新剪贴板时发生异常")
1080 |         return jsonify({"success": False, "message": f"服务器内部错误: {str(e)}"}), 500
1081 | 
1082 | @app.route('/api/clipboard', methods=['GET'])
1083 | def get_clipboard():
1084 |     """从服务器剪贴板读取文本"""
1085 |     try:
1086 |         # 直接尝试读取，不使用is_available()检查
1087 |         try:
1088 |             text = pyperclip.paste()
1089 |             if text is None:
1090 |                 text = ""
1091 |                 
1092 |             return jsonify({
1093 |                 "success": True, 
1094 |                 "text": text,
1095 |                 "message": "成功读取剪贴板内容"
1096 |             })
1097 |         except Exception as e:
1098 |             return jsonify({"success": False, "message": f"读取剪贴板失败: {str(e)}"}), 500
1099 |     except Exception as e:
1100 |         app.logger.exception("读取剪贴板时发生异常")
1101 |         return jsonify({"success": False, "message": f"服务器内部错误: {str(e)}"}), 500
1102 | 
1103 | if __name__ == '__main__':
1104 |     # 尝试使用5000端口，如果被占用则使用5001
1105 |     port = 5000
1106 |     import socket
1107 |     try:
1108 |         s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1109 |         s.bind(('0.0.0.0', port))
1110 |         s.close()
1111 |     except OSError:
1112 |         port = 5001
1113 |         print(f"端口5000被占用，将使用端口{port}")
1114 |     
1115 |     local_ip = get_local_ip()
1116 |     print(f"Local IP Address: {local_ip}")
1117 |     print(f"Connect from your mobile device using: {local_ip}:{port}")
1118 |     
1119 |     # 加载模型配置
1120 |     model_config = load_model_config()
1121 |     if hasattr(ModelFactory, 'update_model_capabilities'):
1122 |         ModelFactory.update_model_capabilities(model_config)
1123 |         print("已加载模型配置信息")
1124 |     
1125 |     # Run Flask in the main thread without debug mode
1126 |     socketio.run(app, host='0.0.0.0', port=port, allow_unsafe_werkzeug=True)
1127 | 


--------------------------------------------------------------------------------