158 |
159 |
模型选择
160 |
168 |
169 |
170 |
193 |
194 | {isOpen && (
195 | <>
196 |
setIsOpen(false)}
199 | />
200 |
201 | {models.map((model) => (
202 |
231 | ))}
232 |
233 | >
234 | )}
235 |
236 |
237 | 不同模型效果不同,建议自行测试
238 |
239 |
240 | )
241 | }
242 |
--------------------------------------------------------------------------------
/backend/app/routers/model.py:
--------------------------------------------------------------------------------
1 | from fastapi import APIRouter, HTTPException
2 | from pydantic import BaseModel
3 | from openai import OpenAI
4 | import google.generativeai as genai
5 | from app.utils.response import ResponseWrapper as R
6 | from app.utils.logger import get_logger
7 |
8 | logger = get_logger(__name__)
9 | router = APIRouter()
10 |
11 | # 内置提供商配置
12 | BUILTIN_PROVIDERS = [
13 | {
14 | "id": "openai",
15 | "name": "OpenAI",
16 | "type": "built-in",
17 | "logo": "OpenAI",
18 | "base_url": "https://api.openai.com/v1"
19 | },
20 | {
21 | "id": "deepseek",
22 | "name": "DeepSeek",
23 | "type": "built-in",
24 | "logo": "DeepSeek",
25 | "base_url": "https://api.deepseek.com"
26 | },
27 | {
28 | "id": "qwen",
29 | "name": "Qwen",
30 | "type": "built-in",
31 | "logo": "Qwen",
32 | "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1"
33 | },
34 | {
35 | "id": "claude",
36 | "name": "Claude",
37 | "type": "built-in",
38 | "logo": "Claude",
39 | "base_url": "https://api.anthropic.com/v1"
40 | },
41 | {
42 | "id": "gemini",
43 | "name": "Gemini",
44 | "type": "built-in",
45 | "logo": "Gemini",
46 | "base_url": "https://generativelanguage.googleapis.com/v1beta/openai/"
47 | },
48 | {
49 | "id": "groq",
50 | "name": "Groq",
51 | "type": "built-in",
52 | "logo": "Groq",
53 | "base_url": "https://api.groq.com/openai/v1"
54 | },
55 | {
56 | "id": "ollama",
57 | "name": "Ollama",
58 | "type": "built-in",
59 | "logo": "Ollama",
60 | "base_url": "http://127.0.0.1:11434/v1"
61 | }
62 | ]
63 |
64 | class ModelConfigRequest(BaseModel):
65 | provider: str
66 | api_key: str
67 | base_url: str = None
68 |
69 | class ModelItem(BaseModel):
70 | id: str
71 | name: str
72 | provider: str
73 |
74 | @router.get("/providers")
75 | def get_providers():
76 | """获取所有可用的提供商列表"""
77 | return R.success(BUILTIN_PROVIDERS, msg="获取提供商列表成功")
78 |
79 | @router.post("/models/list")
80 | def get_model_list(config: ModelConfigRequest):
81 | """获取指定提供商的模型列表"""
82 | try:
83 | if config.provider == 'gemini':
84 | return get_gemini_models(config.api_key)
85 | else:
86 | # 其他厂商都使用 OpenAI 兼容 API
87 | return get_openai_compatible_models(config.provider, config.api_key, config.base_url)
88 | except Exception as e:
89 | logger.error(f"获取模型列表失败: {e}", exc_info=True)
90 | return R.error(f"获取模型列表失败: {str(e)}")
91 |
92 | def get_openai_compatible_models(provider: str, api_key: str, base_url: str = None):
93 | """获取 OpenAI 兼容 API 的模型列表(OpenAI, DeepSeek, Qwen, Claude, Groq, Ollama 等)"""
94 | try:
95 | # Ollama 不需要 API Key
96 | if provider == 'ollama':
97 | api_key = api_key or 'ollama' # Ollama 可以使用任意值或空值
98 |
99 | # 如果没有提供 base_url,从内置配置中查找
100 | if not base_url:
101 | provider_config = next((p for p in BUILTIN_PROVIDERS if p["id"] == provider), None)
102 | if provider_config:
103 | base_url = provider_config.get("base_url")
104 | else:
105 | # 默认使用 OpenAI
106 | base_url = "https://api.openai.com/v1"
107 |
108 | client = OpenAI(api_key=api_key, base_url=base_url)
109 |
110 | # 获取模型列表
111 | models_response = client.models.list()
112 |
113 | # 过滤出可用的聊天模型
114 | chat_models = []
115 | for model in models_response.data:
116 | model_id = model.id
117 |
118 | # 根据提供商过滤模型
119 | if provider == 'openai':
120 | if any(x in model_id.lower() for x in ['gpt-4', 'gpt-3.5', 'gpt-4o']):
121 | chat_models.append({
122 | "id": model_id,
123 | "name": model_id,
124 | "provider": provider
125 | })
126 | elif provider == 'deepseek':
127 | if 'deepseek' in model_id.lower():
128 | chat_models.append({
129 | "id": model_id,
130 | "name": model_id,
131 | "provider": provider
132 | })
133 | elif provider == 'qwen':
134 | if 'qwen' in model_id.lower():
135 | chat_models.append({
136 | "id": model_id,
137 | "name": model_id,
138 | "provider": provider
139 | })
140 | elif provider == 'claude':
141 | if 'claude' in model_id.lower():
142 | chat_models.append({
143 | "id": model_id,
144 | "name": model_id,
145 | "provider": provider
146 | })
147 | elif provider == 'groq':
148 | if any(x in model_id.lower() for x in ['llama', 'mixtral', 'gemma']):
149 | chat_models.append({
150 | "id": model_id,
151 | "name": model_id,
152 | "provider": provider
153 | })
154 | elif provider == 'ollama':
155 | # Ollama 返回所有模型
156 | chat_models.append({
157 | "id": model_id,
158 | "name": model_id,
159 | "provider": provider
160 | })
161 | else:
162 | # 其他提供商,返回所有模型
163 | chat_models.append({
164 | "id": model_id,
165 | "name": model_id,
166 | "provider": provider
167 | })
168 |
169 | # 如果没有获取到,返回常用模型列表
170 | if not chat_models:
171 | chat_models = get_default_models(provider)
172 |
173 | return R.success(chat_models, msg=f"获取 {provider} 模型列表成功")
174 | except Exception as e:
175 | logger.error(f"获取 {provider} 模型列表失败: {e}")
176 | # 返回默认模型列表
177 | return R.success(get_default_models(provider), msg=f"获取 {provider} 模型列表成功(使用默认列表)")
178 |
179 | def get_default_models(provider: str):
180 | """获取默认模型列表"""
181 | defaults = {
182 | "openai": [
183 | {"id": "gpt-4o", "name": "GPT-4o", "provider": "openai"},
184 | {"id": "gpt-4o-mini", "name": "GPT-4o Mini", "provider": "openai"},
185 | {"id": "gpt-4-turbo", "name": "GPT-4 Turbo", "provider": "openai"},
186 | {"id": "gpt-3.5-turbo", "name": "GPT-3.5 Turbo", "provider": "openai"},
187 | ],
188 | "deepseek": [
189 | {"id": "deepseek-chat", "name": "DeepSeek Chat", "provider": "deepseek"},
190 | {"id": "deepseek-coder", "name": "DeepSeek Coder", "provider": "deepseek"},
191 | ],
192 | "qwen": [
193 | {"id": "qwen-turbo", "name": "Qwen Turbo", "provider": "qwen"},
194 | {"id": "qwen-plus", "name": "Qwen Plus", "provider": "qwen"},
195 | {"id": "qwen-max", "name": "Qwen Max", "provider": "qwen"},
196 | ],
197 | "claude": [
198 | {"id": "claude-3-5-sonnet-20241022", "name": "Claude 3.5 Sonnet", "provider": "claude"},
199 | {"id": "claude-3-opus-20240229", "name": "Claude 3 Opus", "provider": "claude"},
200 | {"id": "claude-3-sonnet-20240229", "name": "Claude 3 Sonnet", "provider": "claude"},
201 | ],
202 | "groq": [
203 | {"id": "llama-3.1-70b-versatile", "name": "Llama 3.1 70B", "provider": "groq"},
204 | {"id": "mixtral-8x7b-32768", "name": "Mixtral 8x7B", "provider": "groq"},
205 | {"id": "gemma-7b-it", "name": "Gemma 7B", "provider": "groq"},
206 | ],
207 | "ollama": [
208 | {"id": "llama2", "name": "Llama 2", "provider": "ollama"},
209 | {"id": "mistral", "name": "Mistral", "provider": "ollama"},
210 | {"id": "codellama", "name": "CodeLlama", "provider": "ollama"},
211 | ],
212 | }
213 | return defaults.get(provider, [])
214 |
215 | def get_gemini_models(api_key: str):
216 | """获取 Gemini 模型列表"""
217 | try:
218 | genai.configure(api_key=api_key)
219 |
220 | # 获取可用模型列表
221 | models = genai.list_models()
222 |
223 | # 过滤出生成模型
224 | chat_models = []
225 | for model in models:
226 | # 只包含生成模型
227 | if 'generateContent' in model.supported_generation_methods:
228 | chat_models.append({
229 | "id": model.name.split('/')[-1], # 提取模型名称
230 | "name": model.display_name or model.name.split('/')[-1],
231 | "provider": "gemini"
232 | })
233 |
234 | # 如果没有获取到,返回常用模型列表
235 | if not chat_models:
236 | chat_models = [
237 | {"id": "gemini-2.0-flash-exp", "name": "Gemini 2.0 Flash (Experimental)", "provider": "gemini"},
238 | {"id": "gemini-1.5-pro", "name": "Gemini 1.5 Pro", "provider": "gemini"},
239 | {"id": "gemini-1.5-flash", "name": "Gemini 1.5 Flash", "provider": "gemini"},
240 | {"id": "gemini-pro", "name": "Gemini Pro", "provider": "gemini"},
241 | ]
242 |
243 | return R.success(chat_models, msg="获取 Gemini 模型列表成功")
244 | except Exception as e:
245 | logger.error(f"获取 Gemini 模型列表失败: {e}")
246 | raise
247 |
248 | @router.post("/models/test")
249 | def test_model_connection(config: ModelConfigRequest):
250 | """测试模型连接"""
251 | try:
252 | if config.provider == 'gemini':
253 | genai.configure(api_key=config.api_key)
254 | # 尝试列出模型来测试连接
255 | list(genai.list_models())
256 | return R.success(None, msg="Gemini 连接成功")
257 | else:
258 | # OpenAI 兼容 API
259 | # Ollama 不需要 API Key
260 | api_key = config.api_key
261 | if config.provider == 'ollama':
262 | api_key = api_key or 'ollama' # Ollama 可以使用任意值或空值
263 |
264 | if not config.base_url:
265 | provider_config = next((p for p in BUILTIN_PROVIDERS if p["id"] == config.provider), None)
266 | if provider_config:
267 | base_url = provider_config.get("base_url")
268 | else:
269 | base_url = "https://api.openai.com/v1"
270 | else:
271 | base_url = config.base_url
272 |
273 | client = OpenAI(api_key=api_key, base_url=base_url)
274 | # 尝试获取模型列表来测试连接
275 | client.models.list()
276 | return R.success(None, msg=f"{config.provider} 连接成功")
277 | except Exception as e:
278 | logger.error(f"测试连接失败: {e}", exc_info=True)
279 | return R.error(f"连接失败: {str(e)}")
280 |
--------------------------------------------------------------------------------
/frontend/src/components/ModelSelectorPanel.tsx:
--------------------------------------------------------------------------------
1 | import { useState, useEffect } from 'react'
2 | import { Brain, ChevronDown, CheckCircle2 } from 'lucide-react'
3 | import ProviderIcon from './ProviderIcon'
4 | import toast from 'react-hot-toast'
5 |
6 | interface ModelOption {
7 | id: string
8 | name: string
9 | provider: string
10 | providerName: string
11 | modelId: string
12 | }
13 |
14 | const PROVIDER_LABELS: Record
= {
15 | openai: 'OpenAI',
16 | deepseek: 'DeepSeek',
17 | qwen: 'Qwen',
18 | claude: 'Claude',
19 | gemini: 'Gemini',
20 | groq: 'Groq',
21 | ollama: 'Ollama',
22 | }
23 |
24 | const PROVIDER_COLORS: Record = {
25 | openai: 'bg-green-100 text-green-700 border-green-200',
26 | deepseek: 'bg-blue-100 text-blue-700 border-blue-200',
27 | qwen: 'bg-purple-100 text-purple-700 border-purple-200',
28 | claude: 'bg-orange-100 text-orange-700 border-orange-200',
29 | gemini: 'bg-yellow-100 text-yellow-700 border-yellow-200',
30 | groq: 'bg-indigo-100 text-indigo-700 border-indigo-200',
31 | ollama: 'bg-teal-100 text-teal-700 border-teal-200',
32 | }
33 |
34 | export default function ModelSelectorPanel() {
35 | const [selectedModel, setSelectedModel] = useState('')
36 | const [availableModels, setAvailableModels] = useState([])
37 | const [isOpen, setIsOpen] = useState(false)
38 |
39 | // 从 localStorage 加载所有已配置的模型
40 | const loadModels = () => {
41 | try {
42 | const savedConfigs = localStorage.getItem('modelConfigs')
43 |
44 | if (!savedConfigs) {
45 | setAvailableModels([])
46 | setSelectedModel('')
47 | return
48 | }
49 |
50 | const configs = JSON.parse(savedConfigs)
51 | const modelList: ModelOption[] = []
52 |
53 | // 遍历所有配置,提取已配置的模型
54 | Object.entries(configs).forEach(([providerId, config]: [string, any]) => {
55 | if (!config || typeof config !== 'object') {
56 | return
57 | }
58 |
59 | // 检查是否已配置:有模型ID(支持 models 数组或 model 字符串),且(Ollama 或 有 API Key)
60 | // 优先使用 models 数组,如果没有则使用 model 字符串(兼容旧版本)
61 | const modelIds = config.models && Array.isArray(config.models) && config.models.length > 0
62 | ? config.models
63 | : (config.model && typeof config.model === 'string' && config.model.trim() ? [config.model.trim()] : [])
64 |
65 | // Ollama 不需要 API Key,其他提供商需要
66 | const hasApiKey = providerId === 'ollama' || (config.apiKey && typeof config.apiKey === 'string' && config.apiKey.trim())
67 |
68 | // 只有同时满足:有模型 且 (Ollama 或 有 API Key)才添加
69 | if (modelIds.length > 0 && hasApiKey) {
70 | // 遍历所有选中的模型
71 | modelIds.forEach((modelId: string) => {
72 | const trimmedModelId = typeof modelId === 'string' ? modelId.trim() : String(modelId).trim()
73 | if (!trimmedModelId) return
74 |
75 | // 模型名称处理
76 | let modelName = trimmedModelId
77 |
78 | // 如果 modelId 包含提供商前缀(如 openai-gpt-4o),提取后面的部分
79 | if (trimmedModelId.startsWith(providerId + '-')) {
80 | modelName = trimmedModelId.substring(providerId.length + 1)
81 | }
82 |
83 | // 处理特殊格式的模型名称(如 hf.co/unsloth/Qwen3-4B-GGUF:Q6_K_XL)
84 | // 提取最后一部分作为显示名称
85 | if (modelName.includes('/')) {
86 | const parts = modelName.split('/')
87 | modelName = parts[parts.length - 1]
88 | }
89 |
90 | // 处理量化格式(如 :Q6_K_XL),保留量化信息
91 | if (modelName.includes(':')) {
92 | const colonIndex = modelName.lastIndexOf(':')
93 | if (colonIndex > 0) {
94 | const baseName = modelName.substring(0, colonIndex)
95 | const quantInfo = modelName.substring(colonIndex + 1)
96 | modelName = `${baseName} (${quantInfo})`
97 | }
98 | }
99 |
100 | const modelOption: ModelOption = {
101 | id: `${providerId}-${trimmedModelId}`,
102 | name: modelName,
103 | provider: providerId,
104 | providerName: PROVIDER_LABELS[providerId] || providerId,
105 | modelId: trimmedModelId,
106 | }
107 |
108 | modelList.push(modelOption)
109 | })
110 | }
111 | })
112 |
113 | // 按提供商名称排序,然后按模型名称排序
114 | modelList.sort((a, b) => {
115 | if (a.providerName !== b.providerName) {
116 | return a.providerName.localeCompare(b.providerName)
117 | }
118 | return a.name.localeCompare(b.name)
119 | })
120 |
121 | setAvailableModels(modelList)
122 |
123 | // 加载已选择的模型
124 | const savedSelected = localStorage.getItem('selectedModel')
125 |
126 | if (savedSelected && modelList.find(m => m.id === savedSelected)) {
127 | setSelectedModel(savedSelected)
128 | } else if (modelList.length > 0) {
129 | // 如果没有保存的选择,选择第一个
130 | setSelectedModel(modelList[0].id)
131 | localStorage.setItem('selectedModel', modelList[0].id)
132 | // 触发自定义事件,通知其他组件模型已更改
133 | window.dispatchEvent(new Event('modelChanged'))
134 | } else {
135 | setSelectedModel('')
136 | }
137 | } catch (error) {
138 | console.error('加载模型列表失败:', error)
139 | setAvailableModels([])
140 | setSelectedModel('')
141 | }
142 | }
143 |
144 | useEffect(() => {
145 | loadModels()
146 |
147 | // 监听 storage 变化
148 | const handleStorageChange = (e: StorageEvent) => {
149 | if (e.key === 'modelConfigs' || e.key === 'selectedModel') {
150 | loadModels()
151 | }
152 | }
153 | window.addEventListener('storage', handleStorageChange)
154 |
155 | // 定期检查配置变化(因为同窗口的 localStorage 变化不会触发 storage 事件)
156 | const interval = setInterval(loadModels, 1000)
157 |
158 | return () => {
159 | window.removeEventListener('storage', handleStorageChange)
160 | clearInterval(interval)
161 | }
162 | }, [])
163 |
164 | const currentModel = availableModels.find(m => m.id === selectedModel)
165 |
166 | const handleModelChange = (modelId: string) => {
167 | setSelectedModel(modelId)
168 | localStorage.setItem('selectedModel', modelId)
169 | setIsOpen(false)
170 | const model = availableModels.find(m => m.id === modelId)
171 | if (model) {
172 | toast.success(`已切换到: ${model.providerName} - ${model.name}`)
173 | // 触发自定义事件,通知其他组件模型已更改
174 | window.dispatchEvent(new Event('modelChanged'))
175 | }
176 | }
177 |
178 | if (availableModels.length === 0) {
179 | return (
180 |
181 |
182 |
183 |
184 |
未配置模型
185 |
186 | 请先在下方配置至少一个提供商的 API Key 和模型,然后才能在此处选择使用。
187 |
188 |
189 |
190 |
191 | )
192 | }
193 |
194 | return (
195 |
196 |
选择当前使用的模型
197 |
198 |
232 |
233 | {isOpen && (
234 | <>
235 |
setIsOpen(false)}
238 | />
239 |
240 | {availableModels.length === 0 ? (
241 |
242 | 暂无可用模型
243 |
244 | ) : (
245 | <>
246 |
247 | 共 {availableModels.length} 个可用模型
248 |
249 | {availableModels.map((model) => (
250 |
277 | ))}
278 | >
279 | )}
280 |
281 | >
282 | )}
283 |
284 |
285 | 当前选择的模型将用于生成笔记。共 {availableModels.length} 个可用模型,可以在下方配置更多模型。
286 |
287 |
288 | )
289 | }
290 |
--------------------------------------------------------------------------------
/原理博客.md:
--------------------------------------------------------------------------------
1 | # Video AI Note 技术原理详解
2 |
3 | ## 项目概述
4 |
5 | Video AI Note 是一个智能视频笔记生成工具,能够自动提取视频音频、转写文字,并使用 AI 生成结构化的 Markdown 笔记。项目采用完全本地化处理,保护数据隐私,支持 Ollama 等本地大模型,无需联网即可使用。
6 |
7 | ## 系统架构
8 |
9 | ### 整体架构图
10 |
11 | ```mermaid
12 | graph TB
13 | subgraph "前端层 (Frontend)"
14 | A[React + TypeScript] --> B[组件层]
15 | B --> C[API 服务层]
16 | C --> D[状态管理]
17 | end
18 |
19 | subgraph "后端层 (Backend)"
20 | E[FastAPI 服务器] --> F[路由层]
21 | F --> G[服务层]
22 | G --> H[业务逻辑]
23 | end
24 |
25 | subgraph "核心处理模块"
26 | I[音频提取模块] --> J[FFmpeg]
27 | K[语音转文字模块] --> L[Fast-Whisper]
28 | M[AI 笔记生成模块] --> N[GPT/LLM]
29 | O[截图生成模块] --> P[FFmpeg]
30 | end
31 |
32 | subgraph "数据存储"
33 | Q[(SQLite 数据库)]
34 | R[文件系统]
35 | S[缓存文件]
36 | end
37 |
38 | C -->|HTTP API| E
39 | H --> I
40 | H --> K
41 | H --> M
42 | H --> O
43 | I --> R
44 | K --> S
45 | M --> S
46 | O --> R
47 | H --> Q
48 |
49 | style A fill:#61dafb
50 | style E fill:#009688
51 | style J fill:#ff6b6b
52 | style L fill:#4ecdc4
53 | style N fill:#95e1d3
54 | ```
55 |
56 | ## 核心工作流程
57 |
58 | ### 完整处理流程
59 |
60 | ```mermaid
61 | sequenceDiagram
62 | participant User as 用户
63 | participant Frontend as 前端界面
64 | participant Backend as 后端服务
65 | participant FFmpeg as FFmpeg
66 | participant Whisper as Fast-Whisper
67 | participant LLM as AI 模型
68 | participant DB as 数据库
69 | participant FS as 文件系统
70 |
71 | User->>Frontend: 1. 上传视频文件
72 | Frontend->>Backend: POST /api/upload
73 | Backend->>FS: 保存视频文件
74 | Backend->>DB: 创建任务记录 (pending)
75 | Backend-->>Frontend: 返回 task_id
76 |
77 | User->>Frontend: 2. 执行步骤:提取音频
78 | Frontend->>Backend: POST /api/task/{task_id}/step/extract
79 | Backend->>FFmpeg: 提取音频 (16kHz, 单声道)
80 | FFmpeg->>FS: 保存音频文件
81 | Backend->>DB: 更新状态 (processing)
82 | Backend-->>Frontend: 返回成功
83 |
84 | User->>Frontend: 3. 执行步骤:转写文字
85 | Frontend->>Backend: POST /api/task/{task_id}/step/transcribe
86 | Backend->>FS: 检查缓存
87 | alt 缓存存在
88 | FS-->>Backend: 返回缓存结果
89 | else 缓存不存在
90 | Backend->>Whisper: 转录音频
91 | Whisper->>FS: 保存转录结果 (JSON)
92 | Backend->>DB: 更新状态 (transcribed)
93 | end
94 | Backend-->>Frontend: 返回转录文本
95 |
96 | User->>Frontend: 4. 执行步骤:生成笔记
97 | Frontend->>Backend: POST /api/task/{task_id}/step/summarize
98 | Backend->>FS: 检查缓存
99 | alt 缓存存在且无截图
100 | FS-->>Backend: 返回缓存笔记
101 | else 需要生成
102 | Backend->>LLM: 调用 AI 生成笔记
103 | Note over LLM: 构建 Prompt
包含转录文本
生成结构化 Markdown
104 | LLM-->>Backend: 返回 Markdown 笔记
105 | Backend->>FS: 保存笔记文件
106 | end
107 |
108 | opt 如果启用截图
109 | Backend->>FFmpeg: 根据时间戳生成截图
110 | FFmpeg->>FS: 保存截图文件
111 | Backend->>Backend: 替换 Markdown 中的截图标记
112 | end
113 |
114 | Backend->>DB: 更新状态 (completed)
115 | Backend-->>Frontend: 返回最终笔记
116 |
117 | User->>Frontend: 5. 下载笔记 (Markdown/PDF)
118 | Frontend->>Backend: GET /api/task/{task_id}/download
119 | Backend-->>Frontend: 返回文件
120 | ```
121 |
122 | ## 技术模块详解
123 |
124 | ### 1. 音频提取模块
125 |
126 | 音频提取使用 FFmpeg 从视频文件中提取音频流,并转换为适合语音识别处理的格式。
127 |
128 | ```mermaid
129 | flowchart LR
130 | A[视频文件] --> B{检查 FFmpeg}
131 | B -->|系统已安装| C[使用系统 FFmpeg]
132 | B -->|未安装| D[自动下载 FFmpeg]
133 | D --> E[使用项目 FFmpeg]
134 | C --> F[执行提取命令]
135 | E --> F
136 | F --> G[输出音频文件]
137 |
138 | F --> H["ffmpeg -i video.mp4
-acodec pcm_s16le
-ac 1 -ar 16000
audio.wav"]
139 |
140 | style A fill:#ff6b6b
141 | style G fill:#4ecdc4
142 | style H fill:#ffe66d
143 | ```
144 |
145 | **技术细节:**
146 | - **采样率**:16kHz(Whisper 模型推荐)
147 | - **声道**:单声道(减少计算量)
148 | - **编码格式**:PCM 16-bit(无损,适合语音识别)
149 | - **自动管理**:使用 `imageio-ffmpeg` 自动下载和管理 FFmpeg 二进制文件
150 |
151 | ### 2. 语音转文字模块
152 |
153 | 使用 Fast-Whisper(基于 CTranslate2 的 Whisper 实现)进行语音识别,相比原始 Whisper 速度提升 4-5 倍。
154 |
155 | ```mermaid
156 | graph TD
157 | A[音频文件] --> B[Fast-Whisper 模型]
158 | B --> C{模型加载}
159 | C -->|首次使用| D[下载模型]
160 | C -->|已存在| E[加载模型]
161 | D --> E
162 | E --> F[执行转录]
163 | F --> G[语音活动检测 VAD]
164 | G --> H[自动语言检测]
165 | H --> I[生成分段结果]
166 | I --> J[保存 JSON 缓存]
167 | J --> K[返回转录结果]
168 |
169 | K --> L["TranscriptResult:
- language: 检测语言
- full_text: 完整文本
- segments: 时间分段"]
170 |
171 | style B fill:#4ecdc4
172 | style G fill:#95e1d3
173 | style L fill:#ffe66d
174 | ```
175 |
176 | **技术细节:**
177 | - **模型选择**:支持 tiny/base/small/medium/large(默认 base)
178 | - **VAD 过滤**:启用语音活动检测,过滤静音段
179 | - **语言检测**:自动检测音频语言
180 | - **分段输出**:保留时间戳信息,便于后续处理
181 | - **缓存机制**:转录结果保存为 JSON,避免重复处理
182 |
183 | ### 3. AI 笔记生成模块
184 |
185 | 使用大语言模型(LLM)将转录文本转换为结构化的 Markdown 笔记。
186 |
187 | ```mermaid
188 | graph TB
189 | A[转录文本] --> B[构建 Prompt]
190 | B --> C[Prompt 模板]
191 | C --> D{模型类型}
192 |
193 | D -->|Ollama| E[本地模型
http://localhost:11434/v1]
194 | D -->|OpenAI| F[OpenAI API
api.openai.com]
195 | D -->|DeepSeek| G[DeepSeek API]
196 | D -->|Qwen| H[Qwen API]
197 |
198 | E --> I[调用 LLM]
199 | F --> I
200 | G --> I
201 | H --> I
202 |
203 | I --> J[生成 Markdown]
204 | J --> K[后处理]
205 | K --> L[清理思考标签]
206 | L --> M[格式化输出]
207 | M --> N[保存缓存]
208 |
209 | style E fill:#95e1d3
210 | style I fill:#4ecdc4
211 | style J fill:#ffe66d
212 | ```
213 |
214 | **Prompt 构建策略:**
215 |
216 | ```mermaid
217 | graph LR
218 | A[转录分段] --> B[格式化分段文本]
219 | B --> C[添加指令]
220 | C --> D[语言要求]
221 | C --> E[格式要求]
222 | C --> F[内容要求]
223 | C --> G[截图标记要求]
224 |
225 | D --> H[最终 Prompt]
226 | E --> H
227 | F --> H
228 | G --> H
229 |
230 | H --> I["1. 完整信息
2. 去除无关内容
3. 保留关键细节
4. 可读布局
5. 数学公式 LaTeX
6. 截图标记 *Screenshot-[mm:ss]"]
231 |
232 | style H fill:#4ecdc4
233 | style I fill:#ffe66d
234 | ```
235 |
236 | **支持的模型提供商:**
237 | - **Ollama**:本地运行,完全离线,无需 API 密钥
238 | - **OpenAI**:GPT-3.5/GPT-4 系列
239 | - **DeepSeek**:国产大模型
240 | - **Qwen**:阿里通义千问
241 | - **其他 OpenAI 兼容 API**:通过 base_url 配置
242 |
243 | ### 4. 截图生成模块
244 |
245 | 根据 Markdown 中的截图标记,从视频中提取对应时间点的帧。
246 |
247 | ```mermaid
248 | flowchart TD
249 | A[Markdown 笔记] --> B[解析截图标记]
250 | B --> C["*Screenshot-[mm:ss]"]
251 | C --> D[提取时间戳]
252 | D --> E[转换为秒数]
253 | E --> F[FFmpeg 提取帧]
254 | F --> G[保存截图文件]
255 | G --> H[生成图片 URL]
256 | H --> I[替换 Markdown 标记]
257 | I --> J[更新笔记文件]
258 |
259 | F --> K["ffmpeg -ss {timestamp}
-i video.mp4
-vframes 1
screenshot.jpg"]
260 |
261 | style C fill:#ff6b6b
262 | style F fill:#4ecdc4
263 | style I fill:#95e1d3
264 | ```
265 |
266 | **截图标记格式:**
267 | - 标记格式:`*Screenshot-[mm:ss]`(例如:`*Screenshot-[01:23]`)
268 | - 插入位置:章节内容之后,空行分隔
269 | - 自动替换:生成截图后,标记被替换为 ``
270 |
271 | ## 数据流与状态管理
272 |
273 | ### 任务状态流转
274 |
275 | ```mermaid
276 | stateDiagram-v2
277 | [*] --> pending: 上传文件
278 | pending --> processing: 提取音频
279 | processing --> transcribing: 开始转写
280 | transcribing --> transcribed: 转写完成
281 | transcribed --> summarizing: 生成笔记
282 | summarizing --> completed: 笔记完成
283 | completed --> [*]
284 |
285 | processing --> failed: 错误
286 | transcribing --> failed: 错误
287 | summarizing --> failed: 错误
288 | failed --> [*]
289 | ```
290 |
291 | ### 数据存储结构
292 |
293 | ```mermaid
294 | erDiagram
295 | VIDEO_TASK ||--o{ TASK_STATUS : has
296 | VIDEO_TASK ||--o{ CACHE_FILE : generates
297 |
298 | VIDEO_TASK {
299 | string task_id PK
300 | string filename
301 | string status
302 | bool screenshot
303 | datetime created_at
304 | }
305 |
306 | TASK_STATUS {
307 | string task_id FK
308 | string status
309 | string message
310 | datetime updated_at
311 | }
312 |
313 | CACHE_FILE {
314 | string task_id FK
315 | string type
316 | string path
317 | }
318 |
319 | CACHE_FILE ||--|| AUDIO_FILE : "task_id_audio.wav"
320 | CACHE_FILE ||--|| TRANSCRIPT_FILE : "task_id_transcript.json"
321 | CACHE_FILE ||--|| MARKDOWN_FILE : "task_id_markdown.md"
322 | ```
323 |
324 | ## 前端架构
325 |
326 | ### 组件层次结构
327 |
328 | ```mermaid
329 | graph TD
330 | A[App.tsx] --> B[路由管理]
331 | B --> C[上传页面]
332 | B --> D[任务列表]
333 | B --> E[模型配置]
334 |
335 | C --> F[UploadForm 组件]
336 | F --> G[文件选择]
337 | F --> H[上传进度]
338 |
339 | D --> I[TaskList 组件]
340 | I --> J[TaskItem 组件]
341 | J --> K[TaskSteps 组件]
342 | K --> L[步骤执行按钮]
343 | K --> M[状态显示]
344 |
345 | J --> N[TaskDetailPanel]
346 | N --> O[视频预览]
347 | N --> P[转录文本查看]
348 | N --> Q[笔记预览]
349 | N --> R[下载按钮]
350 |
351 | E --> S[ModelConfig 组件]
352 | S --> T[模型选择]
353 | S --> U[API 配置]
354 |
355 | style A fill:#61dafb
356 | style F fill:#4ecdc4
357 | style I fill:#95e1d3
358 | style S fill:#ffe66d
359 | ```
360 |
361 | ### 状态管理
362 |
363 | ```mermaid
364 | graph LR
365 | A[Zustand Store] --> B[Task Store]
366 | B --> C[任务列表]
367 | B --> D[当前任务]
368 | B --> E[任务状态]
369 |
370 | A --> F[Model Store]
371 | F --> G[模型配置]
372 | F --> H[当前模型]
373 |
374 | C --> I[添加任务]
375 | C --> J[更新任务]
376 | C --> K[删除任务]
377 |
378 | style A fill:#61dafb
379 | style B fill:#4ecdc4
380 | style F fill:#95e1d3
381 | ```
382 |
383 | ## 关键技术特性
384 |
385 | ### 1. 完全本地化处理
386 |
387 | ```mermaid
388 | graph TB
389 | A[用户数据] --> B{处理方式}
390 | B -->|本地模式| C[Ollama 本地模型]
391 | B -->|云端模式| D[云端 API]
392 |
393 | C --> E[数据不上传]
394 | C --> F[完全离线]
395 | C --> G[隐私保护]
396 |
397 | D --> H[需要网络]
398 | D --> I[需要 API Key]
399 |
400 | style C fill:#95e1d3
401 | style E fill:#4ecdc4
402 | style F fill:#4ecdc4
403 | style G fill:#4ecdc4
404 | ```
405 |
406 | ### 2. 缓存机制
407 |
408 | ```mermaid
409 | flowchart TD
410 | A[处理请求] --> B{检查缓存}
411 | B -->|存在| C[返回缓存]
412 | B -->|不存在| D[执行处理]
413 | D --> E[保存缓存]
414 | E --> F[返回结果]
415 |
416 | G[音频提取] --> H[task_id_audio.wav]
417 | I[转录结果] --> J[task_id_transcript.json]
418 | K[笔记生成] --> L[task_id_markdown.md]
419 |
420 | style B fill:#ffe66d
421 | style C fill:#95e1d3
422 | style E fill:#4ecdc4
423 | ```
424 |
425 | **缓存策略:**
426 | - **音频文件**:提取后保存,避免重复提取
427 | - **转录结果**:JSON 格式,包含完整分段信息
428 | - **笔记内容**:Markdown 格式,支持增量更新(截图功能)
429 |
430 | ### 3. 分步执行设计
431 |
432 | 系统采用分步执行设计,用户可以控制每个步骤的执行时机:
433 |
434 | ```mermaid
435 | graph LR
436 | A[上传文件] --> B[步骤 1: 提取音频]
437 | B --> C[步骤 2: 转写文字]
438 | C --> D[步骤 3: 生成笔记]
439 |
440 | B --> E[可查看音频]
441 | C --> F[可查看转录]
442 | D --> G[可查看笔记]
443 |
444 | style A fill:#ff6b6b
445 | style B fill:#4ecdc4
446 | style C fill:#95e1d3
447 | style D fill:#ffe66d
448 | ```
449 |
450 | **优势:**
451 | - 用户可以随时查看中间结果
452 | - 支持单独重试某个步骤
453 | - 降低单次处理失败的影响范围
454 | - 提供更好的用户体验
455 |
456 | ## 性能优化
457 |
458 | ### 1. 模型加载优化
459 |
460 | ```mermaid
461 | graph TD
462 | A[首次使用] --> B[延迟加载]
463 | B --> C[按需初始化]
464 | C --> D[单例模式]
465 | D --> E[复用模型实例]
466 |
467 | style B fill:#95e1d3
468 | style D fill:#4ecdc4
469 | ```
470 |
471 | ### 2. 异步处理
472 |
473 | ```mermaid
474 | sequenceDiagram
475 | participant F as 前端
476 | participant B as 后端
477 | participant W as Worker
478 |
479 | F->>B: 提交任务
480 | B->>B: 创建任务记录
481 | B-->>F: 返回 task_id
482 | B->>W: 后台处理
483 | F->>B: 轮询状态
484 | B-->>F: 返回当前状态
485 | W->>B: 更新状态
486 | F->>B: 获取结果
487 | B-->>F: 返回最终结果
488 | ```
489 |
490 | ## 安全与隐私
491 |
492 | ### 数据隐私保护
493 |
494 | ```mermaid
495 | graph TB
496 | A[用户上传视频] --> B[本地存储]
497 | B --> C[本地处理]
498 | C --> D{使用本地模型?}
499 | D -->|是| E[完全离线]
500 | D -->|否| F[仅发送文本到 API]
501 |
502 | E --> G[数据不上传]
503 | F --> H[仅转录文本]
504 | H --> I[不包含视频内容]
505 |
506 | style E fill:#95e1d3
507 | style G fill:#4ecdc4
508 | style H fill:#ffe66d
509 | ```
510 |
511 | **隐私保护措施:**
512 | 1. 所有文件存储在本地文件系统
513 | 2. 支持完全离线运行(Ollama 模式)
514 | 3. 云端 API 仅传输文本,不传输视频/音频
515 | 4. 无数据收集和追踪
516 |
517 | ## 部署架构
518 |
519 | ### 开发环境
520 |
521 | ```mermaid
522 | graph LR
523 | A[前端 Dev Server
:5173] --> B[Vite Proxy]
524 | B --> C[后端 API
:8483]
525 | C --> D[SQLite DB]
526 | C --> E[文件系统]
527 |
528 | style A fill:#61dafb
529 | style C fill:#009688
530 | ```
531 |
532 | ### 生产环境
533 |
534 | ```mermaid
535 | graph TB
536 | A[用户] --> B[Nginx]
537 | B --> C[前端静态文件]
538 | B --> D[后端 API]
539 | D --> E[SQLite DB]
540 | D --> F[文件系统]
541 | D --> G[FFmpeg]
542 | D --> H[Whisper Model]
543 | D --> I[Ollama/LLM]
544 |
545 | style B fill:#009688
546 | style D fill:#4ecdc4
547 | ```
548 |
549 | ## 总结
550 |
551 | Video AI Note 通过以下技术实现智能视频笔记生成:
552 |
553 | 1. **音频处理**:FFmpeg 提取和转换音频
554 | 2. **语音识别**:Fast-Whisper 实现快速准确的转录
555 | 3. **AI 生成**:大语言模型生成结构化笔记
556 | 4. **截图功能**:基于时间戳的智能截图插入
557 | 5. **本地化支持**:Ollama 实现完全离线运行
558 | 6. **缓存机制**:避免重复处理,提升性能
559 | 7. **分步执行**:提供灵活的用户控制
560 |
561 | 整个系统设计注重**隐私保护**、**用户体验**和**性能优化**,是一个功能完整、技术先进的视频笔记生成工具。
562 |
563 |
--------------------------------------------------------------------------------
/frontend/src/components/TaskDetailPanel.tsx:
--------------------------------------------------------------------------------
1 | import { useEffect, useState, useRef } from 'react'
2 | import { X, FileVideo, Music, FileText, BookOpen } from 'lucide-react'
3 | import { useTaskStore } from '../store/taskStore'
4 | import { getTaskStatus, confirmStep, regenerateNote } from '../services/api'
5 | import StepProgress, { StepStatus } from './StepProgress'
6 | import TranscriptViewer from './TranscriptViewer'
7 | import EnhancedMarkdownViewer from './EnhancedMarkdownViewer'
8 | import toast from 'react-hot-toast'
9 |
10 | interface TaskDetailPanelProps {
11 | taskId: string
12 | onClose: () => void
13 | }
14 |
15 | export default function TaskDetailPanel({ taskId, onClose }: TaskDetailPanelProps) {
16 | const { tasks, updateTask } = useTaskStore()
17 | const task = tasks.find((t) => t.id === taskId)
18 | const [steps, setSteps] = useState
([])
19 | const [autoProcess, setAutoProcess] = useState(false)
20 | const [transcript, setTranscript] = useState(null)
21 |
22 | // 初始化步骤
23 | useEffect(() => {
24 | if (!task) return
25 |
26 | // 检查转录是否完成(通过 transcript 状态判断)
27 | const isTranscribeCompleted = transcript && transcript.segments && transcript.segments.length > 0
28 |
29 | const initialSteps = [
30 | {
31 | id: 'upload',
32 | name: '文件上传',
33 | description: '将文件上传到服务器',
34 | status: 'completed' as StepStatus,
35 | result: task.filename ? (
36 |
37 |
38 | {task.filename}
39 |
40 | ) : null,
41 | },
42 | {
43 | id: 'extract',
44 | name: '提取音频',
45 | description: '从视频文件中提取音频(如果是视频)',
46 | status: (task.status === 'pending'
47 | ? 'waiting_confirm'
48 | : ['processing', 'transcribing', 'summarizing', 'completed'].includes(task.status)
49 | ? 'completed'
50 | : 'pending') as StepStatus,
51 | canConfirm: task.status === 'pending',
52 | onConfirm: () => handleStepConfirm('extract'),
53 | result: task.status !== 'pending' ? (
54 |
55 |
56 | 音频提取完成
57 |
58 | ) : null,
59 | },
60 | {
61 | id: 'transcribe',
62 | name: '音频转写',
63 | description: '使用 AI 将音频转换为文字',
64 | status: (isTranscribeCompleted
65 | ? 'completed'
66 | : task.status === 'transcribing'
67 | ? 'processing'
68 | : task.status === 'processing'
69 | ? 'waiting_confirm'
70 | : 'pending') as StepStatus,
71 | canConfirm: task.status === 'processing',
72 | onConfirm: () => handleStepConfirm('transcribe'),
73 | result: isTranscribeCompleted ? (
74 |
75 |
76 | 转写完成,共 {transcript.segments.length} 条片段
77 |
78 | ) : null,
79 | },
80 | {
81 | id: 'summarize',
82 | name: '生成笔记',
83 | description: '使用 GPT 生成结构化笔记',
84 | status: (['summarizing', 'completed'].includes(task.status)
85 | ? task.status === 'summarizing'
86 | ? 'processing'
87 | : 'completed'
88 | : isTranscribeCompleted || task.status === 'transcribing'
89 | ? 'waiting_confirm'
90 | : 'pending') as StepStatus,
91 | canConfirm: isTranscribeCompleted, // 只有转录完成后才能生成笔记
92 | onConfirm: () => handleStepConfirm('summarize'),
93 | },
94 | ]
95 |
96 | setSteps(initialSteps)
97 | }, [task, transcript])
98 |
99 | // 使用 ref 来避免依赖问题
100 | const tasksRef = useRef(tasks)
101 | const updateTaskRef = useRef(updateTask)
102 |
103 | useEffect(() => {
104 | tasksRef.current = tasks
105 | updateTaskRef.current = updateTask
106 | }, [tasks, updateTask])
107 |
108 | const taskDetailLoadedRef = useRef(null)
109 |
110 | // 初始加载任务详情
111 | useEffect(() => {
112 | if (!taskId) return
113 |
114 | // 防止重复加载同一个任务
115 | if (taskDetailLoadedRef.current === taskId) {
116 | return
117 | }
118 |
119 | const loadTaskDetail = async () => {
120 | taskDetailLoadedRef.current = taskId
121 | try {
122 | const response = await getTaskStatus(taskId)
123 | if (response.data.code === 200) {
124 | const taskData = response.data.data
125 | const currentTask = tasksRef.current.find((t) => t.id === taskId)
126 | updateTaskRef.current(taskId, {
127 | status: taskData.status,
128 | markdown: taskData.markdown || currentTask?.markdown || '',
129 | })
130 |
131 | // 更新转写结果
132 | if (taskData.transcript) {
133 | setTranscript(taskData.transcript)
134 | }
135 |
136 | // 更新步骤状态
137 | updateStepsStatus(taskData.status, taskData)
138 | }
139 | } catch (error) {
140 | console.error('加载任务详情失败:', error)
141 | // 加载失败时重置标记,允许重试
142 | if (taskDetailLoadedRef.current === taskId) {
143 | taskDetailLoadedRef.current = null
144 | }
145 | }
146 | }
147 |
148 | loadTaskDetail()
149 | }, [taskId])
150 |
151 | // 轮询任务状态
152 | useEffect(() => {
153 | if (!taskId) return
154 |
155 | const task = tasks.find((t) => t.id === taskId)
156 | // 如果任务已完成且不需要自动处理,直接返回,不启动轮询
157 | if (!task || (task.status === 'completed' && !autoProcess) || task.status === 'failed') {
158 | return
159 | }
160 |
161 | const interval = setInterval(async () => {
162 | try {
163 | const response = await getTaskStatus(taskId)
164 | if (response.data.code === 200) {
165 | const taskData = response.data.data
166 | const currentTask = tasksRef.current.find((t) => t.id === taskId)
167 |
168 | updateTaskRef.current(taskId, {
169 | status: taskData.status,
170 | markdown: taskData.markdown || currentTask?.markdown || '',
171 | })
172 |
173 | // 更新转写结果
174 | if (taskData.transcript) {
175 | setTranscript(taskData.transcript)
176 | }
177 |
178 | // 更新步骤状态
179 | updateStepsStatus(taskData.status, taskData)
180 |
181 | // 如果任务已完成或失败,停止轮询
182 | if (taskData.status === 'completed' || taskData.status === 'failed') {
183 | clearInterval(interval)
184 | setAutoProcess(false)
185 | }
186 | }
187 | } catch (error) {
188 | console.error('轮询失败:', error)
189 | }
190 | }, 2000)
191 |
192 | return () => clearInterval(interval)
193 | }, [taskId, autoProcess, task?.status])
194 |
195 | const updateStepsStatus = (status: string, taskData: any) => {
196 | setSteps((prev) =>
197 | prev.map((step) => {
198 | // 检查转录步骤:如果有 transcript 数据,即使状态是 transcribing,也标记为完成
199 | if (step.id === 'transcribe') {
200 | if (taskData.transcript && taskData.transcript.segments) {
201 | // 转录已完成
202 | return {
203 | ...step,
204 | status: 'completed' as StepStatus,
205 | result: (
206 |
207 |
208 | 转写完成,共 {taskData.transcript.segments.length} 条片段
209 |
210 | ),
211 | }
212 | } else if (status === 'transcribing') {
213 | // 正在转录中
214 | return { ...step, status: 'processing' as StepStatus }
215 | }
216 | }
217 |
218 | // 提取音频步骤
219 | if (status === 'processing' && step.id === 'extract') {
220 | return { ...step, status: 'processing' as StepStatus }
221 | }
222 |
223 | // 如果状态从 processing 变为其他状态,且提取已完成
224 | if (status !== 'pending' && step.id === 'extract' && step.status === 'processing') {
225 | return {
226 | ...step,
227 | status: 'completed' as StepStatus,
228 | result: (
229 |
230 |
231 | 音频提取完成
232 |
233 | ),
234 | }
235 | }
236 |
237 | // 生成笔记步骤
238 | if (status === 'summarizing' && step.id === 'summarize') {
239 | return { ...step, status: 'processing' as StepStatus }
240 | }
241 |
242 | // 所有步骤完成
243 | if (status === 'completed') {
244 | if (step.status === 'processing') {
245 | let result = null
246 | if (step.id === 'extract') {
247 | result = (
248 |
249 |
250 | 音频提取完成
251 |
252 | )
253 | } else if (step.id === 'transcribe' && taskData.transcript) {
254 | result = (
255 |
256 |
257 | 转写完成,共 {taskData.transcript.segments?.length || 0} 条片段
258 |
259 | )
260 | } else if (step.id === 'summarize' && taskData.markdown) {
261 | result = (
262 |
263 |
264 | 笔记生成完成
265 |
266 | )
267 | }
268 | return { ...step, status: 'completed' as StepStatus, result }
269 | }
270 | }
271 | return step
272 | })
273 | )
274 | }
275 |
276 | const handleStepConfirm = async (stepId: string) => {
277 | try {
278 | // 更新步骤状态为处理中
279 | setSteps((prev) =>
280 | prev.map((step) =>
281 | step.id === stepId ? { ...step, status: 'processing' as StepStatus } : step
282 | )
283 | )
284 |
285 | // 调用后端确认步骤(目前后端自动处理,这里主要是触发状态更新)
286 | await confirmStep(taskId, stepId)
287 | setAutoProcess(true)
288 | } catch (error) {
289 | console.error('确认步骤失败:', error)
290 | // 恢复状态
291 | setSteps((prev) =>
292 | prev.map((step) =>
293 | step.id === stepId ? { ...step, status: 'waiting_confirm' as StepStatus } : step
294 | )
295 | )
296 | }
297 | }
298 |
299 | if (!task) return null
300 |
301 | const currentStepIndex = steps.findIndex((s) => s.status === 'processing' || s.status === 'waiting_confirm')
302 |
303 | return (
304 |
305 |
306 |
307 |
308 |
{task.filename}
309 |
任务 ID: {task.id}
310 |
311 |
317 |
318 |
319 |
320 | {/* 左侧:步骤和结果 */}
321 |
322 |
处理步骤
323 |
324 |
325 |
326 | {/* 右侧:内容预览 */}
327 |
328 |
329 |
内容预览
330 | {task.status === 'completed' && task.markdown && (
331 |
345 | )}
346 |
347 |
348 |
349 | {(() => {
350 | // 优先显示笔记(如果存在且已完成)
351 | if (task.markdown && (task.status === 'completed' || task.markdown.length > 0)) {
352 | return
353 | }
354 |
355 | // 其次显示转写结果(如果存在)
356 | if (transcript && transcript.segments && transcript.segments.length > 0) {
357 | return
358 | }
359 |
360 | // 处理中显示加载
361 | if (task.status === 'processing' || task.status === 'transcribing' || task.status === 'summarizing') {
362 | return (
363 |
364 |
365 |
366 |
正在处理中...
367 |
368 |
369 | )
370 | }
371 |
372 | // 等待状态
373 | return (
374 |
377 | )
378 | })()}
379 |
380 |
381 |
382 |
383 |
384 | )
385 | }
386 |
387 |
--------------------------------------------------------------------------------
/backend/app/routers/download.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import base64
3 | import json
4 | import os
5 | import shutil
6 | import subprocess
7 | import tempfile
8 | import uuid
9 | from typing import Optional
10 | from pathlib import Path
11 |
12 | from fastapi import APIRouter, BackgroundTasks, HTTPException
13 | from pydantic import BaseModel
14 | from app.utils.logger import get_logger
15 |
16 | logger = get_logger(__name__)
17 |
18 | router = APIRouter()
19 |
20 | # 用于跟踪登录会话
21 | _login_sessions: dict = {}
22 |
23 | # 用于跟踪下载合并任务
24 | _download_tasks: dict = {}
25 |
26 |
27 | class DownloadRequest(BaseModel):
28 | url: str
29 | cookie: Optional[str] = ''
30 | quality: Optional[str] = 'best'
31 |
32 |
33 | class StartLoginResponse(BaseModel):
34 | session_id: str
35 | qr_image_base64: str
36 |
37 |
38 | @router.post("/download/bilibili/start_login", response_model=StartLoginResponse)
39 | async def start_bilibili_login():
40 | """
41 | 使用 Playwright 打开哔哩哔哩登录页面,截取二维码并返回 base64 图片以及 session_id。
42 | 后续客户端应定期轮询 /download/bilibili/login_status?session_id=... 来检查是否登录成功。
43 | """
44 | # 将整个启动流程外层捕获异常并记录,方便定位导致 500 的问题
45 | try:
46 | try:
47 | from playwright.async_api import async_playwright
48 | except Exception as e:
49 | logger.error("Playwright 未安装: %s", e)
50 | raise HTTPException(status_code=500, detail="服务器未安装 playwright,请安装并运行 'playwright install' 后重试")
51 |
52 | session_id = uuid.uuid4().hex
53 | tmpdir = Path(tempfile.mkdtemp(prefix=f"bili_login_{session_id}_"))
54 | qr_path = tmpdir / "qr.png"
55 | storage_path = tmpdir / "storage_state.json"
56 |
57 | async def _login_task():
58 | try:
59 | async with async_playwright() as pw:
60 | # 支持通过环境变量切换 headless(默认 True,服务器通常没有显示器)
61 | headless_env = os.getenv("BILI_PLAYWRIGHT_HEADLESS", "0").lower()
62 | headless = not (headless_env in ("0", "false", "no"))
63 | browser = await pw.chromium.launch(headless=headless)
64 | context = await browser.new_context()
65 | page = await context.new_page()
66 | await page.goto("https://passport.bilibili.com/login")
67 |
68 | # 等待二维码元素出现并截图
69 | try:
70 | qr_el = await page.wait_for_selector("img.qrcode-img, img[data-type='qrcode']", timeout=15000)
71 | except Exception:
72 | # 有时候页面需要点击“二维码登录”切换
73 | try:
74 | btn = await page.query_selector("a[href*='qrcode']")
75 | if btn:
76 | await btn.click()
77 | qr_el = await page.wait_for_selector("img.qrcode-img, img[data-type='qrcode']", timeout=10000)
78 | else:
79 | qr_el = None
80 | except Exception:
81 | qr_el = None
82 |
83 | if qr_el:
84 | await qr_el.screenshot(path=str(qr_path))
85 | else:
86 | # fallback:截图整个页面
87 | await page.screenshot(path=str(qr_path), full_page=False)
88 |
89 | # 轮询等待登录完成(检查是否存在登录用户的 cookie)
90 | logged_in = False
91 | for _ in range(180): # 最多等待 ~180*1s = 3分钟
92 | cookies = await context.cookies()
93 | cookie_names = {c.get("name", "").lower() for c in cookies}
94 | # 要避免误判,仅在至少有 sessdata(哔哩哔哩关键登录 cookie)时认为已登录
95 | if "sessdata" in cookie_names:
96 | logged_in = True
97 | # 仅保存 storage_state 到文件(不在日志中输出 cookie 内容)
98 | await context.storage_state(path=str(storage_path))
99 | break
100 | await asyncio.sleep(1)
101 |
102 | # 关闭浏览器
103 | await browser.close()
104 | # 标记会话
105 | _login_sessions[session_id]["finished"] = logged_in
106 | if logged_in:
107 | _login_sessions[session_id]["storage"] = str(storage_path)
108 | else:
109 | _login_sessions[session_id]["storage"] = None
110 | except Exception as e:
111 | logger.exception("Playwright 登录任务失败: %s", e)
112 | _login_sessions[session_id]["error"] = str(e)
113 |
114 | # 保存会话元信息并启动后台任务
115 | _login_sessions[session_id] = {
116 | "tmpdir": str(tmpdir),
117 | "qr_path": str(qr_path),
118 | "storage": None,
119 | "finished": False,
120 | "error": None,
121 | }
122 |
123 | # 启动播放(后台任务)
124 | asyncio.create_task(_login_task())
125 |
126 | # 等待一段时间让 qr.png 生成(最长等待 15 秒),并在任务出错时提前返回错误信息
127 | for _ in range(75): # 75 * 0.2 = 15s
128 | if qr_path.exists():
129 | break
130 | # 如果后台任务已记录错误,返回详细信息
131 | sess_info = _login_sessions.get(session_id)
132 | if sess_info and sess_info.get("error"):
133 | raise HTTPException(status_code=500, detail=f"启动登录任务失败: {sess_info.get('error')}")
134 | await asyncio.sleep(0.2)
135 |
136 | if not qr_path.exists():
137 | # 如果没有生成二维码,记录更多调试信息(但不记录敏感 cookie)
138 | sess_info = _login_sessions.get(session_id, {})
139 | err = sess_info.get("error") or "无法生成二维码图片,请检查 Playwright 是否可用或环境是否允许打开浏览器"
140 | try:
141 | logger.error("start_bilibili_login: 未生成二维码,session=%s tmpdir=%s error=%s", session_id, sess_info.get("tmpdir"), sess_info.get("error"))
142 | td = Path(sess_info.get("tmpdir") or "")
143 | if td.exists() and td.is_dir():
144 | contents = [p.name for p in td.iterdir()]
145 | logger.error("start_bilibili_login: tmpdir 内容: %s", contents)
146 | else:
147 | logger.error("start_bilibili_login: tmpdir 不存在或不可访问: %s", td)
148 | except Exception:
149 | logger.exception("记录 tmpdir 内容时出错")
150 | raise HTTPException(status_code=500, detail=err)
151 |
152 | b64 = base64.b64encode(qr_path.read_bytes()).decode("utf-8")
153 | return {"session_id": session_id, "qr_image_base64": b64}
154 | except HTTPException:
155 | # 已是明确的 HTTP 错误,直接抛出,便于前端显示
156 | raise
157 | except Exception as e:
158 | # 捕获其它未处理异常并记录完整堆栈,返回简洁错误给前端
159 | logger.exception("start_bilibili_login 未捕获异常: %s", e)
160 | raise HTTPException(status_code=500, detail=f"启动登录失败(查看后端日志获取详细信息)")
161 |
162 |
163 | @router.get("/download/bilibili/login_status")
164 | async def bilibili_login_status(session_id: str):
165 | """
166 | 查询登录状态,返回 { finished: bool, error: str|null }
167 | """
168 | sess = _login_sessions.get(session_id)
169 | if not sess:
170 | raise HTTPException(status_code=404, detail="session_id 未找到")
171 | return {"finished": bool(sess.get("finished")), "error": sess.get("error")}
172 |
173 |
174 | def _write_netscape_cookies(cookies, out_path: str):
175 | """
176 | 将 Playwright storage_state 中的 cookies 写入 Netscape cookies.txt 格式,供 yt-dlp 使用 --cookies 参数。
177 | """
178 | lines = []
179 | for c in cookies:
180 | domain = c.get("domain", "")
181 | flag = "TRUE" if domain.startswith(".") else "FALSE"
182 | path = c.get("path", "/")
183 | secure = "TRUE" if c.get("secure", False) else "FALSE"
184 | expires = str(int(c.get("expires", 0))) if c.get("expires") else "0"
185 | name = c.get("name", "")
186 | value = c.get("value", "")
187 | lines.append("\t".join([domain, flag, path, secure, expires, name, value]))
188 | with open(out_path, "w", encoding="utf-8") as fh:
189 | fh.write("# Netscape HTTP Cookie File\n")
190 | fh.write("\n".join(lines))
191 |
192 |
193 | @router.post("/download/bilibili")
194 | async def download_bilibili(req: DownloadRequest, background_tasks: BackgroundTasks):
195 | """
196 | 最终下载接口。优先尝试使用提供的 cookie(或直接使用 yt-dlp),
197 | 如果传入 session_id(来自登录流程),将使用 Playwright 保存下来的 storage_state 中的 cookie。
198 | """
199 | url = req.url
200 | cookie = (req.cookie or "").strip()
201 | quality = req.quality or "best"
202 |
203 | # 检查 yt-dlp
204 | try:
205 | subprocess.run(["yt-dlp", "--version"], capture_output=True, text=True, check=True)
206 | except Exception as e:
207 | logger.error("yt-dlp 未安装或不可用: %s", e)
208 | raise HTTPException(status_code=500, detail="服务器未安装 yt-dlp,请先安装 yt-dlp 后重试")
209 |
210 | # 如果 cookie 字符串看起来是 session_id(我们在 start_login 中返回),优先使用 storage_state
211 | storage_state_path = None
212 | if cookie and cookie.startswith("session:"):
213 | session_id = cookie.split("session:", 1)[1]
214 | sess = _login_sessions.get(session_id)
215 | if not sess:
216 | raise HTTPException(status_code=400, detail="无效的 session_id")
217 | if not sess.get("finished") or not sess.get("storage"):
218 | raise HTTPException(status_code=400, detail="会话尚未完成登录")
219 | storage_state_path = sess.get("storage")
220 |
221 | # 如果提供了 storage_state_path,读取 cookies 并写为 cookies.txt
222 | cookies_file = None
223 | if storage_state_path:
224 | try:
225 | with open(storage_state_path, "r", encoding="utf-8") as fh:
226 | st = json.load(fh)
227 | cookies = st.get("cookies", [])
228 | cookies_file = tempfile.mktemp(prefix="bili_cookies_", suffix=".txt")
229 | _write_netscape_cookies(cookies, cookies_file)
230 | except Exception as e:
231 | logger.exception("读取 storage_state 失败: %s", e)
232 | raise HTTPException(status_code=500, detail="读取登录会话的 cookie 失败")
233 |
234 | # 如果直接提供了 cookie 字符串(纯 Netscape 或 "name=val; ..."),使用 --add-header 或 --cookies
235 | cmd = ["yt-dlp", "-j", url]
236 | if cookies_file:
237 | cmd += ["--cookies", cookies_file]
238 | elif cookie:
239 | # 简单地将原始 Cookie 字符串作为请求头传入
240 | cmd += ["--add-header", f"Cookie: {cookie}"]
241 |
242 | try:
243 | proc = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=120)
244 | info_json = json.loads(proc.stdout)
245 | except subprocess.CalledProcessError as e:
246 | logger.error("yt-dlp 获取信息失败: %s %s", e, e.stderr)
247 | raise HTTPException(status_code=500, detail=f"解析视频信息失败: {e.stderr[:200]}")
248 | except Exception as e:
249 | logger.exception("解析 yt-dlp 输出失败: %s", e)
250 | raise HTTPException(status_code=500, detail="解析视频信息失败")
251 |
252 | # 清理临时 cookies 文件(延迟清理)
253 | if cookies_file:
254 | background_tasks.add_task(lambda p: os.remove(p) if os.path.exists(p) else None, cookies_file)
255 |
256 | # 选择格式(与之前逻辑相同)
257 | formats = info_json.get("formats", []) or []
258 | selected = None
259 | if quality == "best":
260 | formats_with_height = [f for f in formats if f.get("height")]
261 | if formats_with_height:
262 | formats_with_height.sort(key=lambda x: (x.get("height") or 0, x.get("tbr") or 0), reverse=True)
263 | selected = formats_with_height[0]
264 | else:
265 | try:
266 | target_h = int(quality.replace("p", ""))
267 | cand = [f for f in formats if (f.get("height") or 0) == target_h]
268 | if cand:
269 | cand.sort(key=lambda x: x.get("tbr") or 0, reverse=True)
270 | selected = cand[0]
271 | except Exception:
272 | selected = None
273 |
274 | if not selected:
275 | if info_json.get("url"):
276 | return {"download_url": info_json.get("url"), "filename": info_json.get("title")}
277 | if formats:
278 | formats.sort(key=lambda x: (x.get("filesize") or 0, x.get("tbr") or 0), reverse=True)
279 | selected = formats[0]
280 |
281 | if not selected:
282 | raise HTTPException(status_code=500, detail="未能找到可下载的格式")
283 |
284 | download_url = selected.get("url")
285 | filename = info_json.get("title") or "video"
286 | ext = selected.get("ext")
287 | if ext:
288 | filename = f"{filename}.{ext}"
289 | # 如果下载地址是 m3u8(或协议为 m3u8_native),则尝试使用 yt-dlp 下载并合并为单一文件(保存到 UPLOAD_DIR),然后返回静态 URL
290 | try:
291 | upload_dir = os.getenv("UPLOAD_DIR", "uploads")
292 | Path(upload_dir).mkdir(parents=True, exist_ok=True)
293 |
294 | is_m3u8 = False
295 | proto = selected.get("protocol") or ""
296 | if proto == "m3u8_native":
297 | is_m3u8 = True
298 | if download_url and (".m3u8" in download_url or (selected.get("ext") or "") == "m3u8"):
299 | is_m3u8 = True
300 |
301 | if is_m3u8:
302 | # 输出文件名:使用标题 + uuid,强制 mp4
303 | safe_title = "".join(c for c in (info_json.get("title") or "video") if c.isalnum() or c in " _-").strip()[:120] or "video"
304 | out_basename = f"{safe_title}_{uuid.uuid4().hex[:8]}.mp4"
305 | out_path = os.path.join(upload_dir, out_basename)
306 |
307 | # 构建 yt-dlp 下载命令,使用 --merge-output-format mp4 以确保合并
308 | ytdlp_cmd = ["yt-dlp", "-f", "best", "--merge-output-format", "mp4", "-o", out_path, url]
309 | if cookies_file:
310 | ytdlp_cmd += ["--cookies", cookies_file]
311 | elif cookie:
312 | ytdlp_cmd += ["--add-header", f"Cookie: {cookie}"]
313 |
314 | # 将下载/合并任务提交为后台任务(非阻塞)
315 | task_id = uuid.uuid4().hex
316 | _download_tasks[task_id] = {
317 | "status": "pending",
318 | "progress": 0,
319 | "log": "",
320 | "output": None,
321 | "error": None,
322 | }
323 |
324 | def _run_task(tid: str, cmd: list, outp: str, cookiesfile: Optional[str]):
325 | try:
326 | _download_tasks[tid]["status"] = "running"
327 | _download_tasks[tid]["log"] += f"执行命令: {' '.join(cmd)}\n"
328 | # 使用 subprocess.Popen 以便实时读取输出并更新日志/进度
329 | with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1) as proc:
330 | for line in proc.stdout:
331 | _download_tasks[tid]["log"] += line
332 | # 简单根据输出判断进度(若包含 %)
333 | try:
334 | if "%" in line:
335 | # 提取第一个出现的百分比数字
336 | import re
337 | m = re.search(r"(\d{1,3}\.\d|\d{1,3})%", line)
338 | if m:
339 | p = float(m.group(1))
340 | _download_tasks[tid]["progress"] = int(min(max(p, 0), 100))
341 | except Exception:
342 | pass
343 | ret = proc.wait()
344 | if ret != 0:
345 | _download_tasks[tid]["status"] = "failed"
346 | _download_tasks[tid]["error"] = f"yt-dlp 退出码 {ret}"
347 | return
348 |
349 | # 成功后标记输出并返回静态 URL
350 | _download_tasks[tid]["status"] = "completed"
351 | _download_tasks[tid]["output"] = outp
352 | except Exception as e:
353 | logger.exception("后台下载任务失败: %s", e)
354 | _download_tasks[tid]["status"] = "failed"
355 | _download_tasks[tid]["error"] = str(e)
356 | finally:
357 | # 清理 cookies 临时文件
358 | try:
359 | if cookiesfile and os.path.exists(cookiesfile):
360 | os.remove(cookiesfile)
361 | except Exception:
362 | pass
363 |
364 | # 启动后台线程
365 | import threading
366 | thread_cmd = ytdlp_cmd.copy()
367 | thread = threading.Thread(target=_run_task, args=(task_id, thread_cmd, out_path, cookies_file), daemon=True)
368 | thread.start()
369 |
370 | # 返回任务 id,前端可以轮询 /download/bilibili/task_status?task_id=...
371 | return {"task_id": task_id, "message": "已开始后台合并,使用 task_id 查询进度"}
372 |
373 | except Exception as e:
374 | logger.exception("合并处理时发生错误: %s", e)
375 |
376 | return {"download_url": download_url, "filename": filename, "format_note": selected.get("format_note")}
377 |
378 |
379 | @router.get("/download/bilibili/task_status")
380 | async def bilibili_task_status(task_id: str):
381 | """
382 | 查询后台下载/合并任务状态,返回:
383 | { status: 'pending'|'running'|'completed'|'failed', progress: int, log: str, output: str|null, error: str|null }
384 | 如果 status == 'completed',output 为服务器静态路径,例如 /api/uploads/xxx.mp4
385 | """
386 | task = _download_tasks.get(task_id)
387 | if not task:
388 | raise HTTPException(status_code=404, detail="task_id 未找到")
389 | return {
390 | "status": task.get("status"),
391 | "progress": task.get("progress", 0),
392 | "log": task.get("log", ""),
393 | "output": task.get("output"),
394 | "error": task.get("error"),
395 | }
396 |
397 |
398 |
--------------------------------------------------------------------------------
/frontend/src/components/EnhancedMarkdownViewer.tsx:
--------------------------------------------------------------------------------
1 | import ReactMarkdown from 'react-markdown'
2 | import remarkGfm from 'remark-gfm'
3 | import 'github-markdown-css/github-markdown.css'
4 | import { Copy, Download, FileDown } from 'lucide-react'
5 | import { useState, useRef } from 'react'
6 | import toast from 'react-hot-toast'
7 | import Zoom from 'react-medium-image-zoom'
8 | import 'react-medium-image-zoom/dist/styles.css'
9 | import jsPDF from 'jspdf'
10 | import html2canvas from 'html2canvas'
11 | import { exportPDF } from '../services/api'
12 |
13 | interface EnhancedMarkdownViewerProps {
14 | markdown: string
15 | filename?: string
16 | taskId?: string
17 | }
18 |
19 | // 获取API基础URL
20 | const getBaseURL = () => {
21 | const baseURL = import.meta.env.VITE_API_BASE_URL || '/api'
22 | return baseURL.replace(/\/$/, '')
23 | }
24 |
25 | export default function EnhancedMarkdownViewer({
26 | markdown,
27 | filename,
28 | taskId,
29 | }: EnhancedMarkdownViewerProps) {
30 | const [copied, setCopied] = useState(false)
31 | const markdownRef = useRef(null)
32 | const baseURL = getBaseURL()
33 |
34 | const handleCopy = async () => {
35 | try {
36 | await navigator.clipboard.writeText(markdown)
37 | setCopied(true)
38 | toast.success('已复制到剪贴板')
39 | setTimeout(() => setCopied(false), 2000)
40 | } catch (e) {
41 | toast.error('复制失败')
42 | }
43 | }
44 |
45 | const handleDownloadMarkdown = async () => {
46 | try {
47 | toast.loading('正在处理图片,请稍候...', { id: 'markdown-processing' })
48 |
49 | const name = filename?.replace(/\.[^/.]+$/, '') || 'note'
50 |
51 | // 将 markdown 中的图片路径转换为 base64
52 | let processedMarkdown = markdown
53 | const imageRegex = /!\[\]\((.*?)\)/g
54 | const imageMatches = Array.from(markdown.matchAll(imageRegex))
55 |
56 | // 处理所有图片
57 | for (const match of imageMatches) {
58 | const imageUrl = match[1]
59 |
60 | // 如果已经是 base64,跳过
61 | if (imageUrl.startsWith('data:')) {
62 | continue
63 | }
64 |
65 | try {
66 | // 构建完整的图片 URL
67 | let fullImageUrl = imageUrl
68 | if (imageUrl.startsWith('/api/')) {
69 | // 已经是完整路径,使用 baseURL
70 | const apiBaseURL = import.meta.env.VITE_API_BASE_URL || ''
71 | if (apiBaseURL && !imageUrl.startsWith('http')) {
72 | fullImageUrl = `${apiBaseURL}${imageUrl}`
73 | } else if (!imageUrl.startsWith('http')) {
74 | // 使用当前页面的 origin
75 | fullImageUrl = `${window.location.origin}${imageUrl}`
76 | }
77 | } else if (imageUrl.startsWith('/')) {
78 | fullImageUrl = `${window.location.origin}${imageUrl}`
79 | }
80 |
81 | // 获取图片并转换为 base64
82 | const response = await fetch(fullImageUrl)
83 | if (response.ok) {
84 | const blob = await response.blob()
85 | const reader = new FileReader()
86 | const base64 = await new Promise((resolve, reject) => {
87 | reader.onloadend = () => {
88 | if (typeof reader.result === 'string') {
89 | resolve(reader.result)
90 | } else {
91 | reject(new Error('Failed to convert image to base64'))
92 | }
93 | }
94 | reader.onerror = reject
95 | reader.readAsDataURL(blob)
96 | })
97 |
98 | // 替换 markdown 中的图片路径
99 | processedMarkdown = processedMarkdown.replace(match[0], ``)
100 | } else {
101 | console.warn(`Failed to fetch image: ${fullImageUrl}`)
102 | }
103 | } catch (error) {
104 | console.error(`Error processing image ${imageUrl}:`, error)
105 | // 如果图片处理失败,保留原路径
106 | }
107 | }
108 |
109 | const blob = new Blob([processedMarkdown], { type: 'text/markdown;charset=utf-8' })
110 | const link = document.createElement('a')
111 | link.href = URL.createObjectURL(blob)
112 | link.download = `${name}.md`
113 | document.body.appendChild(link)
114 | link.click()
115 | document.body.removeChild(link)
116 | URL.revokeObjectURL(link.href)
117 |
118 | toast.dismiss('markdown-processing')
119 | toast.success('Markdown文件已下载(图片已嵌入)')
120 | } catch (error) {
121 | console.error('下载 Markdown 失败:', error)
122 | toast.dismiss('markdown-processing')
123 | toast.error('下载失败,请稍后重试')
124 | }
125 | }
126 |
127 | const handleDownloadPDF = async () => {
128 | // 如果提供了 taskId,优先使用后端 API 生成可复制文本的 PDF
129 | if (taskId) {
130 | try {
131 | toast.loading('正在生成PDF(可复制文本),请稍候...', { id: 'pdf-generating' })
132 | const response = await exportPDF(taskId)
133 |
134 | // 创建下载链接
135 | const blob = new Blob([response.data], { type: 'application/pdf' })
136 | const url = window.URL.createObjectURL(blob)
137 | const link = document.createElement('a')
138 | link.href = url
139 | const name = filename?.replace(/\.[^/.]+$/, '') || 'note'
140 | link.download = `${name}.pdf`
141 | document.body.appendChild(link)
142 | link.click()
143 | document.body.removeChild(link)
144 | window.URL.revokeObjectURL(url)
145 |
146 | toast.dismiss('pdf-generating')
147 | toast.success('PDF文件已下载(文字可复制)')
148 | return
149 | } catch (error: any) {
150 | console.error('后端PDF生成失败,使用前端生成:', error)
151 | // 如果后端失败,fallback 到前端生成
152 | if (error.response?.status !== 404) {
153 | toast.dismiss('pdf-generating')
154 | toast.error('后端PDF生成失败,使用前端生成方式')
155 | }
156 | }
157 | }
158 |
159 | // 前端生成 PDF(图片模式,文字不可复制)
160 | if (!markdownRef.current) {
161 | toast.error('无法生成PDF,请稍后重试')
162 | return
163 | }
164 |
165 | try {
166 | if (!taskId) {
167 | toast.loading('正在生成PDF(图片模式),请稍候...', { id: 'pdf-generating' })
168 | }
169 |
170 | // 等待所有图片加载完成
171 | const images = markdownRef.current.querySelectorAll('img')
172 | const imagePromises = Array.from(images).map((img) => {
173 | if (img.complete && img.naturalHeight !== 0) {
174 | return Promise.resolve()
175 | }
176 | return new Promise((resolve) => {
177 | const timeout = setTimeout(() => {
178 | resolve(null) // 超时也继续
179 | }, 10000)
180 |
181 | img.onload = () => {
182 | clearTimeout(timeout)
183 | resolve(null)
184 | }
185 | img.onerror = () => {
186 | clearTimeout(timeout)
187 | resolve(null) // 即使加载失败也继续
188 | }
189 | })
190 | })
191 | await Promise.all(imagePromises)
192 |
193 | // 使用html2canvas将内容转换为canvas,改进配置以避免图片截断
194 | const canvas = await html2canvas(markdownRef.current, {
195 | scale: 2,
196 | useCORS: true,
197 | allowTaint: false,
198 | logging: false,
199 | backgroundColor: '#ffffff',
200 | width: markdownRef.current.scrollWidth,
201 | height: markdownRef.current.scrollHeight,
202 | windowWidth: markdownRef.current.scrollWidth,
203 | windowHeight: markdownRef.current.scrollHeight,
204 | onclone: (clonedDoc) => {
205 | // 确保克隆文档中的图片都已加载
206 | const clonedImages = clonedDoc.querySelectorAll('img')
207 | clonedImages.forEach((img: HTMLImageElement) => {
208 | if (!img.complete) {
209 | img.style.display = 'none'
210 | }
211 | })
212 | }
213 | })
214 |
215 | // 计算PDF尺寸
216 | const imgWidth = canvas.width
217 | const imgHeight = canvas.height
218 | const pdfWidth = 210 // A4宽度(mm)
219 | const pdfHeight = (imgHeight * pdfWidth) / imgWidth
220 |
221 | // 创建PDF
222 | const pdf = new jsPDF('p', 'mm', 'a4')
223 | const pageHeight = pdf.internal.pageSize.height
224 | const pageWidth = pdf.internal.pageSize.width
225 | const margin = 10 // 页边距(mm)
226 | const contentWidth = pageWidth - 2 * margin
227 |
228 | // 计算每页可以容纳的高度
229 | const contentHeightPerPage = pageHeight - 2 * margin
230 | const totalPages = Math.ceil(pdfHeight / contentHeightPerPage)
231 |
232 | let yPosition = -margin // 从顶部开始,减去 margin 因为 addImage 的 y 是相对于页面的
233 |
234 | // 添加第一页
235 | pdf.addImage(
236 | canvas.toDataURL('image/png', 0.95),
237 | 'PNG',
238 | margin,
239 | yPosition,
240 | contentWidth,
241 | pdfHeight
242 | )
243 |
244 | // 如果内容超过一页,添加更多页面
245 | for (let page = 1; page < totalPages; page++) {
246 | pdf.addPage()
247 | yPosition = -margin - (page * contentHeightPerPage)
248 | pdf.addImage(
249 | canvas.toDataURL('image/png', 0.95),
250 | 'PNG',
251 | margin,
252 | yPosition,
253 | contentWidth,
254 | pdfHeight
255 | )
256 | }
257 |
258 | // 下载PDF
259 | const name = filename?.replace(/\.[^/.]+$/, '') || 'note'
260 | pdf.save(`${name}.pdf`)
261 |
262 | toast.dismiss('pdf-generating')
263 | toast.success('PDF文件已下载(图片模式,文字不可复制)')
264 | } catch (error) {
265 | console.error('生成PDF失败:', error)
266 | toast.dismiss('pdf-generating')
267 | toast.error('生成PDF失败,请稍后重试')
268 | }
269 | }
270 |
271 | // 处理图片URL,确保使用正确的baseURL
272 | const processMarkdown = (md: string) => {
273 | // 由于markdown中的路径已经是 /api/note_results/screenshots/...
274 | // 而vite代理已经配置了 /api 代理,所以直接返回即可
275 | // 不需要再添加baseURL,避免重复
276 | return md
277 | }
278 |
279 | if (!markdown) {
280 | return (
281 |
282 | 暂无笔记内容
283 |
284 | )
285 | }
286 |
287 | const processedMarkdown = processMarkdown(markdown)
288 |
289 | return (
290 |
291 | {/* 固定头部工具栏 */}
292 |
293 |
294 |
笔记预览
295 | {filename &&
{filename}
}
296 |
297 |
298 |
306 |
314 |
322 |
323 |
324 |
325 | {/* Markdown内容区域 - 内容自然高度,参与父容器滚动 */}
326 |
327 |
331 |
{
336 | let src = props.src || ''
337 | // 如果已经是完整URL(http/https/data),直接使用
338 | if (src.startsWith('http') || src.startsWith('data:')) {
339 | // 已经是完整URL,不需要处理
340 | } else if (src.startsWith('/api/')) {
341 | // 路径已经包含 /api/,vite代理会处理,直接使用
342 | // 不需要再添加baseURL
343 | } else if (src.startsWith('/')) {
344 | // 其他以 / 开头的路径,可能需要添加baseURL
345 | // 但通常markdown中的路径已经是 /api/... 格式
346 | src = src
347 | } else {
348 | // 相对路径,添加baseURL
349 | src = `${baseURL}/${src}`
350 | }
351 |
352 | // 使用 figure 标签包裹图片,避免在 p 标签内嵌套 div
353 | return (
354 |
355 |
356 |
{
363 | // 如果图片加载失败,尝试使用完整URL
364 | const target = e.target as HTMLImageElement
365 | const originalSrc = props.src || ''
366 | console.warn('图片加载失败:', originalSrc, '当前src:', target.src)
367 | // 如果原始路径是 /api/ 开头,说明路径是正确的,可能是服务器问题
368 | // 不需要再次尝试修改URL
369 | }}
370 | />
371 |
372 |
373 | )
374 | },
375 | // 改进标题样式
376 | h1: ({ children, ...props }) => (
377 |
381 | {children}
382 |
383 | ),
384 | h2: ({ children, ...props }) => (
385 |
389 | {children}
390 |
391 | ),
392 | h3: ({ children, ...props }) => (
393 |
397 | {children}
398 |
399 | ),
400 | // 改进段落样式
401 | p: ({ children, ...props }) => {
402 | // 如果段落只包含一个图片,直接返回图片(不包裹在p中)
403 | if (
404 | Array.isArray(children) &&
405 | children.length === 1 &&
406 | typeof children[0] === 'object' &&
407 | children[0] !== null &&
408 | 'type' in children[0] &&
409 | (children[0] as any).type === 'figure'
410 | ) {
411 | return <>{children}>
412 | }
413 |
414 | return (
415 |
416 | {children}
417 |
418 | )
419 | },
420 | // 改进列表样式
421 | ul: ({ children, ordered, ...props }) => (
422 |
425 | ),
426 | ol: ({ children, ordered, ...props }) => (
427 |
428 | {children}
429 |
430 | ),
431 | // 改进代码块样式
432 | code: ({ inline, className, children, ...props }) => {
433 | if (!inline) {
434 | return (
435 |
439 | {children}
440 |
441 | )
442 | }
443 | return (
444 |
448 | {children}
449 |
450 | )
451 | },
452 | // 改进引用样式
453 | blockquote: ({ children, ...props }) => (
454 |
458 | {children}
459 |
460 | ),
461 | }}
462 | >
463 | {processedMarkdown}
464 |
465 |
466 |
467 |
468 | )
469 | }
470 |
--------------------------------------------------------------------------------