├── backend
    ├── app
    │   ├── db
    │   │   ├── __init__.py
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   └── video_task.py
    │   │   ├── engine.py
    │   │   ├── init_db.py
    │   │   ├── migrate.py
    │   │   └── video_task_dao.py
    │   ├── gpt
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   └── openai_gpt.py
    │   ├── routers
    │   │   ├── __init__.py
    │   │   ├── model.py
    │   │   └── download.py
    │   ├── services
    │   │   └── __init__.py
    │   ├── exceptions
    │   │   ├── __init__.py
    │   │   └── exception_handlers.py
    │   ├── transcriber
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── transcriber_provider.py
    │   │   └── fast_whisper.py
    │   ├── .DS_Store
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── notes_model.py
    │   │   └── transcriber_model.py
    │   ├── __init__.py
    │   └── utils
    │   │   ├── logger.py
    │   │   ├── response.py
    │   │   ├── video_helper.py
    │   │   └── ffmpeg_helper.py
    ├── .DS_Store
    ├── icon.icns
    ├── requirements.txt
    ├── .gitignore
    ├── check_db.py
    ├── fix_markdown_paths.py
    ├── start.sh
    ├── start.bat
    ├── main.py
    ├── ENV_SETUP.md
    ├── video_note_ai.spec
    └── app_entry.py
├── .DS_Store
├── frontend
    ├── postcss.config.js
    ├── src
    │   ├── vite-env.d.ts
    │   ├── main.tsx
    │   ├── components
    │   │   ├── ui
    │   │   │   └── ScrollArea.tsx
    │   │   ├── MarkdownContent.tsx
    │   │   ├── ProviderIcon.tsx
    │   │   ├── UploadPage.tsx
    │   │   ├── MarkdownViewer.tsx
    │   │   ├── ContentPreviewModal.tsx
    │   │   ├── Sidebar.tsx
    │   │   ├── TranscriptViewer.tsx
    │   │   ├── MainContent.tsx
    │   │   ├── FileConfirmDialog.tsx
    │   │   ├── CurrentModelDisplay.tsx
    │   │   ├── StepProgress.tsx
    │   │   ├── TaskList.tsx
    │   │   ├── UploadForm.tsx
    │   │   ├── VideoDownloader.tsx
    │   │   ├── ModelSelector.tsx
    │   │   ├── ModelSelectorPanel.tsx
    │   │   ├── TaskDetailPanel.tsx
    │   │   └── EnhancedMarkdownViewer.tsx
    │   ├── index.css
    │   ├── store
    │   │   └── taskStore.ts
    │   ├── App.tsx
    │   └── services
    │   │   └── api.ts
    ├── tailwind.config.js
    ├── tsconfig.node.json
    ├── .gitignore
    ├── index.html
    ├── vite.config.ts
    ├── icon
    │   ├── siliconcloud-color.svg
    │   ├── claude-color.svg
    │   ├── openai-svgrepo-com.svg
    │   ├── deepseek-color.svg
    │   ├── chatglm-color.svg
    │   ├── gemini-color.svg
    │   └── ollama.svg
    ├── tsconfig.json
    └── package.json
├── .gitignore
├── README.md
└── 原理博客.md


/backend/app/db/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/backend/app/gpt/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/backend/app/routers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/backend/app/services/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/backend/app/exceptions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/backend/app/transcriber/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jehuge/video-Ai-note/HEAD/.DS_Store


--------------------------------------------------------------------------------
/backend/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jehuge/video-Ai-note/HEAD/backend/.DS_Store


--------------------------------------------------------------------------------
/backend/icon.icns:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jehuge/video-Ai-note/HEAD/backend/icon.icns


--------------------------------------------------------------------------------
/backend/app/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jehuge/video-Ai-note/HEAD/backend/app/.DS_Store


--------------------------------------------------------------------------------
/backend/app/db/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .video_task import VideoTask
2 | 
3 | __all__ = ["VideoTask"]
4 | 
5 | 


--------------------------------------------------------------------------------
/frontend/postcss.config.js:
--------------------------------------------------------------------------------
1 | export default {
2 |   plugins: {
3 |     tailwindcss: {},
4 |     autoprefixer: {},
5 |   },
6 | }
7 | 
8 | 


--------------------------------------------------------------------------------
/backend/app/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .notes_model import NoteResult
2 | from .transcriber_model import TranscriptResult, TranscriptSegment
3 | 
4 | __all__ = ["NoteResult", "TranscriptResult", "TranscriptSegment"]
5 | 
6 | 


--------------------------------------------------------------------------------
/frontend/src/vite-env.d.ts:
--------------------------------------------------------------------------------
 1 | /// <reference types="vite/client" />
 2 | 
 3 | interface ImportMetaEnv {
 4 |   readonly VITE_API_BASE_URL: string
 5 | }
 6 | 
 7 | interface ImportMeta {
 8 |   readonly env: ImportMetaEnv
 9 | }
10 | 
11 | 


--------------------------------------------------------------------------------
/frontend/tailwind.config.js:
--------------------------------------------------------------------------------
 1 | /** @type {import('tailwindcss').Config} */
 2 | export default {
 3 |   content: [
 4 |     "./index.html",
 5 |     "./src/**/*.{js,ts,jsx,tsx}",
 6 |   ],
 7 |   theme: {
 8 |     extend: {},
 9 |   },
10 |   plugins: [],
11 | }
12 | 
13 | 


--------------------------------------------------------------------------------
/frontend/tsconfig.node.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "composite": true,
 4 |     "skipLibCheck": true,
 5 |     "module": "ESNext",
 6 |     "moduleResolution": "bundler",
 7 |     "allowSyntheticDefaultImports": true
 8 |   },
 9 |   "include": ["vite.config.ts"]
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/frontend/src/main.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react'
 2 | import ReactDOM from 'react-dom/client'
 3 | import App from './App.tsx'
 4 | import './index.css'
 5 | 
 6 | ReactDOM.createRoot(document.getElementById('root')!).render(
 7 |   <React.StrictMode>
 8 |     <App />
 9 |   </React.StrictMode>,
10 | )
11 | 
12 | 


--------------------------------------------------------------------------------
/backend/app/models/notes_model.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from app.models.transcriber_model import TranscriptResult
 3 | 
 4 | 
 5 | @dataclass
 6 | class NoteResult:
 7 |     """笔记结果"""
 8 |     markdown: str                    # GPT 总结的 Markdown 内容
 9 |     transcript: TranscriptResult     # 转录结果
10 |     filename: str                   # 原始文件名
11 | 
12 | 


--------------------------------------------------------------------------------
/frontend/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | npm-debug.log*
 5 | yarn-debug.log*
 6 | yarn-error.log*
 7 | pnpm-debug.log*
 8 | lerna-debug.log*
 9 | 
10 | node_modules
11 | dist
12 | dist-ssr
13 | *.local
14 | .DS_Store
15 | # Editor directories and files
16 | .vscode/*
17 | !.vscode/extensions.json
18 | .idea
19 | .DS_Store
20 | *.suo
21 | *.ntvs*
22 | *.njsproj
23 | *.sln
24 | *.sw?
25 | 
26 | 


--------------------------------------------------------------------------------
/backend/requirements.txt:
--------------------------------------------------------------------------------
 1 | fastapi==0.115.12
 2 | uvicorn==0.34.0
 3 | python-dotenv==1.1.0
 4 | python-multipart==0.0.20
 5 | SQLAlchemy==2.0.41
 6 | openai==1.70.0
 7 | google-generativeai==0.8.3
 8 | faster-whisper==1.1.1
 9 | ffmpeg-python==0.2.0
10 | imageio-ffmpeg>=0.5.0
11 | pydantic==2.11.2
12 | orjson==3.10.16
13 | requests==2.32.3
14 | markdown>=3.0
15 | reportlab>=4.0.0
16 | playwright>=1.40.0
17 | yt-dlp
18 | 


--------------------------------------------------------------------------------
/frontend/src/components/ui/ScrollArea.tsx:
--------------------------------------------------------------------------------
 1 | import { ReactNode } from 'react'
 2 | 
 3 | interface ScrollAreaProps {
 4 |   children: ReactNode
 5 |   className?: string
 6 | }
 7 | 
 8 | export function ScrollArea({ children, className = '' }: ScrollAreaProps) {
 9 |   return (
10 |     <div className={`overflow-y-auto ${className}`} style={{ maxHeight: '100%' }}>
11 |       {children}
12 |     </div>
13 |   )
14 | }
15 | 
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/frontend/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="zh-CN">
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |     <link rel="icon" type="image/svg+xml" href="/vite.svg" />
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
 7 |     <title>强大的AI视频笔记神器-支持截图生产</title>
 8 |   </head>
 9 |   <body>
10 |     <div id="root"></div>
11 |     <script type="module" src="/src/main.tsx"></script>
12 |   </body>
13 | </html>
14 | 
15 | 


--------------------------------------------------------------------------------
/backend/app/transcriber/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from app.models.transcriber_model import TranscriptResult
 3 | 
 4 | 
 5 | class Transcriber(ABC):
 6 |     """转录器基类"""
 7 |     
 8 |     @abstractmethod
 9 |     def transcript(self, file_path: str) -> TranscriptResult:
10 |         """
11 |         转录音频文件
12 |         
13 |         :param file_path: 音频文件路径
14 |         :return: TranscriptResult 对象
15 |         """
16 |         pass
17 | 
18 | 


--------------------------------------------------------------------------------
/backend/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | *.so
 6 | .Python
 7 | env/
 8 | venv/
 9 | ENV/
10 | .venv
11 | 
12 | # Database
13 | *.db
14 | *.sqlite
15 | *.sqlite3
16 | 
17 | # Environment
18 | .env
19 | 
20 | # IDE
21 | .vscode/
22 | .idea/
23 | *.swp
24 | *.swo
25 | 
26 | # Uploads and outputs
27 | uploads/
28 | note_results/
29 | static/
30 | 
31 | # FFmpeg binaries (auto-downloaded)
32 | ffmpeg_bin/
33 | 
34 | # Logs
35 | *.log
36 | 
37 | 


--------------------------------------------------------------------------------
/backend/app/gpt/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from app.models.transcriber_model import TranscriptResult
 3 | 
 4 | 
 5 | class GPT(ABC):
 6 |     """GPT 基类"""
 7 |     
 8 |     @abstractmethod
 9 |     def summarize(self, transcript: TranscriptResult, filename: str = "") -> str:
10 |         """
11 |         根据转录内容生成笔记
12 |         
13 |         :param transcript: 转录结果
14 |         :param filename: 文件名（可选）
15 |         :return: Markdown 格式的笔记
16 |         """
17 |         pass
18 | 
19 | 


--------------------------------------------------------------------------------
/frontend/vite.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig } from 'vite'
 2 | import react from '@vitejs/plugin-react'
 3 | import path from 'path'
 4 | 
 5 | export default defineConfig({
 6 |   plugins: [react()],
 7 |   resolve: {
 8 |     alias: {
 9 |       '@': path.resolve(__dirname, './src'),
10 |     },
11 |   },
12 |   server: {
13 |     port: 5173,
14 |     proxy: {
15 |       '/api': {
16 |         target: 'http://localhost:8483',
17 |         changeOrigin: true,
18 |       },
19 |     },
20 |   },
21 | })
22 | 
23 | 


--------------------------------------------------------------------------------
/backend/app/models/transcriber_model.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import List, Optional
 3 | 
 4 | 
 5 | @dataclass
 6 | class TranscriptSegment:
 7 |     """转录片段"""
 8 |     start: float  # 开始时间（秒）
 9 |     end: float    # 结束时间（秒）
10 |     text: str     # 该段文字
11 | 
12 | 
13 | @dataclass
14 | class TranscriptResult:
15 |     """转录结果"""
16 |     language: Optional[str]  # 检测语言（如 "zh"、"en"）
17 |     full_text: str           # 完整合并后的文本
18 |     segments: List[TranscriptSegment]  # 分段结构
19 | 
20 | 


--------------------------------------------------------------------------------
/frontend/src/index.css:
--------------------------------------------------------------------------------
 1 | @tailwind base;
 2 | @tailwind components;
 3 | @tailwind utilities;
 4 | 
 5 | body {
 6 |   margin: 0;
 7 |   font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
 8 |     'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
 9 |     sans-serif;
10 |   -webkit-font-smoothing: antialiased;
11 |   -moz-osx-font-smoothing: grayscale;
12 | }
13 | 
14 | code {
15 |   font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',
16 |     monospace;
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/backend/app/db/engine.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import create_engine
 2 | from sqlalchemy.ext.declarative import declarative_base
 3 | from sqlalchemy.orm import sessionmaker
 4 | import os
 5 | 
 6 | # SQLite 数据库路径
 7 | DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./video_note.db")
 8 | 
 9 | engine = create_engine(
10 |     DATABASE_URL,
11 |     connect_args={"check_same_thread": False}  # SQLite 需要这个参数
12 | )
13 | 
14 | SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
15 | 
16 | Base = declarative_base()
17 | 
18 | 


--------------------------------------------------------------------------------
/frontend/icon/siliconcloud-color.svg:
--------------------------------------------------------------------------------
1 | <svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>SiliconCloud</title><path clip-rule="evenodd" d="M22.956 6.521H12.522c-.577 0-1.044.468-1.044 1.044v3.13c0 .577-.466 1.044-1.043 1.044H1.044c-.577 0-1.044.467-1.044 1.044v4.174C0 17.533.467 18 1.044 18h10.434c.577 0 1.044-.467 1.044-1.043v-3.13c0-.578.466-1.044 1.043-1.044h9.391c.577 0 1.044-.467 1.044-1.044V7.565c0-.576-.467-1.044-1.044-1.044z" fill="#6E29F6" fill-rule="evenodd"></path></svg>


--------------------------------------------------------------------------------
/frontend/src/components/MarkdownContent.tsx:
--------------------------------------------------------------------------------
 1 | import ReactMarkdown from 'react-markdown'
 2 | import remarkGfm from 'remark-gfm'
 3 | import 'github-markdown-css/github-markdown.css'
 4 | 
 5 | interface MarkdownContentProps {
 6 |   markdown: string
 7 | }
 8 | 
 9 | export default function MarkdownContent({ markdown }: MarkdownContentProps) {
10 |   return (
11 |     <div className="markdown-body prose max-w-none">
12 |       <ReactMarkdown remarkPlugins={[remarkGfm]}>{markdown}</ReactMarkdown>
13 |     </div>
14 |   )
15 | }
16 | 
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/backend/app/__init__.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI
 2 | 
 3 | from .routers import note, model, download
 4 | 
 5 | 
 6 | def create_app(lifespan) -> FastAPI:
 7 |     """创建 FastAPI 应用"""
 8 |     app = FastAPI(
 9 |         title="Video AI Note",
10 |         description="简化版视频笔记生成工具",
11 |         version="1.0.0",
12 |         lifespan=lifespan
13 |     )
14 |     
15 |     app.include_router(note.router, prefix="/api")
16 |     app.include_router(model.router, prefix="/api")
17 |     app.include_router(download.router, prefix="/api")
18 |     
19 |     return app
20 | 
21 | 


--------------------------------------------------------------------------------
/backend/app/db/init_db.py:
--------------------------------------------------------------------------------
 1 | from app.db.engine import Base, engine
 2 | from app.db.migrate import migrate_add_screenshot_column
 3 | from app.utils.logger import get_logger
 4 | 
 5 | logger = get_logger(__name__)
 6 | 
 7 | 
 8 | def init_db():
 9 |     """初始化数据库，创建所有表"""
10 |     try:
11 |         Base.metadata.create_all(bind=engine)
12 |         logger.info("数据库表创建完成")
13 |         
14 |         # 执行迁移
15 |         migrate_add_screenshot_column()
16 |         
17 |         logger.info("数据库初始化完成")
18 |     except Exception as e:
19 |         logger.error(f"数据库初始化失败: {e}")
20 |         raise
21 | 
22 | 


--------------------------------------------------------------------------------
/backend/app/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | 
 4 | def get_logger(name: str) -> logging.Logger:
 5 |     """获取日志记录器"""
 6 |     logger = logging.getLogger(name)
 7 |     logger.setLevel(logging.INFO)
 8 |     
 9 |     if not logger.handlers:
10 |         handler = logging.StreamHandler(sys.stdout)
11 |         handler.setLevel(logging.INFO)
12 |         formatter = logging.Formatter(
13 |             '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
14 |         )
15 |         handler.setFormatter(formatter)
16 |         logger.addHandler(handler)
17 |     
18 |     return logger
19 | 
20 | 


--------------------------------------------------------------------------------
/backend/app/utils/response.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Optional
 2 | from pydantic import BaseModel
 3 | 
 4 | 
 5 | class ResponseWrapper:
 6 |     """统一响应包装器"""
 7 |     
 8 |     @staticmethod
 9 |     def success(data: Any = None, msg: str = "success", code: int = 200):
10 |         return {
11 |             "code": code,
12 |             "msg": msg,
13 |             "data": data
14 |         }
15 |     
16 |     @staticmethod
17 |     def error(msg: str = "error", code: int = 500, data: Any = None):
18 |         return {
19 |             "code": code,
20 |             "msg": msg,
21 |             "data": data
22 |         }
23 | 
24 | 


--------------------------------------------------------------------------------
/backend/app/exceptions/exception_handlers.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI, Request
 2 | from fastapi.responses import JSONResponse
 3 | from app.utils.logger import get_logger
 4 | 
 5 | logger = get_logger(__name__)
 6 | 
 7 | 
 8 | def register_exception_handlers(app: FastAPI):
 9 |     """注册全局异常处理器"""
10 |     
11 |     @app.exception_handler(Exception)
12 |     async def global_exception_handler(request: Request, exc: Exception):
13 |         logger.error(f"未处理的异常: {exc}", exc_info=True)
14 |         return JSONResponse(
15 |             status_code=500,
16 |             content={
17 |                 "code": 500,
18 |                 "msg": str(exc),
19 |                 "data": None
20 |             }
21 |         )
22 | 
23 | 


--------------------------------------------------------------------------------
/frontend/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2020",
 4 |     "useDefineForClassFields": true,
 5 |     "lib": ["ES2020", "DOM", "DOM.Iterable"],
 6 |     "module": "ESNext",
 7 |     "skipLibCheck": true,
 8 |     "moduleResolution": "bundler",
 9 |     "allowImportingTsExtensions": true,
10 |     "resolveJsonModule": true,
11 |     "isolatedModules": true,
12 |     "noEmit": true,
13 |     "jsx": "react-jsx",
14 |     "strict": true,
15 |     "noUnusedLocals": true,
16 |     "noUnusedParameters": true,
17 |     "noFallthroughCasesInSwitch": true,
18 |     "baseUrl": ".",
19 |     "paths": {
20 |       "@/*": ["./src/*"]
21 |     }
22 |   },
23 |   "include": ["src"],
24 |   "references": [{ "path": "./tsconfig.node.json" }]
25 | }
26 | 
27 | 


--------------------------------------------------------------------------------
/backend/app/db/models/video_task.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Column, Integer, String, DateTime, func
 2 | from app.db.engine import Base
 3 | 
 4 | 
 5 | class VideoTask(Base):
 6 |     """视频任务表"""
 7 |     __tablename__ = "video_tasks"
 8 |     
 9 |     id = Column(Integer, primary_key=True, autoincrement=True)
10 |     task_id = Column(String, unique=True, nullable=False, index=True)
11 |     filename = Column(String, nullable=False)
12 |     status = Column(String, nullable=False, default="pending")
13 |     markdown = Column(String, nullable=True)
14 |     screenshot = Column(Integer, default=0)  # 0=False, 1=True
15 |     created_at = Column(DateTime, server_default=func.now())
16 |     updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now())
17 | 
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # 环境变量和敏感信息
 2 | .env
 3 | .env.local
 4 | .env.*.local
 5 | *.key
 6 | *.pem
 7 | *.p12
 8 | 
 9 | # 数据库文件
10 | *.db
11 | *.sqlite
12 | *.sqlite3
13 | 
14 | # 上传文件和输出结果
15 | uploads/
16 | note_results/
17 | static/
18 | 
19 | # Python
20 | __pycache__/
21 | *.py[cod]
22 | *$py.class
23 | *.so
24 | .Python
25 | env/
26 | venv/
27 | ENV/
28 | .venv
29 | 
30 | # Node
31 | node_modules/
32 | dist/
33 | dist-ssr/
34 | *.local
35 | 
36 | # IDE
37 | .vscode/
38 | .idea/
39 | *.swp
40 | *.swo
41 | *.suo
42 | *.ntvs*
43 | *.njsproj
44 | *.sln
45 | 
46 | # 系统文件
47 | .DS_Store
48 | Thumbs.db
49 | 
50 | # 日志
51 | *.log
52 | logs/
53 | 
54 | # 打包和发布
55 | release/
56 | backend/build/
57 | backend/dist/
58 | 
59 | # 二进制和安装包
60 | *.app
61 | *.dmg
62 | *.pkg
63 | *.exe
64 | *.zip
65 | 
66 | # ffmpeg
67 | ffmpeg_bin/
68 | 


--------------------------------------------------------------------------------
/backend/check_db.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """检查数据库结构"""
 3 | import sqlite3
 4 | import os
 5 | 
 6 | DB_PATH = "video_note.db"
 7 | 
 8 | if os.path.exists(DB_PATH):
 9 |     conn = sqlite3.connect(DB_PATH)
10 |     cursor = conn.cursor()
11 |     
12 |     # 检查表结构
13 |     cursor.execute("PRAGMA table_info(video_tasks)")
14 |     columns = cursor.fetchall()
15 |     
16 |     print("数据库表结构:")
17 |     print("-" * 50)
18 |     for col in columns:
19 |         print(f"  {col[1]} ({col[2]})")
20 |     
21 |     # 检查是否有 screenshot 字段
22 |     column_names = [col[1] for col in columns]
23 |     if "screenshot" in column_names:
24 |         print("\n✓ screenshot 字段已存在")
25 |     else:
26 |         print("\n✗ screenshot 字段不存在，需要添加")
27 |         print("执行迁移: python -c 'from app.db.migrate import migrate_add_screenshot_column; migrate_add_screenshot_column()'")
28 |     
29 |     conn.close()
30 | else:
31 |     print(f"数据库文件 {DB_PATH} 不存在")
32 | 
33 | 


--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "video-ai-note-frontend",
 3 |   "private": true,
 4 |   "version": "1.0.0",
 5 |   "type": "module",
 6 |   "scripts": {
 7 |     "dev": "vite",
 8 |     "build": "vite build",
 9 |     "preview": "vite preview"
10 |   },
11 |   "dependencies": {
12 |     "@types/html2canvas": "^0.5.35",
13 |     "axios": "^1.8.4",
14 |     "github-markdown-css": "^5.8.1",
15 |     "html2canvas": "^1.4.1",
16 |     "jspdf": "^3.0.4",
17 |     "lucide-react": "^0.487.0",
18 |     "react": "^18.2.0",
19 |     "react-dom": "^18.2.0",
20 |     "react-hot-toast": "^2.5.2",
21 |     "react-markdown": "^8.0.7",
22 |     "react-medium-image-zoom": "^5.4.0",
23 |     "remark-gfm": "^3.0.1",
24 |     "zustand": "^5.0.3"
25 |   },
26 |   "devDependencies": {
27 |     "@types/react": "^18.2.0",
28 |     "@types/react-dom": "^18.2.0",
29 |     "@vitejs/plugin-react": "^4.3.4",
30 |     "autoprefixer": "^10.4.21",
31 |     "postcss": "^8.4.35",
32 |     "tailwindcss": "^3.4.1",
33 |     "typescript": "^5.7.2",
34 |     "vite": "^6.2.0"
35 |   }
36 | }
37 | 


--------------------------------------------------------------------------------
/backend/app/transcriber/transcriber_provider.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from app.transcriber.base import Transcriber
 3 | from app.transcriber.fast_whisper import FastWhisperTranscriber
 4 | from app.utils.logger import get_logger
 5 | 
 6 | logger = get_logger(__name__)
 7 | 
 8 | # 支持的转录器类型
 9 | _transcribers = {
10 |     "fast-whisper": FastWhisperTranscriber,
11 | }
12 | 
13 | 
14 | def get_transcriber(transcriber_type: str = "fast-whisper") -> Transcriber:
15 |     """
16 |     获取转录器实例
17 |     
18 |     :param transcriber_type: 转录器类型
19 |     :return: Transcriber 实例
20 |     """
21 |     if transcriber_type not in _transcribers:
22 |         raise ValueError(f"不支持的转录器类型: {transcriber_type}")
23 |     
24 |     transcriber_cls = _transcribers[transcriber_type]
25 |     
26 |     # 根据类型初始化
27 |     if transcriber_type == "fast-whisper":
28 |         model_size = os.getenv("WHISPER_MODEL_SIZE", "base")
29 |         device = os.getenv("WHISPER_DEVICE", "cpu")
30 |         return transcriber_cls(model_size=model_size, device=device)
31 |     
32 |     return transcriber_cls()
33 | 
34 | 


--------------------------------------------------------------------------------
/frontend/src/components/ProviderIcon.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react'
 2 | 
 3 | interface ProviderIconProps {
 4 |   provider: string
 5 |   className?: string
 6 |   alt?: string
 7 | }
 8 | 
 9 | const ICON_FILENAMES: Record<string, string> = {
10 |   openai: 'openai-svgrepo-com.svg',
11 |   deepseek: 'deepseek-color.svg',
12 |   claude: 'claude-color.svg',
13 |   gemini: 'gemini-color.svg',
14 |   ollama: 'ollama.svg',
15 |   chatglm: 'chatglm-color.svg',
16 |   siliconcloud: 'siliconcloud-color.svg',
17 |   deepseek_color: 'deepseek-color.svg',
18 | }
19 | 
20 | export default function ProviderIcon({ provider, className, alt }: ProviderIconProps) {
21 |   const filename = ICON_FILENAMES[provider]
22 |   if (filename) {
23 |     // file is located at frontend/icon/*.svg, this file is in frontend/src/components
24 |     const src = new URL(`../../icon/${filename}`, import.meta.url).href
25 |     return <img src={src} alt={alt || provider} className={className} />
26 |   }
27 | 
28 |   // fallback emoji
29 |   return <span className={className}>🤖</span>
30 | }
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/backend/fix_markdown_paths.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | 修复 markdown 文件中的图片路径
 4 | 将 /static/screenshots/ 替换为 /api/note_results/screenshots/
 5 | """
 6 | import re
 7 | from pathlib import Path
 8 | 
 9 | NOTE_OUTPUT_DIR = Path("note_results")
10 | IMAGE_BASE_URL = "/api/note_results/screenshots"
11 | 
12 | def fix_markdown_paths():
13 |     """修复所有 markdown 文件中的图片路径"""
14 |     markdown_files = list(NOTE_OUTPUT_DIR.glob("*_markdown.md"))
15 |     
16 |     for md_file in markdown_files:
17 |         print(f"处理文件: {md_file.name}")
18 |         content = md_file.read_text(encoding='utf-8')
19 |         
20 |         # 修复图片路径
21 |         old_content = content
22 |         content = re.sub(
23 |             r'!\[\]\(/static/screenshots/([^)]+)\)',
24 |             lambda m: f"![]({IMAGE_BASE_URL.rstrip('/')}/{m.group(1)})",
25 |             content
26 |         )
27 |         
28 |         if content != old_content:
29 |             md_file.write_text(content, encoding='utf-8')
30 |             print(f"  ✓ 已修复路径")
31 |         else:
32 |             print(f"  - 无需修复")
33 | 
34 | if __name__ == "__main__":
35 |     fix_markdown_paths()
36 |     print("完成！")
37 | 
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/frontend/src/components/UploadPage.tsx:
--------------------------------------------------------------------------------
 1 | import UploadForm from './UploadForm'
 2 | 
 3 | export default function UploadPage() {
 4 |   return (
 5 |     <div className="h-full overflow-y-auto p-6">
 6 |       <div className="max-w-4xl mx-auto">
 7 |         <div className="mb-6">
 8 |           <h1 className="text-2xl font-bold text-gray-900 mb-2">上传视频/音频</h1>
 9 |           <p className="text-gray-600">上传视频或音频文件，系统将自动处理并生成笔记</p>
10 |         </div>
11 | 
12 |         <div className="bg-white rounded-xl border border-gray-200 p-6">
13 |           <UploadForm />
14 |         </div>
15 | 
16 |         {/* 使用提示 */}
17 |         <div className="mt-6 bg-blue-50 border border-blue-200 rounded-xl p-4">
18 |           <h3 className="text-sm font-semibold text-blue-900 mb-2">💡 使用提示</h3>
19 |           <ul className="text-sm text-blue-800 space-y-1">
20 |             <li>• 支持 MP4, AVI, MOV, MKV, MP3, WAV 等格式</li>
21 |             <li>• 文件大小限制：最大 500MB</li>
22 |             <li>• 上传后系统会自动处理：提取音频 → 转写文字 → 生成笔记</li>
23 |             <li>• 处理完成后可以在首页查看任务列表和笔记预览</li>
24 |             <li>• 可以选择是否生成截图标记（仅视频文件）</li>
25 |           </ul>
26 |         </div>
27 |       </div>
28 |     </div>
29 |   )
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/backend/start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # 检查 Python 环境
 4 | if ! command -v python3 &> /dev/null; then
 5 |     echo "❌ 错误: 未找到 Python3，请先安装 Python 3.8+"
 6 |     exit 1
 7 | fi
 8 | 
 9 | # 检查 Python 版本
10 | PYTHON_VERSION=$(python3 --version 2>&1 | awk '{print $2}')
11 | echo "✓ Python 版本: $PYTHON_VERSION"
12 | 
13 | # 检查虚拟环境
14 | if [ ! -d "venv" ]; then
15 |     echo "📦 创建虚拟环境..."
16 |     python3 -m venv venv
17 |     if [ $? -ne 0 ]; then
18 |         echo "❌ 虚拟环境创建失败"
19 |         exit 1
20 |     fi
21 |     echo "✓ 虚拟环境创建成功"
22 | else
23 |     echo "✓ 虚拟环境已存在"
24 | fi
25 | 
26 | # 激活虚拟环境
27 | echo "🔧 激活虚拟环境..."
28 | source venv/bin/activate
29 | 
30 | # 升级 pip
31 | echo "⬆️  升级 pip..."
32 | pip install --upgrade pip -q
33 | 
34 | # 安装依赖
35 | echo "📥 安装依赖包..."
36 | pip install -r requirements.txt
37 | if [ $? -ne 0 ]; then
38 |     echo "❌ 依赖安装失败"
39 |     exit 1
40 | fi
41 | echo "✓ 依赖安装完成"
42 | 
43 | # 检查 .env 文件
44 | if [ ! -f ".env" ]; then
45 |     echo "⚠️  警告: 未找到 .env 文件"
46 |     echo "   请复制 .env.example 为 .env 并配置环境变量:"
47 |     echo "   cp .env.example .env"
48 |     echo "   然后编辑 .env 文件，填入你的 OPENAI_API_KEY"
49 |     echo ""
50 |     read -p "是否继续启动? (y/n) " -n 1 -r
51 |     echo
52 |     if [[ ! $REPLY =~ ^[Yy]$ ]]; then
53 |         exit 1
54 |     fi
55 | else
56 |     echo "✓ 环境变量文件已配置"
57 | fi
58 | 
59 | # 启动服务
60 | echo ""
61 | echo "🚀 启动后端服务..."
62 | echo "   访问地址: http://localhost:8483"
63 | echo "   API 文档: http://localhost:8483/docs"
64 | echo ""
65 | python main.py
66 | 
67 | 


--------------------------------------------------------------------------------
/backend/start.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | chcp 65001 >nul
 3 | 
 4 | REM 检查 Python 环境
 5 | python --version >nul 2>&1
 6 | if errorlevel 1 (
 7 |     echo ❌ 错误: 未找到 Python，请先安装 Python 3.8+
 8 |     pause
 9 |     exit /b 1
10 | )
11 | 
12 | python --version
13 | echo ✓ Python 环境检查通过
14 | echo.
15 | 
16 | REM 检查虚拟环境
17 | if not exist "venv" (
18 |     echo 📦 创建虚拟环境...
19 |     python -m venv venv
20 |     if errorlevel 1 (
21 |         echo ❌ 虚拟环境创建失败
22 |         pause
23 |         exit /b 1
24 |     )
25 |     echo ✓ 虚拟环境创建成功
26 | ) else (
27 |     echo ✓ 虚拟环境已存在
28 | )
29 | 
30 | REM 激活虚拟环境
31 | echo 🔧 激活虚拟环境...
32 | call venv\Scripts\activate.bat
33 | 
34 | REM 升级 pip
35 | echo ⬆️  升级 pip...
36 | python -m pip install --upgrade pip -q
37 | 
38 | REM 安装依赖
39 | echo 📥 安装依赖包...
40 | pip install -r requirements.txt
41 | if errorlevel 1 (
42 |     echo ❌ 依赖安装失败
43 |     pause
44 |     exit /b 1
45 | )
46 | echo ✓ 依赖安装完成
47 | echo.
48 | 
49 | REM 检查 .env 文件
50 | if not exist ".env" (
51 |     echo ⚠️  警告: 未找到 .env 文件
52 |     echo    请复制 .env.example 为 .env 并配置环境变量:
53 |     echo    copy .env.example .env
54 |     echo    然后编辑 .env 文件，填入你的 OPENAI_API_KEY
55 |     echo.
56 |     set /p continue="是否继续启动? (y/n): "
57 |     if /i not "%continue%"=="y" (
58 |         exit /b 1
59 |     )
60 | ) else (
61 |     echo ✓ 环境变量文件已配置
62 | )
63 | 
64 | REM 启动服务
65 | echo.
66 | echo 🚀 启动后端服务...
67 | echo    访问地址: http://localhost:8483
68 | echo    API 文档: http://localhost:8483/docs
69 | echo.
70 | python main.py
71 | 
72 | pause
73 | 
74 | 


--------------------------------------------------------------------------------
/backend/app/utils/video_helper.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import os
 3 | import uuid
 4 | from pathlib import Path
 5 | from typing import Optional
 6 | from app.utils.logger import get_logger
 7 | from app.utils.ffmpeg_helper import get_ffmpeg_path
 8 | 
 9 | logger = get_logger(__name__)
10 | 
11 | 
12 | def generate_screenshot(video_path: str, output_dir: str, timestamp: int, index: int) -> str:
13 |     """
14 |     使用 ffmpeg 生成截图，返回生成图片路径
15 |     
16 |     :param video_path: 视频文件路径
17 |     :param output_dir: 输出目录
18 |     :param timestamp: 时间戳（秒）
19 |     :param index: 截图索引
20 |     :return: 生成的截图文件路径
21 |     """
22 |     output_dir = Path(output_dir)
23 |     output_dir.mkdir(parents=True, exist_ok=True)
24 | 
25 |     filename = f"screenshot_{index:03d}_{uuid.uuid4().hex[:8]}.jpg"
26 |     output_path = output_dir / filename
27 | 
28 |     ffmpeg_path = get_ffmpeg_path()
29 |     command = [
30 |         ffmpeg_path,
31 |         "-ss", str(timestamp),
32 |         "-i", str(video_path),
33 |         "-frames:v", "1",
34 |         "-q:v", "2",  # 高质量
35 |         "-y",  # 覆盖已存在文件
36 |         str(output_path)
37 |     ]
38 | 
39 |     try:
40 |         result = subprocess.run(
41 |             command,
42 |             capture_output=True,
43 |             text=True,
44 |             check=True
45 |         )
46 |         logger.info(f"截图生成成功: {output_path} (时间戳: {timestamp}秒)")
47 |         return str(output_path)
48 |     except subprocess.CalledProcessError as e:
49 |         logger.error(f"生成截图失败: {e.stderr}")
50 |         raise Exception(f"生成截图失败: {e.stderr}")
51 | 
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/frontend/src/store/taskStore.ts:
--------------------------------------------------------------------------------
 1 | import { create } from 'zustand'
 2 | 
 3 | export interface Task {
 4 |   id: string
 5 |   filename: string
 6 |   status: 'pending' | 'processing' | 'transcribing' | 'summarizing' | 'completed' | 'failed'
 7 |   markdown?: string
 8 |   createdAt?: string
 9 | }
10 | 
11 | interface TaskStore {
12 |   tasks: Task[]
13 |   currentTaskId: string | null
14 |   addTask: (task: Task) => void
15 |   updateTask: (id: string, updates: Partial<Task>) => void
16 |   setCurrentTask: (id: string | null) => void
17 |   loadTasks: (tasks: Task[]) => void
18 |   removeTask: (id: string) => void
19 | }
20 | 
21 | export const useTaskStore = create<TaskStore>((set) => ({
22 |   tasks: [],
23 |   currentTaskId: null,
24 |   
25 |   addTask: (task) =>
26 |     set((state) => ({
27 |       tasks: [task, ...state.tasks],
28 |       currentTaskId: task.id,
29 |     })),
30 |   
31 |   updateTask: (id, updates) =>
32 |     set((state) => ({
33 |       tasks: state.tasks.map((task) =>
34 |         task.id === id ? { ...task, ...updates } : task
35 |       ),
36 |     })),
37 |   
38 |   setCurrentTask: (id) =>
39 |     set({ currentTaskId: id }),
40 |   
41 |   loadTasks: (tasks) =>
42 |     set({ tasks }),
43 |   
44 |   removeTask: (id) =>
45 |     set((state) => {
46 |       const newTasks = state.tasks.filter((task) => task.id !== id)
47 |       // 如果删除的是当前选中的任务，清除选中状态
48 |       const newCurrentTaskId = state.currentTaskId === id ? null : state.currentTaskId
49 |       return {
50 |         tasks: newTasks,
51 |         currentTaskId: newCurrentTaskId,
52 |       }
53 |     }),
54 | }))
55 | 
56 | 


--------------------------------------------------------------------------------
/frontend/icon/claude-color.svg:
--------------------------------------------------------------------------------
1 | <svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>Claude</title><path d="M4.709 15.955l4.72-2.647.08-.23-.08-.128H9.2l-.79-.048-2.698-.073-2.339-.097-2.266-.122-.571-.121L0 11.784l.055-.352.48-.321.686.06 1.52.103 2.278.158 1.652.097 2.449.255h.389l.055-.157-.134-.098-.103-.097-2.358-1.596-2.552-1.688-1.336-.972-.724-.491-.364-.462-.158-1.008.656-.722.881.06.225.061.893.686 1.908 1.476 2.491 1.833.365.304.145-.103.019-.073-.164-.274-1.355-2.446-1.446-2.49-.644-1.032-.17-.619a2.97 2.97 0 01-.104-.729L6.283.134 6.696 0l.996.134.42.364.62 1.414 1.002 2.229 1.555 3.03.456.898.243.832.091.255h.158V9.01l.128-1.706.237-2.095.23-2.695.08-.76.376-.91.747-.492.584.28.48.685-.067.444-.286 1.851-.559 2.903-.364 1.942h.212l.243-.242.985-1.306 1.652-2.064.73-.82.85-.904.547-.431h1.033l.76 1.129-.34 1.166-1.064 1.347-.881 1.142-1.264 1.7-.79 1.36.073.11.188-.02 2.856-.606 1.543-.28 1.841-.315.833.388.091.395-.328.807-1.969.486-2.309.462-3.439.813-.042.03.049.061 1.549.146.662.036h1.622l3.02.225.79.522.474.638-.079.485-1.215.62-1.64-.389-3.829-.91-1.312-.329h-.182v.11l1.093 1.068 2.006 1.81 2.509 2.33.127.578-.322.455-.34-.049-2.205-1.657-.851-.747-1.926-1.62h-.128v.17l.444.649 2.345 3.521.122 1.08-.17.353-.608.213-.668-.122-1.374-1.925-1.415-2.167-1.143-1.943-.14.08-.674 7.254-.316.37-.729.28-.607-.461-.322-.747.322-1.476.389-1.924.315-1.53.286-1.9.17-.632-.012-.042-.14.018-1.434 1.967-2.18 2.945-1.726 1.845-.414.164-.717-.37.067-.662.401-.589 2.388-3.036 1.44-1.882.93-1.086-.006-.158h-.055L4.132 18.56l-1.13.146-.487-.456.061-.746.231-.243 1.908-1.312-.006.006z" fill="#D97757" fill-rule="nonzero"></path></svg>


--------------------------------------------------------------------------------
/frontend/icon/openai-svgrepo-com.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?><!-- Uploaded to: SVG Repo, www.svgrepo.com, Generator: SVG Repo Mixer Tools -->
2 | <svg fill="#000000" width="800px" height="800px" viewBox="0 0 24 24" role="img" xmlns="http://www.w3.org/2000/svg"><title>OpenAI icon</title><path d="M22.2819 9.8211a5.9847 5.9847 0 0 0-.5157-4.9108 6.0462 6.0462 0 0 0-6.5098-2.9A6.0651 6.0651 0 0 0 4.9807 4.1818a5.9847 5.9847 0 0 0-3.9977 2.9 6.0462 6.0462 0 0 0 .7427 7.0966 5.98 5.98 0 0 0 .511 4.9107 6.051 6.051 0 0 0 6.5146 2.9001A5.9847 5.9847 0 0 0 13.2599 24a6.0557 6.0557 0 0 0 5.7718-4.2058 5.9894 5.9894 0 0 0 3.9977-2.9001 6.0557 6.0557 0 0 0-.7475-7.0729zm-9.022 12.6081a4.4755 4.4755 0 0 1-2.8764-1.0408l.1419-.0804 4.7783-2.7582a.7948.7948 0 0 0 .3927-.6813v-6.7369l2.02 1.1686a.071.071 0 0 1 .038.052v5.5826a4.504 4.504 0 0 1-4.4945 4.4944zm-9.6607-4.1254a4.4708 4.4708 0 0 1-.5346-3.0137l.142.0852 4.783 2.7582a.7712.7712 0 0 0 .7806 0l5.8428-3.3685v2.3324a.0804.0804 0 0 1-.0332.0615L9.74 19.9502a4.4992 4.4992 0 0 1-6.1408-1.6464zM2.3408 7.8956a4.485 4.485 0 0 1 2.3655-1.9728V11.6a.7664.7664 0 0 0 .3879.6765l5.8144 3.3543-2.0201 1.1685a.0757.0757 0 0 1-.071 0l-4.8303-2.7865A4.504 4.504 0 0 1 2.3408 7.872zm16.5963 3.8558L13.1038 8.364 15.1192 7.2a.0757.0757 0 0 1 .071 0l4.8303 2.7913a4.4944 4.4944 0 0 1-.6765 8.1042v-5.6772a.79.79 0 0 0-.407-.667zm2.0107-3.0231l-.142-.0852-4.7735-2.7818a.7759.7759 0 0 0-.7854 0L9.409 9.2297V6.8974a.0662.0662 0 0 1 .0284-.0615l4.8303-2.7866a4.4992 4.4992 0 0 1 6.6802 4.66zM8.3065 12.863l-2.02-1.1638a.0804.0804 0 0 1-.038-.0567V6.0742a4.4992 4.4992 0 0 1 7.3757-3.4537l-.142.0805L8.704 5.459a.7948.7948 0 0 0-.3927.6813zm1.0976-2.3654l2.602-1.4998 2.6069 1.4998v2.9994l-2.5974 1.4997-2.6067-1.4997Z"/></svg>


--------------------------------------------------------------------------------
/backend/app/db/migrate.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 数据库迁移脚本
 3 | 用于添加新字段到现有数据库
 4 | """
 5 | import sqlite3
 6 | from pathlib import Path
 7 | import os
 8 | import sys
 9 | 
10 | # 添加项目根目录到路径
11 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
12 | 
13 | try:
14 |     from app.utils.logger import get_logger
15 |     logger = get_logger(__name__)
16 | except:
17 |     import logging
18 |     logging.basicConfig(level=logging.INFO)
19 |     logger = logging.getLogger(__name__)
20 | 
21 | DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./video_note.db")
22 | # 处理相对路径和绝对路径
23 | if DATABASE_URL.startswith("sqlite:///"):
24 |     DB_PATH = DATABASE_URL.replace("sqlite:///", "")
25 |     # 如果是相对路径，转换为绝对路径
26 |     if not os.path.isabs(DB_PATH):
27 |         DB_PATH = os.path.join(os.getcwd(), DB_PATH)
28 | else:
29 |     DB_PATH = DATABASE_URL
30 | 
31 | 
32 | def migrate_add_screenshot_column():
33 |     """添加 screenshot 字段到 video_tasks 表"""
34 |     if not Path(DB_PATH).exists():
35 |         logger.info("数据库不存在，将在首次运行时自动创建")
36 |         return
37 |     
38 |     try:
39 |         conn = sqlite3.connect(DB_PATH)
40 |         cursor = conn.cursor()
41 |         
42 |         # 检查字段是否已存在
43 |         cursor.execute("PRAGMA table_info(video_tasks)")
44 |         columns = [column[1] for column in cursor.fetchall()]
45 |         
46 |         if "screenshot" not in columns:
47 |             logger.info("添加 screenshot 字段到 video_tasks 表...")
48 |             cursor.execute("ALTER TABLE video_tasks ADD COLUMN screenshot INTEGER DEFAULT 0")
49 |             conn.commit()
50 |             logger.info("✓ screenshot 字段添加成功")
51 |         else:
52 |             logger.info("screenshot 字段已存在，跳过迁移")
53 |         
54 |         conn.close()
55 |     except Exception as e:
56 |         logger.error(f"数据库迁移失败: {e}")
57 |         raise
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     migrate_add_screenshot_column()
62 | 
63 | 


--------------------------------------------------------------------------------
/frontend/src/components/MarkdownViewer.tsx:
--------------------------------------------------------------------------------
 1 | import ReactMarkdown from 'react-markdown'
 2 | import remarkGfm from 'remark-gfm'
 3 | import { useTaskStore } from '../store/taskStore'
 4 | import 'github-markdown-css/github-markdown.css'
 5 | 
 6 | interface MarkdownViewerProps {
 7 |   markdown?: string
 8 | }
 9 | 
10 | export default function MarkdownViewer({ markdown: propMarkdown }: MarkdownViewerProps = {}) {
11 |   const { tasks, currentTaskId } = useTaskStore()
12 |   
13 |   const currentTask = tasks.find(t => t.id === currentTaskId)
14 |   const markdown = propMarkdown || currentTask?.markdown || ''
15 | 
16 |   if (!currentTaskId) {
17 |     return (
18 |       <div className="bg-white rounded-lg shadow-md p-8">
19 |         <div className="text-center text-gray-400">
20 |           <p>请选择一个任务查看笔记</p>
21 |         </div>
22 |       </div>
23 |     )
24 |   }
25 | 
26 |   if (currentTask?.status === 'failed') {
27 |     return (
28 |       <div className="bg-white rounded-lg shadow-md p-8">
29 |         <div className="text-center text-red-500">
30 |           <p>任务处理失败，请重试</p>
31 |         </div>
32 |       </div>
33 |     )
34 |   }
35 | 
36 |   if (!markdown && currentTask?.status !== 'completed') {
37 |     return (
38 |       <div className="bg-white rounded-lg shadow-md p-8">
39 |         <div className="text-center text-gray-400">
40 |           <p>正在生成笔记，请稍候...</p>
41 |         </div>
42 |       </div>
43 |     )
44 |   }
45 | 
46 |   return (
47 |     <div className="bg-white rounded-lg shadow-md p-6">
48 |       <div className="mb-4">
49 |         <h2 className="text-xl font-semibold">笔记预览</h2>
50 |         {currentTask && (
51 |           <p className="text-sm text-gray-500 mt-1">{currentTask.filename}</p>
52 |         )}
53 |       </div>
54 |       
55 |       <div className="markdown-body prose max-w-none">
56 |         <ReactMarkdown remarkPlugins={[remarkGfm]}>
57 |           {markdown}
58 |         </ReactMarkdown>
59 |       </div>
60 |     </div>
61 |   )
62 | }
63 | 
64 | 


--------------------------------------------------------------------------------
/frontend/src/App.tsx:
--------------------------------------------------------------------------------
 1 | import { useState } from 'react'
 2 | import Sidebar from './components/Sidebar'
 3 | import MainContent from './components/MainContent'
 4 | import CurrentModelDisplay from './components/CurrentModelDisplay'
 5 | import { Toaster } from 'react-hot-toast'
 6 | 
 7 | type MenuItem = 'home' | 'upload' | 'model' | 'settings' | 'download'
 8 | 
 9 | function App() {
10 |   const [activeMenu, setActiveMenu] = useState<MenuItem>('home')
11 | 
12 |   return (
13 |     <div className="h-screen flex flex-col bg-gray-50 overflow-hidden">
14 |       {/* 顶部导航栏 */}
15 |       <header className="h-16 bg-white border-b border-gray-200 flex items-center justify-between px-6 shrink-0">
16 |         <div className="flex items-center gap-3">
17 |           <div className="flex items-center justify-center w-10 h-10 bg-blue-600 rounded-lg">
18 |             <svg className="w-6 h-6 text-white" fill="none" viewBox="0 0 24 24" stroke="currentColor">
19 |               <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
20 |             </svg>
21 |           </div>
22 |           <div>
23 |             <h1 className="text-lg font-bold text-gray-900">Video AI Note</h1>
24 |             <p className="text-xs text-gray-500">智能视频笔记生成工具</p>
25 |           </div>
26 |         </div>
27 |         {/* 右侧：当前模型显示 */}
28 |         <div className="flex items-center">
29 |           <CurrentModelDisplay />
30 |         </div>
31 |       </header>
32 | 
33 |       {/* 主内容区 */}
34 |       <div className="flex-1 flex overflow-hidden">
35 |         {/* 左侧：菜单栏 */}
36 |         <Sidebar activeMenu={activeMenu} onMenuChange={setActiveMenu} />
37 | 
38 |         {/* 右侧：功能配置和操作区 */}
39 |         <main className="flex-1 overflow-hidden bg-gray-50">
40 |           <MainContent activeMenu={activeMenu} />
41 |         </main>
42 |       </div>
43 |       <Toaster position="top-right" />
44 |     </div>
45 |   )
46 | }
47 | 
48 | export default App
49 | 


--------------------------------------------------------------------------------
/frontend/icon/deepseek-color.svg:
--------------------------------------------------------------------------------
1 | <svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>DeepSeek</title><path d="M23.748 4.482c-.254-.124-.364.113-.512.234-.051.039-.094.09-.137.136-.372.397-.806.657-1.373.626-.829-.046-1.537.214-2.163.848-.133-.782-.575-1.248-1.247-1.548-.352-.156-.708-.311-.955-.65-.172-.241-.219-.51-.305-.774-.055-.16-.11-.323-.293-.35-.2-.031-.278.136-.356.276-.313.572-.434 1.202-.422 1.84.027 1.436.633 2.58 1.838 3.393.137.093.172.187.129.323-.082.28-.18.552-.266.833-.055.179-.137.217-.329.14a5.526 5.526 0 01-1.736-1.18c-.857-.828-1.631-1.742-2.597-2.458a11.365 11.365 0 00-.689-.471c-.985-.957.13-1.743.388-1.836.27-.098.093-.432-.779-.428-.872.004-1.67.295-2.687.684a3.055 3.055 0 01-.465.137 9.597 9.597 0 00-2.883-.102c-1.885.21-3.39 1.102-4.497 2.623C.082 8.606-.231 10.684.152 12.85c.403 2.284 1.569 4.175 3.36 5.653 1.858 1.533 3.997 2.284 6.438 2.14 1.482-.085 3.133-.284 4.994-1.86.47.234.962.327 1.78.397.63.059 1.236-.03 1.705-.128.735-.156.684-.837.419-.961-2.155-1.004-1.682-.595-2.113-.926 1.096-1.296 2.746-2.642 3.392-7.003.05-.347.007-.565 0-.845-.004-.17.035-.237.23-.256a4.173 4.173 0 001.545-.475c1.396-.763 1.96-2.015 2.093-3.517.02-.23-.004-.467-.247-.588zM11.581 18c-2.089-1.642-3.102-2.183-3.52-2.16-.392.024-.321.471-.235.763.09.288.207.486.371.739.114.167.192.416-.113.603-.673.416-1.842-.14-1.897-.167-1.361-.802-2.5-1.86-3.301-3.307-.774-1.393-1.224-2.887-1.298-4.482-.02-.386.093-.522.477-.592a4.696 4.696 0 011.529-.039c2.132.312 3.946 1.265 5.468 2.774.868.86 1.525 1.887 2.202 2.891.72 1.066 1.494 2.082 2.48 2.914.348.292.625.514.891.677-.802.09-2.14.11-3.054-.614zm1-6.44a.306.306 0 01.415-.287.302.302 0 01.2.288.306.306 0 01-.31.307.303.303 0 01-.304-.308zm3.11 1.596c-.2.081-.399.151-.59.16a1.245 1.245 0 01-.798-.254c-.274-.23-.47-.358-.552-.758a1.73 1.73 0 01.016-.588c.07-.327-.008-.537-.239-.727-.187-.156-.426-.199-.688-.199a.559.559 0 01-.254-.078c-.11-.054-.2-.19-.114-.358.028-.054.16-.186.192-.21.356-.202.767-.136 1.146.016.352.144.618.408 1.001.782.391.451.462.576.685.914.176.265.336.537.445.848.067.195-.019.354-.25.452z" fill="#4D6BFE"></path></svg>


--------------------------------------------------------------------------------
/backend/app/transcriber/fast_whisper.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from faster_whisper import WhisperModel
 3 | from app.transcriber.base import Transcriber
 4 | from app.models.transcriber_model import TranscriptResult, TranscriptSegment
 5 | from app.utils.logger import get_logger
 6 | 
 7 | logger = get_logger(__name__)
 8 | 
 9 | 
10 | class FastWhisperTranscriber(Transcriber):
11 |     """使用 faster-whisper 进行音频转录"""
12 |     
13 |     def __init__(self, model_size: str = "base", device: str = "cpu"):
14 |         """
15 |         初始化转录器
16 |         
17 |         :param model_size: 模型大小 (tiny, base, small, medium, large)
18 |         :param device: 设备 (cpu, cuda)
19 |         """
20 |         self.model_size = model_size
21 |         self.device = device
22 |         self._model = None
23 |         logger.info(f"配置 FastWhisper 转录器: model_size={model_size}, device={device}")
24 |     
25 |     @property
26 |     def model(self):
27 |         if self._model is None:
28 |             logger.info(f"正在加载 FastWhisper 模型: {self.model_size}...")
29 |             self._model = WhisperModel(self.model_size, device=self.device)
30 |             logger.info("FastWhisper 模型加载完成")
31 |         return self._model
32 | 
33 |     def transcript(self, file_path: str) -> TranscriptResult:
34 |         """转录音频文件"""
35 |         logger.info(f"开始转录: {file_path}")
36 |         
37 |         segments, info = self.model.transcribe(
38 |             file_path,
39 |             beam_size=5,
40 |             language=None,  # 自动检测语言
41 |             vad_filter=True  # 启用语音活动检测
42 |         )
43 |         
44 |         # 提取语言
45 |         language = info.language
46 |         
47 |         # 处理分段
48 |         transcript_segments = []
49 |         full_text_parts = []
50 |         
51 |         for segment in segments:
52 |             transcript_segments.append(
53 |                 TranscriptSegment(
54 |                     start=segment.start,
55 |                     end=segment.end,
56 |                     text=segment.text.strip()
57 |                 )
58 |             )
59 |             full_text_parts.append(segment.text.strip())
60 |         
61 |         full_text = " ".join(full_text_parts)
62 |         
63 |         logger.info(f"转录完成: 语言={language}, 分段数={len(transcript_segments)}")
64 |         
65 |         return TranscriptResult(
66 |             language=language,
67 |             full_text=full_text,
68 |             segments=transcript_segments
69 |         )
70 | 


--------------------------------------------------------------------------------
/frontend/icon/chatglm-color.svg:
--------------------------------------------------------------------------------
1 | <svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>ChatGLM</title><defs><linearGradient id="lobe-icons-chat-glm-fill" x1="-18.756%" x2="70.894%" y1="49.371%" y2="90.944%"><stop offset="0%" stop-color="#504AF4"></stop><stop offset="100%" stop-color="#3485FF"></stop></linearGradient></defs><path d="M9.917 2c4.906 0 10.178 3.947 8.93 10.58-.014.07-.037.14-.057.21l-.003-.277c-.083-3-1.534-8.934-8.87-8.934-3.393 0-8.137 3.054-7.93 8.158-.04 4.778 3.555 8.4 7.95 8.332l.073-.001c1.2-.033 2.763-.429 3.1-1.657.063-.031.26.534.268.598.048.256.112.369.192.34.981-.348 2.286-1.222 1.952-2.38-.176-.61-1.775-.147-1.921-.347.418-.979 2.234-.926 3.153-.716.443.102.657.38 1.012.442.29.052.981-.2.96.242-1.5 3.042-4.893 5.41-8.808 5.41C3.654 22 0 16.574 0 11.737 0 5.947 4.959 2 9.917 2zM9.9 5.3c.484 0 1.125.225 1.38.585 3.669.145 4.313 2.686 4.694 5.444.255 1.838.315 2.3.182 1.387l.083.59c.068.448.554.737.982.516.144-.075.254-.231.328-.47a.2.2 0 01.258-.13l.625.22a.2.2 0 01.124.238 2.172 2.172 0 01-.51.92c-.878.917-2.757.664-3.08-.62-.14-.554-.055-.626-.345-1.242-.292-.621-1.238-.709-1.69-.295-.345.315-.407.805-.406 1.282L12.6 15.9a.9.9 0 01-.9.9h-1.4a.9.9 0 01-.9-.9v-.65a1.15 1.15 0 10-2.3 0v.65a.9.9 0 01-.9.9H4.8a.9.9 0 01-.9-.9l.035-3.239c.012-1.884.356-3.658 2.47-4.134.2-.045.252.13.29.342.025.154.043.252.053.294.701 3.058 1.75 4.299 3.144 3.722l.66-.331.254-.13c.158-.082.25-.131.276-.15.012-.01-.165-.206-.407-.464l-1.012-1.067a8.925 8.925 0 01-.199-.216c-.047-.034-.116.068-.208.306-.074.157-.251.252-.272.326-.013.058.108.298.362.72.164.288.22.508-.31.343-1.04-.8-1.518-2.273-1.684-3.725-.004-.035-.162-1.913-.162-1.913a1.2 1.2 0 011.113-1.281L9.9 5.3zm12.994 8.68c.037.697-.403.704-1.213.591l-1.783-.276c-.265-.053-.385-.099-.313-.147.47-.315 3.268-.93 3.31-.168zm-.915-.083l-.926.042c-.85.077-1.452.24.338.336l.103.003c.815.012 1.264-.359.485-.381zm1.667-3.601h.01c.79.398.067 1.03-.65 1.393-.14.07-.491.176-1.052.315-.241.04-.457.092-.333.16l.01.005c1.952.958-3.123 1.534-2.495 1.285l.38-.148c.68-.266 1.614-.682 1.666-1.337.038-.48 1.253-.442 1.493-.968.048-.106 0-.236-.144-.389-.05-.047-.094-.094-.107-.148-.073-.305.7-.431 1.222-.168zm-2.568-.474c-.135 1.198-2.479 4.192-1.949 2.863l.017-.042c.298-.717.376-2.221 1.337-3.221.25-.26.636.035.595.4zm-7.976-.253c.02-.694 1.002-.968 1.346-.347.01-1.274-1.941-.768-1.346.347z" fill="url(#lobe-icons-chat-glm-fill)" fill-rule="evenodd"></path></svg>


--------------------------------------------------------------------------------
/backend/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from contextlib import asynccontextmanager
 3 | from pathlib import Path
 4 | 
 5 | import uvicorn
 6 | from fastapi import FastAPI
 7 | from fastapi.middleware.cors import CORSMiddleware
 8 | from fastapi.staticfiles import StaticFiles
 9 | from dotenv import load_dotenv
10 | 
11 | from app.db.init_db import init_db
12 | from app.exceptions.exception_handlers import register_exception_handlers
13 | from app.utils.logger import get_logger
14 | from app import create_app
15 | from app.transcriber.transcriber_provider import get_transcriber
16 | 
17 | logger = get_logger(__name__)
18 | load_dotenv()
19 | 
20 | # 创建必要的目录
21 | UPLOAD_DIR = os.getenv("UPLOAD_DIR", "uploads")
22 | NOTE_OUTPUT_DIR = os.getenv("NOTE_OUTPUT_DIR", "note_results")
23 | STATIC_DIR = os.getenv("STATIC_DIR", "static")
24 | 
25 | for dir_path in [UPLOAD_DIR, NOTE_OUTPUT_DIR, STATIC_DIR]:
26 |     Path(dir_path).mkdir(parents=True, exist_ok=True)
27 | 
28 | 
29 | @asynccontextmanager
30 | async def lifespan(app: FastAPI):
31 |     """应用生命周期管理"""
32 |     # 初始化数据库
33 |     init_db()
34 |     
35 |     # 初始化转录器
36 |     transcriber_type = os.getenv("TRANSCRIBER_TYPE", "fast-whisper")
37 |     get_transcriber(transcriber_type=transcriber_type)
38 |     
39 |     logger.info("应用启动完成")
40 |     yield
41 |     logger.info("应用关闭")
42 | 
43 | 
44 | app = create_app(lifespan=lifespan)
45 | 
46 | # CORS 配置
47 | origins = [
48 |     "http://localhost:5173",
49 |     "http://localhost:3000",
50 |     "http://127.0.0.1:5173",
51 | ]
52 | 
53 | app.add_middleware(
54 |     CORSMiddleware,
55 |     allow_origins=origins,
56 |     allow_credentials=True,
57 |     allow_methods=["*"],
58 |     allow_headers=["*"],
59 | )
60 | 
61 | # 注册异常处理器
62 | register_exception_handlers(app)
63 | 
64 | # 静态文件服务
65 | # 使用 /api/uploads 以匹配前端的 vite proxy 配置
66 | app.mount("/api/uploads", StaticFiles(directory=UPLOAD_DIR), name="uploads")
67 | app.mount("/api/static", StaticFiles(directory=STATIC_DIR), name="static")
68 | 
69 | # note_results 目录服务（包含截图）
70 | NOTE_OUTPUT_DIR = os.getenv("NOTE_OUTPUT_DIR", "note_results")
71 | Path(NOTE_OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
72 | app.mount("/api/note_results", StaticFiles(directory=NOTE_OUTPUT_DIR), name="note_results")
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     port = int(os.getenv("BACKEND_PORT", 8483))
77 |     host = os.getenv("BACKEND_HOST", "0.0.0.0")
78 |     logger.info(f"启动服务器 {host}:{port}")
79 |     # 使用导入字符串以支持 reload
80 |     uvicorn.run("main:app", host=host, port=port, reload=True)
81 | 
82 | 


--------------------------------------------------------------------------------
/frontend/src/components/ContentPreviewModal.tsx:
--------------------------------------------------------------------------------
 1 | import { X } from 'lucide-react'
 2 | import TranscriptViewer from './TranscriptViewer'
 3 | import EnhancedMarkdownViewer from './EnhancedMarkdownViewer'
 4 | 
 5 | interface ContentPreviewModalProps {
 6 |   type: 'transcript' | 'markdown' | 'video'
 7 |   content: any
 8 |   title: string
 9 |   filename?: string
10 |   taskId?: string
11 |   onClose: () => void
12 | }
13 | 
14 | export default function ContentPreviewModal({
15 |   type,
16 |   content,
17 |   title,
18 |   filename,
19 |   taskId,
20 |   onClose,
21 | }: ContentPreviewModalProps) {
22 |   return (
23 |     <div 
24 |       className="fixed inset-0 bg-black bg-opacity-50 z-50 flex items-center justify-center p-4"
25 |       onClick={(e) => {
26 |         if (e.target === e.currentTarget) {
27 |           onClose()
28 |         }
29 |       }}
30 |     >
31 |       <div className="bg-white rounded-xl shadow-2xl w-full max-w-6xl h-[90vh] flex flex-col overflow-hidden">
32 |         {/* 头部 */}
33 |         <div className="flex items-center justify-between px-6 py-4 border-b border-gray-200 shrink-0">
34 |           <div>
35 |             <h2 className="text-xl font-semibold text-gray-900">{title}</h2>
36 |             {filename && <p className="text-sm text-gray-500 mt-1">{filename}</p>}
37 |           </div>
38 |           <button
39 |             onClick={onClose}
40 |             className="p-2 hover:bg-gray-100 rounded-lg transition-colors"
41 |             title="关闭"
42 |           >
43 |             <X className="w-5 h-5 text-gray-500" />
44 |           </button>
45 |         </div>
46 | 
47 |         {/* 内容区域 */}
48 |         <div className="flex-1 min-h-0 overflow-hidden">
49 |           {type === 'transcript' ? (
50 |             <div className="h-full overflow-y-auto p-6">
51 |               <TranscriptViewer transcript={content} />
52 |             </div>
53 |           ) : type === 'video' ? (
54 |             <div className="h-full flex items-center justify-center bg-black p-6">
55 |               <video
56 |                 src={content}
57 |                 controls
58 |                 className="max-w-full max-h-full"
59 |                 style={{ maxHeight: 'calc(90vh - 120px)' }}
60 |               >
61 |                 您的浏览器不支持视频播放
62 |               </video>
63 |             </div>
64 |           ) : (
65 |             <div className="h-full overflow-hidden">
66 |               <EnhancedMarkdownViewer markdown={content} filename={filename} taskId={taskId} />
67 |             </div>
68 |           )}
69 |         </div>
70 |       </div>
71 |     </div>
72 |   )
73 | }
74 | 
75 | 


--------------------------------------------------------------------------------
/backend/ENV_SETUP.md:
--------------------------------------------------------------------------------
  1 | # 环境变量配置说明
  2 | 
  3 | ## 必需配置
  4 | 
  5 | ### OPENAI_API_KEY
  6 | 
  7 | 这是**必需**的环境变量，用于调用 OpenAI API 生成笔记。
  8 | 
  9 | **获取方式：**
 10 | 1. 访问 https://platform.openai.com/api-keys
 11 | 2. 登录你的 OpenAI 账号
 12 | 3. 创建新的 API Key
 13 | 4. 复制 API Key
 14 | 
 15 | **配置方法：**
 16 | 
 17 | 1. 在 `backend` 目录下创建 `.env` 文件（如果不存在）
 18 | 2. 添加以下内容：
 19 | 
 20 | ```env
 21 | OPENAI_API_KEY=sk-your-actual-api-key-here
 22 | ```
 23 | 
 24 | **注意：**
 25 | - 不要将 `.env` 文件提交到 Git（已在 .gitignore 中）
 26 | - API Key 以 `sk-` 开头
 27 | - 确保 API Key 有效且有足够的余额
 28 | 
 29 | ## 完整配置示例
 30 | 
 31 | 创建 `backend/.env` 文件，内容如下：
 32 | 
 33 | ```env
 34 | # GPT 配置（必需）
 35 | OPENAI_API_KEY=sk-your-api-key-here
 36 | OPENAI_BASE_URL=https://api.openai.com/v1
 37 | GPT_MODEL=gpt-4o-mini
 38 | 
 39 | # 转录器配置（可选）
 40 | TRANSCRIBER_TYPE=fast-whisper
 41 | WHISPER_MODEL_SIZE=base
 42 | WHISPER_DEVICE=cpu
 43 | 
 44 | # 服务器配置（可选）
 45 | BACKEND_HOST=0.0.0.0
 46 | BACKEND_PORT=8483
 47 | 
 48 | # 文件存储（可选）
 49 | UPLOAD_DIR=uploads
 50 | NOTE_OUTPUT_DIR=note_results
 51 | STATIC_DIR=static
 52 | ```
 53 | 
 54 | ## 配置说明
 55 | 
 56 | ### GPT 配置
 57 | 
 58 | - `OPENAI_API_KEY`: OpenAI API 密钥（**必需**）
 59 | - `OPENAI_BASE_URL`: API 基础 URL，默认 `https://api.openai.com/v1`
 60 |   - 如果使用代理或兼容 API，可以修改此值
 61 |   - 例如：`https://api.deepseek.com/v1`（DeepSeek）
 62 | - `GPT_MODEL`: 使用的模型，默认 `gpt-4o-mini`
 63 |   - 可选：`gpt-4o`, `gpt-4o-mini`, `gpt-3.5-turbo` 等
 64 | 
 65 | ### 转录器配置
 66 | 
 67 | - `TRANSCRIBER_TYPE`: 转录器类型，默认 `fast-whisper`
 68 | - `WHISPER_MODEL_SIZE`: Whisper 模型大小，默认 `base`
 69 |   - 可选：`tiny`, `base`, `small`, `medium`, `large`
 70 |   - 模型越大，准确度越高，但速度越慢
 71 | - `WHISPER_DEVICE`: 运行设备，默认 `cpu`
 72 |   - 如果有 NVIDIA GPU，可以设置为 `cuda`
 73 | 
 74 | ### 服务器配置
 75 | 
 76 | - `BACKEND_HOST`: 服务器监听地址，默认 `0.0.0.0`（所有接口）
 77 | - `BACKEND_PORT`: 服务器端口，默认 `8483`
 78 | 
 79 | ### 文件存储
 80 | 
 81 | - `UPLOAD_DIR`: 上传文件存储目录，默认 `uploads`
 82 | - `NOTE_OUTPUT_DIR`: 笔记结果存储目录，默认 `note_results`
 83 | - `STATIC_DIR`: 静态文件目录，默认 `static`
 84 | 
 85 | ## 验证配置
 86 | 
 87 | 启动后端服务后，检查日志中是否有错误：
 88 | 
 89 | ```bash
 90 | cd backend
 91 | ./start.sh
 92 | ```
 93 | 
 94 | 如果看到 "OPENAI_API_KEY 未设置" 错误，说明 `.env` 文件配置不正确。
 95 | 
 96 | ## 常见问题
 97 | 
 98 | ### Q: 如何检查 API Key 是否有效？
 99 | 
100 | A: 可以在命令行测试：
101 | ```bash
102 | curl https://api.openai.com/v1/models \
103 |   -H "Authorization: Bearer sk-your-api-key"
104 | ```
105 | 
106 | ### Q: 可以使用其他 GPT 服务吗？
107 | 
108 | A: 目前只支持 OpenAI 兼容的 API。如果需要支持其他服务（如 DeepSeek、Qwen），需要修改 `app/gpt/openai_gpt.py` 或添加新的实现。
109 | 
110 | ### Q: 如何提高转录准确度？
111 | 
112 | A: 可以：
113 | 1. 使用更大的 Whisper 模型（如 `medium` 或 `large`）
114 | 2. 使用 GPU 加速（设置 `WHISPER_DEVICE=cuda`）
115 | 
116 | ### Q: 配置修改后需要重启吗？
117 | 
118 | A: 是的，修改 `.env` 文件后需要重启后端服务。
119 | 
120 | 


--------------------------------------------------------------------------------
/backend/app/db/video_task_dao.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy.orm import Session
 2 | from app.db.engine import SessionLocal
 3 | from app.db.models.video_task import VideoTask
 4 | from app.utils.logger import get_logger
 5 | 
 6 | logger = get_logger(__name__)
 7 | 
 8 | 
 9 | def create_task(task_id: str, filename: str, screenshot: bool = False) -> VideoTask:
10 |     """创建新任务"""
11 |     db = SessionLocal()
12 |     try:
13 |         task = VideoTask(
14 |             task_id=task_id,
15 |             filename=filename,
16 |             status="pending",
17 |             screenshot=1 if screenshot else 0
18 |         )
19 |         db.add(task)
20 |         db.commit()
21 |         db.refresh(task)
22 |         return task
23 |     except Exception as e:
24 |         db.rollback()
25 |         logger.error(f"创建任务失败: {e}")
26 |         raise
27 |     finally:
28 |         db.close()
29 | 
30 | 
31 | def get_task_by_id(task_id: str) -> VideoTask:
32 |     """根据 task_id 获取任务"""
33 |     db = SessionLocal()
34 |     try:
35 |         task = db.query(VideoTask).filter(VideoTask.task_id == task_id).first()
36 |         # 如果任务存在但没有 screenshot 字段，设置默认值
37 |         if task and not hasattr(task, 'screenshot'):
38 |             task.screenshot = 0
39 |         return task
40 |     finally:
41 |         db.close()
42 | 
43 | 
44 | def update_task_status(task_id: str, status: str, markdown: str = None):
45 |     """更新任务状态"""
46 |     db = SessionLocal()
47 |     try:
48 |         task = db.query(VideoTask).filter(VideoTask.task_id == task_id).first()
49 |         if task:
50 |             task.status = status
51 |             if markdown:
52 |                 task.markdown = markdown
53 |             db.commit()
54 |             db.refresh(task)
55 |             return task
56 |         return None
57 |     except Exception as e:
58 |         db.rollback()
59 |         logger.error(f"更新任务状态失败: {e}")
60 |         raise
61 |     finally:
62 |         db.close()
63 | 
64 | 
65 | def get_all_tasks(limit: int = 50):
66 |     """获取所有任务"""
67 |     db = SessionLocal()
68 |     try:
69 |         return db.query(VideoTask).order_by(VideoTask.created_at.desc()).limit(limit).all()
70 |     finally:
71 |         db.close()
72 | 
73 | 
74 | def delete_task_by_id(task_id: str) -> bool:
75 |     """根据 task_id 删除任务"""
76 |     db = SessionLocal()
77 |     try:
78 |         task = db.query(VideoTask).filter(VideoTask.task_id == task_id).first()
79 |         if task:
80 |             db.delete(task)
81 |             db.commit()
82 |             logger.info(f"任务 {task_id} 已从数据库删除")
83 |             return True
84 |         return False
85 |     except Exception as e:
86 |         db.rollback()
87 |         logger.error(f"删除任务失败: {e}")
88 |         raise
89 |     finally:
90 |         db.close()
91 | 
92 | 


--------------------------------------------------------------------------------
/frontend/src/components/Sidebar.tsx:
--------------------------------------------------------------------------------
 1 | import { useState } from 'react'
 2 | import { Home, Settings, Bot, Upload, Download, ChevronLeft, ChevronRight } from 'lucide-react'
 3 | 
 4 | interface SidebarProps {
 5 |   activeMenu: 'home' | 'upload' | 'model' | 'settings' | 'download'
 6 |   onMenuChange: (menu: 'home' | 'upload' | 'model' | 'settings' | 'download') => void
 7 | }
 8 | 
 9 | export default function Sidebar({ activeMenu, onMenuChange }: SidebarProps) {
10 |   const [collapsed, setCollapsed] = useState(false)
11 | 
12 |   const menuItems = [
13 |     {
14 |       id: 'home' as const,
15 |       name: '首页',
16 |       icon: <Home className="w-5 h-5" />,
17 |     },
18 |     {
19 |       id: 'upload' as const,
20 |       name: '上传',
21 |       icon: <Upload className="w-5 h-5" />,
22 |     },
23 |     {
24 |       id: 'model' as const,
25 |       name: '模型配置',
26 |       icon: <Bot className="w-5 h-5" />,
27 |     },
28 |     {
29 |       id: 'download' as const,
30 |       name: '下载',
31 |       icon: <Download className="w-5 h-5" />,
32 |     },
33 |     {
34 |       id: 'settings' as const,
35 |       name: '设置',
36 |       icon: <Settings className="w-5 h-5" />,
37 |     },
38 |   ]
39 | 
40 |   return (
41 |     <aside className={`${collapsed ? 'w-16' : 'w-34'} bg-white border-r border-gray-200 flex flex-col overflow-hidden shrink-0 transition-all duration-300`}>
42 |       {/* 收缩按钮 */}
43 |       <div className="p-2 border-b border-gray-200 flex justify-end">
44 |         <button
45 |           onClick={() => setCollapsed(!collapsed)}
46 |           className="p-1.5 rounded hover:bg-gray-100 transition-colors"
47 |           title={collapsed ? '展开菜单' : '收缩菜单'}
48 |         >
49 |           {collapsed ? (
50 |             <ChevronRight className="w-4 h-4 text-gray-600" />
51 |           ) : (
52 |             <ChevronLeft className="w-4 h-4 text-gray-600" />
53 |           )}
54 |         </button>
55 |       </div>
56 | 
57 |       <div className="flex-1 overflow-y-auto p-2">
58 |         <div className="space-y-1">
59 |           {menuItems.map((item) => (
60 |             <button
61 |               key={item.id}
62 |               onClick={() => onMenuChange(item.id)}
63 |               className={`w-full flex items-center gap-3 px-3 py-3 rounded-lg transition-colors ${
64 |                 activeMenu === item.id
65 |                   ? 'bg-blue-50 text-blue-700 font-medium'
66 |                   : 'text-gray-700 hover:bg-gray-50'
67 |               }`}
68 |               title={collapsed ? item.name : undefined}
69 |             >
70 |               {item.icon}
71 |               {!collapsed && <span className="text-sm">{item.name}</span>}
72 |             </button>
73 |           ))}
74 |         </div>
75 |       </div>
76 |     </aside>
77 |   )
78 | }
79 | 


--------------------------------------------------------------------------------
/frontend/icon/gemini-color.svg:
--------------------------------------------------------------------------------
1 | <svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>Gemini</title><path d="M20.616 10.835a14.147 14.147 0 01-4.45-3.001 14.111 14.111 0 01-3.678-6.452.503.503 0 00-.975 0 14.134 14.134 0 01-3.679 6.452 14.155 14.155 0 01-4.45 3.001c-.65.28-1.318.505-2.002.678a.502.502 0 000 .975c.684.172 1.35.397 2.002.677a14.147 14.147 0 014.45 3.001 14.112 14.112 0 013.679 6.453.502.502 0 00.975 0c.172-.685.397-1.351.677-2.003a14.145 14.145 0 013.001-4.45 14.113 14.113 0 016.453-3.678.503.503 0 000-.975 13.245 13.245 0 01-2.003-.678z" fill="#3186FF"></path><path d="M20.616 10.835a14.147 14.147 0 01-4.45-3.001 14.111 14.111 0 01-3.678-6.452.503.503 0 00-.975 0 14.134 14.134 0 01-3.679 6.452 14.155 14.155 0 01-4.45 3.001c-.65.28-1.318.505-2.002.678a.502.502 0 000 .975c.684.172 1.35.397 2.002.677a14.147 14.147 0 014.45 3.001 14.112 14.112 0 013.679 6.453.502.502 0 00.975 0c.172-.685.397-1.351.677-2.003a14.145 14.145 0 013.001-4.45 14.113 14.113 0 016.453-3.678.503.503 0 000-.975 13.245 13.245 0 01-2.003-.678z" fill="url(#lobe-icons-gemini-fill-0)"></path><path d="M20.616 10.835a14.147 14.147 0 01-4.45-3.001 14.111 14.111 0 01-3.678-6.452.503.503 0 00-.975 0 14.134 14.134 0 01-3.679 6.452 14.155 14.155 0 01-4.45 3.001c-.65.28-1.318.505-2.002.678a.502.502 0 000 .975c.684.172 1.35.397 2.002.677a14.147 14.147 0 014.45 3.001 14.112 14.112 0 013.679 6.453.502.502 0 00.975 0c.172-.685.397-1.351.677-2.003a14.145 14.145 0 013.001-4.45 14.113 14.113 0 016.453-3.678.503.503 0 000-.975 13.245 13.245 0 01-2.003-.678z" fill="url(#lobe-icons-gemini-fill-1)"></path><path d="M20.616 10.835a14.147 14.147 0 01-4.45-3.001 14.111 14.111 0 01-3.678-6.452.503.503 0 00-.975 0 14.134 14.134 0 01-3.679 6.452 14.155 14.155 0 01-4.45 3.001c-.65.28-1.318.505-2.002.678a.502.502 0 000 .975c.684.172 1.35.397 2.002.677a14.147 14.147 0 014.45 3.001 14.112 14.112 0 013.679 6.453.502.502 0 00.975 0c.172-.685.397-1.351.677-2.003a14.145 14.145 0 013.001-4.45 14.113 14.113 0 016.453-3.678.503.503 0 000-.975 13.245 13.245 0 01-2.003-.678z" fill="url(#lobe-icons-gemini-fill-2)"></path><defs><linearGradient gradientUnits="userSpaceOnUse" id="lobe-icons-gemini-fill-0" x1="7" x2="11" y1="15.5" y2="12"><stop stop-color="#08B962"></stop><stop offset="1" stop-color="#08B962" stop-opacity="0"></stop></linearGradient><linearGradient gradientUnits="userSpaceOnUse" id="lobe-icons-gemini-fill-1" x1="8" x2="11.5" y1="5.5" y2="11"><stop stop-color="#F94543"></stop><stop offset="1" stop-color="#F94543" stop-opacity="0"></stop></linearGradient><linearGradient gradientUnits="userSpaceOnUse" id="lobe-icons-gemini-fill-2" x1="3.5" x2="17.5" y1="13.5" y2="12"><stop stop-color="#FABC12"></stop><stop offset=".46" stop-color="#FABC12" stop-opacity="0"></stop></linearGradient></defs></svg>


--------------------------------------------------------------------------------
/frontend/src/components/TranscriptViewer.tsx:
--------------------------------------------------------------------------------
 1 | import { useEffect, useState } from 'react'
 2 | import { ScrollArea } from './ui/ScrollArea'
 3 | 
 4 | interface Segment {
 5 |   start: number
 6 |   end: number
 7 |   text: string
 8 | }
 9 | 
10 | interface TranscriptViewerProps {
11 |   transcript?: {
12 |     language?: string
13 |     full_text?: string
14 |     segments?: Segment[]
15 |   }
16 | }
17 | 
18 | export default function TranscriptViewer({ transcript }: TranscriptViewerProps) {
19 |   const [activeSegment, setActiveSegment] = useState<number | null>(null)
20 | 
21 |   const formatTime = (seconds: number): string => {
22 |     const mins = Math.floor(seconds / 60)
23 |     const secs = Math.floor(seconds % 60)
24 |     return `${mins}:${secs.toString().padStart(2, '0')}`
25 |   }
26 | 
27 |   if (!transcript?.segments?.length) {
28 |     return (
29 |       <div className="flex h-full items-center justify-center text-gray-400">
30 |         暂无转写内容
31 |       </div>
32 |     )
33 |   }
34 | 
35 |   return (
36 |     <div className="flex h-full w-full flex-col rounded-md border bg-white shadow-sm overflow-hidden">
37 |       <div className="p-4 border-b flex-shrink-0">
38 |         <h2 className="text-lg font-medium">转写结果</h2>
39 |         {transcript.language && (
40 |           <p className="text-xs text-gray-500 mt-1">检测语言: {transcript.language}</p>
41 |         )}
42 |       </div>
43 |       
44 |       <div className="px-4 py-2 border-b grid grid-cols-[80px_1fr] gap-2 text-xs font-medium text-gray-500 flex-shrink-0">
45 |         <div>时间</div>
46 |         <div>内容</div>
47 |       </div>
48 |       
49 |       <ScrollArea className="flex-1 overflow-y-auto" style={{ maxHeight: 'calc(100vh - 300px)' }}>
50 |         <div className="p-4 space-y-1">
51 |           {transcript.segments.map((segment, index) => (
52 |             <div
53 |               key={index}
54 |               className={`group grid grid-cols-[80px_1fr] gap-2 rounded-md p-2 transition-colors hover:bg-gray-50 ${
55 |                 activeSegment === index ? 'bg-gray-100' : ''
56 |               }`}
57 |               onClick={() => setActiveSegment(index)}
58 |             >
59 |               <div className="flex items-center gap-1 text-xs text-gray-500">
60 |                 <span>{formatTime(segment.start)}</span>
61 |               </div>
62 |               <div className="text-sm leading-relaxed text-gray-700">
63 |                 {segment.text}
64 |               </div>
65 |             </div>
66 |           ))}
67 |         </div>
68 |       </ScrollArea>
69 | 
70 |       {transcript.segments.length > 0 && (
71 |         <div className="px-4 py-3 border-t flex justify-between text-xs text-gray-500 flex-shrink-0">
72 |           <span>共 {transcript.segments.length} 条片段</span>
73 |           <span>
74 |             总时长:{' '}
75 |             {formatTime(
76 |               transcript.segments[transcript.segments.length - 1]?.end || 0
77 |             )}
78 |           </span>
79 |         </div>
80 |       )}
81 |     </div>
82 |   )
83 | }
84 | 
85 | 


--------------------------------------------------------------------------------
/backend/video_note_ai.spec:
--------------------------------------------------------------------------------
  1 | # -*- mode: python ; coding: utf-8 -*-
  2 | import os
  3 | from PyInstaller.utils.hooks import collect_all, copy_metadata
  4 | 
  5 | datas = [
  6 |     ('../frontend/dist', 'dist'),
  7 | ]
  8 | 
  9 | # 检查 .env 是否存在
 10 | if os.path.exists('.env'):
 11 |     datas.append(('.env', '.'))
 12 | 
 13 | # 收集依赖的元数据和文件
 14 | binaries = []
 15 | hiddenimports = [
 16 |     'uvicorn.loops.auto',
 17 |     'uvicorn.loops.asyncio',
 18 |     'uvicorn.protocols.http.auto',
 19 |     'uvicorn.protocols.http.h11',
 20 |     'uvicorn.lifespan.on',
 21 |     'uvicorn.logging',
 22 |     'app.routers.note',
 23 |     'app.routers.model',
 24 |     'faster_whisper',
 25 |     'engineio.async_drivers.asgi',
 26 |     'python-multipart', 
 27 | ]
 28 | 
 29 | # Collect libraries
 30 | for lib in ['faster_whisper', 'ctranslate2', 'imageio_ffmpeg', 'google.generativeai', 'openai', 'pywebview']:
 31 |     try:
 32 |         tmp_ret = collect_all(lib)
 33 |         datas += tmp_ret[0]
 34 |         binaries += tmp_ret[1]
 35 |         hiddenimports += tmp_ret[2]
 36 |     except Exception as e:
 37 |         print(f"Warning: Could not collect {lib}: {e}")
 38 | 
 39 | # Copy metadata for some packages that might need it
 40 | def safe_copy_metadata(package_name):
 41 |     try:
 42 |         return copy_metadata(package_name)
 43 |     except Exception as e:
 44 |         print(f"Warning: Could not copy metadata for {package_name}: {e}")
 45 |         return []
 46 | 
 47 | datas += safe_copy_metadata('tqdm')
 48 | # datas += safe_copy_metadata('regex') # regex might not be installed
 49 | datas += safe_copy_metadata('requests')
 50 | datas += safe_copy_metadata('packaging')
 51 | datas += safe_copy_metadata('filelock')
 52 | datas += safe_copy_metadata('numpy')
 53 | datas += safe_copy_metadata('tokenizers')
 54 | datas += safe_copy_metadata('huggingface-hub')
 55 | datas += safe_copy_metadata('google-generativeai')
 56 | datas += safe_copy_metadata('openai')
 57 | 
 58 | a = Analysis(
 59 |     ['app_entry.py'],
 60 |     pathex=[],
 61 |     binaries=binaries,
 62 |     datas=datas,
 63 |     hiddenimports=hiddenimports,
 64 |     hookspath=[],
 65 |     hooksconfig={},
 66 |     runtime_hooks=[],
 67 |     excludes=[],
 68 |     noarchive=False,
 69 |     optimize=0,
 70 | )
 71 | pyz = PYZ(a.pure)
 72 | 
 73 | exe = EXE(
 74 |     pyz,
 75 |     a.scripts,
 76 |     [],
 77 |     exclude_binaries=True,
 78 |     name='VideoNoteAI',
 79 |     debug=False,
 80 |     bootloader_ignore_signals=False,
 81 |     strip=False,
 82 |     upx=True,
 83 |     console=False, 
 84 |     disable_windowed_traceback=False,
 85 |     argv_emulation=False,
 86 |     target_arch=None,
 87 |     codesign_identity=None,
 88 |     entitlements_file=None,
 89 | )
 90 | coll = COLLECT(
 91 |     exe,
 92 |     a.binaries,
 93 |     a.datas,
 94 |     strip=False,
 95 |     upx=True,
 96 |     upx_exclude=[],
 97 |     name='VideoNoteAI',
 98 | )
 99 | app = BUNDLE(
100 |     coll,
101 |     name='VideoNoteAI.app',
102 |     icon='icon.icns',
103 |     bundle_identifier='com.jackjia.videonoteai',
104 | )
105 | 


--------------------------------------------------------------------------------
/frontend/icon/ollama.svg:
--------------------------------------------------------------------------------
1 | <svg fill="currentColor" fill-rule="evenodd" height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>Ollama</title><path d="M7.905 1.09c.216.085.411.225.588.41.295.306.544.744.734 1.263.191.522.315 1.1.362 1.68a5.054 5.054 0 012.049-.636l.051-.004c.87-.07 1.73.087 2.48.474.101.053.2.11.297.17.05-.569.172-1.134.36-1.644.19-.52.439-.957.733-1.264a1.67 1.67 0 01.589-.41c.257-.1.53-.118.796-.042.401.114.745.368 1.016.737.248.337.434.769.561 1.287.23.934.27 2.163.115 3.645l.053.04.026.019c.757.576 1.284 1.397 1.563 2.35.435 1.487.216 3.155-.534 4.088l-.018.021.002.003c.417.762.67 1.567.724 2.4l.002.03c.064 1.065-.2 2.137-.814 3.19l-.007.01.01.024c.472 1.157.62 2.322.438 3.486l-.006.039a.651.651 0 01-.747.536.648.648 0 01-.54-.742c.167-1.033.01-2.069-.48-3.123a.643.643 0 01.04-.617l.004-.006c.604-.924.854-1.83.8-2.72-.046-.779-.325-1.544-.8-2.273a.644.644 0 01.18-.886l.009-.006c.243-.159.467-.565.58-1.12a4.229 4.229 0 00-.095-1.974c-.205-.7-.58-1.284-1.105-1.683-.595-.454-1.383-.673-2.38-.61a.653.653 0 01-.632-.371c-.314-.665-.772-1.141-1.343-1.436a3.288 3.288 0 00-1.772-.332c-1.245.099-2.343.801-2.67 1.686a.652.652 0 01-.61.425c-1.067.002-1.893.252-2.497.703-.522.39-.878.935-1.066 1.588a4.07 4.07 0 00-.068 1.886c.112.558.331 1.02.582 1.269l.008.007c.212.207.257.53.109.785-.36.622-.629 1.549-.673 2.44-.05 1.018.186 1.902.719 2.536l.016.019a.643.643 0 01.095.69c-.576 1.236-.753 2.252-.562 3.052a.652.652 0 01-1.269.298c-.243-1.018-.078-2.184.473-3.498l.014-.035-.008-.012a4.339 4.339 0 01-.598-1.309l-.005-.019a5.764 5.764 0 01-.177-1.785c.044-.91.278-1.842.622-2.59l.012-.026-.002-.002c-.293-.418-.51-.953-.63-1.545l-.005-.024a5.352 5.352 0 01.093-2.49c.262-.915.777-1.701 1.536-2.269.06-.045.123-.09.186-.132-.159-1.493-.119-2.73.112-3.67.127-.518.314-.95.562-1.287.27-.368.614-.622 1.015-.737.266-.076.54-.059.797.042zm4.116 9.09c.936 0 1.8.313 2.446.855.63.527 1.005 1.235 1.005 1.94 0 .888-.406 1.58-1.133 2.022-.62.375-1.451.557-2.403.557-1.009 0-1.871-.259-2.493-.734-.617-.47-.963-1.13-.963-1.845 0-.707.398-1.417 1.056-1.946.668-.537 1.55-.849 2.485-.849zm0 .896a3.07 3.07 0 00-1.916.65c-.461.37-.722.835-.722 1.25 0 .428.21.829.61 1.134.455.347 1.124.548 1.943.548.799 0 1.473-.147 1.932-.426.463-.28.7-.686.7-1.257 0-.423-.246-.89-.683-1.256-.484-.405-1.14-.643-1.864-.643zm.662 1.21l.004.004c.12.151.095.37-.056.49l-.292.23v.446a.375.375 0 01-.376.373.375.375 0 01-.376-.373v-.46l-.271-.218a.347.347 0 01-.052-.49.353.353 0 01.494-.051l.215.172.22-.174a.353.353 0 01.49.051zm-5.04-1.919c.478 0 .867.39.867.871a.87.87 0 01-.868.871.87.87 0 01-.867-.87.87.87 0 01.867-.872zm8.706 0c.48 0 .868.39.868.871a.87.87 0 01-.868.871.87.87 0 01-.867-.87.87.87 0 01.867-.872zM7.44 2.3l-.003.002a.659.659 0 00-.285.238l-.005.006c-.138.189-.258.467-.348.832-.17.692-.216 1.631-.124 2.782.43-.128.899-.208 1.404-.237l.01-.001.019-.034c.046-.082.095-.161.148-.239.123-.771.022-1.692-.253-2.444-.134-.364-.297-.65-.453-.813a.628.628 0 00-.107-.09L7.44 2.3zm9.174.04l-.002.001a.628.628 0 00-.107.09c-.156.163-.32.45-.453.814-.29.794-.387 1.776-.23 2.572l.058.097.008.014h.03a5.184 5.184 0 011.466.212c.086-1.124.038-2.043-.128-2.722-.09-.365-.21-.643-.349-.832l-.004-.006a.659.659 0 00-.285-.239h-.004z"></path></svg>


--------------------------------------------------------------------------------
/frontend/src/components/MainContent.tsx:
--------------------------------------------------------------------------------
 1 | import { useTaskStore } from '../store/taskStore'
 2 | import UploadForm from './UploadForm'
 3 | import TaskList from './TaskList'
 4 | import TaskSteps from './TaskSteps'
 5 | import ModelConfig from './ModelConfig'
 6 | import UploadPage from './UploadPage'
 7 | import VideoDownloader from './VideoDownloader'
 8 | 
 9 | interface MainContentProps {
10 |   activeMenu: 'home' | 'upload' | 'model' | 'settings'
11 | }
12 | 
13 | export default function MainContent({ activeMenu }: MainContentProps) {
14 |   const { currentTaskId } = useTaskStore()
15 | 
16 |   if (activeMenu === 'upload') {
17 |     return <UploadPage />
18 |   }
19 | 
20 |   if (activeMenu === 'model') {
21 |     return <ModelConfig />
22 |   }
23 | 
24 |   if (activeMenu === 'download') {
25 |     return <VideoDownloader />
26 |   }
27 | 
28 |   if (activeMenu === 'settings') {
29 |     return (
30 |       <div className="h-full flex items-center justify-center">
31 |         <div className="text-center">
32 |           <Settings className="w-16 h-16 text-gray-400 mx-auto mb-4" />
33 |           <h2 className="text-xl font-semibold text-gray-700 mb-2">设置</h2>
34 |           <p className="text-sm text-gray-500">设置功能开发中...</p>
35 |         </div>
36 |       </div>
37 |     )
38 |   }
39 | 
40 |   // 首页：任务列表 + 步骤
41 |   return (
42 |     <div className="h-full flex flex-col overflow-hidden">
43 |       {/* 主内容区：任务列表 + 步骤 */}
44 |       <div className="flex-1 flex overflow-hidden">
45 |         {/* 左侧：任务列表 */}
46 |         <aside className="w-80 bg-white border-r border-gray-200 flex flex-col overflow-hidden shrink-0">
47 |           <div className="p-4 border-b border-gray-200">
48 |             <h2 className="text-base font-semibold text-gray-900">任务列表</h2>
49 |           </div>
50 |           <div className="flex-1 overflow-y-auto">
51 |             <TaskList />
52 |           </div>
53 |         </aside>
54 | 
55 |         {/* 右侧：步骤区域 */}
56 |         <main className="flex-1 overflow-hidden bg-gray-50">
57 |           {currentTaskId ? (
58 |             <TaskSteps taskId={currentTaskId} />
59 |           ) : (
60 |             <div className="h-full flex items-center justify-center p-8">
61 |               <div className="text-center max-w-md">
62 |                 <div className="text-gray-400 text-6xl mb-4">📝</div>
63 |                 <h2 className="text-xl font-semibold text-gray-700 mb-2">选择一个任务查看详情</h2>
64 |                 <p className="text-sm text-gray-500">
65 |                   在左侧任务列表中点击任务来查看处理步骤
66 |                 </p>
67 |               </div>
68 |             </div>
69 |           )}
70 |         </main>
71 |       </div>
72 |     </div>
73 |   )
74 | }
75 | 
76 | // 临时Settings图标组件
77 | function Settings({ className }: { className: string }) {
78 |   return (
79 |     <svg className={className} fill="none" viewBox="0 0 24 24" stroke="currentColor">
80 |       <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M10.325 4.317c.426-1.756 2.924-1.756 3.35 0a1.724 1.724 0 002.573 1.066c1.543-.94 3.31.826 2.37 2.37a1.724 1.724 0 001.065 2.572c1.756.426 1.756 2.924 0 3.35a1.724 1.724 0 00-1.066 2.573c.94 1.543-.826 3.31-2.37 2.37a1.724 1.724 0 00-2.572 1.065c-.426 1.756-2.924 1.756-3.35 0a1.724 1.724 0 00-2.573-1.066c-1.543.94-3.31-.826-2.37-2.37a1.724 1.724 0 00-1.065-2.572c-1.756-.426-1.756-2.924 0-3.35a1.724 1.724 0 001.066-2.573c-.94-1.543.826-3.31 2.37-2.37.996.608 2.296.07 2.572-1.065z" />
81 |       <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M15 12a3 3 0 11-6 0 3 3 0 016 0z" />
82 |     </svg>
83 |   )
84 | }
85 | 


--------------------------------------------------------------------------------
/backend/app/utils/ffmpeg_helper.py:
--------------------------------------------------------------------------------
  1 | """
  2 | FFmpeg 工具模块
  3 | 自动检测系统 ffmpeg，如果没有则使用 imageio-ffmpeg 提供的 ffmpeg
  4 | """
  5 | import os
  6 | import shutil
  7 | import platform
  8 | from pathlib import Path
  9 | from typing import Optional
 10 | from app.utils.logger import get_logger
 11 | 
 12 | logger = get_logger(__name__)
 13 | 
 14 | # 项目根目录
 15 | PROJECT_ROOT = Path(__file__).parent.parent.parent
 16 | 
 17 | # 优先使用环境变量中的 FFMPEG_BIN_DIR (由 app_entry.py 设置的用户可写目录)
 18 | ffmpeg_bin_env = os.getenv("FFMPEG_BIN_DIR")
 19 | if ffmpeg_bin_env:
 20 |     FFMPEG_DIR = Path(ffmpeg_bin_env)
 21 | else:
 22 |     FFMPEG_DIR = PROJECT_ROOT / "ffmpeg_bin"
 23 | 
 24 | try:
 25 |     FFMPEG_DIR.mkdir(parents=True, exist_ok=True)
 26 | except Exception as e:
 27 |     # 即使创建失败也继续，因为可能不需要下载 ffmpeg
 28 |     logger.warning(f"无法创建 ffmpeg 目录 {FFMPEG_DIR}: {e}")
 29 | 
 30 | 
 31 | def get_ffmpeg_path() -> str:
 32 |     """
 33 |     获取 ffmpeg 可执行文件路径
 34 |     优先级：
 35 |     1. 系统已安装的 ffmpeg
 36 |     2. 项目目录中的 ffmpeg（如果已下载）
 37 |     3. imageio-ffmpeg 提供的 ffmpeg（会自动下载）
 38 |     
 39 |     :return: ffmpeg 可执行文件路径
 40 |     """
 41 |     # 1. 首先检查系统是否已安装 ffmpeg
 42 |     system_ffmpeg = shutil.which("ffmpeg")
 43 |     if system_ffmpeg:
 44 |         logger.info(f"使用系统 ffmpeg: {system_ffmpeg}")
 45 |         return system_ffmpeg
 46 |     
 47 |     # 2. 检查项目目录中是否有 ffmpeg
 48 |     system = platform.system().lower()
 49 |     if system == "windows":
 50 |         ffmpeg_exe = FFMPEG_DIR / "ffmpeg.exe"
 51 |     else:
 52 |         ffmpeg_exe = FFMPEG_DIR / "ffmpeg"
 53 |     
 54 |     if ffmpeg_exe.exists() and os.access(ffmpeg_exe, os.X_OK):
 55 |         logger.info(f"使用项目目录中的 ffmpeg: {ffmpeg_exe}")
 56 |         return str(ffmpeg_exe)
 57 |     
 58 |     # 3. 使用 imageio-ffmpeg 提供的 ffmpeg
 59 |     try:
 60 |         import imageio_ffmpeg
 61 |         ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe()
 62 |         logger.info(f"使用 imageio-ffmpeg 提供的 ffmpeg: {ffmpeg_path}")
 63 |         
 64 |         # 可选：将 ffmpeg 复制到项目目录以便后续使用
 65 |         try:
 66 |             if system == "windows":
 67 |                 target_path = FFMPEG_DIR / "ffmpeg.exe"
 68 |             else:
 69 |                 target_path = FFMPEG_DIR / "ffmpeg"
 70 |             
 71 |             if not target_path.exists():
 72 |                 shutil.copy2(ffmpeg_path, target_path)
 73 |                 # 在 Unix 系统上确保可执行权限
 74 |                 if system != "windows":
 75 |                     os.chmod(target_path, 0o755)
 76 |                 logger.info(f"已将 ffmpeg 复制到项目目录: {target_path}")
 77 |         except Exception as e:
 78 |             logger.warning(f"复制 ffmpeg 到项目目录失败，但不影响使用: {e}")
 79 |         
 80 |         return ffmpeg_path
 81 |     except ImportError:
 82 |         logger.error("imageio-ffmpeg 未安装，请运行: pip install imageio-ffmpeg")
 83 |         raise ImportError(
 84 |             "未找到 ffmpeg。请安装 imageio-ffmpeg: pip install imageio-ffmpeg\n"
 85 |             "或者手动安装 ffmpeg: https://ffmpeg.org/download.html"
 86 |         )
 87 |     except Exception as e:
 88 |         logger.error(f"获取 ffmpeg 失败: {e}")
 89 |         raise Exception(f"无法获取 ffmpeg: {e}")
 90 | 
 91 | 
 92 | def check_ffmpeg_available() -> bool:
 93 |     """
 94 |     检查 ffmpeg 是否可用
 95 |     
 96 |     :return: True 如果可用，False 否则
 97 |     """
 98 |     try:
 99 |         ffmpeg_path = get_ffmpeg_path()
100 |         # 验证 ffmpeg 是否真的可用
101 |         import subprocess
102 |         result = subprocess.run(
103 |             [ffmpeg_path, "-version"],
104 |             capture_output=True,
105 |             timeout=5
106 |         )
107 |         return result.returncode == 0
108 |     except Exception as e:
109 |         logger.warning(f"ffmpeg 不可用: {e}")
110 |         return False
111 | 


--------------------------------------------------------------------------------
/frontend/src/components/FileConfirmDialog.tsx:
--------------------------------------------------------------------------------
  1 | import { X, FileVideo, CheckCircle2 } from 'lucide-react'
  2 | import { useState } from 'react'
  3 | 
  4 | interface FileConfirmDialogProps {
  5 |   file: File | null
  6 |   open: boolean
  7 |   onConfirm: (screenshot: boolean) => void
  8 |   onCancel: () => void
  9 | }
 10 | 
 11 | export default function FileConfirmDialog({
 12 |   file,
 13 |   open,
 14 |   onConfirm,
 15 |   onCancel,
 16 | }: FileConfirmDialogProps) {
 17 |   const [enableScreenshot, setEnableScreenshot] = useState(true)
 18 |   
 19 |   if (!open || !file) return null
 20 | 
 21 |   const formatFileSize = (bytes: number) => {
 22 |     if (bytes < 1024) return bytes + ' B'
 23 |     if (bytes < 1024 * 1024) return (bytes / 1024).toFixed(2) + ' KB'
 24 |     return (bytes / (1024 * 1024)).toFixed(2) + ' MB'
 25 |   }
 26 | 
 27 |   return (
 28 |     <div className="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50">
 29 |       <div className="bg-white rounded-lg shadow-xl max-w-md w-full mx-4">
 30 |         <div className="p-6">
 31 |           <div className="flex items-center justify-between mb-4">
 32 |             <h3 className="text-xl font-semibold">确认上传文件</h3>
 33 |             <button
 34 |               onClick={onCancel}
 35 |               className="text-gray-400 hover:text-gray-600 transition-colors"
 36 |             >
 37 |               <X className="w-5 h-5" />
 38 |             </button>
 39 |           </div>
 40 | 
 41 |           <div className="space-y-4">
 42 |             <div className="flex items-start gap-4 p-4 bg-gray-50 rounded-lg">
 43 |               <FileVideo className="w-8 h-8 text-blue-500 flex-shrink-0 mt-1" />
 44 |               <div className="flex-1 min-w-0">
 45 |                 <p className="font-medium text-gray-900 truncate">{file.name}</p>
 46 |                 <div className="mt-2 space-y-1 text-sm text-gray-600">
 47 |                   <p>类型: {file.type || '未知'}</p>
 48 |                   <p>大小: {formatFileSize(file.size)}</p>
 49 |                   <p>修改时间: {new Date(file.lastModified).toLocaleString()}</p>
 50 |                 </div>
 51 |               </div>
 52 |             </div>
 53 | 
 54 |             <div className="bg-blue-50 border border-blue-200 rounded-lg p-4">
 55 |               <h4 className="font-medium text-blue-900 mb-2">处理流程：</h4>
 56 |               <ol className="list-decimal list-inside space-y-1 text-sm text-blue-800">
 57 |                 <li>上传文件到服务器</li>
 58 |                 <li>提取音频（如果是视频文件）</li>
 59 |                 <li>转写音频为文字</li>
 60 |                 <li>使用 AI 生成结构化笔记</li>
 61 |               </ol>
 62 |             </div>
 63 | 
 64 |             <div className="bg-yellow-50 border border-yellow-200 rounded-lg p-3 mb-4">
 65 |               <label className="flex items-center gap-2 cursor-pointer">
 66 |                 <input
 67 |                   type="checkbox"
 68 |                   checked={enableScreenshot}
 69 |                   onChange={(e) => setEnableScreenshot(e.target.checked)}
 70 |                   className="w-4 h-4 text-blue-600 rounded"
 71 |                 />
 72 |                 <span className="text-sm text-yellow-900">
 73 |                   启用自动截图（在笔记中插入视频关键帧截图）
 74 |                 </span>
 75 |               </label>
 76 |             </div>
 77 | 
 78 |             <div className="flex gap-3">
 79 |               <button
 80 |                 onClick={onCancel}
 81 |                 className="flex-1 px-4 py-2 border border-gray-300 rounded-lg text-gray-700 hover:bg-gray-50 transition-colors"
 82 |               >
 83 |                 取消
 84 |               </button>
 85 |               <button
 86 |                 onClick={() => onConfirm(enableScreenshot)}
 87 |                 className="flex-1 px-4 py-2 bg-blue-600 text-white rounded-lg hover:bg-blue-700 transition-colors flex items-center justify-center gap-2"
 88 |               >
 89 |                 <CheckCircle2 className="w-4 h-4" />
 90 |                 确认上传
 91 |               </button>
 92 |             </div>
 93 |           </div>
 94 |         </div>
 95 |       </div>
 96 |     </div>
 97 |   )
 98 | }
 99 | 
100 | 


--------------------------------------------------------------------------------
/frontend/src/components/CurrentModelDisplay.tsx:
--------------------------------------------------------------------------------
  1 | import { useState, useEffect } from 'react'
  2 | import { Brain } from 'lucide-react'
  3 | import ProviderIcon from './ProviderIcon'
  4 | 
  5 | interface ModelConfig {
  6 |   provider: string
  7 |   apiKey: string
  8 |   baseUrl?: string
  9 |   model: string
 10 | }
 11 | 
 12 | const PROVIDER_LABELS: Record<string, string> = {
 13 |   openai: 'OpenAI',
 14 |   deepseek: 'DeepSeek',
 15 |   qwen: 'Qwen',
 16 |   claude: 'Claude',
 17 |   gemini: 'Gemini',
 18 |   groq: 'Groq',
 19 |   ollama: 'Ollama',
 20 | }
 21 | 
 22 | const PROVIDER_COLORS: Record<string, string> = {
 23 |   openai: 'bg-green-100 text-green-700 border-green-200',
 24 |   deepseek: 'bg-blue-100 text-blue-700 border-blue-200',
 25 |   qwen: 'bg-purple-100 text-purple-700 border-purple-200',
 26 |   claude: 'bg-orange-100 text-orange-700 border-orange-200',
 27 |   gemini: 'bg-yellow-100 text-yellow-700 border-yellow-200',
 28 |   groq: 'bg-indigo-100 text-indigo-700 border-indigo-200',
 29 |   ollama: 'bg-teal-100 text-teal-700 border-teal-200',
 30 | }
 31 | 
 32 | export default function CurrentModelDisplay() {
 33 |   const [currentModel, setCurrentModel] = useState<{
 34 |     name: string
 35 |     provider: string
 36 |     providerName: string
 37 |   } | null>(null)
 38 | 
 39 |   useEffect(() => {
 40 |     const loadCurrentModel = () => {
 41 |       try {
 42 |         const savedSelected = localStorage.getItem('selectedModel')
 43 |         
 44 |         if (!savedSelected) {
 45 |           setCurrentModel(null)
 46 |           return
 47 |         }
 48 | 
 49 |         // 解析选中的模型 ID（格式：provider-modelId）
 50 |         // 例如：ollama-Llama-3.1-8B-Instruct-abliterated-GGUF:Q4_K_M
 51 |         const firstDashIndex = savedSelected.indexOf('-')
 52 |         if (firstDashIndex <= 0) {
 53 |           setCurrentModel(null)
 54 |           return
 55 |         }
 56 | 
 57 |         const providerId = savedSelected.substring(0, firstDashIndex)
 58 |         const modelId = savedSelected.substring(firstDashIndex + 1)
 59 |         
 60 |         // 直接使用模型 ID 作为显示名称（去掉可能的 provider 前缀）
 61 |         // 如果 modelId 包含 provider 前缀（如 ollama-Llama-3.1），则去掉
 62 |         let modelName = modelId
 63 |         if (modelId.startsWith(providerId + '-')) {
 64 |           modelName = modelId.substring(providerId.length + 1)
 65 |         }
 66 |         
 67 |         setCurrentModel({
 68 |           name: modelName,
 69 |           provider: providerId,
 70 |           providerName: PROVIDER_LABELS[providerId] || providerId,
 71 |         })
 72 |       } catch (error) {
 73 |         console.error('加载当前模型失败:', error)
 74 |         setCurrentModel(null)
 75 |       }
 76 |     }
 77 | 
 78 |     loadCurrentModel()
 79 | 
 80 |     // 监听 storage 变化（同窗口内的变化不会触发 storage 事件，所以需要自定义事件）
 81 |     const handleStorageChange = (e: StorageEvent) => {
 82 |       if (e.key === 'selectedModel') {
 83 |         loadCurrentModel()
 84 |       }
 85 |     }
 86 |     window.addEventListener('storage', handleStorageChange)
 87 | 
 88 |     // 监听自定义事件（用于同窗口内的变化）
 89 |     const handleCustomStorageChange = () => {
 90 |       loadCurrentModel()
 91 |     }
 92 |     window.addEventListener('modelChanged', handleCustomStorageChange)
 93 | 
 94 |     // 定期检查配置变化（作为备用方案）
 95 |     const interval = setInterval(loadCurrentModel, 1000)
 96 | 
 97 |     return () => {
 98 |       window.removeEventListener('storage', handleStorageChange)
 99 |       window.removeEventListener('modelChanged', handleCustomStorageChange)
100 |       clearInterval(interval)
101 |     }
102 |   }, [])
103 | 
104 |   if (!currentModel) {
105 |     return (
106 |       <div className="flex items-center gap-2 text-sm text-gray-400">
107 |         <Brain className="w-4 h-4" />
108 |         <span>当前模型: 未选择</span>
109 |       </div>
110 |     )
111 |   }
112 | 
113 |   return (
114 |     <div className="flex items-center gap-2 text-sm">
115 |       <ProviderIcon provider={currentModel.provider} className="w-4 h-4" />
116 |       <span className="text-gray-600">当前模型:</span>
117 |       <span className="font-medium text-gray-900">{currentModel.name}</span>
118 |       <span className="text-gray-500">{currentModel.providerName}</span>
119 |     </div>
120 |   )
121 | }
122 | 
123 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Video AI Note
  2 | 
  3 | 一个智能视频笔记生成工具，支持自动提取视频音频、转写文字，并使用 AI 生成结构化笔记。
  4 | 
  5 | **完全本地化处理，保护数据隐私** - 所有数据处理均在本地完成，支持 Ollama 等本地大模型，无需联网即可使用。
  6 | 
  7 | ## 功能特性
  8 | 
  9 | - **完全本地化处理** - 所有数据在本地处理，不上传到云端，保护隐私安全
 10 | - **支持本地大模型** - 支持 Ollama 4B 等本地模型，完全离线运行，无需 API 密钥
 11 | - 直接上传视频文件（支持常见视频格式）
 12 | - 自动音频转文字（使用 fast-whisper，本地运行）
 13 | - AI 生成结构化笔记（支持 OpenAI/DeepSeek/Qwen/Ollama 等）
 14 | - Markdown 格式输出（图片自动嵌入为 base64）
 15 | - PDF 导出（支持可复制文本格式）
 16 | - 视频预览功能
 17 | - 任务历史记录
 18 | - 多模型配置支持
 19 | - 截图自动插入（可选）
 20 | 
 21 | ### FFmpeg 说明
 22 | 
 23 | 项目会自动处理 FFmpeg 的安装和使用：
 24 | 
 25 | - 如果系统已安装 FFmpeg，会优先使用系统版本
 26 | - 如果没有安装，首次运行时会自动下载 FFmpeg 到项目目录（`backend/ffmpeg_bin/`）
 27 | - 使用 `imageio-ffmpeg` 包自动管理 FFmpeg 二进制文件
 28 | 
 29 | 如果你想使用系统级别的 FFmpeg，可以手动安装：
 30 | 
 31 | ```bash
 32 | # Mac
 33 | brew install ffmpeg
 34 | 
 35 | # Ubuntu/Debian
 36 | sudo apt install ffmpeg
 37 | 
 38 | # Windows
 39 | # 从 https://ffmpeg.org/download.html 下载安装
 40 | ```
 41 | 
 42 | ## 安装
 43 | 
 44 | ### 后端配置
 45 | 
 46 | #### 方式一：使用启动脚本（推荐）
 47 | 
 48 | 启动脚本会自动创建和激活虚拟环境：
 49 | 
 50 | ```bash
 51 | cd backend
 52 | 
 53 | # Linux/Mac
 54 | chmod +x start.sh
 55 | ./start.sh
 56 | 
 57 | # Windows
 58 | start.bat
 59 | ```
 60 | 
 61 | #### 方式二：手动配置
 62 | 
 63 | ```bash
 64 | cd backend
 65 | 
 66 | # 创建虚拟环境
 67 | python3 -m venv venv
 68 | 
 69 | # 激活虚拟环境
 70 | # Linux/Mac:
 71 | source venv/bin/activate
 72 | # Windows:
 73 | # venv\Scripts\activate
 74 | 
 75 | # 升级 pip
 76 | pip install --upgrade pip
 77 | 
 78 | # 安装依赖
 79 | pip install -r requirements.txt
 80 | ```
 81 | 
 82 | ### 前端配置
 83 | 
 84 | ```bash
 85 | cd frontend
 86 | 
 87 | # 安装依赖
 88 | npm install
 89 | # 或
 90 | pnpm install
 91 | # 或
 92 | yarn install
 93 | ```
 94 | 
 95 | ## 使用
 96 | 
 97 | ### 启动后端
 98 | 
 99 | 如果使用启动脚本，直接运行即可。如果手动配置，需要先激活虚拟环境：
100 | 
101 | ```bash
102 | # 确保虚拟环境已激活（命令行前会显示 (venv)）
103 | # 然后运行
104 | python main.py
105 | ```
106 | 
107 | **注意：每次启动前都需要激活虚拟环境！**
108 | 
109 | 后端将在 `http://localhost:8483` 启动
110 | 
111 | ### 启动前端
112 | 
113 | ```bash
114 | cd frontend
115 | 
116 | # 启动开发服务器
117 | npm run dev
118 | # 或
119 | pnpm dev
120 | ```
121 | 
122 | 前端将在 `http://localhost:5173` 启动
123 | 
124 | ### 使用流程
125 | 
126 | 1. 打开浏览器访问 `http://localhost:5173`
127 | 2. 在"模型配置"页面配置你的 AI 模型：
128 |    - **本地运行（推荐）**：选择 Ollama，配置本地模型（如 `llama3.2:3b`、`qwen2.5:4b` 等），无需 API 密钥
129 |    - **云端 API**：也可选择 OpenAI/DeepSeek/Qwen 等云端 API
130 | 3. 在"上传"页面选择视频或音频文件
131 | 4. 在任务列表中选择任务，按步骤执行：
132 |    - 文件上传（可查看视频）
133 |    - 提取音频
134 |    - 音频转写（可查看转写结果）
135 |    - 生成笔记（可查看 Markdown 笔记）
136 | 5. 下载生成的 Markdown 或 PDF 文件
137 | 
138 | ### 本地运行配置（Ollama）
139 | 
140 | 如需完全离线运行，推荐使用 Ollama 本地模型：
141 | 
142 | 1. **安装 Ollama**（如果尚未安装）：
143 | 
144 |    ```bash
145 |    # Mac/Linux
146 |    curl -fsSL https://ollama.com/install.sh | sh
147 | 
148 |    # Windows
149 |    # 从 https://ollama.com/download 下载安装
150 |    ```
151 | 2. **下载模型**（推荐 4B 参数模型，性能与速度平衡）：
152 | 
153 |    ```bash
154 |    # 下载 4B 模型示例
155 |    ollama pull llama3.2:3b
156 |    # 或
157 |    ollama pull qwen2.5:4b
158 |    # 或
159 |    ollama pull phi3:mini
160 |    ```
161 | 3. **在模型配置中选择 Ollama**：
162 | 
163 |    - 模型类型选择 "Ollama"
164 |    - 模型名称填写你下载的模型（如 `llama3.2:3b`）
165 |    - API 地址默认为 `http://localhost:11434`（Ollama 默认端口）
166 | 
167 | **优势**：
168 | 
169 | - 完全离线运行，无需网络连接
170 | - 数据不上传，保护隐私
171 | - 无需 API 密钥，无使用费用
172 | - 4B 模型在普通硬件上即可流畅运行
173 | 
174 | ## 项目结构
175 | 
176 | ```
177 | video-Ai-note/
178 | ├── backend/          # FastAPI 后端
179 | │   ├── app/
180 | │   │   ├── routers/  # API 路由
181 | │   │   ├── services/ # 业务逻辑
182 | │   │   ├── transcriber/ # 音频转文字
183 | │   │   ├── gpt/      # GPT 集成
184 | │   │   └── db/       # 数据库
185 | │   ├── uploads/      # 上传文件存储
186 | │   ├── note_results/ # 笔记结果存储
187 | │   └── main.py
188 | └── frontend/         # React 前端
189 |     └── src/
190 |         ├── components/ # 组件
191 |         ├── services/   # API 服务
192 |         └── store/     # 状态管理
193 | ```
194 | 
195 | ## 注意事项
196 | 
197 | - **数据隐私**：所有视频、音频、转写文本和生成的笔记均存储在本地，不会上传到任何服务器
198 | - **本地运行**：使用 Ollama 等本地模型时，完全离线运行，无需网络连接和 API 密钥
199 | - 必须使用 Python 虚拟环境（推荐使用启动脚本自动管理）
200 | - FFmpeg 会自动下载到项目目录，无需手动安装
201 | - 首次运行会自动创建数据库
202 | - 上传的视频文件会保存在 `backend/uploads` 目录
203 | - 生成的笔记和截图会保存在 `backend/note_results` 目录
204 | - FFmpeg 二进制文件会保存在 `backend/ffmpeg_bin/` 目录（自动创建）
205 | - 详细虚拟环境使用指南请查看 [VENV_GUIDE.md](backend/VENV_GUIDE.md)
206 | 


--------------------------------------------------------------------------------
/frontend/src/components/StepProgress.tsx:
--------------------------------------------------------------------------------
  1 | import { CheckCircle2, Circle, Loader2, XCircle, Play } from 'lucide-react'
  2 | 
  3 | export type StepStatus = 'pending' | 'processing' | 'completed' | 'failed' | 'waiting_confirm'
  4 | 
  5 | interface Step {
  6 |   id: string
  7 |   name: string
  8 |   description: string
  9 |   status: StepStatus
 10 |   canConfirm?: boolean
 11 |   onConfirm?: () => void
 12 |   result?: any
 13 |   onClick?: () => void
 14 | }
 15 | 
 16 | interface StepProgressProps {
 17 |   steps: Step[]
 18 |   currentStep?: number
 19 | }
 20 | 
 21 | export default function StepProgress({ steps, currentStep }: StepProgressProps) {
 22 |   const getStepIcon = (status: StepStatus) => {
 23 |     switch (status) {
 24 |       case 'completed':
 25 |         return <CheckCircle2 className="w-5 h-5 text-green-500" />
 26 |       case 'processing':
 27 |         return <Loader2 className="w-5 h-5 text-blue-500 animate-spin" />
 28 |       case 'failed':
 29 |         return <XCircle className="w-5 h-5 text-red-500" />
 30 |       case 'waiting_confirm':
 31 |         return <Play className="w-5 h-5 text-orange-500" />
 32 |       default:
 33 |         return <Circle className="w-5 h-5 text-gray-400" />
 34 |     }
 35 |   }
 36 | 
 37 |   const getStepColor = (status: StepStatus, isCurrent: boolean) => {
 38 |     if (isCurrent && status === 'processing') return 'border-blue-500 bg-blue-50'
 39 |     if (status === 'completed') return 'border-green-500 bg-green-50'
 40 |     if (status === 'failed') return 'border-red-500 bg-red-50'
 41 |     if (status === 'waiting_confirm') return 'border-orange-500 bg-orange-50'
 42 |     return 'border-gray-300 bg-white'
 43 |   }
 44 | 
 45 |   return (
 46 |     <div className="space-y-4">
 47 |       {steps.map((step, index) => {
 48 |         const isCurrent = currentStep === index
 49 |         const isActive = step.status === 'processing' || isCurrent
 50 | 
 51 |         return (
 52 |           <div
 53 |             key={step.id}
 54 |             className={`border-2 rounded-lg p-4 transition-all ${getStepColor(
 55 |               step.status,
 56 |               isCurrent
 57 |             )} ${step.onClick ? 'cursor-pointer hover:shadow-md' : ''}`}
 58 |             onClick={step.onClick}
 59 |           >
 60 |             <div className="flex items-start gap-3">
 61 |               <div className="flex-shrink-0 mt-0.5">{getStepIcon(step.status)}</div>
 62 |               <div className="flex-1 min-w-0">
 63 |                 <div className="flex items-center justify-between">
 64 |                   <h4
 65 |                     className={`font-medium ${
 66 |                       isActive ? 'text-blue-900' : 'text-gray-700'
 67 |                     }`}
 68 |                   >
 69 |                     {index + 1}. {step.name}
 70 |                   </h4>
 71 |                   {step.status === 'processing' && (
 72 |                     <span className="text-xs text-blue-600 bg-blue-100 px-2 py-1 rounded">
 73 |                       进行中...
 74 |                     </span>
 75 |                   )}
 76 |                   {step.status === 'waiting_confirm' && (
 77 |                     <span className="text-xs text-orange-600 bg-orange-100 px-2 py-1 rounded">
 78 |                       等待确认
 79 |                     </span>
 80 |                   )}
 81 |                 </div>
 82 |                 <p className="text-sm text-gray-600 mt-1">{step.description}</p>
 83 |                 
 84 |                 {/* 显示结果 */}
 85 |                 {step.result && step.status === 'completed' && (
 86 |                   <div className="mt-3 p-3 bg-white rounded border border-gray-200">
 87 |                     {step.result}
 88 |                   </div>
 89 |                 )}
 90 |                 
 91 |                 {/* 确认按钮 */}
 92 |                 {step.canConfirm && step.status === 'waiting_confirm' && step.onConfirm && (
 93 |                   <button
 94 |                     onClick={step.onConfirm}
 95 |                     className="mt-3 px-4 py-2 bg-blue-600 text-white text-sm rounded-lg hover:bg-blue-700 transition-colors flex items-center gap-2"
 96 |                   >
 97 |                     <Play className="w-4 h-4" />
 98 |                     开始此步骤
 99 |                   </button>
100 |                 )}
101 |               </div>
102 |             </div>
103 |           </div>
104 |         )
105 |       })}
106 |     </div>
107 |   )
108 | }
109 | 
110 | 
111 | 


--------------------------------------------------------------------------------
/frontend/src/services/api.ts:
--------------------------------------------------------------------------------
  1 | import axios from 'axios'
  2 | 
  3 | // 使用相对路径，通过 Vite 代理访问后端
  4 | // 开发环境：通过 vite.config.ts 中的 proxy 配置代理到 http://localhost:8483
  5 | // 生产环境：可以设置 VITE_API_BASE_URL 环境变量
  6 | const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || '/api'
  7 | 
  8 | const api = axios.create({
  9 |   baseURL: API_BASE_URL,
 10 |   timeout: 60000, // 增加超时时间，因为文件上传和转写可能需要较长时间
 11 | })
 12 | 
 13 | // 上传视频文件
 14 | export const uploadVideo = async (
 15 |   file: File,
 16 |   screenshot: boolean = false,
 17 |   modelConfig: {
 18 |     provider: string
 19 |     api_key: string
 20 |     base_url?: string
 21 |     model: string
 22 |   } | null = null,
 23 |   onProgress?: (progress: number) => void
 24 | ) => {
 25 |   const formData = new FormData()
 26 |   formData.append('file', file)
 27 |   formData.append('screenshot', screenshot.toString())
 28 |   
 29 |   // 如果提供了模型配置，添加到请求中
 30 |   if (modelConfig) {
 31 |     formData.append('model_config', JSON.stringify(modelConfig))
 32 |   }
 33 |   
 34 |   const response = await api.post('/upload', formData, {
 35 |     headers: {
 36 |       'Content-Type': 'multipart/form-data',
 37 |     },
 38 |     timeout: 300000, // 5分钟超时，适合大文件上传
 39 |     onUploadProgress: (progressEvent) => {
 40 |       if (progressEvent.total && onProgress) {
 41 |         const percentCompleted = Math.round(
 42 |           (progressEvent.loaded * 100) / progressEvent.total
 43 |         )
 44 |         onProgress(percentCompleted)
 45 |       }
 46 |     },
 47 |   })
 48 |   
 49 |   return response
 50 | }
 51 | 
 52 | // 通过后端解析并下载哔哩哔哩视频
 53 | export const downloadBilibili = async (url: string, cookie: string = '', quality: string = 'best') => {
 54 |   return await api.post('/download/bilibili', {
 55 |     url,
 56 |     cookie,
 57 |     quality,
 58 |   })
 59 | }
 60 | 
 61 | // 启动 B 站扫码登录，返回 session_id 与二维码 base64
 62 | export const startBilibiliLogin = async () => {
 63 |   return await api.post('/download/bilibili/start_login')
 64 | }
 65 | 
 66 | // 查询扫码登录状态
 67 | export const getBilibiliLoginStatus = async (sessionId: string) => {
 68 |   return await api.get(`/download/bilibili/login_status?session_id=${sessionId}`)
 69 | }
 70 | 
 71 | // （start/login_status 接口由上方函数提供）
 72 | 
 73 | // 查询后台合并任务状态
 74 | export const getBilibiliTaskStatus = async (taskId: string) => {
 75 |   return await api.get(`/download/bilibili/task_status?task_id=${taskId}`)
 76 | }
 77 | 
 78 | // 获取任务状态
 79 | export const getTaskStatus = async (taskId: string) => {
 80 |   return await api.get(`/task/${taskId}`)
 81 | }
 82 | 
 83 | // 获取任务列表
 84 | export const getTasks = async (limit: number = 50) => {
 85 |   return await api.get(`/tasks?limit=${limit}`)
 86 | }
 87 | 
 88 | // 确认步骤
 89 | export const confirmStep = async (taskId: string, step: string) => {
 90 |   return await api.post(`/task/${taskId}/confirm_step`, { step })
 91 | }
 92 | 
 93 | // 重新生成笔记
 94 | export const regenerateNote = async (taskId: string) => {
 95 |   // 获取当前选择的模型配置
 96 |   const selectedModelId = localStorage.getItem('selectedModel')
 97 |   const modelConfigs = localStorage.getItem('modelConfigs')
 98 |   
 99 |   let modelConfig = null
100 |   if (selectedModelId && modelConfigs) {
101 |     try {
102 |       const configs = JSON.parse(modelConfigs)
103 |       // 从 selectedModelId 中提取 provider 和 modelId
104 |       const firstDashIndex = selectedModelId.indexOf('-')
105 |       if (firstDashIndex > 0) {
106 |         const provider = selectedModelId.substring(0, firstDashIndex)
107 |         const modelId = selectedModelId.substring(firstDashIndex + 1)
108 |         const providerConfig = configs[provider]
109 |         
110 |         if (providerConfig) {
111 |           modelConfig = {
112 |             provider,
113 |             api_key: providerConfig.apiKey || '',
114 |             base_url: providerConfig.baseUrl || '',
115 |             model: modelId,
116 |           }
117 |           console.log('重新生成时使用的模型配置:', modelConfig)
118 |         }
119 |       }
120 |     } catch (e) {
121 |       console.error('解析模型配置失败:', e)
122 |     }
123 |   }
124 |   
125 |   // 将模型配置作为请求体传递（使用驼峰命名）
126 |   return await api.post(`/task/${taskId}/regenerate`, {
127 |     modelConfig: modelConfig
128 |   })
129 | }
130 | 
131 | // 获取模型列表
132 | export const getModelList = async (config: {
133 |   provider: string
134 |   api_key: string
135 |   base_url?: string
136 | }) => {
137 |   return await api.post('/models/list', config)
138 | }
139 | 
140 | // 测试模型连接
141 | export const testModelConnection = async (config: {
142 |   provider: string
143 |   api_key: string
144 |   base_url?: string
145 | }) => {
146 |   return await api.post('/models/test', config)
147 | }
148 | 
149 | // 获取提供商列表
150 | export const getProviders = async () => {
151 |   return await api.get('/providers')
152 | }
153 | 
154 | // 删除任务
155 | export const deleteTask = async (taskId: string) => {
156 |   return await api.delete(`/task/${taskId}`)
157 | }
158 | 
159 | // 导出 PDF（可复制文本）
160 | export const exportPDF = async (taskId: string) => {
161 |   const response = await api.get(`/task/${taskId}/export_pdf`, {
162 |     responseType: 'blob',
163 |   })
164 |   return response
165 | }
166 | 
167 | 


--------------------------------------------------------------------------------
/backend/app_entry.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import uvicorn
  4 | import multiprocessing
  5 | import imageio_ffmpeg
  6 | from pathlib import Path
  7 | from fastapi.staticfiles import StaticFiles
  8 | from fastapi.responses import FileResponse
  9 | from fastapi import Request
 10 | import socket
 11 | import threading
 12 | import webview
 13 | import time
 14 | 
 15 | # --- 关键修复：设置用户可写的数据目录 ---
 16 | try:
 17 |     user_home = Path.home()
 18 |     app_data_dir = user_home / "Documents" / "VideoNoteAI_Data"
 19 |     app_data_dir.mkdir(parents=True, exist_ok=True)
 20 | 
 21 |     os.environ["UPLOAD_DIR"] = str(app_data_dir / "uploads")
 22 |     os.environ["NOTE_OUTPUT_DIR"] = str(app_data_dir / "note_results")
 23 |     os.environ["STATIC_DIR"] = str(app_data_dir / "static")
 24 |     os.environ["FFMPEG_BIN_DIR"] = str(app_data_dir / "ffmpeg_bin")
 25 |     os.environ["DATABASE_URL"] = f"sqlite:///{app_data_dir}/video_note.db"
 26 |     os.environ["HF_HOME"] = str(app_data_dir / "cache" / "huggingface")
 27 |     
 28 |     (app_data_dir / "uploads").mkdir(exist_ok=True)
 29 |     (app_data_dir / "ffmpeg_bin").mkdir(exist_ok=True)
 30 |     (app_data_dir / "note_results").mkdir(exist_ok=True)
 31 |     (app_data_dir / "static").mkdir(exist_ok=True)
 32 |     (app_data_dir / "cache").mkdir(exist_ok=True)
 33 | 
 34 |     log_file = app_data_dir / "app_debug.log"
 35 |     sys.stdout = open(log_file, "a", encoding="utf-8", buffering=1)
 36 |     sys.stderr = open(log_file, "a", encoding="utf-8", buffering=1)
 37 |     
 38 |     print(f"\n{'='*50}")
 39 |     print(f"Application starting at {os.environ.get('Current_Time', '')}")
 40 |     print(f"Data directory: {app_data_dir}")
 41 | except Exception as e:
 42 |     pass
 43 | 
 44 | # --- End of Fix ---
 45 | 
 46 | sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 47 | 
 48 | try:
 49 |     from main import app, logger
 50 | except Exception as e:
 51 |     print(f"Critical error importing main: {e}")
 52 |     import traceback
 53 |     traceback.print_exc()
 54 |     sys.exit(1)
 55 | 
 56 | def get_resource_path(relative_path):
 57 |     if hasattr(sys, '_MEIPASS'):
 58 |         return os.path.join(sys._MEIPASS, relative_path)
 59 |     return os.path.join(os.path.abspath("."), relative_path)
 60 | 
 61 | try:
 62 |     ffmpeg_exe = imageio_ffmpeg.get_ffmpeg_exe()
 63 |     print(f"Found ffmpeg at: {ffmpeg_exe}")
 64 |     ffmpeg_dir = os.path.dirname(ffmpeg_exe)
 65 |     os.environ["PATH"] = ffmpeg_dir + os.pathsep + os.environ["PATH"]
 66 | except Exception as e:
 67 |     print(f"Failed to setup ffmpeg path: {e}")
 68 | 
 69 | frontend_dist = get_resource_path("dist")
 70 | 
 71 | if os.path.exists(frontend_dist):
 72 |     logger.info(f"Mounting frontend from {frontend_dist}")
 73 |     
 74 |     assets_dir = os.path.join(frontend_dist, "assets")
 75 |     if os.path.exists(assets_dir):
 76 |         app.mount("/assets", StaticFiles(directory=assets_dir), name="assets")
 77 |     
 78 |     @app.middleware("http")
 79 |     async def spa_fallback(request: Request, call_next):
 80 |         response = await call_next(request)
 81 |         if response.status_code == 404 and not request.url.path.startswith("/api"):
 82 |             file_path = os.path.join(frontend_dist, request.url.path.lstrip("/"))
 83 |             if os.path.exists(file_path) and os.path.isfile(file_path):
 84 |                  return FileResponse(file_path)
 85 |             index_path = os.path.join(frontend_dist, "index.html")
 86 |             if os.path.exists(index_path):
 87 |                 return FileResponse(index_path)
 88 |         return response
 89 | else:
 90 |     logger.warning(f"Frontend dist directory not found at {frontend_dist}")
 91 | 
 92 | class ServerThread(threading.Thread):
 93 |     def __init__(self, app, host, port):
 94 |         super().__init__()
 95 |         self.server = uvicorn.Server(config=uvicorn.Config(
 96 |             app=app, 
 97 |             host=host, 
 98 |             port=port, 
 99 |             log_level="info",
100 |             loop="asyncio"
101 |         ))
102 |     
103 |     def run(self):
104 |         self.server.run()
105 |     
106 |     def stop(self):
107 |         self.server.should_exit = True
108 | 
109 | def check_port(host, port):
110 |     with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
111 |         return s.connect_ex((host, port)) == 0
112 | 
113 | def main():
114 |     multiprocessing.freeze_support()
115 |     
116 |     port = int(os.getenv("BACKEND_PORT", 8483))
117 |     host = "127.0.0.1"
118 |     
119 |     url = f"http://localhost:{port}"
120 |     print(f"Starting server at {url}")
121 |     
122 |     # 启动服务器线程
123 |     server_thread = ServerThread(app, host, port)
124 |     server_thread.start()
125 |     
126 |     # 等待服务器启动
127 |     # 虽然 pywebview 会一直加载，但最好确保端口可达
128 |     # 不过为了响应速度，我们可以直接打开窗口
129 |     
130 |     # 创建 PyWebView 窗口
131 |     window = webview.create_window(
132 |         title='Video Note AI', 
133 |         url=url, 
134 |         width=1280, 
135 |         height=800,
136 |         resizable=True
137 |     )
138 |     
139 |     # 启动 GUI 循环
140 |     # 这会阻塞主线程，直到窗口关闭
141 |     webview.start(debug=False)
142 |     
143 |     # 窗口关闭后清理
144 |     print("Stopping server...")
145 |     if server_thread.is_alive():
146 |         server_thread.stop()
147 |         server_thread.join(timeout=3)
148 |     sys.exit(0)
149 | 
150 | if __name__ == "__main__":
151 |     main()
152 | 


--------------------------------------------------------------------------------
/frontend/src/components/TaskList.tsx:
--------------------------------------------------------------------------------
  1 | import { useEffect, useState, useRef } from 'react'
  2 | import { CheckCircle2, XCircle, Loader2, Clock, Trash2 } from 'lucide-react'
  3 | import { useTaskStore } from '../store/taskStore'
  4 | import { getTasks, deleteTask } from '../services/api'
  5 | import toast from 'react-hot-toast'
  6 | 
  7 | const statusIcons = {
  8 |   completed: <CheckCircle2 className="w-4 h-4 text-green-500" />,
  9 |   failed: <XCircle className="w-4 h-4 text-red-500" />,
 10 |   pending: <Clock className="w-4 h-4 text-gray-400" />,
 11 |   processing: <Loader2 className="w-4 h-4 animate-spin text-blue-500" />,
 12 |   transcribing: <Loader2 className="w-4 h-4 animate-spin text-blue-500" />,
 13 |   summarizing: <Loader2 className="w-4 h-4 animate-spin text-blue-500" />,
 14 | }
 15 | 
 16 | const statusText = {
 17 |   pending: '等待中',
 18 |   processing: '处理中',
 19 |   transcribing: '转写中',
 20 |   summarizing: '生成中',
 21 |   completed: '已完成',
 22 |   failed: '失败',
 23 | }
 24 | 
 25 | export default function TaskList() {
 26 |   const { tasks, currentTaskId, setCurrentTask, loadTasks, removeTask } = useTaskStore()
 27 |   const [deletingIds, setDeletingIds] = useState<Set<string>>(new Set())
 28 |   const tasksLoadedRef = useRef(false)
 29 | 
 30 |   // 加载任务列表
 31 |   useEffect(() => {
 32 |     // 防止重复加载
 33 |     if (tasksLoadedRef.current) {
 34 |       return
 35 |     }
 36 | 
 37 |     const loadTaskList = async () => {
 38 |       tasksLoadedRef.current = true
 39 |       try {
 40 |         const response = await getTasks()
 41 |         if (response.data.code === 200) {
 42 |           const taskList = response.data.data.map((task: any) => ({
 43 |             id: task.task_id,
 44 |             filename: task.filename,
 45 |             status: task.status,
 46 |             markdown: task.markdown,
 47 |             createdAt: task.created_at,
 48 |           }))
 49 |           loadTasks(taskList)
 50 |         }
 51 |       } catch (error: any) {
 52 |         console.error('加载任务列表失败:', error)
 53 |         // 如果是连接错误，不显示错误（可能是后端未启动）
 54 |         if (error.code !== 'ECONNREFUSED' && error.code !== 'ERR_CONNECTION_TIMED_OUT') {
 55 |           console.warn('无法连接到后端服务，请确保后端已启动')
 56 |         }
 57 |         // 加载失败时重置标记，允许重试
 58 |         tasksLoadedRef.current = false
 59 |       }
 60 |     }
 61 | 
 62 |     loadTaskList()
 63 |   }, [loadTasks])
 64 | 
 65 |   const handleDelete = async (taskId: string, e: React.MouseEvent) => {
 66 |     e.stopPropagation() // 阻止触发任务选择
 67 |     
 68 |     const task = tasks.find(t => t.id === taskId)
 69 |     const taskName = task?.filename || '任务'
 70 |     
 71 |     if (!window.confirm(`确定要删除 "${taskName}" 吗？\n\n删除后将无法恢复，包括：\n- 任务记录\n- 上传的文件\n- 生成的笔记和转写结果\n- 相关截图`)) {
 72 |       return
 73 |     }
 74 | 
 75 |     setDeletingIds((prev) => new Set(prev).add(taskId))
 76 |     
 77 |     try {
 78 |       const response = await deleteTask(taskId)
 79 |       if (response.data.code === 200) {
 80 |         removeTask(taskId)
 81 |         toast.success('任务删除成功')
 82 |       } else {
 83 |         toast.error(response.data.msg || '删除失败')
 84 |       }
 85 |     } catch (error: any) {
 86 |       console.error('删除任务失败:', error)
 87 |       toast.error(error.response?.data?.msg || '删除失败，请稍后重试')
 88 |     } finally {
 89 |       setDeletingIds((prev) => {
 90 |         const newSet = new Set(prev)
 91 |         newSet.delete(taskId)
 92 |         return newSet
 93 |       })
 94 |     }
 95 |   }
 96 | 
 97 |   return (
 98 |     <div className="p-4">
 99 |       {tasks.length === 0 ? (
100 |         <div className="text-center py-12">
101 |           <p className="text-gray-400 text-sm">暂无任务</p>
102 |           <p className="text-xs text-gray-400 mt-2">上传文件后任务将显示在这里</p>
103 |         </div>
104 |       ) : (
105 |         <div className="space-y-2">
106 |           {tasks.map((task) => (
107 |             <div
108 |               key={task.id}
109 |               className={`p-3 rounded-lg transition-all cursor-pointer border ${
110 |                 currentTaskId === task.id
111 |                   ? 'bg-blue-50 border-blue-500 shadow-sm'
112 |                   : 'bg-white border-gray-200 hover:border-gray-300 hover:shadow-sm'
113 |               }`}
114 |               onClick={() => {
115 |                 setCurrentTask(task.id)
116 |               }}
117 |             >
118 |               <div className="flex items-start justify-between gap-2">
119 |                 <div className="flex-1 min-w-0">
120 |                   <p className="text-sm font-medium text-gray-900 truncate mb-1.5">
121 |                     {task.filename}
122 |                   </p>
123 |                   <div className="flex items-center gap-2">
124 |                     {statusIcons[task.status]}
125 |                     <span className="text-xs text-gray-500">
126 |                       {statusText[task.status]}
127 |                     </span>
128 |                   </div>
129 |                 </div>
130 |                 <button
131 |                   onClick={(e) => handleDelete(task.id, e)}
132 |                   disabled={deletingIds.has(task.id)}
133 |                   className="p-1.5 text-gray-400 hover:text-red-600 hover:bg-red-50 rounded transition-colors shrink-0 disabled:opacity-50 disabled:cursor-not-allowed"
134 |                   title="删除任务"
135 |                 >
136 |                   {deletingIds.has(task.id) ? (
137 |                     <Loader2 className="w-4 h-4 animate-spin" />
138 |                   ) : (
139 |                     <Trash2 className="w-4 h-4" />
140 |                   )}
141 |                 </button>
142 |               </div>
143 |             </div>
144 |           ))}
145 |         </div>
146 |       )}
147 |     </div>
148 |   )
149 | }
150 | 


--------------------------------------------------------------------------------
/backend/app/gpt/openai_gpt.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from openai import OpenAI
  3 | from app.gpt.base import GPT
  4 | from app.models.transcriber_model import TranscriptResult
  5 | from app.utils.logger import get_logger
  6 | 
  7 | logger = get_logger(__name__)
  8 | 
  9 | 
 10 | class OpenAIGPT(GPT):
 11 |     """使用 OpenAI API 生成笔记"""
 12 |     
 13 |     def __init__(self, api_key: str = None, base_url: str = None, model: str = None):
 14 |         """
 15 |         初始化 OpenAI GPT
 16 |         
 17 |         :param api_key: API 密钥
 18 |         :param base_url: API 基础 URL
 19 |         :param model: 模型名称
 20 |         """
 21 |         self.api_key = api_key or os.getenv("OPENAI_API_KEY")
 22 |         self.base_url = base_url or os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
 23 |         self.model = model or os.getenv("GPT_MODEL", "gpt-4o-mini")
 24 |         
 25 |         if not self.api_key:
 26 |             raise ValueError("OPENAI_API_KEY 未设置")
 27 |         
 28 |         self.client = OpenAI(
 29 |             api_key=self.api_key,
 30 |             base_url=self.base_url
 31 |         )
 32 |         
 33 |         logger.info(f"初始化 OpenAI GPT: model={self.model}")
 34 |     
 35 |     def summarize(self, transcript: TranscriptResult, filename: str = "", screenshot: bool = False) -> str:
 36 |         """生成笔记"""
 37 |         logger.info(f"开始生成笔记... (screenshot={screenshot})")
 38 |         
 39 |         # 构建提示词（传递 screenshot 参数）
 40 |         prompt = self._build_prompt(transcript, filename, screenshot)
 41 |         
 42 |         # 构建 system message
 43 |         system_content = "你是一个专业的笔记助手，擅长将视频转录内容整理成清晰、有条理且信息丰富的笔记。"
 44 |         if screenshot:
 45 |             system_content += "\n\n**重要**：当用户要求添加截图标记时，你必须在相关章节后插入 `*Screenshot-[mm:ss]` 格式的标记。这是强制要求，不能忽略。"
 46 |         
 47 |         # 调用 API
 48 |         try:
 49 |             response = self.client.chat.completions.create(
 50 |                 model=self.model,
 51 |                 messages=[
 52 |                     {"role": "system", "content": system_content},
 53 |                     {"role": "user", "content": prompt}
 54 |                 ],
 55 |                 temperature=0.7,
 56 |             )
 57 |             
 58 |             markdown = response.choices[0].message.content.strip()
 59 |             logger.info("笔记生成完成")
 60 |             
 61 |             # 如果启用了截图，检查是否包含截图标记
 62 |             if screenshot:
 63 |                 import re
 64 |                 pattern = r"\*Screenshot-\[(\d{2}):(\d{2})\]|\*Screenshot-(\d{2}):(\d{2})"
 65 |                 matches = list(re.finditer(pattern, markdown))
 66 |                 if matches:
 67 |                     logger.info(f"✓ 笔记中包含 {len(matches)} 个截图标记")
 68 |                 else:
 69 |                     logger.warning("⚠ 警告：启用了截图功能，但生成的笔记中没有找到截图标记！")
 70 |             
 71 |             return markdown
 72 |             
 73 |         except Exception as e:
 74 |             logger.error(f"生成笔记失败: {e}")
 75 |             raise
 76 |     
 77 |     def _build_prompt(self, transcript: TranscriptResult, filename: str, screenshot: bool = False) -> str:
 78 |         """构建提示词"""
 79 |         # 构建分段文本
 80 |         segment_text = ""
 81 |         for seg in transcript.segments:
 82 |             mm = int(seg.start // 60)
 83 |             ss = int(seg.start % 60)
 84 |             segment_text += f"[{mm:02d}:{ss:02d}] {seg.text}\n"
 85 |         
 86 |         screenshot_instruction = ""
 87 |         if screenshot:
 88 |             screenshot_instruction = """
 89 | 
 90 | 8. **截图占位符（强制要求）**：
 91 |    
 92 |    ⚠️ **这是强制要求，不能忽略！** ⚠️
 93 |    
 94 |    你**必须**在笔记中插入至少 2-5 个截图标记。截图标记应该插入在以下类型的章节后面：
 95 |    - **视觉演示**：展示界面、效果、结果、画面内容
 96 |    - **代码演示**：显示代码、配置、参数设置
 97 |    - **UI 交互**：操作步骤、界面变化、点击操作
 98 |    - **对比效果**：前后对比、不同参数的效果
 99 |    - **关键操作**：重要步骤、关键设置、重要时刻
100 |    - **人物介绍**：嘉宾出场、重要人物介绍
101 |    - **场景切换**：重要场景、关键转折点
102 |    
103 |    **截图标记格式（严格遵循）**：
104 |    ```
105 |    *Screenshot-[mm:ss]
106 |    ```
107 |    - 格式必须完全一致：`*Screenshot-[mm:ss]`（注意：星号开头，方括号是必需的）
108 |    - 时间戳格式：mm:ss（两位分钟:两位秒，例如 01:23 表示 1分23秒）
109 |    - 时间戳应该对应该章节在视频中的开始时间
110 |    
111 |    **插入位置（非常重要）**：
112 |    
113 |    ⚠️ **位置规则**：
114 |    - 截图标记必须插入在**章节的所有内容之后**，紧跟在章节的最后一行内容后面
115 |    - 格式结构：**章节标题 → 章节内容（列表/段落）→ 空行 → 截图标记 → 空行 → 下一个章节**
116 |    
117 |    - **错误示例 1**（不要这样做 - 在标题后）：
118 |      ```
119 |      ### 节目开场
120 |      *Screenshot-[00:30]  ← ❌ 错误：在标题后，内容前
121 |      - 韩立表达了参加...
122 |      ```
123 |    
124 |    - **错误示例 2**（不要这样做 - 在内容中间）：
125 |      ```
126 |      ### 节目开场
127 |      - 韩立表达了参加...
128 |      *Screenshot-[00:30]  ← ❌ 错误：在内容中间
129 |      - 继续其他内容...
130 |      ```
131 |    
132 |    - **正确示例**（必须这样做）：
133 |      ```
134 |      ### 节目开场
135 |      - 韩立表达了参加由何欢宗举办的相亲节目的兴奋之情，并希望能获得新的理解与体验。
136 |      - 节目气氛轻松，观众反应热烈。
137 |      
138 |      *Screenshot-[00:30]
139 |      
140 |      ### 嘉宾介绍
141 |      - 三号女嘉宾与韩立的互动，提到彼此的旧识与关系。
142 |      - 韩立提到自己与女嘉宾的过去，带有幽默的调侃。
143 |      
144 |      *Screenshot-[02:15]
145 |      
146 |      ### 个人故事
147 |      ```
148 |    
149 |    - **关键规则**：
150 |      1. 截图标记必须在章节的**所有内容都写完后**才插入
151 |      2. 截图标记前必须有**一个空行**（与内容分隔）
152 |      3. 截图标记后必须有**一个空行**（与下一个章节分隔）
153 |      4. 截图标记**不能**在章节标题后、内容中间或内容开头
154 |      5. 每个需要截图的章节，标记都必须在**该章节内容的最后**
155 |    
156 |    **数量要求**：
157 |    - **至少插入 2-5 个截图标记**
158 |    - 根据视频长度和内容复杂度，适当增加标记数量
159 |    - 不要只添加一个标记，要为多个重要章节都添加标记
160 |    
161 |    **时间戳选择**：
162 |    - 查看上面的视频分段，选择每个重要章节对应的开始时间
163 |    - 例如：如果"节目介绍"章节在 [00:30] 开始，就在该章节后添加 `*Screenshot-[00:30]`
164 |    - 确保时间戳与章节内容对应
165 |    
166 |    **重要提醒**：
167 |    - 这是**强制要求**，不是可选项
168 |    - 如果视频内容涉及任何视觉元素（人物、界面、场景等），都必须添加截图标记
169 |    - 不要忘记添加截图标记，这是生成笔记的必需步骤
170 | """
171 |         
172 |         prompt = f"""请根据以下视频转录内容生成结构化的 Markdown 笔记。
173 | 
174 | 文件名称: {filename}
175 | 
176 | 语言要求：
177 | - 笔记必须使用 **中文** 撰写
178 | - 专有名词、技术术语、品牌名称和人名应适当保留 **英文**
179 | 
180 | 视频分段（格式：开始时间 - 内容）：
181 | ---
182 | {segment_text}
183 | ---
184 | 
185 | 你的任务：
186 | 根据上面的分段转录内容，生成结构化的笔记，遵循以下原则：
187 | 
188 | 1. **完整信息**：记录尽可能多的相关细节，确保内容全面
189 | 2. **去除无关内容**：省略广告、填充词、问候语和不相关的言论
190 | 3. **保留关键细节**：保留重要事实、示例、结论和建议
191 | 4. **可读布局**：使用标题、列表等格式，保持段落简短
192 | 5. **数学公式**：视频中提及的数学公式必须保留，并以 LaTeX 语法形式呈现
193 | 6. 使用标题层级（##, ###）组织内容
194 | 7. 在主要章节后可以添加时间标记，格式：`*Content-[mm:ss]`{screenshot_instruction}
195 | 
196 | 输出说明：
197 | - 仅返回最终的 **Markdown 内容**
198 | - **不要**将输出包裹在代码块中（例如：```markdown，```）
199 | 
200 | 请在笔记末尾添加一个 **AI 总结**部分，简要总结整个视频的核心内容。
201 | 
202 | 现在开始生成笔记："""
203 |         
204 |         return prompt
205 | 
206 | 


--------------------------------------------------------------------------------
/frontend/src/components/UploadForm.tsx:
--------------------------------------------------------------------------------
  1 | import { useState } from 'react'
  2 | import { Upload, Loader2 } from 'lucide-react'
  3 | import { uploadVideo } from '../services/api'
  4 | import { useTaskStore } from '../store/taskStore'
  5 | import toast from 'react-hot-toast'
  6 | import FileConfirmDialog from './FileConfirmDialog'
  7 | 
  8 | // 格式化文件大小
  9 | const formatFileSize = (bytes: number): string => {
 10 |   if (bytes === 0) return '0 B'
 11 |   const k = 1024
 12 |   const sizes = ['B', 'KB', 'MB', 'GB']
 13 |   const i = Math.floor(Math.log(bytes) / Math.log(k))
 14 |   return Math.round((bytes / Math.pow(k, i)) * 100) / 100 + ' ' + sizes[i]
 15 | }
 16 | 
 17 | export default function UploadForm() {
 18 |   const [uploading, setUploading] = useState(false)
 19 |   const [uploadProgress, setUploadProgress] = useState(0)
 20 |   const [selectedFile, setSelectedFile] = useState<File | null>(null)
 21 |   const [showConfirm, setShowConfirm] = useState(false)
 22 |   const { addTask } = useTaskStore()
 23 | 
 24 |   const handleFileSelect = (e: React.ChangeEvent<HTMLInputElement>) => {
 25 |     const file = e.target.files?.[0]
 26 |     if (!file) return
 27 | 
 28 |     // 检查文件类型（基于扩展名和 MIME 类型）
 29 |     const fileExtension = file.name.split('.').pop()?.toLowerCase()
 30 |     const allowedExtensions = ['mp4', 'avi', 'mov', 'mkv', 'webm', 'mp3', 'wav', 'm4a', 'flv', 'wmv']
 31 |     
 32 |     const isAllowedExtension = fileExtension && allowedExtensions.includes(fileExtension)
 33 |     const isAllowedMime = file.type.startsWith('video/') || file.type.startsWith('audio/')
 34 |     
 35 |     if (!isAllowedExtension && !isAllowedMime) {
 36 |       toast.error('不支持的文件类型，请上传视频或音频文件')
 37 |       e.target.value = ''
 38 |       return
 39 |     }
 40 | 
 41 |     // 检查文件大小（限制 500MB）
 42 |     if (file.size > 500 * 1024 * 1024) {
 43 |       toast.error('文件大小不能超过 500MB')
 44 |       e.target.value = ''
 45 |       return
 46 |     }
 47 | 
 48 |     // 显示确认对话框
 49 |     setSelectedFile(file)
 50 |     setShowConfirm(true)
 51 |     e.target.value = ''
 52 |   }
 53 | 
 54 |   const handleConfirmUpload = async (screenshot: boolean) => {
 55 |     if (!selectedFile) return
 56 | 
 57 |     // 获取当前选择的模型配置
 58 |     const selectedModelId = localStorage.getItem('selectedModel')
 59 |     const modelConfigs = localStorage.getItem('modelConfigs')
 60 |     
 61 |     let modelConfig = null
 62 |     if (selectedModelId && modelConfigs) {
 63 |       try {
 64 |         const configs = JSON.parse(modelConfigs)
 65 |         // 从 selectedModelId 中提取 provider 和 modelId
 66 |         // selectedModelId 格式: "provider-modelId"
 67 |         // 找到第一个 '-' 的位置，前面是 provider，后面是 modelId
 68 |         const firstDashIndex = selectedModelId.indexOf('-')
 69 |         if (firstDashIndex > 0) {
 70 |           const provider = selectedModelId.substring(0, firstDashIndex)
 71 |           const modelId = selectedModelId.substring(firstDashIndex + 1)
 72 |           const providerConfig = configs[provider]
 73 |           
 74 |           if (providerConfig) {
 75 |             // 使用从 selectedModelId 解析出来的 modelId
 76 |             // 这是用户实际选择的模型
 77 |             modelConfig = {
 78 |               provider,
 79 |               api_key: providerConfig.apiKey || '',
 80 |               base_url: providerConfig.baseUrl || '',
 81 |               model: modelId, // 使用从 selectedModelId 解析的 modelId
 82 |             }
 83 |             console.log('上传时使用的模型配置:', modelConfig)
 84 |           }
 85 |         }
 86 |       } catch (e) {
 87 |         console.error('解析模型配置失败:', e)
 88 |       }
 89 |     } else {
 90 |       console.warn('未选择模型或模型配置不存在')
 91 |     }
 92 | 
 93 |     setShowConfirm(false)
 94 |     setUploading(true)
 95 |     setUploadProgress(0)
 96 | 
 97 |     try {
 98 |       const response = await uploadVideo(
 99 |         selectedFile,
100 |         screenshot,
101 |         modelConfig,
102 |         (progress) => {
103 |           setUploadProgress(progress)
104 |         }
105 |       )
106 | 
107 |       if (response.data.code === 200) {
108 |         const { task_id, filename } = response.data.data
109 | 
110 |         // 添加到任务列表
111 |         addTask({
112 |           id: task_id,
113 |           filename,
114 |           status: 'pending',
115 |         })
116 | 
117 |         toast.success('文件上传成功！')
118 |         setSelectedFile(null)
119 |         setUploadProgress(0)
120 |       } else {
121 |         toast.error(response.data.msg || '上传失败')
122 |       }
123 |     } catch (error: any) {
124 |       console.error('上传失败:', error)
125 |       if (error.code === 'ECONNABORTED') {
126 |         toast.error('上传超时，请检查网络连接或文件大小')
127 |       } else {
128 |         toast.error(error.response?.data?.msg || '上传失败，请稍后重试')
129 |       }
130 |     } finally {
131 |       setUploading(false)
132 |       setUploadProgress(0)
133 |     }
134 |   }
135 | 
136 |   const handleCancelUpload = () => {
137 |     setShowConfirm(false)
138 |     setSelectedFile(null)
139 |   }
140 | 
141 |   return (
142 |     <>
143 |       <div>
144 |         <label className="flex flex-col items-center justify-center w-full h-40 border-2 border-dashed border-gray-300 rounded-lg cursor-pointer hover:border-blue-400 hover:bg-blue-50/50 transition-all bg-white shadow-sm">
145 |           {uploading ? (
146 |             <div className="flex flex-col items-center w-full px-4">
147 |               <Loader2 className="w-8 h-8 animate-spin text-blue-500 mb-2" />
148 |               <span className="text-sm text-gray-600 mb-2">上传中...</span>
149 |               {/* 进度条 */}
150 |               <div className="w-full bg-gray-200 rounded-full h-2.5 mb-1">
151 |                 <div
152 |                   className="bg-blue-500 h-2.5 rounded-full transition-all duration-300"
153 |                   style={{ width: `${uploadProgress}%` }}
154 |                 ></div>
155 |               </div>
156 |               <span className="text-xs text-gray-500">
157 |                 {uploadProgress}% - {selectedFile ? formatFileSize(selectedFile.size) : ''}
158 |               </span>
159 |             </div>
160 |           ) : (
161 |             <div className="flex flex-col items-center">
162 |               <Upload className="w-8 h-8 text-gray-400 mb-2" />
163 |               <span className="text-sm text-gray-600">
164 |                 点击或拖拽文件到此处上传
165 |               </span>
166 |               <span className="text-xs text-gray-400 mt-1">
167 |                 支持 MP4, AVI, MOV, MKV, MP3, WAV 等格式（最大 500MB）
168 |               </span>
169 |             </div> 
170 |           )}
171 |           <input
172 |             type="file"
173 |             className="hidden"
174 |             // 移除 accept 属性以避免某些系统下文件无法选择的问题，文件类型检查已在 onChange 中处理
175 |             onChange={handleFileSelect}
176 |             disabled={uploading}
177 |           />
178 |         </label>
179 |       </div>
180 | 
181 |       <FileConfirmDialog
182 |         file={selectedFile}
183 |         open={showConfirm}
184 |         onConfirm={handleConfirmUpload}
185 |         onCancel={handleCancelUpload}
186 |       />
187 |     </>
188 |   )
189 | }
190 | 


--------------------------------------------------------------------------------
/frontend/src/components/VideoDownloader.tsx:
--------------------------------------------------------------------------------
  1 | import { useState, useEffect, useRef } from 'react'
  2 | import { toast } from 'react-hot-toast'
  3 | import { downloadBilibili, startBilibiliLogin, getBilibiliLoginStatus, getBilibiliTaskStatus } from '../services/api'
  4 | 
  5 | export default function VideoDownloader() {
  6 |   const [url, setUrl] = useState('')
  7 |   const [cookie, setCookie] = useState('')
  8 |   const [loading, setLoading] = useState(false)
  9 |   const [quality, setQuality] = useState('best')
 10 |   const [qrBase64, setQrBase64] = useState<string | null>(null)
 11 |   const [sessionId, setSessionId] = useState<string | null>(null)
 12 |   const [loginInProgress, setLoginInProgress] = useState(false)
 13 |   const [loginFinished, setLoginFinished] = useState(false)
 14 |   const [autoDownloadTriggered, setAutoDownloadTriggered] = useState(false)
 15 |   const pollRef = useRef<number | null>(null)
 16 |   const taskPollRef = useRef<number | null>(null)
 17 | 
 18 |   const handleDownload = async () => {
 19 |     if (!url) {
 20 |       toast.error('请输入哔哩哔哩视频链接')
 21 |       return
 22 |     }
 23 | 
 24 |     setLoading(true)
 25 |     try {
 26 |       // 如果使用扫码登录并已完成，则将 cookie 设为 session:<id>
 27 |       const cookieToSend = loginFinished && sessionId ? `session:${sessionId}` : cookie
 28 |       const resp = await downloadBilibili(url, cookieToSend, quality)
 29 |       // 如果返回直接 download_url，则打开；如果返回 task_id，则开始轮询任务状态
 30 |       if (resp.data && resp.data.download_url) {
 31 |         window.open(resp.data.download_url, '_blank')
 32 |         toast.success('已打开下载链接')
 33 |       } else if (resp.data && resp.data.task_id) {
 34 |         const taskId = resp.data.task_id
 35 |         toast.loading(`后台合并开始，任务 ${taskId} 已提交`)
 36 |         // 开始轮询任务状态
 37 |         taskPollRef.current = window.setInterval(async () => {
 38 |           try {
 39 |             const st = await getBilibiliTaskStatus(taskId)
 40 |             const data = st.data
 41 |             if (data) {
 42 |               const status = data.status
 43 |               const progress = data.progress || 0
 44 |               if (status === 'running') {
 45 |                 toast.loading(`合并进行中：${progress}% (任务 ${taskId})`)
 46 |               } else if (status === 'completed') {
 47 |                 if (taskPollRef.current) {
 48 |                   clearInterval(taskPollRef.current)
 49 |                   taskPollRef.current = null
 50 |                 }
 51 |                 toast.success(`合并完成，正在打开文件`)
 52 |                 // 打开静态下载链接
 53 |                 if (data.output) {
 54 |                   window.open(data.output, '_blank')
 55 |                 } else {
 56 |                   toast.error('合并完成但未返回下载地址')
 57 |                 }
 58 |               } else if (status === 'failed') {
 59 |                 if (taskPollRef.current) {
 60 |                   clearInterval(taskPollRef.current)
 61 |                   taskPollRef.current = null
 62 |                 }
 63 |                 toast.error(`合并失败: ${data.error || '未知错误'}`)
 64 |               }
 65 |             }
 66 |           } catch (err) {
 67 |             console.error('task poll error', err)
 68 |           }
 69 |         }, 2000)
 70 |       } else if (resp.data && resp.data.message) {
 71 |         toast.success(resp.data.message)
 72 |       } else {
 73 |         toast.success('请求已发送，检查后端任务列表以查看进度')
 74 |       }
 75 |     } catch (e: any) {
 76 |       console.error('download error', e)
 77 |       const msg = e?.response?.data?.message || e.message || '下载请求失败'
 78 |       toast.error(msg)
 79 |     } finally {
 80 |       setLoading(false)
 81 |     }
 82 |   }
 83 | 
 84 |   // 启动扫码登录流程
 85 |   const handleStartLogin = async () => {
 86 |     try {
 87 |       setLoginInProgress(true)
 88 |       const resp = await startBilibiliLogin()
 89 |       const { session_id, qr_image_base64 } = resp.data
 90 |       setSessionId(session_id)
 91 |       setQrBase64(qr_image_base64)
 92 | 
 93 |       // 开始轮询登录状态
 94 |       pollRef.current = window.setInterval(async () => {
 95 |         try {
 96 |           const st = await getBilibiliLoginStatus(session_id)
 97 |           if (st.data && st.data.finished) {
 98 |             setLoginFinished(true)
 99 |             setLoginInProgress(false)
100 |             // 自动填充 cookie 为 session:ID，方便直接下载
101 |             setCookie(`session:${session_id}`)
102 |             if (pollRef.current) {
103 |               clearInterval(pollRef.current)
104 |               pollRef.current = null
105 |             }
106 |             toast.success('登录成功，已自动使用该会话进行下载')
107 |           }
108 |         } catch (err) {
109 |           console.error('login poll error', err)
110 |         }
111 |       }, 2000)
112 |     } catch (err: any) {
113 |       console.error('start login error', err)
114 |       toast.error(err?.response?.data?.detail || '启动扫码登录失败')
115 |       setLoginInProgress(false)
116 |     }
117 |   }
118 | 
119 |   useEffect(() => {
120 |     return () => {
121 |       if (pollRef.current) {
122 |         clearInterval(pollRef.current)
123 |       }
124 |       if (taskPollRef.current) {
125 |         clearInterval(taskPollRef.current)
126 |       }
127 |     }
128 |   }, [])
129 | 
130 |   // 当扫码登录成功且已有视频链接时，自动发起解析下载（只触发一次）
131 |   useEffect(() => {
132 |     if (loginFinished && sessionId && url && !autoDownloadTriggered) {
133 |       setAutoDownloadTriggered(true)
134 |       toast('检测到已登录，会在 1 秒后自动开始解析并下载', { icon: '🔔' })
135 |       setTimeout(() => {
136 |         handleDownload()
137 |       }, 1000)
138 |     }
139 |   }, [loginFinished, sessionId, url, autoDownloadTriggered])
140 | 
141 |   return (
142 |     <div className="h-full p-6">
143 |       <div className="max-w-3xl mx-auto bg-white rounded shadow p-6">
144 |         <h2 className="text-lg font-medium mb-4">多平台视频下载（当前：哔哩哔哩）</h2>
145 |         <div className="space-y-4">
146 |           <div>
147 |             <button
148 |               onClick={handleStartLogin}
149 |               disabled={loginInProgress || loginFinished}
150 |               className="px-3 py-2 bg-green-600 text-white rounded hover:bg-green-700 disabled:opacity-60 mr-3"
151 |             >
152 |               {loginFinished ? '已登录' : loginInProgress ? '等待扫码...' : '扫码登录（B站）'}
153 |             </button>
154 |             {sessionId && (
155 |               <span className="text-sm text-gray-500 ml-2">会话：{sessionId}</span>
156 |             )}
157 |           </div>
158 | 
159 |           {qrBase64 && !loginFinished && (
160 |             <div>
161 |               <label className="block text-sm font-medium text-gray-700 mb-1">扫码登录二维码</label>
162 |               <img src={`data:image/png;base64,${qrBase64}`} alt="bili-qr" className="w-48 h-48 border" />
163 |               <p className="text-xs text-gray-500 mt-1">请使用哔哩哔哩 App 扫码，等待页面提示登录完成。</p>
164 |             </div>
165 |           )}
166 | 
167 |           <div>
168 |             <label className="block text-sm font-medium text-gray-700 mb-1">视频链接</label>
169 |             <input
170 |               value={url}
171 |               onChange={(e) => setUrl(e.target.value)}
172 |               placeholder="例如 https://www.bilibili.com/video/BV1F7qDBeEGy/"
173 |               className="w-full border rounded px-3 py-2"
174 |             />
175 |           </div>
176 | 
177 |           <div>
178 |             <label className="block text-sm font-medium text-gray-700 mb-1">登录 Cookie（可选，用于获取高清/会员源）</label>
179 |             <textarea
180 |               value={cookie}
181 |               onChange={(e) => setCookie(e.target.value)}
182 |               placeholder="SESSDATA=xxx; buvid3=xxx; ..."
183 |               className="w-full border rounded px-3 py-2 h-28"
184 |             />
185 |             <p className="text-xs text-gray-500 mt-1">只有在需要会员权限或更高清源时才需要粘贴 Cookie，请妥善保管。</p>
186 |           </div>
187 | 
188 |           <div>
189 |             <label className="block text-sm font-medium text-gray-700 mb-1">期望清晰度</label>
190 |             <select value={quality} onChange={(e) => setQuality(e.target.value)} className="border rounded px-3 py-2">
191 |               <option value="best">最高画质（可能需要登录）</option>
192 |               <option value="1080p">1080p</option>
193 |               <option value="720p">720p</option>
194 |               <option value="480p">480p</option>
195 |             </select>
196 |           </div>
197 | 
198 |           <div className="flex items-center gap-3">
199 |             <button
200 |               onClick={handleDownload}
201 |               disabled={loading}
202 |               className="px-4 py-2 bg-blue-600 text-white rounded hover:bg-blue-700 disabled:opacity-60"
203 |             >
204 |               {loading ? '处理中...' : '解析并下载'}
205 |             </button>
206 |           </div>
207 |         </div>
208 |       </div>
209 |     </div>
210 |   )
211 | }
212 | 
213 | 
214 | 


--------------------------------------------------------------------------------
/frontend/src/components/ModelSelector.tsx:
--------------------------------------------------------------------------------
  1 | import { useState, useEffect } from 'react'
  2 | import { Brain, ChevronDown, AlertCircle, RefreshCw, Loader2 } from 'lucide-react'
  3 | import ProviderIcon from './ProviderIcon'
  4 | import toast from 'react-hot-toast'
  5 | import { getModelList } from '../services/api'
  6 | 
  7 | interface Model {
  8 |   id: string
  9 |   name: string
 10 |   provider: string
 11 |   model: string
 12 |   hasApiKey: boolean
 13 | }
 14 | 
 15 | const PROVIDER_LABELS: Record<string, string> = {
 16 |   openai: 'OpenAI',
 17 |   deepseek: 'DeepSeek',
 18 |   qwen: 'Qwen',
 19 |   claude: 'Claude',
 20 |   gemini: 'Gemini',
 21 |   groq: 'Groq',
 22 |   ollama: 'Ollama',
 23 | }
 24 | 
 25 | const PROVIDER_COLORS: Record<string, string> = {
 26 |   openai: 'bg-green-100 text-green-700',
 27 |   deepseek: 'bg-blue-100 text-blue-700',
 28 |   qwen: 'bg-purple-100 text-purple-700',
 29 |   claude: 'bg-orange-100 text-orange-700',
 30 |   gemini: 'bg-yellow-100 text-yellow-700',
 31 |   groq: 'bg-indigo-100 text-indigo-700',
 32 |   ollama: 'bg-teal-100 text-teal-700',
 33 | }
 34 | 
 35 | export default function ModelSelector() {
 36 |   const [selectedModel, setSelectedModel] = useState<string>('')
 37 |   const [models, setModels] = useState<Model[]>([])
 38 |   const [isOpen, setIsOpen] = useState(false)
 39 |   const [loading, setLoading] = useState(false)
 40 | 
 41 |   // 从 localStorage 加载模型配置并获取模型列表
 42 |   useEffect(() => {
 43 |     loadModelsFromConfig()
 44 |   }, [])
 45 | 
 46 |   const loadModelsFromConfig = async () => {
 47 |     try {
 48 |       const savedConfigs = localStorage.getItem('modelConfigs')
 49 |       if (!savedConfigs) {
 50 |         setModels([])
 51 |         return
 52 |       }
 53 | 
 54 |       const configs = JSON.parse(savedConfigs)
 55 |       const modelList: Model[] = []
 56 | 
 57 |       setLoading(true)
 58 | 
 59 |       // 遍历所有配置的提供商
 60 |       for (const [providerId, config] of Object.entries(configs)) {
 61 |         const providerConfig = config as any
 62 |         if (!providerConfig?.apiKey) {
 63 |           continue
 64 |         }
 65 | 
 66 |         try {
 67 |           const response = await getModelList({
 68 |             provider: providerId,
 69 |             api_key: providerConfig.apiKey,
 70 |             base_url: providerConfig.baseUrl,
 71 |           })
 72 | 
 73 |           if (response.data.code === 200) {
 74 |             const apiModels = response.data.data || []
 75 |             apiModels.forEach((m: any) => {
 76 |               modelList.push({
 77 |                 id: `${providerId}-${m.id}`,
 78 |                 name: `${m.name} (${m.id})`,
 79 |                 provider: providerId,
 80 |                 model: m.id,
 81 |                 hasApiKey: true,
 82 |               })
 83 |             })
 84 |           }
 85 |         } catch (error) {
 86 |           console.error(`加载 ${providerId} 模型失败:`, error)
 87 |         }
 88 |       }
 89 | 
 90 |       setModels(modelList)
 91 |       
 92 |       // 如果还没有选择模型，选择第一个
 93 |       const savedSelected = localStorage.getItem('selectedModel')
 94 |       if (savedSelected && modelList.find(m => m.id === savedSelected)) {
 95 |         setSelectedModel(savedSelected)
 96 |       } else if (modelList.length > 0) {
 97 |         setSelectedModel(modelList[0].id)
 98 |         localStorage.setItem('selectedModel', modelList[0].id)
 99 |       }
100 |     } catch (error) {
101 |       console.error('加载模型配置失败:', error)
102 |       setModels([])
103 |     } finally {
104 |       setLoading(false)
105 |     }
106 |   }
107 | 
108 |   // 监听 storage 变化（只在其他窗口/标签页变化时触发）
109 |   useEffect(() => {
110 |     const handleStorageChange = (e: StorageEvent) => {
111 |       // 只监听 modelConfigs 的变化
112 |       if (e.key === 'modelConfigs') {
113 |         loadModelsFromConfig()
114 |       }
115 |     }
116 |     window.addEventListener('storage', handleStorageChange)
117 | 
118 |     return () => {
119 |       window.removeEventListener('storage', handleStorageChange)
120 |     }
121 |   }, [])
122 | 
123 |   const currentModel = models.find(m => m.id === selectedModel)
124 | 
125 |   if (loading) {
126 |     return (
127 |       <div>
128 |         <h2 className="text-sm font-semibold text-gray-700 mb-3 px-1">模型选择</h2>
129 |         <div className="flex items-center justify-center py-4 bg-white border border-gray-300 rounded-lg">
130 |           <Loader2 className="w-4 h-4 animate-spin text-blue-600" />
131 |           <span className="ml-2 text-sm text-gray-600">加载模型列表...</span>
132 |         </div>
133 |       </div>
134 |     )
135 |   }
136 | 
137 |   if (models.length === 0) {
138 |     return (
139 |       <div>
140 |         <h2 className="text-sm font-semibold text-gray-700 mb-3 px-1">模型选择</h2>
141 |         <div className="bg-yellow-50 border border-yellow-200 rounded-lg p-4">
142 |           <div className="flex items-start gap-2">
143 |             <AlertCircle className="w-4 h-4 text-yellow-600 mt-0.5 shrink-0" />
144 |             <div className="flex-1">
145 |               <p className="text-sm font-medium text-yellow-800 mb-1">未配置模型</p>
146 |               <p className="text-xs text-yellow-700">
147 |                 请前往"模型配置"页面配置 API Key 和选择模型
148 |               </p>
149 |             </div>
150 |           </div>
151 |         </div>
152 |       </div>
153 |     )
154 |   }
155 | 
156 |   return (
157 |     <div>
158 |       <div className="flex items-center justify-between mb-3 px-1">
159 |         <h2 className="text-sm font-semibold text-gray-700">模型选择</h2>
160 |         <button
161 |           onClick={loadModelsFromConfig}
162 |           disabled={loading}
163 |           className="p-1.5 hover:bg-gray-100 rounded transition-colors"
164 |           title="刷新模型列表"
165 |         >
166 |           <RefreshCw className={`w-3.5 h-3.5 text-gray-500 ${loading ? 'animate-spin' : ''}`} />
167 |         </button>
168 |       </div>
169 |       <div className="relative">
170 |         <button
171 |           onClick={() => setIsOpen(!isOpen)}
172 |           className="w-full flex items-center justify-between px-4 py-2.5 bg-white border border-gray-300 rounded-lg hover:border-blue-400 hover:bg-blue-50/50 transition-all text-left"
173 |         >
174 |             <div className="flex items-center gap-2 min-w-0">
175 |             {currentModel ? (
176 |               <ProviderIcon provider={currentModel.provider} className="w-4 h-4 shrink-0" />
177 |             ) : (
178 |               <Brain className="w-4 h-4 text-blue-600 shrink-0" />
179 |             )}
180 |             <span className="text-sm font-medium text-gray-900 truncate">
181 |               {currentModel?.name || '请选择模型'}
182 |             </span>
183 |             {currentModel && (
184 |               <span className={`text-xs px-2 py-0.5 rounded shrink-0 ${
185 |                 PROVIDER_COLORS[currentModel.provider] || 'bg-gray-100 text-gray-700'
186 |               }`}>
187 |                 {PROVIDER_LABELS[currentModel.provider] || currentModel.provider}
188 |               </span>
189 |             )}
190 |           </div>
191 |           <ChevronDown className={`w-4 h-4 text-gray-500 transition-transform shrink-0 ${isOpen ? 'rotate-180' : ''}`} />
192 |         </button>
193 | 
194 |         {isOpen && (
195 |           <>
196 |             <div
197 |               className="fixed inset-0 z-10"
198 |               onClick={() => setIsOpen(false)}
199 |             />
200 |             <div className="absolute z-20 w-full mt-1 bg-white border border-gray-200 rounded-lg shadow-lg max-h-60 overflow-y-auto">
201 |               {models.map((model) => (
202 |                 <button
203 |                   key={model.id}
204 |                   onClick={() => {
205 |                     setSelectedModel(model.id)
206 |                     localStorage.setItem('selectedModel', model.id)
207 |                     setIsOpen(false)
208 |                     toast.success(`已选择: ${model.name}`)
209 |                     // 触发自定义事件，通知其他组件模型已更改
210 |                     window.dispatchEvent(new Event('modelChanged'))
211 |                   }}
212 |                   className={`w-full px-4 py-2.5 text-left hover:bg-blue-50 transition-colors border-b border-gray-100 last:border-b-0 ${
213 |                     selectedModel === model.id
214 |                       ? 'bg-blue-50 text-blue-700 font-medium'
215 |                       : 'text-gray-700'
216 |                   }`}
217 |                 >
218 |                     <div className="flex items-center gap-2">
219 |                     <ProviderIcon provider={model.provider} className="w-4 h-4" />
220 |                     <div className="flex-1 min-w-0">
221 |                       <div className="text-sm font-medium truncate">{model.name}</div>
222 |                       <div className="text-xs text-gray-500 mt-0.5">
223 |                         {PROVIDER_LABELS[model.provider] || model.provider}
224 |                       </div>
225 |                     </div>
226 |                     {selectedModel === model.id && (
227 |                       <div className="w-2 h-2 bg-blue-600 rounded-full shrink-0"></div>
228 |                     )}
229 |                   </div>
230 |                 </button>
231 |               ))}
232 |             </div>
233 |           </>
234 |         )}
235 |       </div>
236 |       <p className="text-xs text-gray-500 mt-2 px-1">
237 |         不同模型效果不同，建议自行测试
238 |       </p>
239 |     </div>
240 |   )
241 | }
242 | 


--------------------------------------------------------------------------------
/backend/app/routers/model.py:
--------------------------------------------------------------------------------
  1 | from fastapi import APIRouter, HTTPException
  2 | from pydantic import BaseModel
  3 | from openai import OpenAI
  4 | import google.generativeai as genai
  5 | from app.utils.response import ResponseWrapper as R
  6 | from app.utils.logger import get_logger
  7 | 
  8 | logger = get_logger(__name__)
  9 | router = APIRouter()
 10 | 
 11 | # 内置提供商配置
 12 | BUILTIN_PROVIDERS = [
 13 |     {
 14 |         "id": "openai",
 15 |         "name": "OpenAI",
 16 |         "type": "built-in",
 17 |         "logo": "OpenAI",
 18 |         "base_url": "https://api.openai.com/v1"
 19 |     },
 20 |     {
 21 |         "id": "deepseek",
 22 |         "name": "DeepSeek",
 23 |         "type": "built-in",
 24 |         "logo": "DeepSeek",
 25 |         "base_url": "https://api.deepseek.com"
 26 |     },
 27 |     {
 28 |         "id": "qwen",
 29 |         "name": "Qwen",
 30 |         "type": "built-in",
 31 |         "logo": "Qwen",
 32 |         "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1"
 33 |     },
 34 |     {
 35 |         "id": "claude",
 36 |         "name": "Claude",
 37 |         "type": "built-in",
 38 |         "logo": "Claude",
 39 |         "base_url": "https://api.anthropic.com/v1"
 40 |     },
 41 |     {
 42 |         "id": "gemini",
 43 |         "name": "Gemini",
 44 |         "type": "built-in",
 45 |         "logo": "Gemini",
 46 |         "base_url": "https://generativelanguage.googleapis.com/v1beta/openai/"
 47 |     },
 48 |     {
 49 |         "id": "groq",
 50 |         "name": "Groq",
 51 |         "type": "built-in",
 52 |         "logo": "Groq",
 53 |         "base_url": "https://api.groq.com/openai/v1"
 54 |     },
 55 |     {
 56 |         "id": "ollama",
 57 |         "name": "Ollama",
 58 |         "type": "built-in",
 59 |         "logo": "Ollama",
 60 |         "base_url": "http://127.0.0.1:11434/v1"
 61 |     }
 62 | ]
 63 | 
 64 | class ModelConfigRequest(BaseModel):
 65 |     provider: str
 66 |     api_key: str
 67 |     base_url: str = None
 68 | 
 69 | class ModelItem(BaseModel):
 70 |     id: str
 71 |     name: str
 72 |     provider: str
 73 | 
 74 | @router.get("/providers")
 75 | def get_providers():
 76 |     """获取所有可用的提供商列表"""
 77 |     return R.success(BUILTIN_PROVIDERS, msg="获取提供商列表成功")
 78 | 
 79 | @router.post("/models/list")
 80 | def get_model_list(config: ModelConfigRequest):
 81 |     """获取指定提供商的模型列表"""
 82 |     try:
 83 |         if config.provider == 'gemini':
 84 |             return get_gemini_models(config.api_key)
 85 |         else:
 86 |             # 其他厂商都使用 OpenAI 兼容 API
 87 |             return get_openai_compatible_models(config.provider, config.api_key, config.base_url)
 88 |     except Exception as e:
 89 |         logger.error(f"获取模型列表失败: {e}", exc_info=True)
 90 |         return R.error(f"获取模型列表失败: {str(e)}")
 91 | 
 92 | def get_openai_compatible_models(provider: str, api_key: str, base_url: str = None):
 93 |     """获取 OpenAI 兼容 API 的模型列表（OpenAI, DeepSeek, Qwen, Claude, Groq, Ollama 等）"""
 94 |     try:
 95 |         # Ollama 不需要 API Key
 96 |         if provider == 'ollama':
 97 |             api_key = api_key or 'ollama'  # Ollama 可以使用任意值或空值
 98 |         
 99 |         # 如果没有提供 base_url，从内置配置中查找
100 |         if not base_url:
101 |             provider_config = next((p for p in BUILTIN_PROVIDERS if p["id"] == provider), None)
102 |             if provider_config:
103 |                 base_url = provider_config.get("base_url")
104 |             else:
105 |                 # 默认使用 OpenAI
106 |                 base_url = "https://api.openai.com/v1"
107 |         
108 |         client = OpenAI(api_key=api_key, base_url=base_url)
109 |         
110 |         # 获取模型列表
111 |         models_response = client.models.list()
112 |         
113 |         # 过滤出可用的聊天模型
114 |         chat_models = []
115 |         for model in models_response.data:
116 |             model_id = model.id
117 |             
118 |             # 根据提供商过滤模型
119 |             if provider == 'openai':
120 |                 if any(x in model_id.lower() for x in ['gpt-4', 'gpt-3.5', 'gpt-4o']):
121 |                     chat_models.append({
122 |                         "id": model_id,
123 |                         "name": model_id,
124 |                         "provider": provider
125 |                     })
126 |             elif provider == 'deepseek':
127 |                 if 'deepseek' in model_id.lower():
128 |                     chat_models.append({
129 |                         "id": model_id,
130 |                         "name": model_id,
131 |                         "provider": provider
132 |                     })
133 |             elif provider == 'qwen':
134 |                 if 'qwen' in model_id.lower():
135 |                     chat_models.append({
136 |                         "id": model_id,
137 |                         "name": model_id,
138 |                         "provider": provider
139 |                     })
140 |             elif provider == 'claude':
141 |                 if 'claude' in model_id.lower():
142 |                     chat_models.append({
143 |                         "id": model_id,
144 |                         "name": model_id,
145 |                         "provider": provider
146 |                     })
147 |             elif provider == 'groq':
148 |                 if any(x in model_id.lower() for x in ['llama', 'mixtral', 'gemma']):
149 |                     chat_models.append({
150 |                         "id": model_id,
151 |                         "name": model_id,
152 |                         "provider": provider
153 |                     })
154 |             elif provider == 'ollama':
155 |                 # Ollama 返回所有模型
156 |                 chat_models.append({
157 |                     "id": model_id,
158 |                     "name": model_id,
159 |                     "provider": provider
160 |                 })
161 |             else:
162 |                 # 其他提供商，返回所有模型
163 |                 chat_models.append({
164 |                     "id": model_id,
165 |                     "name": model_id,
166 |                     "provider": provider
167 |                 })
168 |         
169 |         # 如果没有获取到，返回常用模型列表
170 |         if not chat_models:
171 |             chat_models = get_default_models(provider)
172 |         
173 |         return R.success(chat_models, msg=f"获取 {provider} 模型列表成功")
174 |     except Exception as e:
175 |         logger.error(f"获取 {provider} 模型列表失败: {e}")
176 |         # 返回默认模型列表
177 |         return R.success(get_default_models(provider), msg=f"获取 {provider} 模型列表成功（使用默认列表）")
178 | 
179 | def get_default_models(provider: str):
180 |     """获取默认模型列表"""
181 |     defaults = {
182 |         "openai": [
183 |             {"id": "gpt-4o", "name": "GPT-4o", "provider": "openai"},
184 |             {"id": "gpt-4o-mini", "name": "GPT-4o Mini", "provider": "openai"},
185 |             {"id": "gpt-4-turbo", "name": "GPT-4 Turbo", "provider": "openai"},
186 |             {"id": "gpt-3.5-turbo", "name": "GPT-3.5 Turbo", "provider": "openai"},
187 |         ],
188 |         "deepseek": [
189 |             {"id": "deepseek-chat", "name": "DeepSeek Chat", "provider": "deepseek"},
190 |             {"id": "deepseek-coder", "name": "DeepSeek Coder", "provider": "deepseek"},
191 |         ],
192 |         "qwen": [
193 |             {"id": "qwen-turbo", "name": "Qwen Turbo", "provider": "qwen"},
194 |             {"id": "qwen-plus", "name": "Qwen Plus", "provider": "qwen"},
195 |             {"id": "qwen-max", "name": "Qwen Max", "provider": "qwen"},
196 |         ],
197 |         "claude": [
198 |             {"id": "claude-3-5-sonnet-20241022", "name": "Claude 3.5 Sonnet", "provider": "claude"},
199 |             {"id": "claude-3-opus-20240229", "name": "Claude 3 Opus", "provider": "claude"},
200 |             {"id": "claude-3-sonnet-20240229", "name": "Claude 3 Sonnet", "provider": "claude"},
201 |         ],
202 |         "groq": [
203 |             {"id": "llama-3.1-70b-versatile", "name": "Llama 3.1 70B", "provider": "groq"},
204 |             {"id": "mixtral-8x7b-32768", "name": "Mixtral 8x7B", "provider": "groq"},
205 |             {"id": "gemma-7b-it", "name": "Gemma 7B", "provider": "groq"},
206 |         ],
207 |         "ollama": [
208 |             {"id": "llama2", "name": "Llama 2", "provider": "ollama"},
209 |             {"id": "mistral", "name": "Mistral", "provider": "ollama"},
210 |             {"id": "codellama", "name": "CodeLlama", "provider": "ollama"},
211 |         ],
212 |     }
213 |     return defaults.get(provider, [])
214 | 
215 | def get_gemini_models(api_key: str):
216 |     """获取 Gemini 模型列表"""
217 |     try:
218 |         genai.configure(api_key=api_key)
219 |         
220 |         # 获取可用模型列表
221 |         models = genai.list_models()
222 |         
223 |         # 过滤出生成模型
224 |         chat_models = []
225 |         for model in models:
226 |             # 只包含生成模型
227 |             if 'generateContent' in model.supported_generation_methods:
228 |                 chat_models.append({
229 |                     "id": model.name.split('/')[-1],  # 提取模型名称
230 |                     "name": model.display_name or model.name.split('/')[-1],
231 |                     "provider": "gemini"
232 |                 })
233 |         
234 |         # 如果没有获取到，返回常用模型列表
235 |         if not chat_models:
236 |             chat_models = [
237 |                 {"id": "gemini-2.0-flash-exp", "name": "Gemini 2.0 Flash (Experimental)", "provider": "gemini"},
238 |                 {"id": "gemini-1.5-pro", "name": "Gemini 1.5 Pro", "provider": "gemini"},
239 |                 {"id": "gemini-1.5-flash", "name": "Gemini 1.5 Flash", "provider": "gemini"},
240 |                 {"id": "gemini-pro", "name": "Gemini Pro", "provider": "gemini"},
241 |             ]
242 |         
243 |         return R.success(chat_models, msg="获取 Gemini 模型列表成功")
244 |     except Exception as e:
245 |         logger.error(f"获取 Gemini 模型列表失败: {e}")
246 |         raise
247 | 
248 | @router.post("/models/test")
249 | def test_model_connection(config: ModelConfigRequest):
250 |     """测试模型连接"""
251 |     try:
252 |         if config.provider == 'gemini':
253 |             genai.configure(api_key=config.api_key)
254 |             # 尝试列出模型来测试连接
255 |             list(genai.list_models())
256 |             return R.success(None, msg="Gemini 连接成功")
257 |         else:
258 |             # OpenAI 兼容 API
259 |             # Ollama 不需要 API Key
260 |             api_key = config.api_key
261 |             if config.provider == 'ollama':
262 |                 api_key = api_key or 'ollama'  # Ollama 可以使用任意值或空值
263 |             
264 |             if not config.base_url:
265 |                 provider_config = next((p for p in BUILTIN_PROVIDERS if p["id"] == config.provider), None)
266 |                 if provider_config:
267 |                     base_url = provider_config.get("base_url")
268 |                 else:
269 |                     base_url = "https://api.openai.com/v1"
270 |             else:
271 |                 base_url = config.base_url
272 |             
273 |             client = OpenAI(api_key=api_key, base_url=base_url)
274 |             # 尝试获取模型列表来测试连接
275 |             client.models.list()
276 |             return R.success(None, msg=f"{config.provider} 连接成功")
277 |     except Exception as e:
278 |         logger.error(f"测试连接失败: {e}", exc_info=True)
279 |         return R.error(f"连接失败: {str(e)}")
280 | 


--------------------------------------------------------------------------------
/frontend/src/components/ModelSelectorPanel.tsx:
--------------------------------------------------------------------------------
  1 | import { useState, useEffect } from 'react'
  2 | import { Brain, ChevronDown, CheckCircle2 } from 'lucide-react'
  3 | import ProviderIcon from './ProviderIcon'
  4 | import toast from 'react-hot-toast'
  5 | 
  6 | interface ModelOption {
  7 |   id: string
  8 |   name: string
  9 |   provider: string
 10 |   providerName: string
 11 |   modelId: string
 12 | }
 13 | 
 14 | const PROVIDER_LABELS: Record<string, string> = {
 15 |   openai: 'OpenAI',
 16 |   deepseek: 'DeepSeek',
 17 |   qwen: 'Qwen',
 18 |   claude: 'Claude',
 19 |   gemini: 'Gemini',
 20 |   groq: 'Groq',
 21 |   ollama: 'Ollama',
 22 | }
 23 | 
 24 | const PROVIDER_COLORS: Record<string, string> = {
 25 |   openai: 'bg-green-100 text-green-700 border-green-200',
 26 |   deepseek: 'bg-blue-100 text-blue-700 border-blue-200',
 27 |   qwen: 'bg-purple-100 text-purple-700 border-purple-200',
 28 |   claude: 'bg-orange-100 text-orange-700 border-orange-200',
 29 |   gemini: 'bg-yellow-100 text-yellow-700 border-yellow-200',
 30 |   groq: 'bg-indigo-100 text-indigo-700 border-indigo-200',
 31 |   ollama: 'bg-teal-100 text-teal-700 border-teal-200',
 32 | }
 33 | 
 34 | export default function ModelSelectorPanel() {
 35 |   const [selectedModel, setSelectedModel] = useState<string>('')
 36 |   const [availableModels, setAvailableModels] = useState<ModelOption[]>([])
 37 |   const [isOpen, setIsOpen] = useState(false)
 38 | 
 39 |   // 从 localStorage 加载所有已配置的模型
 40 |   const loadModels = () => {
 41 |     try {
 42 |       const savedConfigs = localStorage.getItem('modelConfigs')
 43 |       
 44 |       if (!savedConfigs) {
 45 |         setAvailableModels([])
 46 |         setSelectedModel('')
 47 |         return
 48 |       }
 49 | 
 50 |       const configs = JSON.parse(savedConfigs)
 51 |       const modelList: ModelOption[] = []
 52 | 
 53 |       // 遍历所有配置，提取已配置的模型
 54 |       Object.entries(configs).forEach(([providerId, config]: [string, any]) => {
 55 |         if (!config || typeof config !== 'object') {
 56 |           return
 57 |         }
 58 |         
 59 |         // 检查是否已配置：有模型ID（支持 models 数组或 model 字符串），且（Ollama 或 有 API Key）
 60 |         // 优先使用 models 数组，如果没有则使用 model 字符串（兼容旧版本）
 61 |         const modelIds = config.models && Array.isArray(config.models) && config.models.length > 0
 62 |           ? config.models
 63 |           : (config.model && typeof config.model === 'string' && config.model.trim() ? [config.model.trim()] : [])
 64 |         
 65 |         // Ollama 不需要 API Key，其他提供商需要
 66 |         const hasApiKey = providerId === 'ollama' || (config.apiKey && typeof config.apiKey === 'string' && config.apiKey.trim())
 67 |         
 68 |         // 只有同时满足：有模型 且 （Ollama 或 有 API Key）才添加
 69 |         if (modelIds.length > 0 && hasApiKey) {
 70 |           // 遍历所有选中的模型
 71 |           modelIds.forEach((modelId: string) => {
 72 |             const trimmedModelId = typeof modelId === 'string' ? modelId.trim() : String(modelId).trim()
 73 |             if (!trimmedModelId) return
 74 |             
 75 |             // 模型名称处理
 76 |             let modelName = trimmedModelId
 77 |             
 78 |             // 如果 modelId 包含提供商前缀（如 openai-gpt-4o），提取后面的部分
 79 |             if (trimmedModelId.startsWith(providerId + '-')) {
 80 |               modelName = trimmedModelId.substring(providerId.length + 1)
 81 |             }
 82 |             
 83 |             // 处理特殊格式的模型名称（如 hf.co/unsloth/Qwen3-4B-GGUF:Q6_K_XL）
 84 |             // 提取最后一部分作为显示名称
 85 |             if (modelName.includes('/')) {
 86 |               const parts = modelName.split('/')
 87 |               modelName = parts[parts.length - 1]
 88 |             }
 89 |             
 90 |             // 处理量化格式（如 :Q6_K_XL），保留量化信息
 91 |             if (modelName.includes(':')) {
 92 |               const colonIndex = modelName.lastIndexOf(':')
 93 |               if (colonIndex > 0) {
 94 |                 const baseName = modelName.substring(0, colonIndex)
 95 |                 const quantInfo = modelName.substring(colonIndex + 1)
 96 |                 modelName = `${baseName} (${quantInfo})`
 97 |               }
 98 |             }
 99 |             
100 |             const modelOption: ModelOption = {
101 |               id: `${providerId}-${trimmedModelId}`,
102 |               name: modelName,
103 |               provider: providerId,
104 |               providerName: PROVIDER_LABELS[providerId] || providerId,
105 |               modelId: trimmedModelId,
106 |             }
107 |             
108 |             modelList.push(modelOption)
109 |           })
110 |         }
111 |       })
112 | 
113 |       // 按提供商名称排序，然后按模型名称排序
114 |       modelList.sort((a, b) => {
115 |         if (a.providerName !== b.providerName) {
116 |           return a.providerName.localeCompare(b.providerName)
117 |         }
118 |         return a.name.localeCompare(b.name)
119 |       })
120 | 
121 |       setAvailableModels(modelList)
122 | 
123 |       // 加载已选择的模型
124 |       const savedSelected = localStorage.getItem('selectedModel')
125 |       
126 |       if (savedSelected && modelList.find(m => m.id === savedSelected)) {
127 |         setSelectedModel(savedSelected)
128 |       } else if (modelList.length > 0) {
129 |         // 如果没有保存的选择，选择第一个
130 |         setSelectedModel(modelList[0].id)
131 |         localStorage.setItem('selectedModel', modelList[0].id)
132 |         // 触发自定义事件，通知其他组件模型已更改
133 |         window.dispatchEvent(new Event('modelChanged'))
134 |       } else {
135 |         setSelectedModel('')
136 |       }
137 |     } catch (error) {
138 |       console.error('加载模型列表失败:', error)
139 |       setAvailableModels([])
140 |       setSelectedModel('')
141 |     }
142 |   }
143 | 
144 |   useEffect(() => {
145 |     loadModels()
146 | 
147 |     // 监听 storage 变化
148 |     const handleStorageChange = (e: StorageEvent) => {
149 |       if (e.key === 'modelConfigs' || e.key === 'selectedModel') {
150 |         loadModels()
151 |       }
152 |     }
153 |     window.addEventListener('storage', handleStorageChange)
154 | 
155 |     // 定期检查配置变化（因为同窗口的 localStorage 变化不会触发 storage 事件）
156 |     const interval = setInterval(loadModels, 1000)
157 | 
158 |     return () => {
159 |       window.removeEventListener('storage', handleStorageChange)
160 |       clearInterval(interval)
161 |     }
162 |   }, [])
163 | 
164 |   const currentModel = availableModels.find(m => m.id === selectedModel)
165 | 
166 |   const handleModelChange = (modelId: string) => {
167 |     setSelectedModel(modelId)
168 |     localStorage.setItem('selectedModel', modelId)
169 |     setIsOpen(false)
170 |     const model = availableModels.find(m => m.id === modelId)
171 |     if (model) {
172 |       toast.success(`已切换到: ${model.providerName} - ${model.name}`)
173 |       // 触发自定义事件，通知其他组件模型已更改
174 |       window.dispatchEvent(new Event('modelChanged'))
175 |     }
176 |   }
177 | 
178 |   if (availableModels.length === 0) {
179 |     return (
180 |       <div className="bg-yellow-50 border border-yellow-200 rounded-xl p-4 mb-6">
181 |         <div className="flex items-start gap-3">
182 |           <Brain className="w-5 h-5 text-yellow-600 mt-0.5 shrink-0" />
183 |           <div className="flex-1">
184 |             <h3 className="text-sm font-semibold text-yellow-900 mb-1">未配置模型</h3>
185 |             <p className="text-xs text-yellow-800">
186 |               请先在下方配置至少一个提供商的 API Key 和模型，然后才能在此处选择使用。
187 |             </p>
188 |           </div>
189 |         </div>
190 |       </div>
191 |     )
192 |   }
193 | 
194 |   return (
195 |     <div className="bg-white rounded-xl border border-gray-200 p-6 mb-6">
196 |       <h2 className="text-lg font-semibold text-gray-900 mb-4">选择当前使用的模型</h2>
197 |       <div className="relative">
198 |         <button
199 |           onClick={() => setIsOpen(!isOpen)}
200 |           className="w-full flex items-center justify-between px-4 py-3 bg-gray-50 border border-gray-300 rounded-lg hover:border-blue-400 hover:bg-blue-50/50 transition-all text-left"
201 |         >
202 |             <div className="flex items-center gap-3 min-w-0">
203 |             {currentModel ? (
204 |               <ProviderIcon provider={currentModel.provider} className="w-5 h-5 shrink-0" />
205 |             ) : (
206 |               <Brain className="w-5 h-5 text-blue-600 shrink-0" />
207 |             )}
208 |             <div className="flex-1 min-w-0">
209 |               {currentModel ? (
210 |                 <>
211 |                   <div className="text-sm font-medium text-gray-900 truncate">
212 |                     {currentModel.name}
213 |                   </div>
214 |                   <div className="text-xs text-gray-500 mt-0.5">
215 |                     {currentModel.providerName}
216 |                   </div>
217 |                 </>
218 |               ) : (
219 |                 <div className="text-sm text-gray-500">请选择模型</div>
220 |               )}
221 |             </div>
222 |             {currentModel && (
223 |               <span className={`text-xs px-2.5 py-1 rounded border shrink-0 ${
224 |                 PROVIDER_COLORS[currentModel.provider] || 'bg-gray-100 text-gray-700 border-gray-200'
225 |               }`}>
226 |                 {currentModel.providerName}
227 |               </span>
228 |             )}
229 |           </div>
230 |           <ChevronDown className={`w-4 h-4 text-gray-500 transition-transform shrink-0 ml-2 ${isOpen ? 'rotate-180' : ''}`} />
231 |         </button>
232 | 
233 |         {isOpen && (
234 |           <>
235 |             <div
236 |               className="fixed inset-0 z-10"
237 |               onClick={() => setIsOpen(false)}
238 |             />
239 |             <div className="absolute z-20 w-full mt-1 bg-white border border-gray-200 rounded-lg shadow-lg max-h-60 overflow-y-auto">
240 |               {availableModels.length === 0 ? (
241 |                 <div className="px-4 py-3 text-sm text-gray-500 text-center">
242 |                   暂无可用模型
243 |                 </div>
244 |               ) : (
245 |                 <>
246 |                   <div className="px-4 py-2 bg-gray-50 border-b border-gray-200 text-xs font-medium text-gray-700 sticky top-0">
247 |                     共 {availableModels.length} 个可用模型
248 |                   </div>
249 |                   {availableModels.map((model) => (
250 |                     <button
251 |                       key={model.id}
252 |                       onClick={() => handleModelChange(model.id)}
253 |                       className={`w-full px-4 py-3 text-left hover:bg-blue-50 transition-colors border-b border-gray-100 last:border-b-0 ${
254 |                         selectedModel === model.id
255 |                           ? 'bg-blue-50 text-blue-700 font-medium'
256 |                           : 'text-gray-700'
257 |                       }`}
258 |                     >
259 |             <div className="flex items-center gap-3">
260 |                         <ProviderIcon provider={model.provider} className="w-4 h-4" />
261 |                         <div className="flex-1 min-w-0">
262 |                           <div className="text-sm font-medium truncate">{model.name}</div>
263 |                           <div className="text-xs text-gray-500 mt-0.5">{model.providerName}</div>
264 |                         </div>
265 |                         <div className="flex items-center gap-2 shrink-0">
266 |                           <span className={`text-xs px-2 py-0.5 rounded border ${
267 |                             PROVIDER_COLORS[model.provider] || 'bg-gray-100 text-gray-700 border-gray-200'
268 |                           }`}>
269 |                             {model.providerName}
270 |                           </span>
271 |                           {selectedModel === model.id && (
272 |                             <CheckCircle2 className="w-4 h-4 text-blue-600" />
273 |                           )}
274 |                         </div>
275 |                       </div>
276 |                     </button>
277 |                   ))}
278 |                 </>
279 |               )}
280 |             </div>
281 |           </>
282 |         )}
283 |       </div>
284 |       <p className="text-xs text-gray-500 mt-2">
285 |         当前选择的模型将用于生成笔记。共 {availableModels.length} 个可用模型，可以在下方配置更多模型。
286 |       </p>
287 |     </div>
288 |   )
289 | }
290 | 


--------------------------------------------------------------------------------
/原理博客.md:
--------------------------------------------------------------------------------
  1 | # Video AI Note 技术原理详解
  2 | 
  3 | ## 项目概述
  4 | 
  5 | Video AI Note 是一个智能视频笔记生成工具，能够自动提取视频音频、转写文字，并使用 AI 生成结构化的 Markdown 笔记。项目采用完全本地化处理，保护数据隐私，支持 Ollama 等本地大模型，无需联网即可使用。
  6 | 
  7 | ## 系统架构
  8 | 
  9 | ### 整体架构图
 10 | 
 11 | ```mermaid
 12 | graph TB
 13 |     subgraph "前端层 (Frontend)"
 14 |         A[React + TypeScript] --> B[组件层]
 15 |         B --> C[API 服务层]
 16 |         C --> D[状态管理]
 17 |     end
 18 |     
 19 |     subgraph "后端层 (Backend)"
 20 |         E[FastAPI 服务器] --> F[路由层]
 21 |         F --> G[服务层]
 22 |         G --> H[业务逻辑]
 23 |     end
 24 |     
 25 |     subgraph "核心处理模块"
 26 |         I[音频提取模块] --> J[FFmpeg]
 27 |         K[语音转文字模块] --> L[Fast-Whisper]
 28 |         M[AI 笔记生成模块] --> N[GPT/LLM]
 29 |         O[截图生成模块] --> P[FFmpeg]
 30 |     end
 31 |     
 32 |     subgraph "数据存储"
 33 |         Q[(SQLite 数据库)]
 34 |         R[文件系统]
 35 |         S[缓存文件]
 36 |     end
 37 |     
 38 |     C -->|HTTP API| E
 39 |     H --> I
 40 |     H --> K
 41 |     H --> M
 42 |     H --> O
 43 |     I --> R
 44 |     K --> S
 45 |     M --> S
 46 |     O --> R
 47 |     H --> Q
 48 |     
 49 |     style A fill:#61dafb
 50 |     style E fill:#009688
 51 |     style J fill:#ff6b6b
 52 |     style L fill:#4ecdc4
 53 |     style N fill:#95e1d3
 54 | ```
 55 | 
 56 | ## 核心工作流程
 57 | 
 58 | ### 完整处理流程
 59 | 
 60 | ```mermaid
 61 | sequenceDiagram
 62 |     participant User as 用户
 63 |     participant Frontend as 前端界面
 64 |     participant Backend as 后端服务
 65 |     participant FFmpeg as FFmpeg
 66 |     participant Whisper as Fast-Whisper
 67 |     participant LLM as AI 模型
 68 |     participant DB as 数据库
 69 |     participant FS as 文件系统
 70 | 
 71 |     User->>Frontend: 1. 上传视频文件
 72 |     Frontend->>Backend: POST /api/upload
 73 |     Backend->>FS: 保存视频文件
 74 |     Backend->>DB: 创建任务记录 (pending)
 75 |     Backend-->>Frontend: 返回 task_id
 76 |     
 77 |     User->>Frontend: 2. 执行步骤：提取音频
 78 |     Frontend->>Backend: POST /api/task/{task_id}/step/extract
 79 |     Backend->>FFmpeg: 提取音频 (16kHz, 单声道)
 80 |     FFmpeg->>FS: 保存音频文件
 81 |     Backend->>DB: 更新状态 (processing)
 82 |     Backend-->>Frontend: 返回成功
 83 |     
 84 |     User->>Frontend: 3. 执行步骤：转写文字
 85 |     Frontend->>Backend: POST /api/task/{task_id}/step/transcribe
 86 |     Backend->>FS: 检查缓存
 87 |     alt 缓存存在
 88 |         FS-->>Backend: 返回缓存结果
 89 |     else 缓存不存在
 90 |         Backend->>Whisper: 转录音频
 91 |         Whisper->>FS: 保存转录结果 (JSON)
 92 |         Backend->>DB: 更新状态 (transcribed)
 93 |     end
 94 |     Backend-->>Frontend: 返回转录文本
 95 |     
 96 |     User->>Frontend: 4. 执行步骤：生成笔记
 97 |     Frontend->>Backend: POST /api/task/{task_id}/step/summarize
 98 |     Backend->>FS: 检查缓存
 99 |     alt 缓存存在且无截图
100 |         FS-->>Backend: 返回缓存笔记
101 |     else 需要生成
102 |         Backend->>LLM: 调用 AI 生成笔记
103 |         Note over LLM: 构建 Prompt<br/>包含转录文本<br/>生成结构化 Markdown
104 |         LLM-->>Backend: 返回 Markdown 笔记
105 |         Backend->>FS: 保存笔记文件
106 |     end
107 |     
108 |     opt 如果启用截图
109 |         Backend->>FFmpeg: 根据时间戳生成截图
110 |         FFmpeg->>FS: 保存截图文件
111 |         Backend->>Backend: 替换 Markdown 中的截图标记
112 |     end
113 |     
114 |     Backend->>DB: 更新状态 (completed)
115 |     Backend-->>Frontend: 返回最终笔记
116 |     
117 |     User->>Frontend: 5. 下载笔记 (Markdown/PDF)
118 |     Frontend->>Backend: GET /api/task/{task_id}/download
119 |     Backend-->>Frontend: 返回文件
120 | ```
121 | 
122 | ## 技术模块详解
123 | 
124 | ### 1. 音频提取模块
125 | 
126 | 音频提取使用 FFmpeg 从视频文件中提取音频流，并转换为适合语音识别处理的格式。
127 | 
128 | ```mermaid
129 | flowchart LR
130 |     A[视频文件] --> B{检查 FFmpeg}
131 |     B -->|系统已安装| C[使用系统 FFmpeg]
132 |     B -->|未安装| D[自动下载 FFmpeg]
133 |     D --> E[使用项目 FFmpeg]
134 |     C --> F[执行提取命令]
135 |     E --> F
136 |     F --> G[输出音频文件]
137 |     
138 |     F --> H["ffmpeg -i video.mp4<br/>-acodec pcm_s16le<br/>-ac 1 -ar 16000<br/>audio.wav"]
139 |     
140 |     style A fill:#ff6b6b
141 |     style G fill:#4ecdc4
142 |     style H fill:#ffe66d
143 | ```
144 | 
145 | **技术细节：**
146 | - **采样率**：16kHz（Whisper 模型推荐）
147 | - **声道**：单声道（减少计算量）
148 | - **编码格式**：PCM 16-bit（无损，适合语音识别）
149 | - **自动管理**：使用 `imageio-ffmpeg` 自动下载和管理 FFmpeg 二进制文件
150 | 
151 | ### 2. 语音转文字模块
152 | 
153 | 使用 Fast-Whisper（基于 CTranslate2 的 Whisper 实现）进行语音识别，相比原始 Whisper 速度提升 4-5 倍。
154 | 
155 | ```mermaid
156 | graph TD
157 |     A[音频文件] --> B[Fast-Whisper 模型]
158 |     B --> C{模型加载}
159 |     C -->|首次使用| D[下载模型]
160 |     C -->|已存在| E[加载模型]
161 |     D --> E
162 |     E --> F[执行转录]
163 |     F --> G[语音活动检测 VAD]
164 |     G --> H[自动语言检测]
165 |     H --> I[生成分段结果]
166 |     I --> J[保存 JSON 缓存]
167 |     J --> K[返回转录结果]
168 |     
169 |     K --> L["TranscriptResult:<br/>- language: 检测语言<br/>- full_text: 完整文本<br/>- segments: 时间分段"]
170 |     
171 |     style B fill:#4ecdc4
172 |     style G fill:#95e1d3
173 |     style L fill:#ffe66d
174 | ```
175 | 
176 | **技术细节：**
177 | - **模型选择**：支持 tiny/base/small/medium/large（默认 base）
178 | - **VAD 过滤**：启用语音活动检测，过滤静音段
179 | - **语言检测**：自动检测音频语言
180 | - **分段输出**：保留时间戳信息，便于后续处理
181 | - **缓存机制**：转录结果保存为 JSON，避免重复处理
182 | 
183 | ### 3. AI 笔记生成模块
184 | 
185 | 使用大语言模型（LLM）将转录文本转换为结构化的 Markdown 笔记。
186 | 
187 | ```mermaid
188 | graph TB
189 |     A[转录文本] --> B[构建 Prompt]
190 |     B --> C[Prompt 模板]
191 |     C --> D{模型类型}
192 |     
193 |     D -->|Ollama| E[本地模型<br/>http://localhost:11434/v1]
194 |     D -->|OpenAI| F[OpenAI API<br/>api.openai.com]
195 |     D -->|DeepSeek| G[DeepSeek API]
196 |     D -->|Qwen| H[Qwen API]
197 |     
198 |     E --> I[调用 LLM]
199 |     F --> I
200 |     G --> I
201 |     H --> I
202 |     
203 |     I --> J[生成 Markdown]
204 |     J --> K[后处理]
205 |     K --> L[清理思考标签]
206 |     L --> M[格式化输出]
207 |     M --> N[保存缓存]
208 |     
209 |     style E fill:#95e1d3
210 |     style I fill:#4ecdc4
211 |     style J fill:#ffe66d
212 | ```
213 | 
214 | **Prompt 构建策略：**
215 | 
216 | ```mermaid
217 | graph LR
218 |     A[转录分段] --> B[格式化分段文本]
219 |     B --> C[添加指令]
220 |     C --> D[语言要求]
221 |     C --> E[格式要求]
222 |     C --> F[内容要求]
223 |     C --> G[截图标记要求]
224 |     
225 |     D --> H[最终 Prompt]
226 |     E --> H
227 |     F --> H
228 |     G --> H
229 |     
230 |     H --> I["1. 完整信息<br/>2. 去除无关内容<br/>3. 保留关键细节<br/>4. 可读布局<br/>5. 数学公式 LaTeX<br/>6. 截图标记 *Screenshot-[mm:ss]"]
231 |     
232 |     style H fill:#4ecdc4
233 |     style I fill:#ffe66d
234 | ```
235 | 
236 | **支持的模型提供商：**
237 | - **Ollama**：本地运行，完全离线，无需 API 密钥
238 | - **OpenAI**：GPT-3.5/GPT-4 系列
239 | - **DeepSeek**：国产大模型
240 | - **Qwen**：阿里通义千问
241 | - **其他 OpenAI 兼容 API**：通过 base_url 配置
242 | 
243 | ### 4. 截图生成模块
244 | 
245 | 根据 Markdown 中的截图标记，从视频中提取对应时间点的帧。
246 | 
247 | ```mermaid
248 | flowchart TD
249 |     A[Markdown 笔记] --> B[解析截图标记]
250 |     B --> C["*Screenshot-[mm:ss]"]
251 |     C --> D[提取时间戳]
252 |     D --> E[转换为秒数]
253 |     E --> F[FFmpeg 提取帧]
254 |     F --> G[保存截图文件]
255 |     G --> H[生成图片 URL]
256 |     H --> I[替换 Markdown 标记]
257 |     I --> J[更新笔记文件]
258 |     
259 |     F --> K["ffmpeg -ss {timestamp}<br/>-i video.mp4<br/>-vframes 1<br/>screenshot.jpg"]
260 |     
261 |     style C fill:#ff6b6b
262 |     style F fill:#4ecdc4
263 |     style I fill:#95e1d3
264 | ```
265 | 
266 | **截图标记格式：**
267 | - 标记格式：`*Screenshot-[mm:ss]`（例如：`*Screenshot-[01:23]`）
268 | - 插入位置：章节内容之后，空行分隔
269 | - 自动替换：生成截图后，标记被替换为 `![](/api/note_results/screenshots/xxx.jpg)`
270 | 
271 | ## 数据流与状态管理
272 | 
273 | ### 任务状态流转
274 | 
275 | ```mermaid
276 | stateDiagram-v2
277 |     [*] --> pending: 上传文件
278 |     pending --> processing: 提取音频
279 |     processing --> transcribing: 开始转写
280 |     transcribing --> transcribed: 转写完成
281 |     transcribed --> summarizing: 生成笔记
282 |     summarizing --> completed: 笔记完成
283 |     completed --> [*]
284 |     
285 |     processing --> failed: 错误
286 |     transcribing --> failed: 错误
287 |     summarizing --> failed: 错误
288 |     failed --> [*]
289 | ```
290 | 
291 | ### 数据存储结构
292 | 
293 | ```mermaid
294 | erDiagram
295 |     VIDEO_TASK ||--o{ TASK_STATUS : has
296 |     VIDEO_TASK ||--o{ CACHE_FILE : generates
297 |     
298 |     VIDEO_TASK {
299 |         string task_id PK
300 |         string filename
301 |         string status
302 |         bool screenshot
303 |         datetime created_at
304 |     }
305 |     
306 |     TASK_STATUS {
307 |         string task_id FK
308 |         string status
309 |         string message
310 |         datetime updated_at
311 |     }
312 |     
313 |     CACHE_FILE {
314 |         string task_id FK
315 |         string type
316 |         string path
317 |     }
318 |     
319 |     CACHE_FILE ||--|| AUDIO_FILE : "task_id_audio.wav"
320 |     CACHE_FILE ||--|| TRANSCRIPT_FILE : "task_id_transcript.json"
321 |     CACHE_FILE ||--|| MARKDOWN_FILE : "task_id_markdown.md"
322 | ```
323 | 
324 | ## 前端架构
325 | 
326 | ### 组件层次结构
327 | 
328 | ```mermaid
329 | graph TD
330 |     A[App.tsx] --> B[路由管理]
331 |     B --> C[上传页面]
332 |     B --> D[任务列表]
333 |     B --> E[模型配置]
334 |     
335 |     C --> F[UploadForm 组件]
336 |     F --> G[文件选择]
337 |     F --> H[上传进度]
338 |     
339 |     D --> I[TaskList 组件]
340 |     I --> J[TaskItem 组件]
341 |     J --> K[TaskSteps 组件]
342 |     K --> L[步骤执行按钮]
343 |     K --> M[状态显示]
344 |     
345 |     J --> N[TaskDetailPanel]
346 |     N --> O[视频预览]
347 |     N --> P[转录文本查看]
348 |     N --> Q[笔记预览]
349 |     N --> R[下载按钮]
350 |     
351 |     E --> S[ModelConfig 组件]
352 |     S --> T[模型选择]
353 |     S --> U[API 配置]
354 |     
355 |     style A fill:#61dafb
356 |     style F fill:#4ecdc4
357 |     style I fill:#95e1d3
358 |     style S fill:#ffe66d
359 | ```
360 | 
361 | ### 状态管理
362 | 
363 | ```mermaid
364 | graph LR
365 |     A[Zustand Store] --> B[Task Store]
366 |     B --> C[任务列表]
367 |     B --> D[当前任务]
368 |     B --> E[任务状态]
369 |     
370 |     A --> F[Model Store]
371 |     F --> G[模型配置]
372 |     F --> H[当前模型]
373 |     
374 |     C --> I[添加任务]
375 |     C --> J[更新任务]
376 |     C --> K[删除任务]
377 |     
378 |     style A fill:#61dafb
379 |     style B fill:#4ecdc4
380 |     style F fill:#95e1d3
381 | ```
382 | 
383 | ## 关键技术特性
384 | 
385 | ### 1. 完全本地化处理
386 | 
387 | ```mermaid
388 | graph TB
389 |     A[用户数据] --> B{处理方式}
390 |     B -->|本地模式| C[Ollama 本地模型]
391 |     B -->|云端模式| D[云端 API]
392 |     
393 |     C --> E[数据不上传]
394 |     C --> F[完全离线]
395 |     C --> G[隐私保护]
396 |     
397 |     D --> H[需要网络]
398 |     D --> I[需要 API Key]
399 |     
400 |     style C fill:#95e1d3
401 |     style E fill:#4ecdc4
402 |     style F fill:#4ecdc4
403 |     style G fill:#4ecdc4
404 | ```
405 | 
406 | ### 2. 缓存机制
407 | 
408 | ```mermaid
409 | flowchart TD
410 |     A[处理请求] --> B{检查缓存}
411 |     B -->|存在| C[返回缓存]
412 |     B -->|不存在| D[执行处理]
413 |     D --> E[保存缓存]
414 |     E --> F[返回结果]
415 |     
416 |     G[音频提取] --> H[task_id_audio.wav]
417 |     I[转录结果] --> J[task_id_transcript.json]
418 |     K[笔记生成] --> L[task_id_markdown.md]
419 |     
420 |     style B fill:#ffe66d
421 |     style C fill:#95e1d3
422 |     style E fill:#4ecdc4
423 | ```
424 | 
425 | **缓存策略：**
426 | - **音频文件**：提取后保存，避免重复提取
427 | - **转录结果**：JSON 格式，包含完整分段信息
428 | - **笔记内容**：Markdown 格式，支持增量更新（截图功能）
429 | 
430 | ### 3. 分步执行设计
431 | 
432 | 系统采用分步执行设计，用户可以控制每个步骤的执行时机：
433 | 
434 | ```mermaid
435 | graph LR
436 |     A[上传文件] --> B[步骤 1: 提取音频]
437 |     B --> C[步骤 2: 转写文字]
438 |     C --> D[步骤 3: 生成笔记]
439 |     
440 |     B --> E[可查看音频]
441 |     C --> F[可查看转录]
442 |     D --> G[可查看笔记]
443 |     
444 |     style A fill:#ff6b6b
445 |     style B fill:#4ecdc4
446 |     style C fill:#95e1d3
447 |     style D fill:#ffe66d
448 | ```
449 | 
450 | **优势：**
451 | - 用户可以随时查看中间结果
452 | - 支持单独重试某个步骤
453 | - 降低单次处理失败的影响范围
454 | - 提供更好的用户体验
455 | 
456 | ## 性能优化
457 | 
458 | ### 1. 模型加载优化
459 | 
460 | ```mermaid
461 | graph TD
462 |     A[首次使用] --> B[延迟加载]
463 |     B --> C[按需初始化]
464 |     C --> D[单例模式]
465 |     D --> E[复用模型实例]
466 |     
467 |     style B fill:#95e1d3
468 |     style D fill:#4ecdc4
469 | ```
470 | 
471 | ### 2. 异步处理
472 | 
473 | ```mermaid
474 | sequenceDiagram
475 |     participant F as 前端
476 |     participant B as 后端
477 |     participant W as Worker
478 |     
479 |     F->>B: 提交任务
480 |     B->>B: 创建任务记录
481 |     B-->>F: 返回 task_id
482 |     B->>W: 后台处理
483 |     F->>B: 轮询状态
484 |     B-->>F: 返回当前状态
485 |     W->>B: 更新状态
486 |     F->>B: 获取结果
487 |     B-->>F: 返回最终结果
488 | ```
489 | 
490 | ## 安全与隐私
491 | 
492 | ### 数据隐私保护
493 | 
494 | ```mermaid
495 | graph TB
496 |     A[用户上传视频] --> B[本地存储]
497 |     B --> C[本地处理]
498 |     C --> D{使用本地模型?}
499 |     D -->|是| E[完全离线]
500 |     D -->|否| F[仅发送文本到 API]
501 |     
502 |     E --> G[数据不上传]
503 |     F --> H[仅转录文本]
504 |     H --> I[不包含视频内容]
505 |     
506 |     style E fill:#95e1d3
507 |     style G fill:#4ecdc4
508 |     style H fill:#ffe66d
509 | ```
510 | 
511 | **隐私保护措施：**
512 | 1. 所有文件存储在本地文件系统
513 | 2. 支持完全离线运行（Ollama 模式）
514 | 3. 云端 API 仅传输文本，不传输视频/音频
515 | 4. 无数据收集和追踪
516 | 
517 | ## 部署架构
518 | 
519 | ### 开发环境
520 | 
521 | ```mermaid
522 | graph LR
523 |     A[前端 Dev Server<br/>:5173] --> B[Vite Proxy]
524 |     B --> C[后端 API<br/>:8483]
525 |     C --> D[SQLite DB]
526 |     C --> E[文件系统]
527 |     
528 |     style A fill:#61dafb
529 |     style C fill:#009688
530 | ```
531 | 
532 | ### 生产环境
533 | 
534 | ```mermaid
535 | graph TB
536 |     A[用户] --> B[Nginx]
537 |     B --> C[前端静态文件]
538 |     B --> D[后端 API]
539 |     D --> E[SQLite DB]
540 |     D --> F[文件系统]
541 |     D --> G[FFmpeg]
542 |     D --> H[Whisper Model]
543 |     D --> I[Ollama/LLM]
544 |     
545 |     style B fill:#009688
546 |     style D fill:#4ecdc4
547 | ```
548 | 
549 | ## 总结
550 | 
551 | Video AI Note 通过以下技术实现智能视频笔记生成：
552 | 
553 | 1. **音频处理**：FFmpeg 提取和转换音频
554 | 2. **语音识别**：Fast-Whisper 实现快速准确的转录
555 | 3. **AI 生成**：大语言模型生成结构化笔记
556 | 4. **截图功能**：基于时间戳的智能截图插入
557 | 5. **本地化支持**：Ollama 实现完全离线运行
558 | 6. **缓存机制**：避免重复处理，提升性能
559 | 7. **分步执行**：提供灵活的用户控制
560 | 
561 | 整个系统设计注重**隐私保护**、**用户体验**和**性能优化**，是一个功能完整、技术先进的视频笔记生成工具。
562 | 
563 | 


--------------------------------------------------------------------------------
/frontend/src/components/TaskDetailPanel.tsx:
--------------------------------------------------------------------------------
  1 | import { useEffect, useState, useRef } from 'react'
  2 | import { X, FileVideo, Music, FileText, BookOpen } from 'lucide-react'
  3 | import { useTaskStore } from '../store/taskStore'
  4 | import { getTaskStatus, confirmStep, regenerateNote } from '../services/api'
  5 | import StepProgress, { StepStatus } from './StepProgress'
  6 | import TranscriptViewer from './TranscriptViewer'
  7 | import EnhancedMarkdownViewer from './EnhancedMarkdownViewer'
  8 | import toast from 'react-hot-toast'
  9 | 
 10 | interface TaskDetailPanelProps {
 11 |   taskId: string
 12 |   onClose: () => void
 13 | }
 14 | 
 15 | export default function TaskDetailPanel({ taskId, onClose }: TaskDetailPanelProps) {
 16 |   const { tasks, updateTask } = useTaskStore()
 17 |   const task = tasks.find((t) => t.id === taskId)
 18 |   const [steps, setSteps] = useState<any[]>([])
 19 |   const [autoProcess, setAutoProcess] = useState(false)
 20 |   const [transcript, setTranscript] = useState<any>(null)
 21 | 
 22 |   // 初始化步骤
 23 |   useEffect(() => {
 24 |     if (!task) return
 25 | 
 26 |     // 检查转录是否完成（通过 transcript 状态判断）
 27 |     const isTranscribeCompleted = transcript && transcript.segments && transcript.segments.length > 0
 28 | 
 29 |     const initialSteps = [
 30 |       {
 31 |         id: 'upload',
 32 |         name: '文件上传',
 33 |         description: '将文件上传到服务器',
 34 |         status: 'completed' as StepStatus,
 35 |         result: task.filename ? (
 36 |           <div className="flex items-center gap-2 text-sm">
 37 |             <FileVideo className="w-4 h-4 text-blue-500" />
 38 |             <span>{task.filename}</span>
 39 |           </div>
 40 |         ) : null,
 41 |       },
 42 |       {
 43 |         id: 'extract',
 44 |         name: '提取音频',
 45 |         description: '从视频文件中提取音频（如果是视频）',
 46 |         status: (task.status === 'pending'
 47 |           ? 'waiting_confirm'
 48 |           : ['processing', 'transcribing', 'summarizing', 'completed'].includes(task.status)
 49 |           ? 'completed'
 50 |           : 'pending') as StepStatus,
 51 |         canConfirm: task.status === 'pending',
 52 |         onConfirm: () => handleStepConfirm('extract'),
 53 |         result: task.status !== 'pending' ? (
 54 |           <div className="flex items-center gap-2 text-sm text-green-600">
 55 |             <Music className="w-4 h-4" />
 56 |             <span>音频提取完成</span>
 57 |           </div>
 58 |         ) : null,
 59 |       },
 60 |       {
 61 |         id: 'transcribe',
 62 |         name: '音频转写',
 63 |         description: '使用 AI 将音频转换为文字',
 64 |         status: (isTranscribeCompleted
 65 |           ? 'completed'
 66 |           : task.status === 'transcribing'
 67 |           ? 'processing'
 68 |           : task.status === 'processing'
 69 |           ? 'waiting_confirm'
 70 |           : 'pending') as StepStatus,
 71 |         canConfirm: task.status === 'processing',
 72 |         onConfirm: () => handleStepConfirm('transcribe'),
 73 |         result: isTranscribeCompleted ? (
 74 |           <div className="text-sm text-green-600">
 75 |             <FileText className="w-4 h-4 inline mr-2" />
 76 |             转写完成，共 {transcript.segments.length} 条片段
 77 |           </div>
 78 |         ) : null,
 79 |       },
 80 |       {
 81 |         id: 'summarize',
 82 |         name: '生成笔记',
 83 |         description: '使用 GPT 生成结构化笔记',
 84 |         status: (['summarizing', 'completed'].includes(task.status)
 85 |           ? task.status === 'summarizing'
 86 |             ? 'processing'
 87 |             : 'completed'
 88 |           : isTranscribeCompleted || task.status === 'transcribing'
 89 |           ? 'waiting_confirm'
 90 |           : 'pending') as StepStatus,
 91 |         canConfirm: isTranscribeCompleted, // 只有转录完成后才能生成笔记
 92 |         onConfirm: () => handleStepConfirm('summarize'),
 93 |       },
 94 |     ]
 95 | 
 96 |     setSteps(initialSteps)
 97 |   }, [task, transcript])
 98 | 
 99 |   // 使用 ref 来避免依赖问题
100 |   const tasksRef = useRef(tasks)
101 |   const updateTaskRef = useRef(updateTask)
102 |   
103 |   useEffect(() => {
104 |     tasksRef.current = tasks
105 |     updateTaskRef.current = updateTask
106 |   }, [tasks, updateTask])
107 | 
108 |   const taskDetailLoadedRef = useRef<string | null>(null)
109 | 
110 |   // 初始加载任务详情
111 |   useEffect(() => {
112 |     if (!taskId) return
113 |     
114 |     // 防止重复加载同一个任务
115 |     if (taskDetailLoadedRef.current === taskId) {
116 |       return
117 |     }
118 |     
119 |     const loadTaskDetail = async () => {
120 |       taskDetailLoadedRef.current = taskId
121 |       try {
122 |         const response = await getTaskStatus(taskId)
123 |         if (response.data.code === 200) {
124 |           const taskData = response.data.data
125 |           const currentTask = tasksRef.current.find((t) => t.id === taskId)
126 |           updateTaskRef.current(taskId, {
127 |             status: taskData.status,
128 |             markdown: taskData.markdown || currentTask?.markdown || '',
129 |           })
130 | 
131 |           // 更新转写结果
132 |           if (taskData.transcript) {
133 |             setTranscript(taskData.transcript)
134 |           }
135 | 
136 |           // 更新步骤状态
137 |           updateStepsStatus(taskData.status, taskData)
138 |         }
139 |       } catch (error) {
140 |         console.error('加载任务详情失败:', error)
141 |         // 加载失败时重置标记，允许重试
142 |         if (taskDetailLoadedRef.current === taskId) {
143 |           taskDetailLoadedRef.current = null
144 |         }
145 |       }
146 |     }
147 |     
148 |     loadTaskDetail()
149 |   }, [taskId])
150 | 
151 |   // 轮询任务状态
152 |   useEffect(() => {
153 |     if (!taskId) return
154 | 
155 |     const task = tasks.find((t) => t.id === taskId)
156 |     // 如果任务已完成且不需要自动处理，直接返回，不启动轮询
157 |     if (!task || (task.status === 'completed' && !autoProcess) || task.status === 'failed') {
158 |       return
159 |     }
160 | 
161 |     const interval = setInterval(async () => {
162 |       try {
163 |         const response = await getTaskStatus(taskId)
164 |         if (response.data.code === 200) {
165 |           const taskData = response.data.data
166 |           const currentTask = tasksRef.current.find((t) => t.id === taskId)
167 |           
168 |           updateTaskRef.current(taskId, {
169 |             status: taskData.status,
170 |             markdown: taskData.markdown || currentTask?.markdown || '',
171 |           })
172 | 
173 |           // 更新转写结果
174 |           if (taskData.transcript) {
175 |             setTranscript(taskData.transcript)
176 |           }
177 | 
178 |           // 更新步骤状态
179 |           updateStepsStatus(taskData.status, taskData)
180 | 
181 |           // 如果任务已完成或失败，停止轮询
182 |           if (taskData.status === 'completed' || taskData.status === 'failed') {
183 |             clearInterval(interval)
184 |             setAutoProcess(false)
185 |           }
186 |         }
187 |       } catch (error) {
188 |         console.error('轮询失败:', error)
189 |       }
190 |     }, 2000)
191 | 
192 |     return () => clearInterval(interval)
193 |   }, [taskId, autoProcess, task?.status])
194 | 
195 |   const updateStepsStatus = (status: string, taskData: any) => {
196 |     setSteps((prev) =>
197 |       prev.map((step) => {
198 |         // 检查转录步骤：如果有 transcript 数据，即使状态是 transcribing，也标记为完成
199 |         if (step.id === 'transcribe') {
200 |           if (taskData.transcript && taskData.transcript.segments) {
201 |             // 转录已完成
202 |             return {
203 |               ...step,
204 |               status: 'completed' as StepStatus,
205 |               result: (
206 |                 <div className="text-sm text-green-600">
207 |                   <FileText className="w-4 h-4 inline mr-2" />
208 |                   转写完成，共 {taskData.transcript.segments.length} 条片段
209 |                 </div>
210 |               ),
211 |             }
212 |           } else if (status === 'transcribing') {
213 |             // 正在转录中
214 |             return { ...step, status: 'processing' as StepStatus }
215 |           }
216 |         }
217 |         
218 |         // 提取音频步骤
219 |         if (status === 'processing' && step.id === 'extract') {
220 |           return { ...step, status: 'processing' as StepStatus }
221 |         }
222 |         
223 |         // 如果状态从 processing 变为其他状态，且提取已完成
224 |         if (status !== 'pending' && step.id === 'extract' && step.status === 'processing') {
225 |           return {
226 |             ...step,
227 |             status: 'completed' as StepStatus,
228 |             result: (
229 |               <div className="flex items-center gap-2 text-sm text-green-600">
230 |                 <Music className="w-4 h-4" />
231 |                 <span>音频提取完成</span>
232 |               </div>
233 |             ),
234 |           }
235 |         }
236 |         
237 |         // 生成笔记步骤
238 |         if (status === 'summarizing' && step.id === 'summarize') {
239 |           return { ...step, status: 'processing' as StepStatus }
240 |         }
241 |         
242 |         // 所有步骤完成
243 |         if (status === 'completed') {
244 |           if (step.status === 'processing') {
245 |             let result = null
246 |             if (step.id === 'extract') {
247 |               result = (
248 |                 <div className="flex items-center gap-2 text-sm text-green-600">
249 |                   <Music className="w-4 h-4" />
250 |                   <span>音频提取完成</span>
251 |                 </div>
252 |               )
253 |             } else if (step.id === 'transcribe' && taskData.transcript) {
254 |               result = (
255 |                 <div className="text-sm text-green-600">
256 |                   <FileText className="w-4 h-4 inline mr-2" />
257 |                   转写完成，共 {taskData.transcript.segments?.length || 0} 条片段
258 |                 </div>
259 |               )
260 |             } else if (step.id === 'summarize' && taskData.markdown) {
261 |               result = (
262 |                 <div className="text-sm text-green-600">
263 |                   <BookOpen className="w-4 h-4 inline mr-2" />
264 |                   笔记生成完成
265 |                 </div>
266 |               )
267 |             }
268 |             return { ...step, status: 'completed' as StepStatus, result }
269 |           }
270 |         }
271 |         return step
272 |       })
273 |     )
274 |   }
275 | 
276 |   const handleStepConfirm = async (stepId: string) => {
277 |     try {
278 |       // 更新步骤状态为处理中
279 |       setSteps((prev) =>
280 |         prev.map((step) =>
281 |           step.id === stepId ? { ...step, status: 'processing' as StepStatus } : step
282 |         )
283 |       )
284 |       
285 |       // 调用后端确认步骤（目前后端自动处理，这里主要是触发状态更新）
286 |       await confirmStep(taskId, stepId)
287 |       setAutoProcess(true)
288 |     } catch (error) {
289 |       console.error('确认步骤失败:', error)
290 |       // 恢复状态
291 |       setSteps((prev) =>
292 |         prev.map((step) =>
293 |           step.id === stepId ? { ...step, status: 'waiting_confirm' as StepStatus } : step
294 |         )
295 |       )
296 |     }
297 |   }
298 | 
299 |   if (!task) return null
300 | 
301 |   const currentStepIndex = steps.findIndex((s) => s.status === 'processing' || s.status === 'waiting_confirm')
302 | 
303 |   return (
304 |     <div className="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50 p-4">
305 |       <div className="bg-white rounded-lg shadow-xl max-w-7xl w-full max-h-[95vh] flex flex-col">
306 |         <div className="p-6 border-b flex items-center justify-between">
307 |           <div>
308 |             <h2 className="text-xl font-semibold">{task.filename}</h2>
309 |             <p className="text-sm text-gray-500 mt-1">任务 ID: {task.id}</p>
310 |           </div>
311 |           <button
312 |             onClick={onClose}
313 |             className="text-gray-400 hover:text-gray-600 transition-colors"
314 |           >
315 |             <X className="w-5 h-5" />
316 |           </button>
317 |         </div>
318 | 
319 |         <div className="flex-1 overflow-hidden flex">
320 |           {/* 左侧：步骤和结果 */}
321 |           <div className="w-1/2 border-r overflow-y-auto p-6">
322 |             <h3 className="text-lg font-semibold mb-4">处理步骤</h3>
323 |             <StepProgress steps={steps} currentStep={currentStepIndex} />
324 |           </div>
325 | 
326 |           {/* 右侧：内容预览 */}
327 |           <div className="w-1/2 overflow-y-auto p-6">
328 |             <div className="flex items-center justify-between mb-4">
329 |               <h3 className="text-lg font-semibold">内容预览</h3>
330 |               {task.status === 'completed' && task.markdown && (
331 |                 <button
332 |                   onClick={async () => {
333 |                     try {
334 |                       await regenerateNote(taskId)
335 |                       toast.success('正在重新生成笔记...')
336 |                       setAutoProcess(true)
337 |                     } catch (error: any) {
338 |                       toast.error(error.response?.data?.msg || '重新生成失败')
339 |                     }
340 |                   }}
341 |                   className="px-3 py-1.5 text-sm bg-blue-500 text-white rounded hover:bg-blue-600 transition-colors"
342 |                 >
343 |                   重新生成笔记
344 |                 </button>
345 |               )}
346 |             </div>
347 |             
348 |             <div className="h-full flex flex-col">
349 |               {(() => {
350 |                 // 优先显示笔记（如果存在且已完成）
351 |                 if (task.markdown && (task.status === 'completed' || task.markdown.length > 0)) {
352 |                   return <EnhancedMarkdownViewer markdown={task.markdown} filename={task.filename} taskId={taskId} />
353 |                 }
354 |                 
355 |                 // 其次显示转写结果（如果存在）
356 |                 if (transcript && transcript.segments && transcript.segments.length > 0) {
357 |                   return <TranscriptViewer transcript={transcript} />
358 |                 }
359 |                 
360 |                 // 处理中显示加载
361 |                 if (task.status === 'processing' || task.status === 'transcribing' || task.status === 'summarizing') {
362 |                   return (
363 |                     <div className="flex h-full items-center justify-center text-gray-400">
364 |                       <div className="text-center">
365 |                         <div className="animate-spin rounded-full h-8 w-8 border-b-2 border-blue-500 mx-auto mb-2"></div>
366 |                         <p>正在处理中...</p>
367 |                       </div>
368 |                     </div>
369 |                   )
370 |                 }
371 |                 
372 |                 // 等待状态
373 |                 return (
374 |                   <div className="flex h-full items-center justify-center text-gray-400">
375 |                     <p>等待开始处理</p>
376 |                   </div>
377 |                 )
378 |               })()}
379 |             </div>
380 |           </div>
381 |         </div>
382 |       </div>
383 |     </div>
384 |   )
385 | }
386 | 
387 | 


--------------------------------------------------------------------------------
/backend/app/routers/download.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import base64
  3 | import json
  4 | import os
  5 | import shutil
  6 | import subprocess
  7 | import tempfile
  8 | import uuid
  9 | from typing import Optional
 10 | from pathlib import Path
 11 | 
 12 | from fastapi import APIRouter, BackgroundTasks, HTTPException
 13 | from pydantic import BaseModel
 14 | from app.utils.logger import get_logger
 15 | 
 16 | logger = get_logger(__name__)
 17 | 
 18 | router = APIRouter()
 19 | 
 20 | # 用于跟踪登录会话
 21 | _login_sessions: dict = {}
 22 | 
 23 | # 用于跟踪下载合并任务
 24 | _download_tasks: dict = {}
 25 | 
 26 | 
 27 | class DownloadRequest(BaseModel):
 28 |     url: str
 29 |     cookie: Optional[str] = ''
 30 |     quality: Optional[str] = 'best'
 31 | 
 32 | 
 33 | class StartLoginResponse(BaseModel):
 34 |     session_id: str
 35 |     qr_image_base64: str
 36 | 
 37 | 
 38 | @router.post("/download/bilibili/start_login", response_model=StartLoginResponse)
 39 | async def start_bilibili_login():
 40 |     """
 41 |     使用 Playwright 打开哔哩哔哩登录页面，截取二维码并返回 base64 图片以及 session_id。
 42 |     后续客户端应定期轮询 /download/bilibili/login_status?session_id=... 来检查是否登录成功。
 43 |     """
 44 |     # 将整个启动流程外层捕获异常并记录，方便定位导致 500 的问题
 45 |     try:
 46 |         try:
 47 |             from playwright.async_api import async_playwright
 48 |         except Exception as e:
 49 |             logger.error("Playwright 未安装: %s", e)
 50 |             raise HTTPException(status_code=500, detail="服务器未安装 playwright，请安装并运行 'playwright install' 后重试")
 51 | 
 52 |         session_id = uuid.uuid4().hex
 53 |         tmpdir = Path(tempfile.mkdtemp(prefix=f"bili_login_{session_id}_"))
 54 |         qr_path = tmpdir / "qr.png"
 55 |         storage_path = tmpdir / "storage_state.json"
 56 | 
 57 |         async def _login_task():
 58 |             try:
 59 |                 async with async_playwright() as pw:
 60 |                     # 支持通过环境变量切换 headless（默认 True，服务器通常没有显示器）
 61 |                     headless_env = os.getenv("BILI_PLAYWRIGHT_HEADLESS", "0").lower()
 62 |                     headless = not (headless_env in ("0", "false", "no"))
 63 |                     browser = await pw.chromium.launch(headless=headless)
 64 |                     context = await browser.new_context()
 65 |                     page = await context.new_page()
 66 |                     await page.goto("https://passport.bilibili.com/login")
 67 | 
 68 |                     # 等待二维码元素出现并截图
 69 |                     try:
 70 |                         qr_el = await page.wait_for_selector("img.qrcode-img, img[data-type='qrcode']", timeout=15000)
 71 |                     except Exception:
 72 |                         # 有时候页面需要点击“二维码登录”切换
 73 |                         try:
 74 |                             btn = await page.query_selector("a[href*='qrcode']")
 75 |                             if btn:
 76 |                                 await btn.click()
 77 |                                 qr_el = await page.wait_for_selector("img.qrcode-img, img[data-type='qrcode']", timeout=10000)
 78 |                             else:
 79 |                                 qr_el = None
 80 |                         except Exception:
 81 |                             qr_el = None
 82 | 
 83 |                     if qr_el:
 84 |                         await qr_el.screenshot(path=str(qr_path))
 85 |                     else:
 86 |                         # fallback：截图整个页面
 87 |                         await page.screenshot(path=str(qr_path), full_page=False)
 88 | 
 89 |                     # 轮询等待登录完成（检查是否存在登录用户的 cookie）
 90 |                     logged_in = False
 91 |                     for _ in range(180):  # 最多等待 ~180*1s = 3分钟
 92 |                         cookies = await context.cookies()
 93 |                         cookie_names = {c.get("name", "").lower() for c in cookies}
 94 |                         # 要避免误判，仅在至少有 sessdata（哔哩哔哩关键登录 cookie）时认为已登录
 95 |                         if "sessdata" in cookie_names:
 96 |                             logged_in = True
 97 |                             # 仅保存 storage_state 到文件（不在日志中输出 cookie 内容）
 98 |                             await context.storage_state(path=str(storage_path))
 99 |                             break
100 |                         await asyncio.sleep(1)
101 | 
102 |                     # 关闭浏览器
103 |                     await browser.close()
104 |                     # 标记会话
105 |                     _login_sessions[session_id]["finished"] = logged_in
106 |                     if logged_in:
107 |                         _login_sessions[session_id]["storage"] = str(storage_path)
108 |                     else:
109 |                         _login_sessions[session_id]["storage"] = None
110 |             except Exception as e:
111 |                 logger.exception("Playwright 登录任务失败: %s", e)
112 |                 _login_sessions[session_id]["error"] = str(e)
113 | 
114 |         # 保存会话元信息并启动后台任务
115 |         _login_sessions[session_id] = {
116 |             "tmpdir": str(tmpdir),
117 |             "qr_path": str(qr_path),
118 |             "storage": None,
119 |             "finished": False,
120 |             "error": None,
121 |         }
122 | 
123 |         # 启动播放（后台任务）
124 |         asyncio.create_task(_login_task())
125 | 
126 |         # 等待一段时间让 qr.png 生成（最长等待 15 秒），并在任务出错时提前返回错误信息
127 |         for _ in range(75):  # 75 * 0.2 = 15s
128 |             if qr_path.exists():
129 |                 break
130 |             # 如果后台任务已记录错误，返回详细信息
131 |             sess_info = _login_sessions.get(session_id)
132 |             if sess_info and sess_info.get("error"):
133 |                 raise HTTPException(status_code=500, detail=f"启动登录任务失败: {sess_info.get('error')}")
134 |             await asyncio.sleep(0.2)
135 | 
136 |         if not qr_path.exists():
137 |             # 如果没有生成二维码，记录更多调试信息（但不记录敏感 cookie）
138 |             sess_info = _login_sessions.get(session_id, {})
139 |             err = sess_info.get("error") or "无法生成二维码图片，请检查 Playwright 是否可用或环境是否允许打开浏览器"
140 |             try:
141 |                 logger.error("start_bilibili_login: 未生成二维码，session=%s tmpdir=%s error=%s", session_id, sess_info.get("tmpdir"), sess_info.get("error"))
142 |                 td = Path(sess_info.get("tmpdir") or "")
143 |                 if td.exists() and td.is_dir():
144 |                     contents = [p.name for p in td.iterdir()]
145 |                     logger.error("start_bilibili_login: tmpdir 内容: %s", contents)
146 |                 else:
147 |                     logger.error("start_bilibili_login: tmpdir 不存在或不可访问: %s", td)
148 |             except Exception:
149 |                 logger.exception("记录 tmpdir 内容时出错")
150 |             raise HTTPException(status_code=500, detail=err)
151 | 
152 |         b64 = base64.b64encode(qr_path.read_bytes()).decode("utf-8")
153 |         return {"session_id": session_id, "qr_image_base64": b64}
154 |     except HTTPException:
155 |         # 已是明确的 HTTP 错误，直接抛出，便于前端显示
156 |         raise
157 |     except Exception as e:
158 |         # 捕获其它未处理异常并记录完整堆栈，返回简洁错误给前端
159 |         logger.exception("start_bilibili_login 未捕获异常: %s", e)
160 |         raise HTTPException(status_code=500, detail=f"启动登录失败（查看后端日志获取详细信息）")
161 | 
162 | 
163 | @router.get("/download/bilibili/login_status")
164 | async def bilibili_login_status(session_id: str):
165 |     """
166 |     查询登录状态，返回 { finished: bool, error: str|null }
167 |     """
168 |     sess = _login_sessions.get(session_id)
169 |     if not sess:
170 |         raise HTTPException(status_code=404, detail="session_id 未找到")
171 |     return {"finished": bool(sess.get("finished")), "error": sess.get("error")}
172 | 
173 | 
174 | def _write_netscape_cookies(cookies, out_path: str):
175 |     """
176 |     将 Playwright storage_state 中的 cookies 写入 Netscape cookies.txt 格式，供 yt-dlp 使用 --cookies 参数。
177 |     """
178 |     lines = []
179 |     for c in cookies:
180 |         domain = c.get("domain", "")
181 |         flag = "TRUE" if domain.startswith(".") else "FALSE"
182 |         path = c.get("path", "/")
183 |         secure = "TRUE" if c.get("secure", False) else "FALSE"
184 |         expires = str(int(c.get("expires", 0))) if c.get("expires") else "0"
185 |         name = c.get("name", "")
186 |         value = c.get("value", "")
187 |         lines.append("\t".join([domain, flag, path, secure, expires, name, value]))
188 |     with open(out_path, "w", encoding="utf-8") as fh:
189 |         fh.write("# Netscape HTTP Cookie File\n")
190 |         fh.write("\n".join(lines))
191 | 
192 | 
193 | @router.post("/download/bilibili")
194 | async def download_bilibili(req: DownloadRequest, background_tasks: BackgroundTasks):
195 |     """
196 |     最终下载接口。优先尝试使用提供的 cookie（或直接使用 yt-dlp），
197 |     如果传入 session_id（来自登录流程），将使用 Playwright 保存下来的 storage_state 中的 cookie。
198 |     """
199 |     url = req.url
200 |     cookie = (req.cookie or "").strip()
201 |     quality = req.quality or "best"
202 | 
203 |     # 检查 yt-dlp
204 |     try:
205 |         subprocess.run(["yt-dlp", "--version"], capture_output=True, text=True, check=True)
206 |     except Exception as e:
207 |         logger.error("yt-dlp 未安装或不可用: %s", e)
208 |         raise HTTPException(status_code=500, detail="服务器未安装 yt-dlp，请先安装 yt-dlp 后重试")
209 | 
210 |     # 如果 cookie 字符串看起来是 session_id（我们在 start_login 中返回），优先使用 storage_state
211 |     storage_state_path = None
212 |     if cookie and cookie.startswith("session:"):
213 |         session_id = cookie.split("session:", 1)[1]
214 |         sess = _login_sessions.get(session_id)
215 |         if not sess:
216 |             raise HTTPException(status_code=400, detail="无效的 session_id")
217 |         if not sess.get("finished") or not sess.get("storage"):
218 |             raise HTTPException(status_code=400, detail="会话尚未完成登录")
219 |         storage_state_path = sess.get("storage")
220 | 
221 |     # 如果提供了 storage_state_path，读取 cookies 并写为 cookies.txt
222 |     cookies_file = None
223 |     if storage_state_path:
224 |         try:
225 |             with open(storage_state_path, "r", encoding="utf-8") as fh:
226 |                 st = json.load(fh)
227 |             cookies = st.get("cookies", [])
228 |             cookies_file = tempfile.mktemp(prefix="bili_cookies_", suffix=".txt")
229 |             _write_netscape_cookies(cookies, cookies_file)
230 |         except Exception as e:
231 |             logger.exception("读取 storage_state 失败: %s", e)
232 |             raise HTTPException(status_code=500, detail="读取登录会话的 cookie 失败")
233 | 
234 |     # 如果直接提供了 cookie 字符串（纯 Netscape 或 "name=val; ..."），使用 --add-header 或 --cookies
235 |     cmd = ["yt-dlp", "-j", url]
236 |     if cookies_file:
237 |         cmd += ["--cookies", cookies_file]
238 |     elif cookie:
239 |         # 简单地将原始 Cookie 字符串作为请求头传入
240 |         cmd += ["--add-header", f"Cookie: {cookie}"]
241 | 
242 |     try:
243 |         proc = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=120)
244 |         info_json = json.loads(proc.stdout)
245 |     except subprocess.CalledProcessError as e:
246 |         logger.error("yt-dlp 获取信息失败: %s %s", e, e.stderr)
247 |         raise HTTPException(status_code=500, detail=f"解析视频信息失败: {e.stderr[:200]}")
248 |     except Exception as e:
249 |         logger.exception("解析 yt-dlp 输出失败: %s", e)
250 |         raise HTTPException(status_code=500, detail="解析视频信息失败")
251 | 
252 |     # 清理临时 cookies 文件（延迟清理）
253 |     if cookies_file:
254 |         background_tasks.add_task(lambda p: os.remove(p) if os.path.exists(p) else None, cookies_file)
255 | 
256 |     # 选择格式（与之前逻辑相同）
257 |     formats = info_json.get("formats", []) or []
258 |     selected = None
259 |     if quality == "best":
260 |         formats_with_height = [f for f in formats if f.get("height")]
261 |         if formats_with_height:
262 |             formats_with_height.sort(key=lambda x: (x.get("height") or 0, x.get("tbr") or 0), reverse=True)
263 |             selected = formats_with_height[0]
264 |     else:
265 |         try:
266 |             target_h = int(quality.replace("p", ""))
267 |             cand = [f for f in formats if (f.get("height") or 0) == target_h]
268 |             if cand:
269 |                 cand.sort(key=lambda x: x.get("tbr") or 0, reverse=True)
270 |                 selected = cand[0]
271 |         except Exception:
272 |             selected = None
273 | 
274 |     if not selected:
275 |         if info_json.get("url"):
276 |             return {"download_url": info_json.get("url"), "filename": info_json.get("title")}
277 |         if formats:
278 |             formats.sort(key=lambda x: (x.get("filesize") or 0, x.get("tbr") or 0), reverse=True)
279 |             selected = formats[0]
280 | 
281 |     if not selected:
282 |         raise HTTPException(status_code=500, detail="未能找到可下载的格式")
283 | 
284 |     download_url = selected.get("url")
285 |     filename = info_json.get("title") or "video"
286 |     ext = selected.get("ext")
287 |     if ext:
288 |         filename = f"{filename}.{ext}"
289 |     # 如果下载地址是 m3u8（或协议为 m3u8_native），则尝试使用 yt-dlp 下载并合并为单一文件（保存到 UPLOAD_DIR），然后返回静态 URL
290 |     try:
291 |         upload_dir = os.getenv("UPLOAD_DIR", "uploads")
292 |         Path(upload_dir).mkdir(parents=True, exist_ok=True)
293 | 
294 |         is_m3u8 = False
295 |         proto = selected.get("protocol") or ""
296 |         if proto == "m3u8_native":
297 |             is_m3u8 = True
298 |         if download_url and (".m3u8" in download_url or (selected.get("ext") or "") == "m3u8"):
299 |             is_m3u8 = True
300 | 
301 |         if is_m3u8:
302 |             # 输出文件名：使用标题 + uuid，强制 mp4
303 |             safe_title = "".join(c for c in (info_json.get("title") or "video") if c.isalnum() or c in " _-").strip()[:120] or "video"
304 |             out_basename = f"{safe_title}_{uuid.uuid4().hex[:8]}.mp4"
305 |             out_path = os.path.join(upload_dir, out_basename)
306 | 
307 |             # 构建 yt-dlp 下载命令，使用 --merge-output-format mp4 以确保合并
308 |             ytdlp_cmd = ["yt-dlp", "-f", "best", "--merge-output-format", "mp4", "-o", out_path, url]
309 |             if cookies_file:
310 |                 ytdlp_cmd += ["--cookies", cookies_file]
311 |             elif cookie:
312 |                 ytdlp_cmd += ["--add-header", f"Cookie: {cookie}"]
313 | 
314 |             # 将下载/合并任务提交为后台任务（非阻塞）
315 |             task_id = uuid.uuid4().hex
316 |             _download_tasks[task_id] = {
317 |                 "status": "pending",
318 |                 "progress": 0,
319 |                 "log": "",
320 |                 "output": None,
321 |                 "error": None,
322 |             }
323 | 
324 |             def _run_task(tid: str, cmd: list, outp: str, cookiesfile: Optional[str]):
325 |                 try:
326 |                     _download_tasks[tid]["status"] = "running"
327 |                     _download_tasks[tid]["log"] += f"执行命令: {' '.join(cmd)}\n"
328 |                     # 使用 subprocess.Popen 以便实时读取输出并更新日志/进度
329 |                     with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1) as proc:
330 |                         for line in proc.stdout:
331 |                             _download_tasks[tid]["log"] += line
332 |                             # 简单根据输出判断进度（若包含 %）
333 |                             try:
334 |                                 if "%" in line:
335 |                                     # 提取第一个出现的百分比数字
336 |                                     import re
337 |                                     m = re.search(r"(\d{1,3}\.\d|\d{1,3})%", line)
338 |                                     if m:
339 |                                         p = float(m.group(1))
340 |                                         _download_tasks[tid]["progress"] = int(min(max(p, 0), 100))
341 |                             except Exception:
342 |                                 pass
343 |                         ret = proc.wait()
344 |                     if ret != 0:
345 |                         _download_tasks[tid]["status"] = "failed"
346 |                         _download_tasks[tid]["error"] = f"yt-dlp 退出码 {ret}"
347 |                         return
348 | 
349 |                     # 成功后标记输出并返回静态 URL
350 |                     _download_tasks[tid]["status"] = "completed"
351 |                     _download_tasks[tid]["output"] = outp
352 |                 except Exception as e:
353 |                     logger.exception("后台下载任务失败: %s", e)
354 |                     _download_tasks[tid]["status"] = "failed"
355 |                     _download_tasks[tid]["error"] = str(e)
356 |                 finally:
357 |                     # 清理 cookies 临时文件
358 |                     try:
359 |                         if cookiesfile and os.path.exists(cookiesfile):
360 |                             os.remove(cookiesfile)
361 |                     except Exception:
362 |                         pass
363 | 
364 |             # 启动后台线程
365 |             import threading
366 |             thread_cmd = ytdlp_cmd.copy()
367 |             thread = threading.Thread(target=_run_task, args=(task_id, thread_cmd, out_path, cookies_file), daemon=True)
368 |             thread.start()
369 | 
370 |             # 返回任务 id，前端可以轮询 /download/bilibili/task_status?task_id=...
371 |             return {"task_id": task_id, "message": "已开始后台合并，使用 task_id 查询进度"}
372 | 
373 |     except Exception as e:
374 |         logger.exception("合并处理时发生错误: %s", e)
375 | 
376 |     return {"download_url": download_url, "filename": filename, "format_note": selected.get("format_note")}
377 | 
378 | 
379 | @router.get("/download/bilibili/task_status")
380 | async def bilibili_task_status(task_id: str):
381 |     """
382 |     查询后台下载/合并任务状态，返回：
383 |     { status: 'pending'|'running'|'completed'|'failed', progress: int, log: str, output: str|null, error: str|null }
384 |     如果 status == 'completed'，output 为服务器静态路径，例如 /api/uploads/xxx.mp4
385 |     """
386 |     task = _download_tasks.get(task_id)
387 |     if not task:
388 |         raise HTTPException(status_code=404, detail="task_id 未找到")
389 |     return {
390 |         "status": task.get("status"),
391 |         "progress": task.get("progress", 0),
392 |         "log": task.get("log", ""),
393 |         "output": task.get("output"),
394 |         "error": task.get("error"),
395 |     }
396 | 
397 | 
398 | 


--------------------------------------------------------------------------------
/frontend/src/components/EnhancedMarkdownViewer.tsx:
--------------------------------------------------------------------------------
  1 | import ReactMarkdown from 'react-markdown'
  2 | import remarkGfm from 'remark-gfm'
  3 | import 'github-markdown-css/github-markdown.css'
  4 | import { Copy, Download, FileDown } from 'lucide-react'
  5 | import { useState, useRef } from 'react'
  6 | import toast from 'react-hot-toast'
  7 | import Zoom from 'react-medium-image-zoom'
  8 | import 'react-medium-image-zoom/dist/styles.css'
  9 | import jsPDF from 'jspdf'
 10 | import html2canvas from 'html2canvas'
 11 | import { exportPDF } from '../services/api'
 12 | 
 13 | interface EnhancedMarkdownViewerProps {
 14 |   markdown: string
 15 |   filename?: string
 16 |   taskId?: string
 17 | }
 18 | 
 19 | // 获取API基础URL
 20 | const getBaseURL = () => {
 21 |   const baseURL = import.meta.env.VITE_API_BASE_URL || '/api'
 22 |   return baseURL.replace(/\/$/, '')
 23 | }
 24 | 
 25 | export default function EnhancedMarkdownViewer({
 26 |   markdown,
 27 |   filename,
 28 |   taskId,
 29 | }: EnhancedMarkdownViewerProps) {
 30 |   const [copied, setCopied] = useState(false)
 31 |   const markdownRef = useRef<HTMLDivElement>(null)
 32 |   const baseURL = getBaseURL()
 33 | 
 34 |   const handleCopy = async () => {
 35 |     try {
 36 |       await navigator.clipboard.writeText(markdown)
 37 |       setCopied(true)
 38 |       toast.success('已复制到剪贴板')
 39 |       setTimeout(() => setCopied(false), 2000)
 40 |     } catch (e) {
 41 |       toast.error('复制失败')
 42 |     }
 43 |   }
 44 | 
 45 |   const handleDownloadMarkdown = async () => {
 46 |     try {
 47 |       toast.loading('正在处理图片，请稍候...', { id: 'markdown-processing' })
 48 |       
 49 |       const name = filename?.replace(/\.[^/.]+$/, '') || 'note'
 50 |       
 51 |       // 将 markdown 中的图片路径转换为 base64
 52 |       let processedMarkdown = markdown
 53 |       const imageRegex = /!\[\]\((.*?)\)/g
 54 |       const imageMatches = Array.from(markdown.matchAll(imageRegex))
 55 |       
 56 |       // 处理所有图片
 57 |       for (const match of imageMatches) {
 58 |         const imageUrl = match[1]
 59 |         
 60 |         // 如果已经是 base64，跳过
 61 |         if (imageUrl.startsWith('data:')) {
 62 |           continue
 63 |         }
 64 |         
 65 |         try {
 66 |           // 构建完整的图片 URL
 67 |           let fullImageUrl = imageUrl
 68 |           if (imageUrl.startsWith('/api/')) {
 69 |             // 已经是完整路径，使用 baseURL
 70 |             const apiBaseURL = import.meta.env.VITE_API_BASE_URL || ''
 71 |             if (apiBaseURL && !imageUrl.startsWith('http')) {
 72 |               fullImageUrl = `${apiBaseURL}${imageUrl}`
 73 |             } else if (!imageUrl.startsWith('http')) {
 74 |               // 使用当前页面的 origin
 75 |               fullImageUrl = `${window.location.origin}${imageUrl}`
 76 |             }
 77 |           } else if (imageUrl.startsWith('/')) {
 78 |             fullImageUrl = `${window.location.origin}${imageUrl}`
 79 |           }
 80 |           
 81 |           // 获取图片并转换为 base64
 82 |           const response = await fetch(fullImageUrl)
 83 |           if (response.ok) {
 84 |             const blob = await response.blob()
 85 |             const reader = new FileReader()
 86 |             const base64 = await new Promise<string>((resolve, reject) => {
 87 |               reader.onloadend = () => {
 88 |                 if (typeof reader.result === 'string') {
 89 |                   resolve(reader.result)
 90 |                 } else {
 91 |                   reject(new Error('Failed to convert image to base64'))
 92 |                 }
 93 |               }
 94 |               reader.onerror = reject
 95 |               reader.readAsDataURL(blob)
 96 |             })
 97 |             
 98 |             // 替换 markdown 中的图片路径
 99 |             processedMarkdown = processedMarkdown.replace(match[0], `![](${base64})`)
100 |           } else {
101 |             console.warn(`Failed to fetch image: ${fullImageUrl}`)
102 |           }
103 |         } catch (error) {
104 |           console.error(`Error processing image ${imageUrl}:`, error)
105 |           // 如果图片处理失败，保留原路径
106 |         }
107 |       }
108 |       
109 |       const blob = new Blob([processedMarkdown], { type: 'text/markdown;charset=utf-8' })
110 |       const link = document.createElement('a')
111 |       link.href = URL.createObjectURL(blob)
112 |       link.download = `${name}.md`
113 |       document.body.appendChild(link)
114 |       link.click()
115 |       document.body.removeChild(link)
116 |       URL.revokeObjectURL(link.href)
117 |       
118 |       toast.dismiss('markdown-processing')
119 |       toast.success('Markdown文件已下载（图片已嵌入）')
120 |     } catch (error) {
121 |       console.error('下载 Markdown 失败:', error)
122 |       toast.dismiss('markdown-processing')
123 |       toast.error('下载失败，请稍后重试')
124 |     }
125 |   }
126 | 
127 |   const handleDownloadPDF = async () => {
128 |     // 如果提供了 taskId，优先使用后端 API 生成可复制文本的 PDF
129 |     if (taskId) {
130 |       try {
131 |         toast.loading('正在生成PDF（可复制文本），请稍候...', { id: 'pdf-generating' })
132 |         const response = await exportPDF(taskId)
133 |         
134 |         // 创建下载链接
135 |         const blob = new Blob([response.data], { type: 'application/pdf' })
136 |         const url = window.URL.createObjectURL(blob)
137 |         const link = document.createElement('a')
138 |         link.href = url
139 |         const name = filename?.replace(/\.[^/.]+$/, '') || 'note'
140 |         link.download = `${name}.pdf`
141 |         document.body.appendChild(link)
142 |         link.click()
143 |         document.body.removeChild(link)
144 |         window.URL.revokeObjectURL(url)
145 |         
146 |         toast.dismiss('pdf-generating')
147 |         toast.success('PDF文件已下载（文字可复制）')
148 |         return
149 |       } catch (error: any) {
150 |         console.error('后端PDF生成失败，使用前端生成:', error)
151 |         // 如果后端失败，fallback 到前端生成
152 |         if (error.response?.status !== 404) {
153 |           toast.dismiss('pdf-generating')
154 |           toast.error('后端PDF生成失败，使用前端生成方式')
155 |         }
156 |       }
157 |     }
158 |     
159 |     // 前端生成 PDF（图片模式，文字不可复制）
160 |     if (!markdownRef.current) {
161 |       toast.error('无法生成PDF，请稍后重试')
162 |       return
163 |     }
164 | 
165 |     try {
166 |       if (!taskId) {
167 |         toast.loading('正在生成PDF（图片模式），请稍候...', { id: 'pdf-generating' })
168 |       }
169 |       
170 |       // 等待所有图片加载完成
171 |       const images = markdownRef.current.querySelectorAll('img')
172 |       const imagePromises = Array.from(images).map((img) => {
173 |         if (img.complete && img.naturalHeight !== 0) {
174 |           return Promise.resolve()
175 |         }
176 |         return new Promise((resolve) => {
177 |           const timeout = setTimeout(() => {
178 |             resolve(null) // 超时也继续
179 |           }, 10000)
180 |           
181 |           img.onload = () => {
182 |             clearTimeout(timeout)
183 |             resolve(null)
184 |           }
185 |           img.onerror = () => {
186 |             clearTimeout(timeout)
187 |             resolve(null) // 即使加载失败也继续
188 |           }
189 |         })
190 |       })
191 |       await Promise.all(imagePromises)
192 | 
193 |       // 使用html2canvas将内容转换为canvas，改进配置以避免图片截断
194 |       const canvas = await html2canvas(markdownRef.current, {
195 |         scale: 2,
196 |         useCORS: true,
197 |         allowTaint: false,
198 |         logging: false,
199 |         backgroundColor: '#ffffff',
200 |         width: markdownRef.current.scrollWidth,
201 |         height: markdownRef.current.scrollHeight,
202 |         windowWidth: markdownRef.current.scrollWidth,
203 |         windowHeight: markdownRef.current.scrollHeight,
204 |         onclone: (clonedDoc) => {
205 |           // 确保克隆文档中的图片都已加载
206 |           const clonedImages = clonedDoc.querySelectorAll('img')
207 |           clonedImages.forEach((img: HTMLImageElement) => {
208 |             if (!img.complete) {
209 |               img.style.display = 'none'
210 |             }
211 |           })
212 |         }
213 |       })
214 | 
215 |       // 计算PDF尺寸
216 |       const imgWidth = canvas.width
217 |       const imgHeight = canvas.height
218 |       const pdfWidth = 210 // A4宽度（mm）
219 |       const pdfHeight = (imgHeight * pdfWidth) / imgWidth
220 |       
221 |       // 创建PDF
222 |       const pdf = new jsPDF('p', 'mm', 'a4')
223 |       const pageHeight = pdf.internal.pageSize.height
224 |       const pageWidth = pdf.internal.pageSize.width
225 |       const margin = 10 // 页边距（mm）
226 |       const contentWidth = pageWidth - 2 * margin
227 |       
228 |       // 计算每页可以容纳的高度
229 |       const contentHeightPerPage = pageHeight - 2 * margin
230 |       const totalPages = Math.ceil(pdfHeight / contentHeightPerPage)
231 |       
232 |       let yPosition = -margin // 从顶部开始，减去 margin 因为 addImage 的 y 是相对于页面的
233 |       
234 |       // 添加第一页
235 |       pdf.addImage(
236 |         canvas.toDataURL('image/png', 0.95), 
237 |         'PNG', 
238 |         margin, 
239 |         yPosition, 
240 |         contentWidth, 
241 |         pdfHeight
242 |       )
243 |       
244 |       // 如果内容超过一页，添加更多页面
245 |       for (let page = 1; page < totalPages; page++) {
246 |         pdf.addPage()
247 |         yPosition = -margin - (page * contentHeightPerPage)
248 |         pdf.addImage(
249 |           canvas.toDataURL('image/png', 0.95), 
250 |           'PNG', 
251 |           margin, 
252 |           yPosition, 
253 |           contentWidth, 
254 |           pdfHeight
255 |         )
256 |       }
257 | 
258 |       // 下载PDF
259 |       const name = filename?.replace(/\.[^/.]+$/, '') || 'note'
260 |       pdf.save(`${name}.pdf`)
261 |       
262 |       toast.dismiss('pdf-generating')
263 |       toast.success('PDF文件已下载（图片模式，文字不可复制）')
264 |     } catch (error) {
265 |       console.error('生成PDF失败:', error)
266 |       toast.dismiss('pdf-generating')
267 |       toast.error('生成PDF失败，请稍后重试')
268 |     }
269 |   }
270 | 
271 |   // 处理图片URL，确保使用正确的baseURL
272 |   const processMarkdown = (md: string) => {
273 |     // 由于markdown中的路径已经是 /api/note_results/screenshots/...
274 |     // 而vite代理已经配置了 /api 代理，所以直接返回即可
275 |     // 不需要再添加baseURL，避免重复
276 |     return md
277 |   }
278 | 
279 |   if (!markdown) {
280 |     return (
281 |       <div className="flex h-full items-center justify-center text-gray-400">
282 |         暂无笔记内容
283 |       </div>
284 |     )
285 |   }
286 | 
287 |   const processedMarkdown = processMarkdown(markdown)
288 | 
289 |   return (
290 |     <div className="h-full flex flex-col overflow-hidden">
291 |       {/* 固定头部工具栏 */}
292 |       <div className="flex items-center justify-between border-b border-gray-200 px-6 py-4 bg-white shrink-0">
293 |         <div>
294 |           <h2 className="text-base font-semibold text-gray-900">笔记预览</h2>
295 |           {filename && <p className="text-xs text-gray-500 mt-1">{filename}</p>}
296 |         </div>
297 |         <div className="flex gap-2">
298 |           <button
299 |             onClick={handleCopy}
300 |             className="flex items-center gap-2 px-3 py-1.5 text-sm bg-gray-100 hover:bg-gray-200 text-gray-700 rounded-lg transition-colors"
301 |             title="复制内容"
302 |           >
303 |             <Copy className="w-4 h-4" />
304 |             {copied ? '已复制' : '复制'}
305 |           </button>
306 |           <button
307 |             onClick={handleDownloadMarkdown}
308 |             className="flex items-center gap-2 px-3 py-1.5 text-sm bg-gray-100 hover:bg-gray-200 text-gray-700 rounded-lg transition-colors"
309 |             title="下载Markdown文件"
310 |           >
311 |             <Download className="w-4 h-4" />
312 |             Markdown
313 |           </button>
314 |           <button
315 |             onClick={handleDownloadPDF}
316 |             className="flex items-center gap-2 px-3 py-1.5 text-sm bg-blue-600 text-white hover:bg-blue-700 rounded-lg transition-colors shadow-sm"
317 |             title="下载PDF文件（包含图片）"
318 |           >
319 |             <FileDown className="w-4 h-4" />
320 |             PDF
321 |           </button>
322 |         </div>
323 |       </div>
324 |       
325 |       {/* Markdown内容区域 - 内容自然高度，参与父容器滚动 */}
326 |       <div className="flex-1 min-h-0 overflow-y-auto">
327 |         <div 
328 |           ref={markdownRef}
329 |           className="markdown-body prose prose-slate max-w-none px-6 py-6 pb-12" 
330 |         >
331 |           <ReactMarkdown
332 |             remarkPlugins={[remarkGfm]}
333 |             components={{
334 |               // 处理图片，支持缩放
335 |               img: ({ node, ...props }) => {
336 |                 let src = props.src || ''
337 |                 // 如果已经是完整URL（http/https/data），直接使用
338 |                 if (src.startsWith('http') || src.startsWith('data:')) {
339 |                   // 已经是完整URL，不需要处理
340 |                 } else if (src.startsWith('/api/')) {
341 |                   // 路径已经包含 /api/，vite代理会处理，直接使用
342 |                   // 不需要再添加baseURL
343 |                 } else if (src.startsWith('/')) {
344 |                   // 其他以 / 开头的路径，可能需要添加baseURL
345 |                   // 但通常markdown中的路径已经是 /api/... 格式
346 |                   src = src
347 |                 } else {
348 |                   // 相对路径，添加baseURL
349 |                   src = `${baseURL}/${src}`
350 |                 }
351 |                 
352 |                 // 使用 figure 标签包裹图片，避免在 p 标签内嵌套 div
353 |                 return (
354 |                   <figure className="my-8 flex justify-center">
355 |                     <Zoom>
356 |                       <img
357 |                         src={src}
358 |                         alt={props.alt || ''}
359 |                         className="max-w-full cursor-zoom-in rounded-lg object-cover shadow-md transition-all hover:shadow-lg"
360 |                         style={{ maxHeight: '500px' }}
361 |                         crossOrigin="anonymous"
362 |                         onError={(e) => {
363 |                           // 如果图片加载失败，尝试使用完整URL
364 |                           const target = e.target as HTMLImageElement
365 |                           const originalSrc = props.src || ''
366 |                           console.warn('图片加载失败:', originalSrc, '当前src:', target.src)
367 |                           // 如果原始路径是 /api/ 开头，说明路径是正确的，可能是服务器问题
368 |                           // 不需要再次尝试修改URL
369 |                         }}
370 |                       />
371 |                     </Zoom>
372 |                   </figure>
373 |                 )
374 |               },
375 |               // 改进标题样式
376 |               h1: ({ children, ...props }) => (
377 |                 <h1
378 |                   className="text-gray-900 my-6 scroll-m-20 text-3xl font-extrabold tracking-tight"
379 |                   {...props}
380 |                 >
381 |                   {children}
382 |                 </h1>
383 |               ),
384 |               h2: ({ children, ...props }) => (
385 |                 <h2
386 |                   className="text-gray-900 mt-10 mb-4 scroll-m-20 border-b border-gray-200 pb-2 text-2xl font-semibold tracking-tight first:mt-0"
387 |                   {...props}
388 |                 >
389 |                   {children}
390 |                 </h2>
391 |               ),
392 |               h3: ({ children, ...props }) => (
393 |                 <h3
394 |                   className="text-gray-900 mt-8 mb-4 scroll-m-20 text-xl font-semibold tracking-tight"
395 |                   {...props}
396 |                 >
397 |                   {children}
398 |                 </h3>
399 |               ),
400 |               // 改进段落样式
401 |               p: ({ children, ...props }) => {
402 |                 // 如果段落只包含一个图片，直接返回图片（不包裹在p中）
403 |                 if (
404 |                   Array.isArray(children) &&
405 |                   children.length === 1 &&
406 |                   typeof children[0] === 'object' &&
407 |                   children[0] !== null &&
408 |                   'type' in children[0] &&
409 |                   (children[0] as any).type === 'figure'
410 |                 ) {
411 |                   return <>{children}</>
412 |                 }
413 |                 
414 |                 return (
415 |                   <p className="leading-7 text-gray-700 [&:not(:first-child)]:mt-6" {...props}>
416 |                     {children}
417 |                   </p>
418 |                 )
419 |               },
420 |               // 改进列表样式
421 |               ul: ({ children, ordered, ...props }) => (
422 |                 <ul className="my-6 ml-6 list-disc [&>li]:mt-2" {...props}>
423 |                   {children}
424 |                 </ul>
425 |               ),
426 |               ol: ({ children, ordered, ...props }) => (
427 |                 <ol className="my-6 ml-6 list-decimal [&>li]:mt-2" {...props}>
428 |                   {children}
429 |                 </ol>
430 |               ),
431 |               // 改进代码块样式
432 |               code: ({ inline, className, children, ...props }) => {
433 |                 if (!inline) {
434 |                   return (
435 |                     <code
436 |                       className="block bg-gray-100 rounded-lg p-4 my-4 overflow-x-auto text-sm"
437 |                       {...props}
438 |                     >
439 |                       {children}
440 |                     </code>
441 |                   )
442 |                 }
443 |                 return (
444 |                   <code
445 |                     className="bg-gray-100 rounded px-1.5 py-0.5 font-mono text-sm"
446 |                     {...props}
447 |                   >
448 |                     {children}
449 |                   </code>
450 |                 )
451 |               },
452 |               // 改进引用样式
453 |               blockquote: ({ children, ...props }) => (
454 |                 <blockquote
455 |                   className="border-l-4 border-gray-300 pl-4 italic text-gray-600 my-6"
456 |                   {...props}
457 |                 >
458 |                   {children}
459 |                 </blockquote>
460 |               ),
461 |             }}
462 |           >
463 |             {processedMarkdown}
464 |           </ReactMarkdown>
465 |         </div>
466 |       </div>
467 |     </div>
468 |   )
469 | }
470 | 


--------------------------------------------------------------------------------