├── backend
    ├── db
    │   ├── __init__.py
    │   ├── job_store.py
    │   ├── transcript_crud.py
    │   ├── transcript_summary_crud.py
    │   └── transcript_translation_crud.py
    ├── ReAct
    │   ├── tools
    │   │   └── __init__.py
    │   ├── chat_tools_config.json
    │   ├── __init__.py
    │   ├── models.py
    │   ├── fastmcp_client_example.py
    │   ├── utils.py
    │   ├── test_chat_agent.py
    │   ├── tools_server.py
    │   ├── test_chat_request.py
    │   ├── action_parser.py
    │   ├── chat_prompt_builder.py
    │   ├── actions.py
    │   └── example.py
    ├── queues
    │   ├── __init__.py
    │   ├── tasks.py
    │   ├── __main__.py
    │   ├── tasks
    │   │   ├── __init__.py
    │   │   ├── progress_utils.py
    │   │   └── knowledge_base_stage.py
    │   └── worker_launcher.py
    ├── utils
    │   ├── __init__.py
    │   └── typing_defs.py
    ├── __init__.py
    ├── services
    │   ├── __init__.py
    │   ├── docs
    │   │   ├── embedding_文件名增强设计文档.md
    │   │   └── 多视频聊天服务调试经验.md
    │   ├── embedding_litellm_example.py
    │   ├── example_chat_service.py
    │   ├── upload_service.py
    │   └── chat_knowledge_service.py
    ├── text_process
    │   ├── __init__.py
    │   ├── translate
    │   │   └── __init__.py
    │   ├── docs
    │   │   ├── 翻译服务性能测试报告.md
    │   │   └── ChatGPT翻译技巧说明.md
    │   └── translate_batch_service.py
    ├── media_processing
    │   ├── audio
    │   │   ├── __init__.py
    │   │   ├── download
    │   │   │   ├── __init__.py
    │   │   │   └── xiaoyuzhou
    │   │   │   │   └── __init__.py
    │   │   └── local
    │   │   │   ├── __init__.py
    │   │   │   └── upload_handler.py
    │   ├── video
    │   │   ├── __init__.py
    │   │   ├── download
    │   │   │   ├── __init__.py
    │   │   │   ├── bilibili
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── test_login_handler.py
    │   │   │   └── youtube
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── docs
    │   │   │   │       └── 下载问题修复记录.md
    │   │   │   │   ├── example.py
    │   │   │   │   └── test_login_handler.py
    │   │   └── local
    │   │   │   └── __init__.py
    │   ├── __init__.py
    │   ├── docs
    │   │   └── downloader_factory_design.md
    │   └── upload_handler.py
    ├── common_interfaces.py
    ├── routers
    │   ├── chat
    │   │   ├── __init__.py
    │   │   └── summarize_router.py
    │   └── __init__.py
    ├── main.py
    ├── requirements.txt
    ├── tests
    │   ├── test_asr_sentence_segments.py
    │   ├── test_audio2text.py
    │   ├── test_download_vedio.py
    │   ├── test_SenseVoiceSmall2.py
    │   ├── test_paraformer_a2t.py
    │   ├── test_SenseVoiceSmall.py
    │   ├── test_summarize.py
    │   ├── test_multi_platform_downloader.py
    │   ├── test_stream_translate.py
    │   ├── download_media_file.py
    │   └── test_download_media.py
    ├── schemas.py
    ├── .env.example
    ├── llm_test.py
    ├── Dockerfile
    ├── docker-compose.yml
    ├── app.py
    └── test_litellm.py
├── ASRBackend
    ├── tests
    │   ├── __init__.py
    │   └── test_asr_segments.py
    ├── supabase_utils
    │   ├── example_tests
    │   │   ├── test.txt
    │   │   └── test_upload_file.py
    │   ├── test_supabase_upload.py
    │   └── docs
    │   │   └── supabase_upload_design.md
    ├── __init__.py
    ├── .dockerignore
    ├── services
    │   └── __init__.py
    ├── routers
    │   └── __init__.py
    ├── asr_functions
    │   ├── utils
    │   │   ├── __init__.py
    │   │   └── language_utils.py
    │   ├── __init__.py
    │   └── docs
    │   │   ├── asr_sentence_segments设计文档.md
    │   │   └── dashscope_paraformer_v2_transcription设计文档.md
    ├── .gitignore
    ├── requirements-cloud.txt
    ├── docker-compose.cloud.yml
    ├── docker-compose.local.yml
    ├── requirements-local.txt
    ├── Dockerfile.cloud
    ├── Dockerfile.local
    ├── .env.example
    └── main.py
├── frontend
    ├── src
    │   ├── components
    │   │   ├── LeftPanel
    │   │   │   ├── index.ts
    │   │   │   ├── docs
    │   │   │   │   └── 组件设计说明.md
    │   │   │   ├── hooks.tsx
    │   │   │   └── LeftPanel.tsx
    │   │   ├── RightPanel
    │   │   │   ├── index.ts
    │   │   │   ├── types.ts
    │   │   │   ├── Chat
    │   │   │   │   ├── index.ts
    │   │   │   │   ├── ChatToolbar.tsx
    │   │   │   │   ├── MessageInput.tsx
    │   │   │   │   └── docs
    │   │   │   │   │   └── 消息列表排版设计说明.md
    │   │   │   ├── hooks
    │   │   │   │   ├── index.ts
    │   │   │   │   ├── useSegmentHandlers.ts
    │   │   │   │   ├── useTranslationLanguages.ts
    │   │   │   │   ├── useSearchHandlers.ts
    │   │   │   │   └── useLanguageSwitch.ts
    │   │   │   ├── docs
    │   │   │   │   ├── 组件设计说明.md
    │   │   │   │   └── 翻译对话框设计说明.md
    │   │   │   ├── LanguageSwitcher.tsx
    │   │   │   └── TranscriptTab.tsx
    │   │   ├── ui
    │   │   │   ├── label.tsx
    │   │   │   ├── separator.tsx
    │   │   │   ├── textarea.tsx
    │   │   │   ├── input.tsx
    │   │   │   ├── docs
    │   │   │   │   └── 组件设计说明.md
    │   │   │   ├── switch.tsx
    │   │   │   ├── scroll-area.tsx
    │   │   │   ├── resizable.tsx
    │   │   │   ├── tabs.tsx
    │   │   │   ├── button.tsx
    │   │   │   └── card.tsx
    │   │   ├── MarkdownRenderer.tsx
    │   │   └── docs
    │   │   │   ├── ProgressCard设计说明.md
    │   │   │   ├── VideoPlayer设计说明.md
    │   │   │   └── FileUploader设计说明.md
    │   ├── App.tsx
    │   ├── hooks
    │   │   ├── index.ts
    │   │   ├── docs
    │   │   │   └── 设计说明.md
    │   │   └── useVideoSync.ts
    │   ├── lib
    │   │   ├── utils.ts
    │   │   └── docs
    │   │   │   └── 工具库设计说明.md
    │   ├── utils
    │   │   ├── pendingUrl.ts
    │   │   ├── message.ts
    │   │   └── language-detector.ts
    │   ├── types
    │   │   ├── react-markdown.d.ts
    │   │   └── global.d.ts
    │   ├── main.tsx
    │   ├── routes.tsx
    │   ├── services
    │   │   ├── progressService.ts
    │   │   ├── downloadService.ts
    │   │   ├── transcriptService.ts
    │   │   ├── thumbnailService.ts
    │   │   └── summaryService.ts
    │   ├── HomePage
    │   │   └── docs
    │   │   │   ├── 组件设计说明.md
    │   │   │   └── 主页组件设计说明.md
    │   ├── App.css
    │   ├── assets
    │   │   └── docs
    │   │   │   └── 资源管理说明.md
    │   └── features
    │   │   └── app
    │   │       ├── docs
    │   │           ├── App组件设计说明.md
    │   │           ├── 滚动条问题解决记录.md
    │   │           └── 文稿Tab页滚动问题解决记录.md
    │   │       └── components
    │   │           └── UploadDialog.tsx
    ├── public
    │   ├── logo.png
    │   ├── human.png
    │   └── chatbot.png
    ├── tsconfig.json
    ├── .gitignore
    ├── Dockerfile
    ├── index.html
    ├── components.json
    ├── docker-compose.yml
    ├── tsconfig.node.json
    ├── tsconfig.app.json
    ├── eslint.config.js
    ├── vite.config.ts
    └── package.json
├── example_tests
    ├── __init__.py
    └── 音频转视频测试.html
├── docs
    ├── mermaid图汇集
    │   ├── ARM设备直接构建流程图.md
    │   ├── ARM设备交叉构建流程图.md
    │   ├── ReAct设计文档-记忆管理机制图.md
    │   ├── embedding文件名增强设计文档-流程图.md
    │   ├── 翻译服务设计文档-翻译流程图.md
    │   ├── ASR_Backend启动流程概览图.md
    │   ├── ASR_MODE选择模式图.md
    │   ├── 知识库结构设计文档-整体架构图.md
    │   ├── ReAct设计文档-工作流程图.md
    │   ├── 知识库结构设计文档-数据流图.md
    │   ├── ReAct设计文档-工具管理流程图.md
    │   ├── 前端架构设计文档-文件上传流程图.md
    │   ├── 前端架构设计文档-聊天交互流程图.md
    │   ├── 前端架构设计文档-转写结果显示流程图.md
    │   ├── 前端拖拽侧边栏功能设计文档-拖拽流程图.md
    │   ├── 知识库使用设计文档-使用流程图.md
    │   └── ReAct设计文档-类层次结构图.md
    ├── 测试示例.md
    └── api_文档导航.md
├── main.ipynb
├── .dockerignore
└── lefthook.yml


/backend/db/__init__.py:
--------------------------------------------------------------------------------
1 | # db package


--------------------------------------------------------------------------------
/backend/ReAct/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # tools 工具模块


--------------------------------------------------------------------------------
/ASRBackend/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """测试模块初始化"""
2 | 


--------------------------------------------------------------------------------
/ASRBackend/supabase_utils/example_tests/test.txt:
--------------------------------------------------------------------------------
1 | 测试使用的文件。


--------------------------------------------------------------------------------
/backend/ReAct/chat_tools_config.json:
--------------------------------------------------------------------------------
1 | ["knowledge_retrieval"]


--------------------------------------------------------------------------------
/backend/queues/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """任务队列模块"""
3 | 


--------------------------------------------------------------------------------
/backend/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # utils package
3 | 


--------------------------------------------------------------------------------
/ASRBackend/__init__.py:
--------------------------------------------------------------------------------
1 | """ASRBackend 模块初始化"""
2 | __version__ = "0.1.0"
3 | 


--------------------------------------------------------------------------------
/backend/__init__.py:
--------------------------------------------------------------------------------
1 | # Make backend a regular package for reliable imports.
2 | 


--------------------------------------------------------------------------------
/backend/services/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # services package
3 | 


--------------------------------------------------------------------------------
/backend/text_process/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # text_process package
3 | 


--------------------------------------------------------------------------------
/frontend/src/components/LeftPanel/index.ts:
--------------------------------------------------------------------------------
1 | export { default } from "./LeftPanel"
2 | 


--------------------------------------------------------------------------------
/frontend/src/components/RightPanel/index.ts:
--------------------------------------------------------------------------------
1 | export { default } from "./RightPanel"
2 | 


--------------------------------------------------------------------------------
/ASRBackend/.dockerignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | *.py[cod]
3 | datas/
4 | .env
5 | results/
6 | 


--------------------------------------------------------------------------------
/ASRBackend/services/__init__.py:
--------------------------------------------------------------------------------
1 | """ASR Backend 服务模块
2 | 
3 | 包含业务逻辑服务层的实现。
4 | """
5 | 


--------------------------------------------------------------------------------
/example_tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | example_tests 包初始化文件
4 | """


--------------------------------------------------------------------------------
/ASRBackend/routers/__init__.py:
--------------------------------------------------------------------------------
1 | """ASR Backend 路由模块
2 | 
3 | 包含所有 API 路由的定义和注册。
4 | """
5 | 


--------------------------------------------------------------------------------
/backend/queues/tasks.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/li-xiu-qi/HearSight/HEAD/backend/queues/tasks.py


--------------------------------------------------------------------------------
/frontend/public/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/li-xiu-qi/HearSight/HEAD/frontend/public/logo.png


--------------------------------------------------------------------------------
/frontend/public/human.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/li-xiu-qi/HearSight/HEAD/frontend/public/human.png


--------------------------------------------------------------------------------
/backend/media_processing/audio/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """音频处理模块"""
3 | 
4 | __all__ = []
5 | 


--------------------------------------------------------------------------------
/backend/media_processing/video/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """视频处理模块"""
3 | 
4 | __all__ = []
5 | 


--------------------------------------------------------------------------------
/frontend/public/chatbot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/li-xiu-qi/HearSight/HEAD/frontend/public/chatbot.png


--------------------------------------------------------------------------------
/backend/media_processing/audio/download/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """音频下载模块"""
3 | 
4 | __all__ = []
5 | 


--------------------------------------------------------------------------------
/backend/media_processing/video/download/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """视频下载模块"""
3 | 
4 | __all__ = []
5 | 


--------------------------------------------------------------------------------
/ASRBackend/asr_functions/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """语言检测工具模块"""
2 | 
3 | from .language_utils import detect_language
4 | 
5 | __all__ = ["detect_language"]
6 | 


--------------------------------------------------------------------------------
/frontend/src/App.tsx:
--------------------------------------------------------------------------------
1 | import AppPage from "@/features/app/AppPage"
2 | 
3 | function App() {
4 |   return <AppPage />
5 | }
6 | 
7 | export default App
8 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/index.ts:
--------------------------------------------------------------------------------
1 | export { useUrlHandler } from './useUrlHandler'
2 | export { useDataLoader } from './useDataLoader'
3 | export { useVideoSync } from './useVideoSync'
4 | 


--------------------------------------------------------------------------------
/backend/queues/__main__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Celery worker模块入口"""
3 | 
4 | from backend.queues.worker_launcher import main
5 | 
6 | if __name__ == "__main__":
7 |     main()
8 | 


--------------------------------------------------------------------------------
/backend/media_processing/audio/local/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """本地音频处理模块"""
3 | 
4 | from .upload_handler import process_uploaded_audio
5 | 
6 | __all__ = ['process_uploaded_audio']
7 | 


--------------------------------------------------------------------------------
/backend/media_processing/video/local/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """本地视频处理模块"""
3 | 
4 | from .upload_handler import process_uploaded_video
5 | 
6 | __all__ = ['process_uploaded_video']
7 | 


--------------------------------------------------------------------------------
/frontend/src/lib/utils.ts:
--------------------------------------------------------------------------------
1 | import { clsx, type ClassValue } from "clsx"
2 | import { twMerge } from "tailwind-merge"
3 | 
4 | export function cn(...inputs: ClassValue[]) {
5 |   return twMerge(clsx(inputs))
6 | }
7 | 


--------------------------------------------------------------------------------
/backend/text_process/translate/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | 翻译模块：支持分批翻译、上下文感知、JSON格式化输出。
4 | """
5 | from .core import translate_segments_async
6 | 
7 | __all__ = ["translate_segments_async"]
8 | 


--------------------------------------------------------------------------------
/docs/mermaid图汇集/ARM设备直接构建流程图.md:
--------------------------------------------------------------------------------
 1 | # ARM设备直接构建流程图
 2 | 
 3 | ```mermaid
 4 | flowchart TD
 5 |     A[在ARM设备上安装Docker] --> B[传输项目代码]
 6 |     B --> C[配置环境变量]
 7 |     C --> D[运行docker-compose build]
 8 |     D --> E[运行docker-compose up]
 9 | ```
10 | 


--------------------------------------------------------------------------------
/backend/queues/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Celery异步任务定义 - 模块化版本"""
3 | 
4 | # 导入分解后的任务模块
5 | from .process_job_task import process_job_task, knowledge_retrieval_task
6 | 
7 | __all__ = ["process_job_task", "knowledge_retrieval_task"]


--------------------------------------------------------------------------------
/docs/mermaid图汇集/ARM设备交叉构建流程图.md:
--------------------------------------------------------------------------------
 1 | # ARM设备交叉构建流程图
 2 | 
 3 | ```mermaid
 4 | flowchart TD
 5 |     A[启用Docker Buildx] --> B[检查Dockerfile兼容性]
 6 |     B --> C[构建多架构镜像]
 7 |     C --> D[保存镜像为文件]
 8 |     D --> E[传输到ARM设备]
 9 |     E --> F[加载并运行镜像]
10 | ```
11 | 


--------------------------------------------------------------------------------
/docs/mermaid图汇集/ReAct设计文档-记忆管理机制图.md:
--------------------------------------------------------------------------------
 1 | # ReAct设计文档 - 记忆管理机制图
 2 | 
 3 | ```mermaid
 4 | graph TD
 5 |     A[新消息] --> B{是否需要总结}
 6 |     B -->|是| C[生成总结]
 7 |     B -->|否| D[添加到缓冲区]
 8 |     C --> E[压缩消息缓冲区]
 9 |     E --> F[更新上下文]
10 |     D --> F
11 |     F --> G[继续对话]
12 | ```
13 | 


--------------------------------------------------------------------------------
/docs/mermaid图汇集/embedding文件名增强设计文档-流程图.md:
--------------------------------------------------------------------------------
 1 | # embedding文件名增强设计文档 - 流程图
 2 | 
 3 | ```mermaid
 4 | graph TD
 5 |     A[接收segments和metadata] --> B[分组为chunks]
 6 |     B --> C[获取文件名]
 7 |     C --> D[生成增强chunk_text]
 8 |     D --> E[计算embedding]
 9 |     E --> F[存储到ChromaDB]
10 | ```
11 | 


--------------------------------------------------------------------------------
/backend/media_processing/video/download/bilibili/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """b站视频下载模块"""
3 | 
4 | from .bilibili_downloader import BilibiliDownloader, get_downloader, download_bilibili_video
5 | 
6 | __all__ = ['BilibiliDownloader', 'get_downloader', 'download_bilibili_video']
7 | 


--------------------------------------------------------------------------------
/backend/media_processing/video/download/youtube/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """YouTube视频下载模块"""
3 | 
4 | from .youtube_downloader import YoutubeDownloader, get_downloader, download_youtube_video
5 | 
6 | __all__ = ['YoutubeDownloader', 'get_downloader', 'download_youtube_video']
7 | 


--------------------------------------------------------------------------------
/frontend/src/components/RightPanel/types.ts:
--------------------------------------------------------------------------------
1 | export interface TranslateProgress {
2 |   status: 'idle' | 'detecting' | 'translating' | 'done' | 'error'
3 |   progress: number
4 |   message: string
5 |   detectionInfo?: string
6 |   targetLanguage?: string
7 |   newTranscriptId?: number
8 | }
9 | 


--------------------------------------------------------------------------------
/frontend/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "files": [],
 3 |   "references": [
 4 |     { "path": "./tsconfig.app.json" },
 5 |     { "path": "./tsconfig.node.json" }
 6 |   ],
 7 |   "compilerOptions": {
 8 |     "baseUrl": ".",
 9 |     "paths": {
10 |       "@/*": ["./src/*"]
11 |     }
12 |   }
13 | }
14 | 


--------------------------------------------------------------------------------
/backend/db/job_store.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """任务存储模块，统一导出所有任务相关函数"""
3 | 
4 | from .job_base_store import create_job, get_job
5 | from .job_status_store import update_job_status, update_job_celery_task_id
6 | from .job_result_store import finish_job_success, finish_job_failed, update_job_result


--------------------------------------------------------------------------------
/frontend/src/utils/pendingUrl.ts:
--------------------------------------------------------------------------------
 1 | let pendingUrl: string | null = null
 2 | 
 3 | export const setPendingUrl = (url: string | null) => {
 4 |   pendingUrl = url
 5 | }
 6 | 
 7 | export const getPendingUrl = () => {
 8 |   const url = pendingUrl
 9 |   pendingUrl = null
10 |   return url
11 | }


--------------------------------------------------------------------------------
/main.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "0c9365e3",
 6 |    "metadata": {},
 7 |    "source": []
 8 |   }
 9 |  ],
10 |  "metadata": {
11 |   "language_info": {
12 |    "name": "python"
13 |   }
14 |  },
15 |  "nbformat": 4,
16 |  "nbformat_minor": 5
17 | }
18 | 


--------------------------------------------------------------------------------
/backend/media_processing/audio/download/xiaoyuzhou/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """小宇宙下载模块"""
3 | 
4 | from .xiaoyuzhou_downloader import XiaoyuzhouDownloader, get_downloader, download_xiaoyuzhou_episode
5 | 
6 | __all__ = ['XiaoyuzhouDownloader', 'get_downloader', 'download_xiaoyuzhou_episode']
7 | 


--------------------------------------------------------------------------------
/docs/mermaid图汇集/翻译服务设计文档-翻译流程图.md:
--------------------------------------------------------------------------------
 1 | # 翻译服务设计文档 - 翻译流程图
 2 | 
 3 | ```mermaid
 4 | graph TD
 5 |     A[输入句子] --> B[语言验证]
 6 |     B --> C[分批处理]
 7 |     C --> D[第一步：直译]
 8 |     D --> E[解析直译结果]
 9 |     E --> F[第二步：意译]
10 |     F --> G[解析意译结果]
11 |     G --> H[质量检查]
12 |     H --> I[结果整合]
13 |     I --> J[输出翻译]
14 | ```
15 | 


--------------------------------------------------------------------------------
/backend/text_process/docs/翻译服务性能测试报告.md:
--------------------------------------------------------------------------------
 1 | # 翻译服务性能测试报告
 2 | 
 3 | ## 测试概述
 4 | 
 5 | 两步翻译系统，使用DeepSeek-V3.2-Exp模型，基于translate目录下的example.py进行测试。
 6 | 
 7 | ## 测试结果
 8 | 
 9 | - 英文→中文：3句，耗时约11秒，平均3.7秒/句
10 | - 中文→英文：3句，耗时约10秒，平均3.3秒/句
11 | - 性能测试：5句，耗时13.16秒，平均2.63秒/句
12 | 
13 | ## 结论
14 | 
15 | 两步翻译系统性能良好，平均2.5-3.7秒/句，质量高，适合使用。
16 | 


--------------------------------------------------------------------------------
/docs/mermaid图汇集/ASR_Backend启动流程概览图.md:
--------------------------------------------------------------------------------
 1 | # ASR Backend 启动流程概览图
 2 | 
 3 | ```mermaid
 4 | flowchart TD
 5 |     A[前置准备] --> B{选择运行模式}
 6 |     B -->|云端模式| C[获取API Key]
 7 |     B -->|本地模式| D[配置本地环境]
 8 |     C --> E[配置环境变量]
 9 |     D --> E
10 |     E --> F[安装依赖]
11 |     F --> G[启动服务]
12 |     G --> H[验证服务]
13 |     H --> I[开始使用]
14 | ```
15 | 


--------------------------------------------------------------------------------
/docs/mermaid图汇集/ASR_MODE选择模式图.md:
--------------------------------------------------------------------------------
 1 | # ASR_MODE 选择模式图
 2 | 
 3 | ```mermaid
 4 | graph TD
 5 |     A[ASR_MODE] --> B{选择模式}
 6 |     B -->|cloud| C[云端模式]
 7 |     B -->|local| D[本地模式]
 8 |     
 9 |     C --> E[轻量级部署]
10 |     C --> F[需要API Key]
11 |     C --> G[支持多语言]
12 |     
13 |     D --> H[完全离线]
14 |     D --> I[需要GPU]
15 |     D --> J[单语言支持]
16 | ```
17 | 


--------------------------------------------------------------------------------
/docs/mermaid图汇集/知识库结构设计文档-整体架构图.md:
--------------------------------------------------------------------------------
 1 | # 知识库结构设计文档 - 整体架构图
 2 | 
 3 | ```mermaid
 4 | graph TB
 5 |     A[视频转写] --> B[句子段数组]
 6 |     B --> C[ChatService]
 7 |     C --> D[KnowledgeBaseService]
 8 |     D --> E[ChromaDB]
 9 |     
10 |     F[用户查询] --> C
11 |     C --> G[LLM API]
12 |     G --> H[回答]
13 |     
14 |     I[转写删除] --> D
15 |     D --> J[清理向量数据]
16 | ```
17 | 


--------------------------------------------------------------------------------
/frontend/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | npm-debug.log*
 5 | yarn-debug.log*
 6 | yarn-error.log*
 7 | pnpm-debug.log*
 8 | lerna-debug.log*
 9 | 
10 | node_modules
11 | dist
12 | dist-ssr
13 | *.local
14 | 
15 | # Editor directories and files
16 | .vscode/*
17 | !.vscode/extensions.json
18 | .idea
19 | .DS_Store
20 | *.suo
21 | *.ntvs*
22 | *.njsproj
23 | *.sln
24 | *.sw?
25 | 


--------------------------------------------------------------------------------
/frontend/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM node:22-alpine
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | # Copy package files
 6 | COPY package*.json ./
 7 | 
 8 | # Install dependencies
 9 | RUN npm ci
10 | 
11 | # Copy source code
12 | COPY . .
13 | 
14 | # Expose port 5173 for Vite dev server
15 | EXPOSE 5173
16 | 
17 | # Start Vite dev server
18 | CMD ["npm", "run", "dev", "--", "--host", "0.0.0.0", "--port", "5173"]


--------------------------------------------------------------------------------
/frontend/src/components/RightPanel/Chat/index.ts:
--------------------------------------------------------------------------------
1 | export { default as MessageList } from './MessageList'
2 | export { default as VideoSelector } from './VideoSelector'
3 | export { default as ChatToolbar } from './ChatToolbar'
4 | export { default as MessageInput } from './MessageInput'
5 | export { default as ChatSessionManager } from './ChatSessionManager'
6 | export { default as ChatView } from './ChatView'


--------------------------------------------------------------------------------
/docs/mermaid图汇集/ReAct设计文档-工作流程图.md:
--------------------------------------------------------------------------------
 1 | # ReAct设计文档 - 工作流程图
 2 | 
 3 | ```mermaid
 4 | graph TD
 5 |     A[用户输入问题] --> B[初始化推理上下文]
 6 |     B --> C[构建系统提示]
 7 |     C --> D[进入推理循环]
 8 |     D --> E[调用LLM生成响应]
 9 |     E --> F{解析响应}
10 |     F --> G[是最终答案?]
11 |     G -->|是| H[返回结果]
12 |     G -->|否| I[执行工具]
13 |     I --> J[获取观察结果]
14 |     J --> K[反馈到LLM]
15 |     K --> D
16 |     H --> L[结束]
17 | ```
18 | 


--------------------------------------------------------------------------------
/frontend/src/types/react-markdown.d.ts:
--------------------------------------------------------------------------------
 1 | declare module 'react-markdown' {
 2 |   import React from 'react';
 3 |   
 4 |   interface ReactMarkdownProps {
 5 |     children: string;
 6 |     className?: string;
 7 |     components?: Record<string, React.ComponentType<unknown>>;
 8 |     [key: string]: unknown;
 9 |   }
10 |   
11 |   const ReactMarkdown: React.FC<ReactMarkdownProps>;
12 |   export default ReactMarkdown;
13 | }


--------------------------------------------------------------------------------
/docs/mermaid图汇集/知识库结构设计文档-数据流图.md:
--------------------------------------------------------------------------------
 1 | # 知识库结构设计文档 - 数据流图
 2 | 
 3 | ```mermaid
 4 | flowchart TD
 5 |     A[输入segments] --> B{transcript_id提供?}
 6 |     B -->|否| C[计算tokens]
 7 |     B -->|是| D[从DB获取完整转录稿]
 8 |     D --> E[计算完整tokens]
 9 |     E --> F{完整tokens ≤ 阈值?}
10 |     F -->|是| G[使用完整转录稿]
11 |     F -->|否| H[知识库检索分块]
12 |     C --> I[构建提示词]
13 |     G --> I
14 |     H --> I
15 |     I --> J[调用LLM]
16 |     J --> K[返回回答]
17 | ```
18 | 


--------------------------------------------------------------------------------
/frontend/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |     <link rel="icon" type="image/svg+xml" href="/logo.png" />
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
 7 |     <title>HearSight：智能视频内容分析与理解平台</title>
 8 |   </head>
 9 |   <body>
10 |     <div id="root"></div>
11 |     <script type="module" src="/src/main.tsx"></script>
12 |   </body>
13 | </html>
14 | 


--------------------------------------------------------------------------------
/ASRBackend/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | *.py[cod]
 3 | *$py.class
 4 | *.so
 5 | .Python
 6 | build/
 7 | develop-eggs/
 8 | dist/
 9 | downloads/
10 | eggs/
11 | .eggs/
12 | lib/
13 | lib64/
14 | parts/
15 | sdist/
16 | var/
17 | wheels/
18 | *.egg-info/
19 | .installed.cfg
20 | *.egg
21 | .env
22 | .venv
23 | env/
24 | venv/
25 | ENV/
26 | datas/
27 | results/
28 | *.log
29 | .pytest_cache/
30 | .coverage
31 | htmlcov/
32 | .DS_Store
33 | 


--------------------------------------------------------------------------------
/ASRBackend/requirements-cloud.txt:
--------------------------------------------------------------------------------
 1 | # ASR Backend - 云端版本
 2 | # 轻量级依赖，仅支持阿里云 DashScope API 调用
 3 | # 不包含任何本地模型和 torch，节省空间
 4 | 
 5 | fastapi
 6 | uvicorn
 7 | pydantic
 8 | pydantic-settings
 9 | python-multipart
10 | pytest
11 | pytest-asyncio
12 | httpx
13 | requests
14 | 
15 | # 云端 API 支持
16 | dashscope>=1.3.0
17 | 
18 | # Supabase 支持
19 | supabase
20 | 
21 | # 可选：如果需要保留语言检测功能
22 | langdetect
23 | 
24 | # 环境变量支持
25 | python-dotenv>=1.0.0
26 | 


--------------------------------------------------------------------------------
/frontend/src/main.tsx:
--------------------------------------------------------------------------------
 1 | import { StrictMode } from "react"
 2 | import { createRoot } from "react-dom/client"
 3 | import { Toaster } from "sonner"
 4 | import "./index.css"
 5 | import Routes from "./routes.tsx"
 6 | 
 7 | createRoot(document.getElementById("root")!).render(
 8 |   <StrictMode>
 9 |     <Routes />
10 |     <Toaster
11 |       position="top-center"
12 |       richColors
13 |       toastOptions={{ closeButton: true }}
14 |     />
15 |   </StrictMode>,
16 | )
17 | 


--------------------------------------------------------------------------------
/docs/mermaid图汇集/ReAct设计文档-工具管理流程图.md:
--------------------------------------------------------------------------------
 1 | # ReAct设计文档 - 工具管理流程图
 2 | 
 3 | ```mermaid
 4 | sequenceDiagram
 5 |     participant A as Agent
 6 |     participant TM as ToolManager
 7 |     participant C as FastMCP Client
 8 |     participant T as Tools Server
 9 |     
10 |     A->>TM: 获取可用工具
11 |     TM->>C: 连接工具后端
12 |     C->>T: 获取工具列表
13 |     T-->>C: 工具列表
14 |     C-->>TM: 工具列表
15 |     TM->>TM: 过滤允许工具
16 |     TM->>TM: 生成工具描述
17 |     TM-->>A: 可用工具和描述
18 | ```
19 | 


--------------------------------------------------------------------------------
/frontend/src/routes.tsx:
--------------------------------------------------------------------------------
 1 | import { createBrowserRouter, RouterProvider } from "react-router-dom"
 2 | import HomePage from "./HomePage/HomePage"
 3 | import App from "./App"
 4 | 
 5 | const router = createBrowserRouter([
 6 |   {
 7 |     path: "/",
 8 |     element: <HomePage />,
 9 |   },
10 |   {
11 |     path: "/app",
12 |     element: <App />,
13 |   },
14 | ])
15 | 
16 | export default function Routes() {
17 |   return <RouterProvider router={router} />
18 | }
19 | 


--------------------------------------------------------------------------------
/frontend/src/components/RightPanel/hooks/index.ts:
--------------------------------------------------------------------------------
1 | export { useScrollHandlers } from './useScrollHandlers'
2 | export { useSegmentHandlers } from './useSegmentHandlers'
3 | export { useSearchHandlers } from './useSearchHandlers'
4 | export { useSummaryHandlers } from './useSummaryHandlers'
5 | export { useTranslateHandlers } from './useTranslateHandlers'
6 | export { useLanguageSwitch } from './useLanguageSwitch'
7 | export { useRightPanelController } from './useRightPanelController'
8 | 


--------------------------------------------------------------------------------
/docs/mermaid图汇集/前端架构设计文档-文件上传流程图.md:
--------------------------------------------------------------------------------
 1 | # 前端架构设计文档 - 文件上传流程图
 2 | 
 3 | ```mermaid
 4 | sequenceDiagram
 5 |     participant User
 6 |     participant FileUploader
 7 |     participant UploadService
 8 |     participant Backend
 9 |     
10 |     User->>FileUploader: 选择文件
11 |     FileUploader->>UploadService: 调用上传接口
12 |     UploadService->>Backend: 发送文件数据
13 |     Backend-->>UploadService: 返回上传结果
14 |     UploadService-->>FileUploader: 更新上传状态
15 |     FileUploader-->>User: 显示上传结果
16 | ```
17 | 


--------------------------------------------------------------------------------
/frontend/src/utils/message.ts:
--------------------------------------------------------------------------------
 1 | import { toast } from "sonner"
 2 | 
 3 | export const message = {
 4 |   success: (content: string) => {
 5 |     toast.success(content, { dismissible: true })
 6 |   },
 7 |   error: (content: string) => {
 8 |     toast.error(content, { dismissible: true })
 9 |   },
10 |   warning: (content: string) => {
11 |     toast.warning(content, { dismissible: true })
12 |   },
13 |   info: (content: string) => {
14 |     toast.info(content, { dismissible: true })
15 |   },
16 | }
17 | 


--------------------------------------------------------------------------------
/ASRBackend/asr_functions/__init__.py:
--------------------------------------------------------------------------------
 1 | """ASR functions package initializer.
 2 | 
 3 | Avoid importing heavy/local-specific modules at package import time so
 4 | that submodule-level imports (e.g. asr_functions.asr_sentence_segments)
 5 | do not accidentally trigger imports for all implementations (local/cloud).
 6 | Modules should import submodules directly, e.g.:
 7 |     from asr_functions.asr_sentence_segments import process
 8 | or rely on providers to import the appropriate submodule lazily.
 9 | """
10 | 
11 | __all__ = []
12 | 


--------------------------------------------------------------------------------
/docs/mermaid图汇集/前端架构设计文档-聊天交互流程图.md:
--------------------------------------------------------------------------------
 1 | # 前端架构设计文档 - 聊天交互流程图
 2 | 
 3 | ```mermaid
 4 | sequenceDiagram
 5 |     participant User
 6 |     participant ChatView
 7 |     participant ChatService
 8 |     participant Backend
 9 |     participant Store
10 |     
11 |     User->>ChatView: 输入消息并发送
12 |     ChatView->>ChatService: 发起聊天请求
13 |     ChatService->>Backend: POST /chat/stream
14 |     Backend-->>ChatService: 流式返回响应
15 |     ChatService->>Store: 更新聊天状态
16 |     Store-->>ChatView: 提供聊天数据
17 |     ChatView-->>User: 显示AI回复
18 | ```
19 | 


--------------------------------------------------------------------------------
/backend/media_processing/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """媒体处理模块"""
 3 | 
 4 | from backend.common_interfaces import DownloadResult
 5 | from .upload_handler import process_uploaded_file
 6 | from .audio.local import process_uploaded_audio
 7 | from .video.local import process_uploaded_video
 8 | from .downloader_factory import MediaDownloaderFactory
 9 | 
10 | __all__ = [
11 |     'DownloadResult',
12 |     'MediaDownloaderFactory',
13 |     'process_uploaded_file',
14 |     'process_uploaded_audio',
15 |     'process_uploaded_video',
16 | ]
17 | 


--------------------------------------------------------------------------------
/frontend/components.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://ui.shadcn.com/schema.json",
 3 |   "style": "new-york",
 4 |   "rsc": false,
 5 |   "tsx": true,
 6 |   "tailwind": {
 7 |     "config": "",
 8 |     "css": "src/index.css",
 9 |     "baseColor": "neutral",
10 |     "cssVariables": true,
11 |     "prefix": ""
12 |   },
13 |   "iconLibrary": "lucide",
14 |   "aliases": {
15 |     "components": "@/components",
16 |     "utils": "@/lib/utils",
17 |     "ui": "@/components/ui",
18 |     "lib": "@/lib",
19 |     "hooks": "@/hooks"
20 |   },
21 |   "registries": {}
22 | }
23 | 


--------------------------------------------------------------------------------
/ASRBackend/docker-compose.cloud.yml:
--------------------------------------------------------------------------------
 1 | version: '3.8'
 2 | 
 3 | services:
 4 |   asr-backend:
 5 |     build:
 6 |       context: .
 7 |       dockerfile: Dockerfile.cloud
 8 |     container_name: hearsight-asr-backend-cloud
 9 |     restart: unless-stopped
10 |     env_file: .env
11 |     ports:
12 |       - "${ASR_BACKEND_PORT:-8003}:8003"
13 |     environment:
14 |       - ASR_MODE=cloud
15 |     healthcheck:
16 |       test: ["CMD", "curl", "-f", "http://localhost:8003/health"]
17 |       interval: 30s
18 |       timeout: 10s
19 |       retries: 3
20 |       start_period: 40s


--------------------------------------------------------------------------------
/ASRBackend/docker-compose.local.yml:
--------------------------------------------------------------------------------
 1 | version: '3.8'
 2 | 
 3 | services:
 4 |   asr-backend:
 5 |     build:
 6 |       context: .
 7 |       dockerfile: Dockerfile.local
 8 |     container_name: hearsight-asr-backend-local
 9 |     restart: unless-stopped
10 |     env_file: .env
11 |     ports:
12 |       - "${ASR_BACKEND_PORT:-8003}:8003"
13 |     environment:
14 |       - ASR_MODE=local
15 |     healthcheck:
16 |       test: ["CMD", "curl", "-f", "http://localhost:8003/health"]
17 |       interval: 30s
18 |       timeout: 10s
19 |       retries: 3
20 |       start_period: 40s


--------------------------------------------------------------------------------
/frontend/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.8'
 2 | 
 3 | services:
 4 |   frontend:
 5 |     build:
 6 |       context: .
 7 |       dockerfile: Dockerfile
 8 |     container_name: hearsight-frontend
 9 |     restart: unless-stopped
10 |     ports:
11 |       - "${FRONTEND_PORT:-10000}:5173"
12 |     environment:
13 |       - BACKEND_HOST=host.docker.internal
14 |       - BACKEND_PORT=9999
15 |       - USE_DOCKER=false
16 |       - VITE_BACKEND_URL=http://host.docker.internal:9999
17 |       - VITE_USE_DOCKER=false
18 |     volumes:
19 |       - .:/app
20 |       - /app/node_modules


--------------------------------------------------------------------------------
/backend/common_interfaces.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """通用接口定义"""
 3 | 
 4 | from __future__ import annotations
 5 | 
 6 | from dataclasses import dataclass
 7 | from typing import Optional
 8 | 
 9 | 
10 | @dataclass
11 | class DownloadResult:
12 |     """下载结果"""
13 |     video_path: Optional[str] = None
14 |     audio_path: Optional[str] = None
15 |     title: Optional[str] = None
16 |     duration: Optional[float] = None
17 |     media_type: Optional[str] = None  # "video", "audio", 或 "both"
18 |     success: bool = False
19 |     error_message: Optional[str] = None


--------------------------------------------------------------------------------
/frontend/src/services/progressService.ts:
--------------------------------------------------------------------------------
 1 | export const getDownloadProgress = async (jobId: number) => {
 2 |   const response = await fetch(`/api/progress/download/${jobId}`)
 3 |   
 4 |   if (!response.ok) {
 5 |     throw new Error(`获取下载进度失败：${response.status}`)
 6 |   }
 7 |   
 8 |   return response.json()
 9 | }
10 | 
11 | export const getTaskProgress = async (jobId: number) => {
12 |   const response = await fetch(`/api/progress/task/${jobId}`)
13 |   
14 |   if (!response.ok) {
15 |     throw new Error(`获取任务进度失败：${response.status}`)
16 |   }
17 |   
18 |   return response.json()
19 | }


--------------------------------------------------------------------------------
/frontend/src/types/global.d.ts:
--------------------------------------------------------------------------------
 1 | /// <reference types="react" />
 2 | /// <reference types="react-dom" />
 3 | 
 4 | declare module '*.css' {
 5 |   const content: Record<string, string>;
 6 |   export default content;
 7 | }
 8 | 
 9 | declare module '*.module.css' {
10 |   const classes: Record<string, string>;
11 |   export default classes;
12 | }
13 | 
14 | declare module '*.scss' {
15 |   const content: Record<string, string>;
16 |   export default content;
17 | }
18 | 
19 | declare module '*.module.scss' {
20 |   const classes: Record<string, string>;
21 |   export default classes;
22 | }


--------------------------------------------------------------------------------
/frontend/src/components/RightPanel/hooks/useSegmentHandlers.ts:
--------------------------------------------------------------------------------
 1 | import { useCallback } from 'react'
 2 | import type { Segment } from '../../../types'
 3 | 
 4 | export const useSegmentHandlers = (
 5 |   onActiveSegmentChange: (index: number) => void,
 6 |   onSeekTo: (timeMs: number) => void
 7 | ) => {
 8 |   const handleSegmentClick = useCallback(
 9 |     (segment: Segment) => {
10 |       onActiveSegmentChange(segment.index)
11 |       onSeekTo(segment.start_time)
12 |     },
13 |     [onActiveSegmentChange, onSeekTo]
14 |   )
15 | 
16 |   return { handleSegmentClick }
17 | }
18 | 


--------------------------------------------------------------------------------
/docs/mermaid图汇集/前端架构设计文档-转写结果显示流程图.md:
--------------------------------------------------------------------------------
 1 | # 前端架构设计文档 - 转写结果显示流程图
 2 | 
 3 | ```mermaid
 4 | sequenceDiagram
 5 |     participant User
 6 |     participant TranscriptTab
 7 |     participant TranscriptService
 8 |     participant Backend
 9 |     participant Store
10 |     
11 |     User->>TranscriptTab: 打开转写Tab
12 |     TranscriptTab->>TranscriptService: 请求转写数据
13 |     TranscriptService->>Backend: GET /transcript/{id}
14 |     Backend-->>TranscriptService: 返回转写数据
15 |     TranscriptService->>Store: 更新转写状态
16 |     Store-->>TranscriptTab: 提供转写数据
17 |     TranscriptTab-->>User: 显示转写结果
18 | ```
19 | 


--------------------------------------------------------------------------------
/backend/utils/typing_defs.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | 集中类型定义，避免各处重复：
 4 | - Segment: 语音转写后的句级片段（不含 spk_id）
 5 | - SummaryItem: 汇总输出项
 6 | """
 7 | from __future__ import annotations
 8 | 
 9 | from typing import Dict, Optional
10 | from typing_extensions import TypedDict
11 | 
12 | 
13 | class Segment(TypedDict, total=False):
14 |     index: int
15 |     sentence: str
16 |     start_time: float
17 |     end_time: float
18 |     translation: Optional[Dict[str, str]]
19 | 
20 | 
21 | class SummaryItem(TypedDict):
22 |     topic: str
23 |     summary: str
24 |     start_time: float
25 |     end_time: float
26 | 


--------------------------------------------------------------------------------
/ASRBackend/requirements-local.txt:
--------------------------------------------------------------------------------
 1 | # ASR Backend - 本地版本
 2 | # 包含完整的本地模型支持，包括 torch 等大型依赖
 3 | # 本地版本可以作为备选方案支持云端版本
 4 | 
 5 | fastapi
 6 | uvicorn
 7 | pydantic
 8 | pydantic-settings
 9 | python-multipart
10 | pytest
11 | pytest-asyncio
12 | httpx
13 | requests
14 | 
15 | # 本地 ASR 模型依赖
16 | funasr>=0.4.0
17 | 
18 | # Supabase 支持
19 | supabase
20 | 
21 | # 深度学习框架 - 在 Dockerfile 中单独安装 CUDA 版本
22 | # torch==2.1.0+cu118
23 | # torchaudio==2.1.0+cu118
24 | 
25 | # 语言检测
26 | langdetect
27 | textblob
28 | 
29 | # 可选：云端 API 支持（如果想在本地版本中也支持云端作为备选）
30 | dashscope
31 | 
32 | # 环境变量支持
33 | python-dotenv>=1.0.0
34 | 


--------------------------------------------------------------------------------
/docs/mermaid图汇集/前端拖拽侧边栏功能设计文档-拖拽流程图.md:
--------------------------------------------------------------------------------
 1 | # 前端拖拽侧边栏功能设计文档 - 拖拽流程图
 2 | 
 3 | ```mermaid
 4 | sequenceDiagram
 5 |     participant U as 用户
 6 |     participant H as ResizableHandle
 7 |     participant P as ResizablePanel
 8 |     participant S as LayoutStore
 9 | 
10 |     U->>H: 鼠标按下 (pointerdown)
11 |     H->>P: 开始调整 (onResizeStart)
12 |     P->>S: 更新状态 (setPanelSize)
13 |     U->>H: 鼠标移动 (pointermove)
14 |     H->>P: 实时调整 (onResize)
15 |     P->>P: 应用约束 (minSize, maxSize)
16 |     P->>S: 同步状态
17 |     U->>H: 鼠标释放 (pointerup)
18 |     H->>P: 结束调整 (onResizeEnd)
19 |     P->>S: 保存到 localStorage (saveLayout)
20 | ```
21 | 


--------------------------------------------------------------------------------
/backend/routers/chat/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """聊天路由模块"""
 3 | 
 4 | from fastapi import APIRouter
 5 | 
 6 | from .summarize_router import router as summarize_router
 7 | from .chat_conversation_router import router as chat_conversation_router
 8 | from .messages_router import router as messages_router
 9 | from .chat_session_router import router as chat_session_router
10 | 
11 | 
12 | router = APIRouter(tags=["chat"])
13 | 
14 | # 包含所有子路由
15 | router.include_router(summarize_router)
16 | router.include_router(chat_conversation_router)
17 | router.include_router(messages_router)
18 | router.include_router(chat_session_router)


--------------------------------------------------------------------------------
/backend/main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """HearSight 应用入口"""
 3 | 
 4 | from __future__ import annotations
 5 | 
 6 | import sys
 7 | import os
 8 | 
 9 | # 添加项目根目录到 sys.path 以支持模块导入
10 | sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
11 | 
12 | from backend.app import create_app
13 | from backend.startup import initialize_app, get_backend_port
14 | 
15 | # 初始化应用
16 | static_dir, db_url = initialize_app()
17 | 
18 | # 创建 FastAPI 应用
19 | app = create_app(static_dir, db_url)
20 | 
21 | if __name__ == "__main__":
22 |     import uvicorn
23 | 
24 |     port = get_backend_port()
25 |     uvicorn.run("main:app", host="0.0.0.0", port=port, reload=False)
26 | 


--------------------------------------------------------------------------------
/backend/requirements.txt:
--------------------------------------------------------------------------------
 1 | yt-dlp
 2 | # 用于HuggingFace风格分词器（AutoTokenizer）
 3 | modelscope
 4 | # 用于OpenAI tiktoken分词
 5 | tiktoken
 6 | # pytorch安装请去https://pytorch.org/get-started/locally/
 7 | # Windows：pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
 8 | 
 9 | fastapi
10 | uvicorn
11 | PyYAML
12 | psycopg2-binary
13 | python-dotenv
14 | 
15 | # 异步任务队列
16 | celery
17 | redis
18 | 
19 | pillow
20 | # 用于重试机制，处理超时和限速
21 | tenacity
22 | aiohttp
23 | requests
24 | openai
25 | litellm
26 | 
27 | # 向量数据库
28 | chromadb
29 | 
30 | # ReAct 相关依赖
31 | fastmcp
32 | 
33 | black
34 | isort
35 | pylint
36 | pydantic_settings
37 | python-multipart


--------------------------------------------------------------------------------
/backend/tests/test_asr_sentence_segments.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os, sys
 3 | from pathlib import Path
 4 | 
 5 | project_root = Path(__file__).parent.parent.parent
 6 | sys.path.insert(0, str(project_root / "example_tests"))
 7 | 
 8 | import json
 9 | from asr_sentence_segments import process
10 | 
11 | if __name__ == "__main__":
12 |     # audio_path = r"C:\Users\ke\Documents\projects\python_projects\HearSight\backend\tests\datas\大语言模型进化论：从"听懂指令"到"学会思考"，AI如何与人类对齐？.m4a"
13 |     audio_path = r"C:\Users\ke\Documents\projects\python_projects\HearSight\backend\tests\datas\test.mp4"
14 |     out = process(audio_path)
15 |     print(json.dumps(out, ensure_ascii=False, indent=2))


--------------------------------------------------------------------------------
/docs/mermaid图汇集/知识库使用设计文档-使用流程图.md:
--------------------------------------------------------------------------------
 1 | # 知识库使用设计文档 - 使用流程图
 2 | 
 3 | ```mermaid
 4 | sequenceDiagram
 5 |     participant U as 用户
 6 |     participant R as 路由
 7 |     participant C as ChatService
 8 |     participant K as KnowledgeBaseService
 9 |     participant DB as ChromaDB
10 |     participant LLM as LLM API
11 | 
12 |     U->>R: 发送问答请求
13 |     R->>C: 调用 chat_with_segments
14 |     C->>C: 检查 token 数
15 |     alt token 超过阈值
16 |         C->>K: 检索相似内容
17 |         K->>DB: 查询向量
18 |         DB-->>K: 返回结果
19 |         K-->>C: 返回相关 segments
20 |     end
21 |     C->>C: 构建提示词
22 |     C->>LLM: 调用 LLM
23 |     LLM-->>C: 返回回答
24 |     C-->>R: 返回结果
25 |     R-->>U: 返回回答
26 | ```
27 | 


--------------------------------------------------------------------------------
/frontend/src/services/downloadService.ts:
--------------------------------------------------------------------------------
 1 | export const startDownload = async (
 2 |   url: string,
 3 |   jobId: number,
 4 |   sessdata?: string,
 5 |   playlist?: boolean,
 6 |   quality?: string
 7 | ) => {
 8 |   const response = await fetch('/api/download', {
 9 |     method: 'POST',
10 |     headers: { 'Content-Type': 'application/json' },
11 |     body: JSON.stringify({
12 |       url,
13 |       job_id: jobId,
14 |       sessdata: sessdata || '',
15 |       playlist: playlist || false,
16 |       quality: quality || 'best',
17 |       workers: 1,
18 |     })
19 |   })
20 |   
21 |   if (!response.ok) {
22 |     throw new Error(`启动下载失败：${response.status}`)
23 |   }
24 |   
25 |   return response.json()
26 | }


--------------------------------------------------------------------------------
/backend/routers/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """路由模块统一入口"""
 3 | 
 4 | from .chat import router as chat_router
 5 | from .download_router import router as download_router
 6 | from .progress_router import router as progress_router
 7 | from .thumbnail_router import router as thumbnail_router
 8 | from .transcript_router import router as transcript_router
 9 | from .translate_router import router as translate_router
10 | from .upload_router import router as upload_router
11 | 
12 | __all__ = [
13 |     "download_router",
14 |     "transcript_router",
15 |     "chat_router",
16 |     "thumbnail_router",
17 |     "progress_router",
18 |     "translate_router",
19 |     "upload_router",
20 | ]
21 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/docs/设计说明.md:
--------------------------------------------------------------------------------
 1 | # Hooks 设计说明
 2 | 
 3 | ## 概述
 4 | 
 5 | hooks目录包含HearSight前端应用的所有自定义React hooks，提供状态管理和副作用处理的逻辑复用。
 6 | 
 7 | ## 主要Hooks
 8 | 
 9 | ### useDataLoader
10 | 数据加载和管理hook，负责：
11 | - 转录记录列表获取
12 | - 转录详情加载
13 | - 视频/音频资源管理
14 | - 任务状态同步
15 | 
16 | ### useUrlHandler
17 | URL参数处理hook，功能：
18 | - 页面URL解析
19 | - 参数状态同步
20 | - 路由导航管理
21 | 
22 | ### useVideoSync
23 | 视频同步hook，负责：
24 | - 视频播放状态管理
25 | - 时间轴同步
26 | - 字幕定位控制
27 | 
28 | ## 设计原则
29 | 
30 | 1. **单一职责**: 每个hook专注特定功能
31 | 2. **组合性**: hooks可灵活组合使用
32 | 3. **状态隔离**: 避免不必要的状态耦合
33 | 4. **性能优化**: 使用useCallback和useMemo优化重渲染
34 | 
35 | ## 使用模式
36 | 
37 | - 通过自定义hooks封装复杂逻辑
38 | - 返回状态和操作函数的元组
39 | - 支持TypeScript类型安全


--------------------------------------------------------------------------------
/backend/tests/test_audio2text.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import json
 4 | 
 5 | # 确保将项目 backend 目录加入到导入路径，便于从 tests 目录直接运行
 6 | sys.path.append(os.path.dirname(os.path.dirname(__file__)))
 7 | 
 8 | from audio2text.audio2text import audio_to_text
 9 | 
10 | audio_path = r"C:\Users\ke\Documents\projects\python_projects\HearSight\backend\tests\datas\大语言模型进化论：从“听懂指令”到“学会思考”，AI如何与人类对齐？.m4a"
11 | text, segments = audio_to_text(audio_path, return_segments=True)
12 | print(text)
13 | # 如需保存解析结果，可自行取消下面注释
14 | output_dir = "results"
15 | os.makedirs(output_dir, exist_ok=True)
16 | with open(os.path.join(output_dir, 'test_audio2text.json'), 'w', encoding='utf-8') as f:
17 |     json.dump(segments, f, ensure_ascii=False, indent=2)
18 | 


--------------------------------------------------------------------------------
/backend/schemas.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | 转录和总结相关的数据模式定义
 4 | - Segment: 语音转写后的句级片段
 5 | - SummaryItem: 总结项目
 6 | """
 7 | from __future__ import annotations
 8 | 
 9 | from typing import Dict, Optional
10 | from typing_extensions import TypedDict
11 | 
12 | 
13 | class Segment(TypedDict, total=False):
14 |     """语音转写后的句级片段（包含说话人信息，允许额外字段被忽略）"""
15 |     index: int
16 |     sentence: str
17 |     start_time: float
18 |     end_time: float
19 |     spk_id: Optional[str]  # 说话人ID，可为空
20 |     translation: Optional[Dict[str, str]]
21 | 
22 | 
23 | class SummaryItem(TypedDict):
24 |     """总结项：包含主题标题、内容和时间范围"""
25 |     topic: str
26 |     summary: str
27 |     start_time: float
28 |     end_time: float
29 | 


--------------------------------------------------------------------------------
/frontend/src/components/ui/label.tsx:
--------------------------------------------------------------------------------
 1 | "use client"
 2 | 
 3 | import * as React from "react"
 4 | import * as LabelPrimitive from "@radix-ui/react-label"
 5 | 
 6 | import { cn } from "@/lib/utils"
 7 | 
 8 | function Label({
 9 |   className,
10 |   ...props
11 | }: React.ComponentProps<typeof LabelPrimitive.Root>) {
12 |   return (
13 |     <LabelPrimitive.Root
14 |       data-slot="label"
15 |       className={cn(
16 |         "flex items-center gap-2 text-sm leading-none font-medium select-none group-data-[disabled=true]:pointer-events-none group-data-[disabled=true]:opacity-50 peer-disabled:cursor-not-allowed peer-disabled:opacity-50",
17 |         className
18 |       )}
19 |       {...props}
20 |     />
21 |   )
22 | }
23 | 
24 | export { Label }
25 | 


--------------------------------------------------------------------------------
/ASRBackend/supabase_utils/test_supabase_upload.py:
--------------------------------------------------------------------------------
 1 | """测试 Supabase 文件上传功能"""
 2 | 
 3 | from supabase_utils.supabase_upload import upload_file_to_supabase
 4 | import os
 5 | 
 6 | def test_upload():
 7 |     """测试上传文件"""
 8 |     # 使用 example_tests 目录下的 test.txt
 9 |     file_path = os.path.join(os.path.dirname(__file__), "example_tests", "test.txt")
10 | 
11 |     if not os.path.exists(file_path):
12 |         print(f"测试文件不存在: {file_path}")
13 |         return
14 | 
15 |     print(f"开始上传文件: {file_path}")
16 |     success, result = upload_file_to_supabase(file_path)
17 |     if success:
18 |         print(f"上传成功: {result}")
19 |     else:
20 |         print(f"上传失败: {result}")
21 | 
22 | if __name__ == "__main__":
23 |     test_upload()
24 | 


--------------------------------------------------------------------------------
/frontend/src/components/LeftPanel/docs/组件设计说明.md:
--------------------------------------------------------------------------------
 1 | # 左侧面板组件设计说明
 2 | 
 3 | ## 概述
 4 | 
 5 | 左侧面板负责展示和管理HearSight应用中的转录记录和处理任务，提供列表视图和状态管理功能。
 6 | 
 7 | ## 组件结构
 8 | 
 9 | ### LeftPanel.tsx
10 | 
11 | 主容器组件，负责：
12 | 
13 | - 标签页布局管理
14 | - 数据传递给子组件
15 | - 响应用户交互事件
16 | 
17 | ### 标签页组件
18 | 
19 | - **ProcessedTab.tsx**: 已处理转录列表展示和选择
20 | - **TasksTab.tsx**: 任务进度和状态展示
21 | 
22 | ## 设计原则
23 | 
24 | 1. **列表展示**: 使用卡片式布局清晰展示各项内容
25 | 2. **状态指示**: 通过颜色和图标直观显示处理状态
26 | 3. **交互友好**: 支持点击加载、刷新等操作
27 | 4. **性能优化**: 虚拟滚动处理大量数据
28 | 
29 | ## 数据流
30 | 
31 | - 父组件提供transcripts和jobs数据
32 | - 子组件通过回调函数触发数据更新
33 | - 状态变化通过事件冒泡传递
34 | 
35 | ## 功能特性
36 | 
37 | - **转录管理**: 展示所有已处理的转录，支持选择和加载
38 | - **任务监控**: 实时显示处理任务的进度和状态
39 | - **状态同步**: 与右侧面板保持数据一致性
40 | 


--------------------------------------------------------------------------------
/ASRBackend/asr_functions/utils/language_utils.py:
--------------------------------------------------------------------------------
 1 | """语言检测工具模块"""
 2 | 
 3 | import unicodedata
 4 | 
 5 | 
 6 | def detect_language(text: str) -> str:
 7 |     """检测文本主要语言，支持中文和英文。
 8 | 
 9 |     如果中文字符比例 > 70%，返回 'zh'，否则返回 'en'。
10 |     """
11 |     if not text:
12 |         return "en"
13 | 
14 |     chinese_count = 0
15 |     total_count = 0
16 |     for char in text:
17 |         if char.isspace():
18 |             continue
19 |         total_count += 1
20 |         if unicodedata.category(char).startswith("Lo") or "\u4e00" <= char <= "\u9fff":
21 |             chinese_count += 1
22 | 
23 |     if total_count == 0:
24 |         return "en"
25 | 
26 |     chinese_ratio = chinese_count / total_count
27 |     return "zh" if chinese_ratio > 0.7 else "en"
28 | 


--------------------------------------------------------------------------------
/frontend/src/HomePage/docs/组件设计说明.md:
--------------------------------------------------------------------------------
 1 | # HomePage 组件设计说明
 2 | 
 3 | ## 概述
 4 | 
 5 | HomePage是HearSight应用的首页组件，提供产品介绍、功能展示和快速入口功能。
 6 | 
 7 | ## 组件结构
 8 | 
 9 | ### HomePage.tsx
10 | 
11 | 主页面组件，包含：
12 | 
13 | - 顶部导航栏
14 | - 产品介绍区域
15 | - 功能特性展示
16 | - URL输入表单
17 | - 快速开始按钮
18 | 
19 | ## 设计特点
20 | 
21 | 1. **渐变背景**: 使用渐变色营造视觉吸引力
22 | 2. **响应式布局**: 支持不同屏幕尺寸的自适应
23 | 3. **交互友好**: 清晰的CTA（行动召唤）按钮
24 | 4. **信息层次**: 通过卡片和图标组织内容
25 | 
26 | ## 功能特性
27 | 
28 | - **产品介绍**: 展示HearSight的核心功能
29 | - **URL输入**: 支持直接输入视频链接快速开始
30 | - **导航引导**: 提供进入应用的便捷路径
31 | - **特性展示**: 通过图标和描述突出产品优势
32 | 
33 | ## 用户流程
34 | 
35 | 1. 访问首页查看产品介绍
36 | 2. 输入视频URL或点击进入应用
37 | 3. 跳转到主应用界面开始使用
38 | 
39 | ## 技术实现
40 | 
41 | - 使用React Router进行页面导航
42 | - 表单处理用户输入
43 | - 本地存储管理待处理URL
44 | 


--------------------------------------------------------------------------------
/frontend/tsconfig.node.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
 4 |     "target": "ES2023",
 5 |     "lib": ["ES2023"],
 6 |     "module": "ESNext",
 7 |     "types": ["node"],
 8 |     "skipLibCheck": true,
 9 | 
10 |     /* Bundler mode */
11 |     "moduleResolution": "bundler",
12 |     "allowImportingTsExtensions": true,
13 |     "verbatimModuleSyntax": true,
14 |     "moduleDetection": "force",
15 |     "noEmit": true,
16 | 
17 |     /* Linting */
18 |     "strict": true,
19 |     "noUnusedLocals": true,
20 |     "noUnusedParameters": true,
21 |     "erasableSyntaxOnly": true,
22 |     "noFallthroughCasesInSwitch": true,
23 |     "noUncheckedSideEffectImports": true
24 |   },
25 |   "include": ["vite.config.ts"]
26 | }
27 | 


--------------------------------------------------------------------------------
/backend/ReAct/__init__.py:
--------------------------------------------------------------------------------
 1 | # ReAct 核心模块
 2 | 
 3 | from .action_parser import parse_response
 4 | from .actions import ActionExecutor
 5 | from .base_agent import BaseAgent
 6 | from .chat_agent import ChatAgent
 7 | from .models import AgentResult, StreamCallback, ToolCallable, TraceStep
 8 | from .react_loop import ReactLoop
 9 | from .tool_manager import ToolManager
10 | from .utils import create_tool_wrapper, normalize_input
11 | 
12 | __all__ = [
13 |     "parse_response",
14 |     "ActionExecutor",
15 |     "BaseAgent",
16 |     "ChatAgent",
17 |     "LLMClient",
18 |     "AgentResult",
19 |     "StreamCallback",
20 |     "ToolCallable",
21 |     "TraceStep",
22 |     "ReactLoop",
23 |     "ToolManager",
24 |     "create_tool_wrapper",
25 |     "normalize_input",
26 | ]
27 | 


--------------------------------------------------------------------------------
/backend/tests/test_download_vedio.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | # 确保将项目根目录加入到导入路径，便于从 tests 目录直接运行（使顶级包 `backend` 可被导入）
 4 | # __file__ = .../backend/tests/test_download_vedio.py
 5 | # 向上三次 dirname 到达项目根目录
 6 | project_root = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
 7 | if project_root not in sys.path:
 8 |     sys.path.insert(0, project_root)
 9 | 
10 | from backend.utils.vedio_utils.download_video.download_bilibili import download_bilibili
11 | 
12 | 
13 | 
14 | example_url = "https://www.bilibili.com/video/BV1pm8dzXEpA?spm_id_from=333.1007.tianma.2-3-6.click"  # 示例占位
15 | sess = os.getenv("BILI_SESSDATA", "")
16 | paths = download_bilibili(example_url, out_dir="downloads", playlist=False, sessdata=sess)
17 | print("Saved files:")
18 | for p in paths:
19 |     print(p)


--------------------------------------------------------------------------------
/frontend/src/HomePage/docs/主页组件设计说明.md:
--------------------------------------------------------------------------------
 1 | # 主页组件设计说明
 2 | 
 3 | ## 概述
 4 | 
 5 | HomePage组件是HearSight应用的入口页面，提供整体布局框架和核心功能导航。它整合了文件上传、工作区管理和全局状态控制等功能。
 6 | 
 7 | ## 组件结构
 8 | 
 9 | ### HomePage.tsx
10 | 主页面组件，负责：
11 | - 整体页面布局
12 | - 核心功能模块协调
13 | - 全局状态管理
14 | - 路由初始化
15 | 
16 | ## 主要功能模块
17 | 
18 | ### 文件上传模块
19 | - 支持拖拽上传
20 | - 文件格式校验
21 | - 上传进度显示
22 | - 错误处理机制
23 | 
24 | ### 工作区管理
25 | - 当前工作项目展示
26 | - 工作区切换功能
27 | - 历史记录管理
28 | 
29 | ### 导航控制
30 | - 左右面板切换
31 | - 页面路由跳转
32 | - 用户操作引导
33 | 
34 | ## 设计原则
35 | 
36 | 1. **直观性**: 界面布局清晰，功能一目了然
37 | 2. **易用性**: 操作流程简洁，降低学习成本
38 | 3. **响应式**: 适配不同设备屏幕尺寸
39 | 4. **可扩展**: 便于后续功能模块添加
40 | 
41 | ## 数据流
42 | 
43 | - 通过上下文(Context)管理全局状态
44 | - 各子组件通过props接收数据和回调函数
45 | - 用户操作通过事件机制向上传递
46 | 
47 | ## 性能优化
48 | 
49 | - 组件懒加载
50 | - 虚拟滚动处理大量数据
51 | - 防抖节流优化频繁操作
52 | - 记忆化计算避免重复渲染


--------------------------------------------------------------------------------
/frontend/src/App.css:
--------------------------------------------------------------------------------
 1 | #root {
 2 |   max-width: 1280px;
 3 |   margin: 0 auto;
 4 |   padding: 2rem;
 5 |   text-align: center;
 6 | }
 7 | 
 8 | .logo {
 9 |   height: 6em;
10 |   padding: 1.5em;
11 |   will-change: filter;
12 |   transition: filter 300ms;
13 | }
14 | .logo:hover {
15 |   filter: drop-shadow(0 0 2em #646cffaa);
16 | }
17 | .logo.react:hover {
18 |   filter: drop-shadow(0 0 2em #61dafbaa);
19 | }
20 | 
21 | @keyframes logo-spin {
22 |   from {
23 |     transform: rotate(0deg);
24 |   }
25 |   to {
26 |     transform: rotate(360deg);
27 |   }
28 | }
29 | 
30 | @media (prefers-reduced-motion: no-preference) {
31 |   a:nth-of-type(2) .logo {
32 |     animation: logo-spin infinite 20s linear;
33 |   }
34 | }
35 | 
36 | .card {
37 |   padding: 2em;
38 | }
39 | 
40 | .read-the-docs {
41 |   color: #888;
42 | }
43 | 


--------------------------------------------------------------------------------
/frontend/src/components/ui/separator.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react"
 2 | import * as SeparatorPrimitive from "@radix-ui/react-separator"
 3 | 
 4 | import { cn } from "@/lib/utils"
 5 | 
 6 | function Separator({
 7 |   className,
 8 |   orientation = "horizontal",
 9 |   decorative = true,
10 |   ...props
11 | }: React.ComponentProps<typeof SeparatorPrimitive.Root>) {
12 |   return (
13 |     <SeparatorPrimitive.Root
14 |       data-slot="separator"
15 |       decorative={decorative}
16 |       orientation={orientation}
17 |       className={cn(
18 |         "bg-border shrink-0 data-[orientation=horizontal]:h-px data-[orientation=horizontal]:w-full data-[orientation=vertical]:h-full data-[orientation=vertical]:w-px",
19 |         className
20 |       )}
21 |       {...props}
22 |     />
23 |   )
24 | }
25 | 
26 | export { Separator }
27 | 


--------------------------------------------------------------------------------
/frontend/src/components/ui/textarea.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react"
 2 | 
 3 | import { cn } from "@/lib/utils"
 4 | 
 5 | function Textarea({ className, ...props }: React.ComponentProps<"textarea">) {
 6 |   return (
 7 |     <textarea
 8 |       data-slot="textarea"
 9 |       className={cn(
10 |         "border-input placeholder:text-muted-foreground focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:bg-input/30 flex min-h-16 w-full rounded-md border bg-transparent px-3 py-2 text-base shadow-xs transition-[color,box-shadow] outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50 md:text-sm",
11 |         className
12 |       )}
13 |       {...props}
14 |     />
15 |   )
16 | }
17 | 
18 | export { Textarea }
19 | 


--------------------------------------------------------------------------------
/frontend/src/lib/docs/工具库设计说明.md:
--------------------------------------------------------------------------------
 1 | # 工具库设计说明
 2 | 
 3 | ## 概述
 4 | 
 5 | 工具库包含HearSight前端应用中的通用工具函数和辅助方法，提供常用的实用功能，减少重复代码编写。
 6 | 
 7 | ## 主要工具函数
 8 | 
 9 | ### cn (类名合并函数)
10 | 基于 clsx 和 tailwind-merge 实现的类名合并工具，用于处理条件类名和 Tailwind CSS 冲突。
11 | 
12 | 功能特点：
13 | - 支持多个类名参数传入
14 | - 自动处理类名冲突
15 | - 条件性类名应用
16 | - Tailwind CSS 类名优化
17 | 
18 | 使用示例：
19 | ```typescript
20 | import { cn } from "@/lib/utils"
21 | 
22 | // 基本使用
23 | const className = cn("text-red-500", "bg-blue-500")
24 | 
25 | // 条件使用
26 | const className = cn("text-red-500", isActive && "bg-blue-500")
27 | 
28 | // 类名冲突处理
29 | const className = cn("p-2 p-4") // 结果为 "p-4"
30 | ```
31 | 
32 | ## 设计原则
33 | 
34 | 1. **轻量级**: 工具函数尽可能简单小巧，避免引入大型依赖
35 | 2. **通用性**: 提供的功能应该具有广泛适用性
36 | 3. **类型安全**: 全面支持 TypeScript 类型检查
37 | 4. **无副作用**: 工具函数应该是纯函数，不产生副作用
38 | 
39 | ## 扩展性
40 | 
41 | 可以根据项目需求添加更多通用工具函数，建议按照功能类别组织，保持工具库的整洁和易维护性。


--------------------------------------------------------------------------------
/docs/测试示例.md:
--------------------------------------------------------------------------------
 1 | ## 问答使用的示例
 2 | 
 3 | 1. 介绍下Obsidian的地图插件和记忆插件。
 4 | 2. 介绍下视频提到的插件是什么。
 5 | 
 6 | 使用到的视频：
 7 | 
 8 | 1. <https://www.bilibili.com/video/BV19aUpBzEVH/?spm_id_from=333.1007.tianma.4-1-11.click>
 9 | 2. <https://www.bilibili.com/video/BV1yqsCzVEJ3/?spm_id_from=333.337.search-card.all.click>
10 | 
11 | ## youtube 下载使用示例
12 | 
13 | 1. <https://www.youtube.com/watch?v=A6ZgS0vGsl8>
14 | 2. <https://www.youtube.com/watch?v=bBC-nXj3Ng4>
15 | 
16 | ## 英文的视频
17 | 
18 | 1. <https://www.bilibili.com/video/BV1sm421W7vC?spm_id_from=333.788.videopod.sections&vd_source=325d9b8b91626b0afd2ef63a99caf970>
19 | 2. <https://www.bilibili.com/video/BV11c411B77U/?spm_id_from=333.337.search-card.all.click&vd_source=325d9b8b91626b0afd2ef63a99caf970>
20 | 
21 | ## xiaoyuzhou 音频下载使用示例
22 | 
23 | 1. <https://www.xiaoyuzhoufm.com/episode/6912fcf821e6d1bd34f78257?s=eyJ1IjogIjY2MTU0MjQ1ZWRjZTY3MTA0YTFiNTUxMiJ9>
24 | 


--------------------------------------------------------------------------------
/frontend/src/assets/docs/资源管理说明.md:
--------------------------------------------------------------------------------
 1 | # 静态资源管理说明
 2 | 
 3 | ## 概述
 4 | 
 5 | assets目录用于存放HearSight前端应用的静态资源文件，包括图标、图片、字体等非代码资源。
 6 | 
 7 | ## 资源类型
 8 | 
 9 | ### 图标资源
10 | - SVG图标文件
11 | - 应用Logo
12 | - 第三方图标
13 | 
14 | ### 图片资源
15 | - 装饰性图片
16 | - 示例图片
17 | - 背景图片
18 | 
19 | ### 其他资源
20 | - 字体文件
21 | - 音频文件
22 | - 视频文件
23 | 
24 | ## 使用方式
25 | 
26 | 在组件中引用静态资源：
27 | 
28 | ```typescript
29 | import ReactLogo from '@/assets/react.svg'
30 | 
31 | // 在JSX中使用
32 | <img src={ReactLogo} alt="React Logo" />
33 | ```
34 | 
35 | 或者通过CSS背景图片方式使用：
36 | 
37 | ```css
38 | .background {
39 |   background-image: url('@/assets/background.jpg');
40 | }
41 | ```
42 | 
43 | ## 最佳实践
44 | 
45 | 1. **命名规范**: 使用有意义的英文命名，避免使用特殊字符
46 | 2. **文件格式**: 优先使用现代Web格式（如WebP图片）
47 | 3. **文件大小**: 注意控制资源文件大小，必要时进行压缩优化
48 | 4. **版本管理**: 将重要资源纳入版本控制系统
49 | 
50 | ## 优化策略
51 | 
52 | - 使用适当的图片压缩工具减小文件体积
53 | - 对于大文件考虑CDN分发
54 | - 利用Webpack的资源模块处理静态资源
55 | - 实施懒加载策略提升初始加载速度


--------------------------------------------------------------------------------
/backend/tests/test_SenseVoiceSmall2.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import requests
 3 | from dotenv import load_dotenv
 4 | import os
 5 | import subprocess
 6 | 
 7 | load_dotenv()
 8 | 
 9 | 
10 | 
11 | token = os.getenv("OPENAI_API_KEY")
12 | 
13 | # 使用requests库发送POST请求到SiliconFlow API进行音频转录
14 | url = "https://api.siliconflow.cn/v1/audio/transcriptions"
15 | headers = {
16 |     "Authorization": f"Bearer {token}"
17 | }
18 | wav_path = r"C:\Users\ke\Documents\projects\python_projects\HearSight\tests\converted_audio.wav"
19 | 
20 | 
21 | files = {
22 |     "file": open(wav_path, "rb")
23 | }
24 | data = {
25 |     # "model": "FunAudioLLM/SenseVoiceSmall"
26 |     "model": "TeleAI/TeleSpeechASR"
27 | }
28 | 
29 | response = requests.post(url, headers=headers, files=files, data=data)
30 | 
31 | print(response.json())
32 | # 写入json文件里面
33 | with open("output.json", "w") as f:
34 |     json.dump(response.json(), f)


--------------------------------------------------------------------------------
/frontend/src/components/MarkdownRenderer.tsx:
--------------------------------------------------------------------------------
 1 | import { useMemo } from "react"
 2 | import MarkdownIt from "markdown-it"
 3 | 
 4 | interface MarkdownRendererProps {
 5 |   readonly children: string
 6 |   readonly className?: string
 7 | }
 8 | 
 9 | export default function MarkdownRenderer({ children, className }: Readonly<MarkdownRendererProps>) {
10 |   const md = useMemo(() => {
11 |     return new MarkdownIt({
12 |       html: true,
13 |       linkify: true,
14 |       typographer: true,
15 |       breaks: true,
16 |     })
17 |   }, [])
18 | 
19 |   const htmlContent = useMemo(() => {
20 |     return md.render(children || "")
21 |   }, [md, children])
22 | 
23 |   return (
24 |     <div
25 |       className={className}
26 |       dangerouslySetInnerHTML={{ __html: htmlContent }}
27 |       style={{
28 |         lineHeight: "1.6",
29 |         wordBreak: "break-word",
30 |       }}
31 |     />
32 |   )
33 | }
34 | 


--------------------------------------------------------------------------------
/frontend/src/features/app/docs/App组件设计说明.md:
--------------------------------------------------------------------------------
 1 | # App组件设计说明
 2 | 
 3 | ## 概述
 4 | 
 5 | AppPage组件是HearSight应用的主要容器组件，负责整合整个应用的核心功能模块，包括上传对话框、主工作区布局以及各功能面板的协调管理。
 6 | 
 7 | ## 组件结构
 8 | 
 9 | ### AppPage.tsx
10 | 应用主容器组件，负责：
11 | - 整体应用布局管理
12 | - 核心功能模块协调
13 | - 全局状态初始化
14 | - 用户会话管理
15 | 
16 | ### 子组件
17 | 
18 | #### HeaderBar.tsx
19 | 应用顶部导航栏，提供：
20 | - 应用Logo展示
21 | - 全局操作按钮（如设置、帮助等）
22 | - 用户信息显示
23 | 
24 | #### UploadDialog.tsx
25 | 文件上传对话框，实现：
26 | - 文件选择和上传
27 | - 上传进度跟踪
28 | - 错误提示和重试机制
29 | 
30 | #### AppLayout.tsx (新)
31 | 工作区布局组件，负责：
32 | - 基于 shadcn/ui resizable 的拖拽面板布局
33 | - 面板尺寸调整和持久化
34 | - 响应式布局适配
35 | - 状态管理和动画效果
36 | 
37 | ## 设计原则
38 | 
39 | 1. **模块化**: 各功能模块独立封装，便于维护和扩展
40 | 2. **状态驱动**: 通过状态管理驱动UI变化
41 | 3. **用户体验**: 注重操作流畅性和界面友好性
42 | 4. **可维护性**: 代码结构清晰，注释完整
43 | 
44 | ## 数据流
45 | 
46 | - 使用React Context进行全局状态管理
47 | - 组件间通过props传递数据和回调函数
48 | - 异步操作通过自定义hooks处理
49 | 
50 | ## 扩展性
51 | 
52 | - 遵循组件化设计，易于添加新功能模块
53 | - 通过配置方式支持功能定制
54 | - 预留扩展点便于第三方集成


--------------------------------------------------------------------------------
/frontend/src/components/RightPanel/docs/组件设计说明.md:
--------------------------------------------------------------------------------
 1 | # 右侧面板组件设计说明
 2 | 
 3 | ## 概述
 4 | 
 5 | 右侧面板是HearSight应用的核心交互区域，提供多标签页式的功能展示，包括转录文本、摘要、聊天问答等功能。
 6 | 
 7 | ## 组件结构
 8 | 
 9 | ### RightPanel.tsx
10 | 
11 | 主容器组件，负责：
12 | 
13 | - 标签页切换逻辑
14 | - 状态管理（当前活跃标签）
15 | - 子组件渲染
16 | 
17 | ### 标签页组件
18 | 
19 | - **TranscriptTab.tsx**: 转录文本展示
20 | - **SummariesTab.tsx**: 摘要内容展示
21 | - **ChatView.tsx**: 聊天问答界面
22 | - **SegmentsTab.tsx**: 片段管理
23 | 
24 | ### 辅助组件
25 | 
26 | - **TabToolbar.tsx**: 标签页工具栏
27 | - **LanguageSwitcher.tsx**: 语言切换
28 | - **SearchDialog.tsx**: 搜索对话框
29 | - **TranslateDialog.tsx**: 翻译对话框
30 | 
31 | ## 设计原则
32 | 
33 | 1. **模块化**: 每个标签页独立组件，便于维护和扩展
34 | 2. **状态隔离**: 各组件状态独立，避免耦合
35 | 3. **响应式**: 支持不同屏幕尺寸的自适应布局
36 | 4. **用户友好**: 清晰的视觉层次和交互反馈
37 | 
38 | ## 数据流
39 | 
40 | - 父组件传递必要props（segments、transcriptId等）
41 | - 子组件通过回调函数与父组件通信
42 | - 使用React hooks管理局部状态
43 | 
44 | ## 扩展性
45 | 
46 | 预留接口支持新增标签页，遵循现有模式即可快速集成。
47 | 


--------------------------------------------------------------------------------
/backend/ReAct/models.py:
--------------------------------------------------------------------------------
 1 | """ReAct 核心数据模型定义"""
 2 | 
 3 | from dataclasses import dataclass
 4 | from typing import Any, Awaitable, Callable, Dict, List, Optional
 5 | 
 6 | # 类型别名
 7 | ToolCallable = Callable[[str], Awaitable[str]]
 8 | StreamCallback = Callable[[Dict[str, Any]], Awaitable[None]]
 9 | 
10 | 
11 | @dataclass
12 | class TraceStep:
13 |     """ReAct 推理步骤"""
14 | 
15 |     step: int  # 第几步
16 |     thought: str  # Thought 内容
17 |     action: Optional[str] = (
18 |         None  # 执行的动作名称（如 generate_sql、execute_query）
19 |     )
20 |     action_input: Optional[Dict[str, Any]] = None  # 动作输入
21 |     observation: Optional[str] = None  # 动作执行结果
22 |     raw_response: str = ""  # LLM 的完整输出
23 | 
24 | 
25 | @dataclass
26 | class AgentResult:
27 |     """Agent 执行结果"""
28 | 
29 |     final_answer: str  # 最终回答
30 |     trace: List[TraceStep]  # 推理步骤列表
31 |     messages: List[Dict[str, str]]  # 消息历史
32 |     error: Optional[str] = None  # 错误信息
33 | 


--------------------------------------------------------------------------------
/frontend/tsconfig.app.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
 4 |     "target": "ES2022",
 5 |     "useDefineForClassFields": true,
 6 |     "lib": ["ES2022", "DOM", "DOM.Iterable"],
 7 |     "module": "ESNext",
 8 |     "types": ["vite/client"],
 9 |     "skipLibCheck": true,
10 | 
11 |     /* Bundler mode */
12 |     "moduleResolution": "bundler",
13 |     "allowImportingTsExtensions": true,
14 |     "verbatimModuleSyntax": true,
15 |     "moduleDetection": "force",
16 |     "noEmit": true,
17 |     "jsx": "react-jsx",
18 | 
19 |     /* Linting */
20 |     "strict": true,
21 |     "noUnusedLocals": true,
22 |     "noUnusedParameters": true,
23 |     "erasableSyntaxOnly": true,
24 |     "noFallthroughCasesInSwitch": true,
25 |     "noUncheckedSideEffectImports": true,
26 | 
27 |     /* Path Mapping */
28 |     "baseUrl": ".",
29 |     "paths": {
30 |       "@/*": ["./src/*"]
31 |     }
32 |   },
33 |   "include": ["src"]
34 | }
35 | 


--------------------------------------------------------------------------------
/frontend/src/components/RightPanel/hooks/useTranslationLanguages.ts:
--------------------------------------------------------------------------------
 1 | import { useEffect } from "react"
 2 | import type { Segment } from "@/types"
 3 | 
 4 | interface UseTranslationLanguagesParams {
 5 |   readonly segments: Segment[]
 6 |   readonly addLanguage: (code: string) => void
 7 | }
 8 | 
 9 | export const useTranslationLanguages = ({ segments, addLanguage }: UseTranslationLanguagesParams) => {
10 |   useEffect(() => {
11 |     if (!segments.length) {
12 |       return
13 |     }
14 |     const collected = new Set<string>()
15 |     segments.forEach((segment) => {
16 |       const { translation } = segment
17 |       if (!translation || typeof translation !== "object") {
18 |         return
19 |       }
20 |       Object.entries(translation).forEach(([language, text]) => {
21 |         if (typeof text === "string" && text.trim() && language.trim()) {
22 |           collected.add(language)
23 |         }
24 |       })
25 |     })
26 |     collected.forEach(addLanguage)
27 |   }, [segments, addLanguage])
28 | }
29 | 


--------------------------------------------------------------------------------
/ASRBackend/Dockerfile.cloud:
--------------------------------------------------------------------------------
 1 | # 云端版本 Dockerfile
 2 | # 轻量级，仅支持阿里云 DashScope API
 3 | # 构建大小：约 400MB（非常轻量）
 4 | 
 5 | FROM python:3.10-slim
 6 | 
 7 | WORKDIR /app
 8 | 
 9 | # 设置运行模式为云端
10 | ENV ASR_MODE=cloud
11 | ENV PYTHONPATH=/app
12 | 
13 | # 配置 pip 使用阿里云源
14 | RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
15 |     pip config set global.trusted-host mirrors.aliyun.com
16 | 
17 | # 复制项目文件
18 | COPY . .
19 | 
20 | # 安装系统依赖
21 | RUN apt-get update && apt-get install -y --no-install-recommends \
22 |     curl \
23 |     ffmpeg \
24 |     ca-certificates \
25 |     && rm -rf /var/lib/apt/lists/*
26 | 
27 | # 安装 Python 依赖
28 | RUN pip install -r requirements-cloud.txt
29 | 
30 | # 创建必要的目录
31 | RUN mkdir -p cache
32 | 
33 | # 暴露端口
34 | EXPOSE 8003
35 | 
36 | # 健康检查
37 | HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
38 |     CMD curl -f http://localhost:8003/health || exit 1
39 | 
40 | # 启动应用
41 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8003"]
42 | 


--------------------------------------------------------------------------------
/frontend/src/features/app/components/UploadDialog.tsx:
--------------------------------------------------------------------------------
 1 | import { Dialog, DialogContent, DialogHeader, DialogTitle } from "@/components/ui/dialog"
 2 | import FileUploader from "@/components/FileUploader"
 3 | 
 4 | interface UploadDialogProps {
 5 |   readonly open: boolean
 6 |   readonly onOpenChange: (open: boolean) => void
 7 |   readonly onUploadSuccess: (data: { static_url: string; is_audio: boolean; placeholder_url?: string }) => void
 8 |   readonly onUploadError: (message: string) => void
 9 | }
10 | 
11 | function UploadDialog({ open, onOpenChange, onUploadSuccess, onUploadError }: UploadDialogProps) {
12 |   return (
13 |     <Dialog open={open} onOpenChange={onOpenChange}>
14 |       <DialogContent className="sm:max-w-[500px]">
15 |         <DialogHeader>
16 |           <DialogTitle>上传视频或音频文件</DialogTitle>
17 |         </DialogHeader>
18 |         <FileUploader onUploadSuccess={onUploadSuccess} onUploadError={onUploadError} />
19 |       </DialogContent>
20 |     </Dialog>
21 |   )
22 | }
23 | 
24 | export default UploadDialog
25 | 


--------------------------------------------------------------------------------
/frontend/eslint.config.js:
--------------------------------------------------------------------------------
 1 | import js from '@eslint/js'
 2 | import globals from 'globals'
 3 | import reactHooks from 'eslint-plugin-react-hooks'
 4 | import reactRefresh from 'eslint-plugin-react-refresh'
 5 | import jsxA11y from 'eslint-plugin-jsx-a11y'
 6 | import tseslint from 'typescript-eslint'
 7 | import { defineConfig, globalIgnores } from 'eslint/config'
 8 | 
 9 | export default defineConfig([
10 |   globalIgnores(['dist']),
11 |   {
12 |     files: ['**/*.{ts,tsx}'],
13 |     extends: [
14 |       js.configs.recommended,
15 |       tseslint.configs.recommended,
16 |       reactHooks.configs['recommended-latest'],
17 |       reactRefresh.configs.vite,
18 |     ],
19 |     languageOptions: {
20 |       ecmaVersion: 2020,
21 |       globals: globals.browser,
22 |     },
23 |     plugins: {
24 |       'jsx-a11y': jsxA11y,
25 |     },
26 |     rules: {
27 |       'react-refresh/only-export-components': 'off',
28 |       'jsx-a11y/click-events-have-key-events': 'off',
29 |       'jsx-a11y/no-static-element-interactions': 'off',
30 |     },
31 |   },
32 | ])
33 | 


--------------------------------------------------------------------------------
/frontend/src/components/ui/input.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react"
 2 | 
 3 | import { cn } from "@/lib/utils"
 4 | 
 5 | function Input({ className, type, ...props }: React.ComponentProps<"input">) {
 6 |   return (
 7 |     <input
 8 |       type={type}
 9 |       data-slot="input"
10 |       className={cn(
11 |         "file:text-foreground placeholder:text-muted-foreground selection:bg-primary selection:text-primary-foreground dark:bg-input/30 border-input h-9 w-full min-w-0 rounded-md border bg-transparent px-3 py-1 text-base shadow-xs transition-[color,box-shadow] outline-none file:inline-flex file:h-7 file:border-0 file:bg-transparent file:text-sm file:font-medium disabled:pointer-events-none disabled:cursor-not-allowed disabled:opacity-50 md:text-sm",
12 |         "focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px]",
13 |         "aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive",
14 |         className
15 |       )}
16 |       {...props}
17 |     />
18 |   )
19 | }
20 | 
21 | export { Input }
22 | 


--------------------------------------------------------------------------------
/frontend/vite.config.ts:
--------------------------------------------------------------------------------
 1 | import path from "path"
 2 | import tailwindcss from "@tailwindcss/vite"
 3 | import react from "@vitejs/plugin-react"
 4 | import { defineConfig } from "vite"
 5 | 
 6 | // 支持通过环境变量配置后端地址/端口，适配本地（localhost）和 docker（服务名）场景
 7 | const isDocker = Boolean(process.env.USE_DOCKER || process.env.DOCKER)
 8 | const backendHost = process.env.BACKEND_HOST || (isDocker ? "backend" : "localhost")
 9 | const backendPort = Number(process.env.BACKEND_PORT || 9999)
10 | const backendTarget = `http://${backendHost}:${backendPort}`
11 | 
12 | export default defineConfig({
13 |   plugins: [
14 |     react({
15 |       babel: {
16 |         plugins: [["babel-plugin-react-compiler"]],
17 |       },
18 |     }),
19 |     tailwindcss(),
20 |   ],
21 |   resolve: {
22 |     alias: {
23 |       "@": path.resolve(__dirname, "./src"),
24 |     },
25 |   },
26 |   server: {
27 |     proxy: {
28 |       "/api": {
29 |         target: backendTarget,
30 |         changeOrigin: true,
31 |       },
32 |       "/static": {
33 |         target: backendTarget,
34 |         changeOrigin: true,
35 |       },
36 |     },
37 |   },
38 | })
39 | 


--------------------------------------------------------------------------------
/backend/queues/tasks/progress_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """进度更新工具函数"""
 3 | 
 4 | import json
 5 | from typing import Dict, Any
 6 | 
 7 | 
 8 | def update_task_progress(set_task_progress_func, redis_client, job_id: int, progress_info: Dict[str, Any]) -> bool:
 9 |     """统一的进度更新函数，包含fallback逻辑"""
10 |     ok = set_task_progress_func(
11 |         job_id,
12 |         progress_info,
13 |     )
14 |     if not ok:
15 |         try:
16 |             redis_client.setex(f"task_progress:{job_id}", 86400, json.dumps(progress_info, ensure_ascii=False))
17 |         except Exception:
18 |             pass
19 |     return ok
20 | 
21 | 
22 | def create_progress_info(job_id: int, status: str, stage: str, progress_percent: int,
23 |                         filename: str = "", message: str = "", **kwargs) -> Dict[str, Any]:
24 |     """创建进度信息字典"""
25 |     return {
26 |         "status": status,
27 |         "stage": stage,
28 |         "progress_percent": progress_percent,
29 |         "filename": filename,
30 |         "message": message,
31 |         "job_id": job_id,
32 |         **kwargs
33 |     }


--------------------------------------------------------------------------------
/frontend/src/components/ui/docs/组件设计说明.md:
--------------------------------------------------------------------------------
 1 | # UI 组件库设计说明
 2 | 
 3 | ## 概述
 4 | 
 5 | UI 组件库基于 Radix UI 和 Tailwind CSS 构建，提供了一套可复用的基础 UI 组件。这些组件遵循统一的设计语言，确保整个应用的视觉一致性和用户体验。
 6 | 
 7 | ## 组件列表
 8 | 
 9 | ### Button (按钮)
10 | 提供多种样式和尺寸的按钮组件，支持默认、破坏性、轮廓、次要、幽灵和链接样式。
11 | 
12 | ### Card (卡片)
13 | 用于内容分组展示的容器组件，支持标题、描述和内容区域。
14 | 
15 | ### Dialog (对话框)
16 | 模态对话框组件，用于重要信息提示或用户确认操作。
17 | 
18 | ### DropdownMenu (下拉菜单)
19 | 下拉菜单组件，用于展示一组相关操作选项。
20 | 
21 | ### Form (表单)
22 | 表单相关组件集合，包括字段、标签、控件和验证错误提示。
23 | 
24 | ### Input (输入框)
25 | 文本输入组件，支持各种输入类型和状态样式。
26 | 
27 | ### ScrollArea (滚动区域)
28 | 自定义滚动条容器组件，用于内容超出可视区域时的展示。
29 | 
30 | ### Select (选择器)
31 | 下拉选择组件，用于从预定义选项中选择值。
32 | 
33 | ### Separator (分隔符)
34 | 视觉分隔线组件，用于内容区域的分割。
35 | 
36 | ### Switch (开关)
37 | 布尔值切换组件，用于开启或关闭某项功能。
38 | 
39 | ### Tabs (标签页)
40 | 标签页切换组件，用于组织和切换相关内容视图。
41 | 
42 | ## 设计原则
43 | 
44 | 1. **一致性**: 所有组件遵循统一的设计规范和交互模式
45 | 2. **可访问性**: 支持键盘导航和屏幕阅读器，符合 WCAG 标准
46 | 3. **灵活性**: 组件支持多种变体和自定义属性
47 | 4. **性能**: 轻量级实现，最小化重渲染和 DOM 操作
48 | 
49 | ## 使用方式
50 | 
51 | 所有组件均通过 props 进行配置，支持 className 属性进行样式覆盖。组件使用 Tailwind CSS 类名系统，可以方便地与应用的样式体系集成。
52 | 
53 | ## 扩展性
54 | 
55 | 可以通过修改组件变体定义或创建新的变体来扩展组件样式，同时保持与其他组件的一致性。


--------------------------------------------------------------------------------
/backend/db/transcript_crud.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """转写记录 CRUD 操作模块"""
 3 | 
 4 | # 为保持向后兼容性，从各个子模块导入所有函数
 5 | from .transcript_base_crud import (
 6 |     save_transcript,
 7 |     get_transcript_by_id,
 8 |     update_transcript,
 9 |     update_transcript_audio_path,
10 |     delete_transcript,
11 |     get_all_transcript_ids
12 | )
13 | 
14 | from .transcript_summary_crud import (
15 |     save_summaries,
16 |     get_summaries
17 | )
18 | 
19 | from .transcript_translation_crud import (
20 |     save_translations,
21 |     get_translations
22 | )
23 | 
24 | from .chat_message_crud import (
25 |     save_chat_messages,
26 |     get_chat_messages,
27 |     clear_chat_messages
28 | )
29 | 
30 | __all__ = [
31 |     "save_transcript",
32 |     "get_transcript_by_id",
33 |     "update_transcript",
34 |     "update_transcript_audio_path",
35 |     "delete_transcript",
36 |     "get_all_transcript_ids",
37 |     "save_summaries",
38 |     "get_summaries",
39 |     "save_translations",
40 |     "get_translations",
41 |     "save_chat_messages",
42 |     "get_chat_messages",
43 |     "clear_chat_messages"
44 | ]
45 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Docker build context ignore file
 2 | # Exclude large runtime data, caches and sensitive files from build context.
 3 | 
 4 | # Runtime data / large media / model caches
 5 | app_datas/
 6 | app_datas/download_videos/
 7 | app_datas/*.mp4
 8 | app_datas/*.m4a
 9 | app_datas/*.temp.mp4
10 | model_cache/
11 | torch_cache/
12 | transformers_cache/
13 | 
14 | # Databases and cache files
15 | *.sqlite3
16 | backend/cache/cache.db
17 | 
18 | # Environment / secrets
19 | .env
20 | .env.*
21 | 
22 | # Node / Python dependencies and build artifacts
23 | node_modules/
24 | frontend/node_modules/
25 | dist/
26 | build/
27 | frontend/dist/
28 | 
29 | # Virtualenvs and python cache
30 | .venv/
31 | venv/
32 | env/
33 | __pycache__/
34 | *.pyc
35 | *.pyo
36 | 
37 | # Logs, OS and git metadata
38 | *.log
39 | .DS_Store
40 | Thumbs.db
41 | .git
42 | .gitignore
43 | .github
44 | 
45 | # Test outputs and temporary results
46 | results/
47 | backend/tests/results/
48 | tests/
49 | 
50 | # Note: keep lockfiles (package-lock.json / requirements.txt) and Dockerfile in context
51 | # so builds are reproducible. Do NOT add them to this file.


--------------------------------------------------------------------------------
/docs/mermaid图汇集/ReAct设计文档-类层次结构图.md:
--------------------------------------------------------------------------------
 1 | # ReAct设计文档 - 类层次结构图
 2 | 
 3 | ```mermaid
 4 | classDiagram
 5 |     class BaseAgent {
 6 |         +tools_backend_url
 7 |         +llm_router
 8 |         +llm_model
 9 |         +tool_manager
10 |         +react_loop
11 |         +generate_answer()
12 |         +stream_answer()
13 |     }
14 |     
15 |     class ChatAgent {
16 |         +memory_manager
17 |         +generate_answer()
18 |     }
19 |     
20 |     class ReactLoop {
21 |         +llm_router
22 |         +llm_model
23 |         +tool_manager
24 |         +prompt_builder
25 |         +action_executor
26 |         +run()
27 |     }
28 |     
29 |     class ToolManager {
30 |         +tools_url
31 |         +config_path
32 |         +list_available_tools()
33 |         +get_available_tools()
34 |         +generate_tool_descriptions()
35 |     }
36 |     
37 |     class ActionExecutor {
38 |         +llm_router
39 |         +llm_model
40 |         +execute_action()
41 |     }
42 |     
43 |     BaseAgent <|-- ChatAgent
44 |     BaseAgent --> ReactLoop
45 |     ReactLoop --> ToolManager
46 |     ReactLoop --> ActionExecutor
47 | ```
48 | 


--------------------------------------------------------------------------------
/docs/api_文档导航.md:
--------------------------------------------------------------------------------
 1 | # API 文档导航
 2 | 
 3 | 本项目包含两个主要后端的 API 文档：
 4 | 
 5 | ## Backend API 文档
 6 | 
 7 | HearSight 主后端 API，提供媒体处理、转写、总结、翻译和聊天等功能。
 8 | 
 9 | - **文档位置**: [backend/docs/api.md](../backend/docs/api.md)
10 | - **服务地址**: `http://localhost:9999`
11 | - **交互式文档**: 启动后端服务后访问 `http://localhost:9999/docs`
12 | 
13 | 主要功能模块：
14 | - 媒体下载：从哔哩哔哩等平台下载视频
15 | - 文件上传：上传本地音视频文件
16 | - 转写管理：管理ASR转写结果
17 | - 翻译服务：将转写内容翻译为多种语言
18 | - 摘要生成：基于内容生成结构化摘要
19 | - 聊天交互：基于转写内容进行问答
20 | 
21 | ## ASR Backend API 文档
22 | 
23 | HearSight 语音识别后端 API，提供语音转文本功能。
24 | 
25 | - **文档位置**: [ASRBackend/docs/api.md](../ASRBackend/docs/api.md)
26 | - **服务地址**: `http://localhost:8003`
27 | - **交互式文档**: 启动 ASR 后端服务后访问 `http://localhost:8003/docs`
28 | 
29 | 主要功能模块：
30 | - 音频转文字：将音频文件转换为文本
31 | - URL转录：直接从URL获取音频并转录
32 | - 文件上传转录：上传本地音频文件进行转录
33 | 
34 | ## 快速开始
35 | 
36 | 1. 启动后端服务：`python main.py` (backend/)
37 | 2. 启动 ASR 后端服务：`python main.py` (ASRBackend/)
38 | 3. 访问交互式 API 文档查看详细接口信息
39 | 
40 | ## 相关文档
41 | 
42 | - [Backend 数据库设计](../backend/docs/database_schema.md)
43 | - [Backend 聊天系统设计](../backend/docs/chat_system_design.md)
44 | - [ASR 服务设计](../ASRBackend/docs/ASR_服务设计文档.md)


--------------------------------------------------------------------------------
/backend/tests/test_paraformer_a2t.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import json
 4 | 
 5 | # 确保将项目 backend 目录加入到导入路径，便于从 tests 目录直接运行
 6 | sys.path.append(os.path.dirname(os.path.dirname(__file__)))
 7 | 
 8 | from audio2text.paraformer_a2t import paraformer_audio_to_text
 9 | 
10 | # 使用与现有测试一致的数据路径（可按需修改为你的绝对路径）
11 | audio_path = r"C:\Users\ke\Documents\projects\python_projects\HearSight\backend\tests\datas\大语言模型进化论：从“听懂指令”到“学会思考”，AI如何与人类对齐？.m4a"
12 | 
13 | # 直接转写，拿到文本、段级时间戳与按秒聚合
14 | res = paraformer_audio_to_text(audio_path, return_segments=True)
15 | 
16 | if isinstance(res, tuple) and len(res) == 3:
17 |     text, segments, per_second = res
18 | else:
19 |     # 兜底：老版本只返回文本
20 |     text, segments, per_second = str(res), [], {}
21 | 
22 | print(text)
23 | 
24 | # 如需保存解析结果
25 | output_dir = "results"
26 | os.makedirs(output_dir, exist_ok=True)
27 | 
28 | with open(os.path.join(output_dir, 'test_paraformer_a2t_segments.json'), 'w', encoding='utf-8') as f:
29 |     json.dump(segments, f, ensure_ascii=False, indent=2)
30 | 
31 | with open(os.path.join(output_dir, 'test_paraformer_a2t_per_second.json'), 'w', encoding='utf-8') as f:
32 |     json.dump(per_second, f, ensure_ascii=False, indent=2)
33 | 


--------------------------------------------------------------------------------
/frontend/src/components/RightPanel/LanguageSwitcher.tsx:
--------------------------------------------------------------------------------
 1 | import {
 2 |   Select,
 3 |   SelectContent,
 4 |   SelectItem,
 5 |   SelectTrigger,
 6 |   SelectValue,
 7 | } from "@/components/ui/select"
 8 | 
 9 | interface LanguageSwitcherProps {
10 |   displayLanguage: string
11 |   availableLanguages: string[]
12 |   onLanguageChange: (language: string) => void
13 |   getLanguageName: (code: string) => string
14 | }
15 | 
16 | export default function LanguageSwitcher({
17 |   displayLanguage,
18 |   availableLanguages,
19 |   onLanguageChange,
20 |   getLanguageName,
21 | }: Readonly<LanguageSwitcherProps>) {
22 |   const validLanguages = availableLanguages.filter(lang => lang && lang.trim() !== '')
23 | 
24 |   return (
25 |     <Select value={displayLanguage} onValueChange={onLanguageChange}>
26 |       <SelectTrigger className="w-28 h-9">
27 |         <SelectValue />
28 |       </SelectTrigger>
29 |       <SelectContent>
30 |         {validLanguages.map((lang) => (
31 |           <SelectItem key={lang} value={lang}>
32 |             {getLanguageName(lang)}
33 |           </SelectItem>
34 |         ))}
35 |       </SelectContent>
36 |     </Select>
37 |   )
38 | }
39 | 


--------------------------------------------------------------------------------
/example_tests/音频转视频测试.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="zh-CN">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |     <title>音频播放测试</title>
 7 |     <style>
 8 |         body {
 9 |             font-family: Arial, sans-serif;
10 |             display: flex;
11 |             justify-content: center;
12 |             align-items: center;
13 |             height: 100vh;
14 |             margin: 0;
15 |             background-color: #f0f0f0;
16 |         }
17 |         .container {
18 |             text-align: center;
19 |         }
20 |         video {
21 |             width: 640px;
22 |             height: 360px;
23 |             border: 1px solid #ccc;
24 |         }
25 |     </style>
26 | </head>
27 | <body>
28 |     <div class="container">
29 |         <h1>音频播放测试 - 类视频效果</h1>
30 |         <p>使用图片作为背景，让音频看起来像视频。</p>
31 |         <video controls poster="https://via.placeholder.com/640x360?text=Audio+Background">
32 |             <source src="https://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_0010_8k.wav" type="audio/wav">
33 |             您的浏览器不支持音频播放。
34 |         </video>
35 |     </div>
36 | </body>
37 | </html>
38 | 


--------------------------------------------------------------------------------
/backend/ReAct/fastmcp_client_example.py:
--------------------------------------------------------------------------------
 1 | """FastMCP 客户端示例
 2 | 
 3 | 测试连接到本地 MCP 工具服务器（HTTP 模式）。
 4 | """
 5 | 
 6 | import asyncio
 7 | from fastmcp import Client
 8 | 
 9 | 
10 | async def test_mcp_client():
11 |     """测试 MCP 客户端连接和工具调用。"""
12 | 
13 |     print("测试 MCP 客户端连接...")
14 | 
15 |     # 连接到本地 MCP 服务器
16 |     client = Client("http://localhost:8001/mcp")
17 | 
18 |     try:
19 |         async with client:
20 |             print("✓ 连接到 MCP 服务器成功")
21 | 
22 |             # 列出可用工具
23 |             tools = await client.list_tools()
24 |             print(f"✓ 可用工具: {[tool.name for tool in tools]}")
25 | 
26 |             # 调用计算器工具
27 |             print("调用计算器工具...")
28 |             result = await client.call_tool("calculator", {"a": 2.0, "b": 3.0})
29 |             print(f"✓ 计算器结果对象: {result}")
30 |             print(f"✓ 实际结果: {result.data}")
31 |             print(f"✓ 结果类型: {type(result.data)}")
32 | 
33 |     except Exception as e:
34 |         print(f"✗ 连接或调用失败: {e}")
35 |         import traceback
36 |         traceback.print_exc()
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     asyncio.run(test_mcp_client())
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     asyncio.run(test_mcp_client())
45 | 


--------------------------------------------------------------------------------
/frontend/src/components/RightPanel/hooks/useSearchHandlers.ts:
--------------------------------------------------------------------------------
 1 | import { useCallback } from 'react'
 2 | import type { Segment } from '../../../types'
 3 | 
 4 | export const useSearchHandlers = (segments: Segment[]) => {
 5 |   const performSearch = useCallback(
 6 |     (searchTerm: string, displayLanguage: string = 'original') => {
 7 |       if (!searchTerm.trim()) {
 8 |         return []
 9 |       }
10 |       const term = searchTerm.toLowerCase()
11 | 
12 |       return segments.filter((seg) => {
13 |         let contentToSearch = ''
14 | 
15 |         if (displayLanguage === 'original') {
16 |           contentToSearch = seg.sentence || ''
17 |         } else {
18 |           // 检查translation字段（对象格式）
19 |           if (seg.translation && typeof seg.translation === 'object' && seg.translation[displayLanguage]) {
20 |             contentToSearch = seg.translation[displayLanguage] || ''
21 |           }
22 |           // 如果没有找到翻译内容，回退到原文
23 |           else {
24 |             contentToSearch = seg.sentence || ''
25 |           }
26 |         }
27 | 
28 |         return contentToSearch.toLowerCase().includes(term)
29 |       })
30 |     },
31 |     [segments]
32 |   )
33 | 
34 |   return { performSearch }
35 | }
36 | 


--------------------------------------------------------------------------------
/frontend/src/services/transcriptService.ts:
--------------------------------------------------------------------------------
 1 | import type { 
 2 |   TranscriptsResponse, 
 3 |   TranscriptDetailResponse,
 4 |   Segment
 5 | } from '../types'
 6 | 
 7 | export const fetchTranscripts = async (limit = 50, offset = 0): Promise<TranscriptsResponse> => {
 8 |   const response = await fetch(`/api/transcripts?limit=${limit}&offset=${offset}`)
 9 |   
10 |   if (!response.ok) {
11 |     throw new Error(`获取列表失败：${response.status}`)
12 |   }
13 |   
14 |   return response.json()
15 | }
16 | 
17 | export const fetchTranscriptDetail = async (id: number): Promise<TranscriptDetailResponse> => {
18 |   const response = await fetch(`/api/transcripts/${id}`)
19 |   
20 |   if (!response.ok) {
21 |     throw new Error(`获取详情失败：${response.status}`)
22 |   }
23 |   
24 |   return response.json()
25 | }
26 | 
27 | export const deleteTranscriptComplete = async (transcriptId: number): Promise<{ success: boolean; message: string }> => {
28 |   const response = await fetch(`/api/transcripts/${transcriptId}`, {
29 |     method: 'DELETE',
30 |     headers: {
31 |       'Content-Type': 'application/json'
32 |     }
33 |   })
34 |   
35 |   if (!response.ok) {
36 |     throw new Error(`删除失败：${response.status} - ${response.statusText}`)
37 |   }
38 |   
39 |   return response.json()
40 | }


--------------------------------------------------------------------------------
/backend/media_processing/docs/downloader_factory_design.md:
--------------------------------------------------------------------------------
 1 | # 媒体下载器工厂设计文档
 2 | 
 3 | ## 设计思路
 4 | 
 5 | 媒体下载器工厂采用工厂模式实现，支持根据URL自动识别媒体源类型，并返回对应的下载器实例。该设计旨在提供统一的下载接口，简化不同媒体平台的下载逻辑，同时通过缓存机制提高性能。
 6 | 
 7 | ## 架构概述
 8 | 
 9 | 工厂类 `MediaDownloaderFactory` 作为核心组件，负责媒体源识别、下载器实例管理和统一下载接口。抽象基类 `MediaDownloaderBase` 定义了下载器的标准接口，确保各具体下载器的一致性。
10 | 
11 | ## 支持的媒体源
12 | 
13 | - Bilibili（哔哩哔哩）
14 | - YouTube
15 | - Xiaoyuzhou（小宇宙播客）
16 | 
17 | ## 关键组件
18 | 
19 | ### MediaDownloaderBase
20 | 
21 | 抽象基类，定义 `download` 方法作为下载接口。
22 | 
23 | ### MediaDownloaderFactory
24 | 
25 | 工厂类，包含以下功能：
26 | 
27 | - URL模式匹配识别媒体源
28 | - 下载器实例缓存
29 | - 统一下载接口
30 | - 特定媒体源的便捷下载方法
31 | 
32 | ## 数据流
33 | 
34 | ```mermaid
35 | graph TD
36 |     A[用户提供URL] --> B[MediaDownloaderFactory.download]
37 |     B --> C[_get_source_type: 匹配URL模式]
38 |     C --> D{识别成功?}
39 |     D -->|是| E[_get_downloader: 获取下载器实例]
40 |     D -->|否| F[返回错误结果]
41 |     E --> G[调用具体下载器方法]
42 |     G --> H[返回DownloadResult]
43 | ```
44 | 
45 | ## 与其他模块关系
46 | 
47 | - 依赖 `backend.common_interfaces.DownloadResult` 作为下载结果类型
48 | - 使用具体下载器模块：
49 |   - `audio.download.xiaoyuzhou.xiaoyuzhou_downloader`
50 |   - `video.download.bilibili.bilibili_downloader`
51 |   - `video.download.youtube.youtube_downloader`
52 | - 集成到媒体处理流程中，提供下载服务
53 | 


--------------------------------------------------------------------------------
/backend/.env.example:
--------------------------------------------------------------------------------
 1 | # Example environment variables for HearSight
 2 | 
 3 | # Postgres - change this password for production
 4 | POSTGRES_USER=hearsight
 5 | POSTGRES_PASSWORD=hearsight_pass
 6 | POSTGRES_DB=hearsight
 7 | POSTGRES_PORT=5432
 8 | POSTGRES_HOST=localhost
 9 | 
10 | # Backend / Frontend ports (optional)
11 | BACKEND_PORT=9999
12 | FRONTEND_PORT=10000
13 | 
14 | # LLM 配置
15 | LLM_PROVIDER=openai
16 | LLM_MODEL=deepseek-ai/DeepSeek-V3.2-Exp
17 | LLM_PROVIDER_BASE_URL=https://api.siliconflow.cn/v1
18 | LLM_PROVIDER_API_KEY= # 必须要配置
19 | LLM_CONTEXT_LENGTH=100000
20 | LLM_TPM=80000
21 | LLM_RPM=1000
22 | 
23 | # Embedding 配置
24 | EMBEDDING_PROVIDER=openai
25 | EMBEDDING_PROVIDER_BASE_URL=https://api.siliconflow.cn/v1
26 | EMBEDDING_MODEL=BAAI/bge-m3
27 | EMBEDDING_CONTEXT_LENGTH=8192
28 | EMBEDDING_DIM=1024
29 | EMBEDDING_TPM=500000
30 | EMBEDDING_RPM=2000
31 | 
32 | # ASR Backend Service
33 | ASR_BACKEND_URL=http://localhost:8003
34 | ASR_MODE= # 'local' 或 'cloud'，留空表示自动检测
35 | 
36 | # Downloads directory
37 | DOWNLOADS_DIR= # 默认在 app_datas/download_videos
38 | 
39 | # Celery 配置
40 | CELERY_BROKER_URL=redis://localhost:6379/0
41 | CELERY_RESULT_BACKEND=redis://localhost:6379/1
42 | CELERY_TASK_TIME_LIMIT=3600
43 | CELERY_TASK_SOFT_TIME_LIMIT=3300
44 | CELERY_WORKER_CONCURRENCY=4
45 | CELERY_LOG_LEVEL=info
46 | 


--------------------------------------------------------------------------------
/frontend/src/components/LeftPanel/hooks.tsx:
--------------------------------------------------------------------------------
 1 | import { Loader2, CheckCircle2, XCircle, Clock } from 'lucide-react'
 2 | 
 3 | export const useStatusHelpers = () => {
 4 |   const getStatusIcon = (status: string) => {
 5 |     switch (status) {
 6 |       case 'downloading':
 7 |         return <Loader2 className="h-3 w-3 animate-spin text-blue-600" />
 8 |       case 'processing':
 9 |         return <Loader2 className="h-3 w-3 animate-spin text-purple-600" />
10 |       case 'success':
11 |         return <CheckCircle2 className="h-3 w-3 text-green-600" />
12 |       case 'failed':
13 |         return <XCircle className="h-3 w-3 text-red-600" />
14 |       default:
15 |         return <Clock className="h-3 w-3 text-slate-400" />
16 |     }
17 |   }
18 | 
19 |   const getStatusColor = (status: string) => {
20 |     switch (status) {
21 |       case 'downloading':
22 |         return 'bg-blue-100 text-blue-700'
23 |       case 'processing':
24 |         return 'bg-purple-100 text-purple-700'
25 |       case 'success':
26 |         return 'bg-green-100 text-green-700'
27 |       case 'failed':
28 |         return 'bg-red-100 text-red-700'
29 |       default:
30 |         return 'bg-slate-100 text-slate-600'
31 |     }
32 |   }
33 | 
34 |   return { getStatusIcon, getStatusColor }
35 | }
36 | 


--------------------------------------------------------------------------------
/frontend/src/components/ui/switch.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react"
 2 | import * as SwitchPrimitive from "@radix-ui/react-switch"
 3 | 
 4 | import { cn } from "@/lib/utils"
 5 | 
 6 | function Switch({
 7 |   className,
 8 |   ...props
 9 | }: React.ComponentProps<typeof SwitchPrimitive.Root>) {
10 |   return (
11 |     <SwitchPrimitive.Root
12 |       data-slot="switch"
13 |       className={cn(
14 |         "peer data-[state=checked]:bg-primary data-[state=unchecked]:bg-input focus-visible:border-ring focus-visible:ring-ring/50 dark:data-[state=unchecked]:bg-input/80 inline-flex h-[1.15rem] w-8 shrink-0 items-center rounded-full border border-transparent shadow-xs transition-all outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50",
15 |         className
16 |       )}
17 |       {...props}
18 |     >
19 |       <SwitchPrimitive.Thumb
20 |         data-slot="switch-thumb"
21 |         className={cn(
22 |           "bg-background dark:data-[state=unchecked]:bg-foreground dark:data-[state=checked]:bg-primary-foreground pointer-events-none block size-4 rounded-full ring-0 transition-transform data-[state=checked]:translate-x-[calc(100%-2px)] data-[state=unchecked]:translate-x-0"
23 |         )}
24 |       />
25 |     </SwitchPrimitive.Root>
26 |   )
27 | }
28 | 
29 | export { Switch }
30 | 


--------------------------------------------------------------------------------
/backend/ReAct/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import re
 4 | from typing import Any, Dict, List, Optional
 5 | 
 6 | from fastmcp import Client
 7 | 
 8 | from .models import ToolCallable
 9 | 
10 | 
11 | def normalize_input(input_data: Any):
12 |     """解析工具调用输入，支持 JSON 文本或普通字符串。"""
13 |     if isinstance(input_data, str):
14 |         s = input_data.strip()
15 |         # 用正则匹配任意数量反引号包裹的块
16 |         m = re.match(r"^`{3,}\s*(?:\w+)?\n?(.*)\n?`{3,}\s*$", s, re.DOTALL)
17 |         if m:
18 |             s = m.group(1).strip()
19 | 
20 |         try:
21 |             return json.loads(s)
22 |         except json.JSONDecodeError:
23 |             return {"input": input_data}
24 |     return input_data
25 | 
26 | 
27 | def create_tool_wrapper(tools_url, tool_name: str) -> ToolCallable:
28 |     """为工具创建调用包装器，统一处理字符串/JSON 输入。"""
29 | 
30 |     async def tool_wrapper(input_str: str) -> str:
31 |         try:
32 |             params = normalize_input(input_str)
33 |             client = Client(tools_url)
34 |             async with client:
35 |                 result = await client.call_tool(tool_name, params)
36 |                 return str(result.data)
37 |         except Exception as e:
38 |             return f"调用工具 '{tool_name}' 时发生错误: {str(e)}"
39 | 
40 |     return tool_wrapper
41 | 


--------------------------------------------------------------------------------
/backend/llm_test.py:
--------------------------------------------------------------------------------
 1 | """测试LLM调用中连续消息的处理"""
 2 | 
 3 | import asyncio
 4 | import openai
 5 | from config import settings
 6 | 
 7 | async def test_llm_with_consecutive_roles():
 8 |     """测试连续多个user和assistant消息"""
 9 |     # 直接使用openai sdk
10 |     client = openai.OpenAI(api_key=settings.llm_provider_api_key, base_url=settings.llm_provider_base_url)
11 | 
12 |     # 模拟连续相同role的消息
13 |     messages = [
14 |         {"role": "system", "content": "你是一个AI助手"},
15 |         {"role": "user", "content": "什么是人工智能？"},
16 |         {"role": "user", "content": "它的发展历史是什么？"},  # 连续user
17 |         {"role": "assistant", "content": "人工智能是计算机科学的一个分支"},
18 |         {"role": "assistant", "content": "它的发展历史可以追溯到1950年代"},  # 连续assistant
19 |         {"role": "user", "content": "它有哪些应用？"}
20 |     ]
21 | 
22 |     try:
23 |         response = client.chat.completions.create(
24 |             model=settings.llm_model,
25 |             messages=messages,
26 |             temperature=0.3,
27 |             max_tokens=200,
28 |         )
29 |         print("成功调用，没有报错")
30 |         print("响应:", response.choices[0].message.content)
31 |     except Exception as e:
32 |         print("调用失败，报错:", str(e))
33 | 
34 | if __name__ == "__main__":
35 |     print("测试连续相同role的消息:")
36 |     asyncio.run(test_llm_with_consecutive_roles())


--------------------------------------------------------------------------------
/frontend/src/services/thumbnailService.ts:
--------------------------------------------------------------------------------
 1 | export const fetchThumbnail = async (
 2 |   transcriptId: number,
 3 |   startTime: number,
 4 |   endTime: number,
 5 |   width: number = 320
 6 | ): Promise<string> => {
 7 | 
 8 |   const response = await fetch(
 9 |     `/api/thumbnails/${transcriptId}?start_time=${startTime}&end_time=${endTime}&width=${width}`
10 |   )
11 |   
12 |   if (!response.ok) {
13 |     throw new Error(`获取缩略图失败：${response.status}`)
14 |   }
15 |   
16 |   const result = await response.json()
17 |   if (!result.success || !result.data) {
18 |     throw new Error('缩略图数据格式错误')
19 |   }
20 | 
21 |   const data = result.data
22 |   // 验证返回的 data 是否为可用图片，允许 data URL、http(s) 或静态图片路径
23 |   if (typeof data !== 'string') {
24 |     throw new Error('缩略图数据不是字符串')
25 |   }
26 | 
27 |   const isDataUrl = data.startsWith('data:image/')
28 |   const isHttpUrl = data.startsWith('http://') || data.startsWith('https://')
29 |   const isStaticPath = data.startsWith('/static/')
30 |   // 检查静态路径是否为图片扩展名
31 |   const imageExtMatch = /\.(jpg|jpeg|png|webp|gif)$/i.test(data)
32 | 
33 |   if (isDataUrl || isHttpUrl || (isStaticPath && imageExtMatch)) {
34 |     return data
35 |   }
36 | 
37 |   // 不支持的返回格式：记录并抛出错误
38 |   // Unexpected thumbnail data value will be ignored and throw an error at caller
39 |   throw new Error('缩略图数据格式不受支持')
40 | }


--------------------------------------------------------------------------------
/backend/services/docs/embedding_文件名增强设计文档.md:
--------------------------------------------------------------------------------
 1 | # Embedding文件名增强设计文档
 2 | 
 3 | ## 引言
 4 | 
 5 | 当前知识库的embedding环节仅基于文本内容进行向量嵌入，当用户基于文件名提问时，检索效果不佳。本文档提出在embedding时同时加入文件名信息的改进方案，以提升检索准确性。
 6 | 
 7 | ## 当前问题
 8 | 
 9 | 在`knowledge_base_service.py`的`add_transcript`方法中，embedding仅基于chunk_text生成，而chunk_text由句子段拼接而成，不包含文件名信息。当用户查询包含文件名相关内容时，向量相似度可能无法有效匹配，导致检索结果不准确。
10 | 
11 | ## 解决方案
12 | 
13 | 在生成embedding时，将文件名与文本内容结合，形成更丰富的上下文信息。具体做法是在chunk_text前添加文件名描述。
14 | 
15 | ## 实现细节
16 | 
17 | ### 修改位置
18 | - 文件：`backend/services/knowledge_base_service.py`
19 | - 方法：`add_transcript`
20 | 
21 | ### 代码修改建议
22 | 在生成chunk_text时，添加文件名信息：
23 | 
24 | ```python
25 | # 获取文件名（假设metadata中包含filename字段）
26 | filename = metadata.get("filename", "未知文件")
27 | 
28 | # 修改chunk_text生成逻辑
29 | chunk_text = f"文件名：{filename}\n内容：{' '.join([seg['sentence'] for seg in chunk])}"
30 | ```
31 | 
32 | ### 数据来源
33 | 需要确保metadata中包含`filename`字段，可从上传或转写过程中获取。
34 | 
35 | ### 检索优化
36 | 在`search_similar`方法中，可考虑对query进行预处理，如果query包含文件名相关信息，则增强query的权重。
37 | 
38 | ## 流程图
39 | 
40 | ```mermaid
41 | graph TD
42 |     A[接收segments和metadata] --> B[分组为chunks]
43 |     B --> C[获取文件名]
44 |     C --> D[生成增强chunk_text]
45 |     D --> E[计算embedding]
46 |     E --> F[存储到ChromaDB]
47 | ```
48 | 
49 | ## 总结
50 | 
51 | 通过在embedding时加入文件名信息，可以显著提升基于文件名提问的检索效果。该方案实现简单，对现有架构影响最小，值得实施。


--------------------------------------------------------------------------------
/backend/services/docs/多视频聊天服务调试经验.md:
--------------------------------------------------------------------------------
 1 | # 多视频聊天服务调试经验
 2 | 
 3 | ## 问题描述
 4 | 
 5 | 运行 `example_chat_service.py` 时，检索到了相关内容，但最终响应显示"由于您没有提供具体的视频字幕内容，我无法分析这些视频的内容并回答'视频讲了什么'这个问题。"，没有显示检索到的内容。
 6 | 
 7 | ## 原因分析
 8 | 
 9 | 1. 在 `chat_knowledge_service.py` 的 `_perform_knowledge_retrieval` 方法中，调用 `knowledge_base.get_doc_details(doc_id, None)` 获取文档详情。
10 | 
11 | 2. `get_doc_details` 返回的 `sentences` 列表中，每个句子字典缺少 `transcript_id` 字段。
12 | 
13 | 3. 在 `chat_service.py` 的 `_build_multi_video_prompt_body` 方法中，按 `transcript_id` 对句子进行分组，但由于句子中没有 `transcript_id`，`segment.get("transcript_id")` 返回 `None`，导致分组失败，`segments_by_video` 为空。
14 | 
15 | 4. 因此，构建的提示词中"多视频字幕内容"部分为空，LLM 认为没有提供内容，返回了默认的错误消息。
16 | 
17 | ## 解决方案
18 | 
19 | 在 `knowledge_base_service.py` 的 `get_doc_details` 方法中，为每个句子添加 `transcript_id` 字段：
20 | 
21 | ```python
22 | "sentences": [
23 |     {
24 |         "index": s.get("index"),
25 |         "sentence": s.get("sentence"),
26 |         "start_time": s.get("start_time"),
27 |         "end_time": s.get("end_time"),
28 |         "spk_id": s.get("spk_id"),
29 |         "transcript_id": transcript_id,  # 添加此行
30 |     }
31 |     for s in chosen_chunk
32 | ],
33 | ```
34 | 
35 | ## 经验教训
36 | 
37 | - 在数据传递过程中，确保所有必要字段都被正确包含，避免因字段缺失导致的数据处理失败。
38 | 
39 | - 调试复杂系统时，可以添加打印语句来检查中间数据结构，帮助定位问题。
40 | 
41 | - 多层数据处理时，要验证每一步的数据完整性，特别是跨模块的数据传递。
42 | 


--------------------------------------------------------------------------------
/backend/ReAct/test_chat_agent.py:
--------------------------------------------------------------------------------
 1 | """ReAct ChatAgent 测试"""
 2 | 
 3 | import asyncio
 4 | import sys
 5 | import os
 6 | 
 7 | # 添加backend目录到路径
 8 | backend_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
 9 | parent_path = os.path.abspath(os.path.join(backend_path, '..'))
10 | for path in [backend_path, parent_path]:
11 |     if path not in sys.path:
12 |         sys.path.insert(0, path)
13 | 
14 | from backend.config import settings
15 | from backend.ReAct import ChatAgent
16 | 
17 | 
18 | async def test_chat_agent():
19 |     """测试ChatAgent基本功能"""
20 |     # 使用settings配置
21 |     agent = ChatAgent(
22 |         openai_api_key=settings.llm_provider_api_key,
23 |         openai_api_base=settings.llm_provider_base_url,
24 |         openai_api_model=settings.llm_model,
25 |         tools_backend_url="http://localhost:8004/mcp"
26 |     )
27 | 
28 |     # 测试基本功能
29 |     try:
30 |         # 测试列出工具
31 |         tools = await agent.list_available_tools()
32 |         print(f"可用工具: {[tool.name for tool in tools]}")
33 | 
34 |         # 测试生成工具描述
35 |         tool_desc = await agent.generate_tool_descriptions()
36 |         print(f"工具描述:\n{tool_desc}")
37 | 
38 |         print("ChatAgent初始化成功")
39 | 
40 |     except Exception as e:
41 |         print(f"测试失败: {e}")
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     asyncio.run(test_chat_agent())


--------------------------------------------------------------------------------
/ASRBackend/supabase_utils/docs/supabase_upload_design.md:
--------------------------------------------------------------------------------
 1 | # Supabase 文件上传工具设计文档
 2 | 
 3 | ## 概述
 4 | 
 5 | 我设计了 `supabase_upload.py` 模块，作为 HearSight 后端项目中专门处理文件上传到 Supabase 存储的工具。这个模块封装了 Supabase 客户端的创建、管理员登录和文件上传逻辑，简化了其他模块的使用。
 6 | 
 7 | ## 设计思路
 8 | 
 9 | ### 核心功能
10 | 
11 | - 提供 `upload_file_to_supabase` 函数，支持上传本地文件到 Supabase 存储桶
12 | - 自动处理管理员邮箱登录，确保有权限进行上传操作
13 | - 返回公开访问 URL，便于后续使用
14 | 
15 | ### 配置依赖
16 | 
17 | 模块依赖 `config.py` 中的 Supabase 配置，包括 URL、密钥、存储桶名称等。这确保了配置的集中管理和环境变量覆盖。
18 | 
19 | ### 错误处理
20 | 
21 | - 检查配置完整性
22 | - 验证文件存在性
23 | - 处理登录和上传异常，不中断程序运行
24 | 
25 | ## 架构关系
26 | 
27 | 这个模块与 HearSight 项目其他部分的关系如下：
28 | 
29 | - **与 config.py**：依赖配置获取 Supabase 参数
30 | - **与 ASRBackend**：ASR 服务可能使用此模块上传音频文件进行处理
31 | - **与前端**：上传的文件 URL 可用于前端展示或下载
32 | 
33 | ## 流程图
34 | 
35 | ```mermaid
36 | graph TD
37 |     A[调用 upload_file_to_supabase] --> B[获取 config 配置]
38 |     B --> C[创建 Supabase 客户端]
39 |     C --> D[尝试管理员登录]
40 |     D --> E[检查文件存在]
41 |     E --> F[读取文件内容]
42 |     F --> G[上传到 Supabase]
43 |     G --> H[获取公开 URL]
44 |     H --> I[返回 URL 或 None]
45 | ```
46 | 
47 | ## 使用示例
48 | 
49 | 在其他模块中导入并使用：
50 | 
51 | ```python
52 | from supabase_utils.supabase_upload import upload_file_to_supabase
53 | 
54 | url = upload_file_to_supabase("path/to/file.mp3")
55 | if url:
56 |     print(f"文件上传成功: {url}")
57 | ```
58 | 
59 | 这个设计保持了模块的简洁性和可复用性，同时确保了安全性通过管理员登录。
60 | 


--------------------------------------------------------------------------------
/backend/media_processing/upload_handler.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """媒体处理适配器 - 提供向后兼容的接口"""
 3 | 
 4 | from __future__ import annotations
 5 | 
 6 | from pathlib import Path
 7 | from typing import Optional
 8 | 
 9 | from backend.common_interfaces import DownloadResult
10 | from .audio.local.upload_handler import process_uploaded_audio, SUPPORTED_AUDIO_FORMATS
11 | from .video.local.upload_handler import process_uploaded_video, SUPPORTED_VIDEO_FORMATS
12 | 
13 | 
14 | def process_uploaded_file(file_path: str, output_dir: str) -> DownloadResult:
15 |     """处理本地上传的文件 - 兼容接口。
16 | 
17 |     根据文件类型自动调用对应的处理器。
18 |     对于音频文件，直接返回路径。
19 |     对于视频文件，提取音频并返回两个路径。
20 | 
21 |     Args:
22 |         file_path: 上传文件的完整路径
23 |         output_dir: 输出目录（用于存放提取的音频）
24 | 
25 |     Returns:
26 |         DownloadResult: 处理结果，包含audio_path和video_path
27 |     """
28 |     file_path_obj = Path(file_path)
29 |     file_ext = file_path_obj.suffix.lower()
30 | 
31 |     if file_ext in SUPPORTED_AUDIO_FORMATS:
32 |         return process_uploaded_audio(file_path)
33 |     elif file_ext in SUPPORTED_VIDEO_FORMATS:
34 |         return process_uploaded_video(file_path, output_dir)
35 |     else:
36 |         return DownloadResult(
37 |             success=False,
38 |             error_message=f"不支持的文件格式: {file_ext}。支持音频格式: {SUPPORTED_AUDIO_FORMATS}，视频格式: {SUPPORTED_VIDEO_FORMATS}"
39 |         )
40 | 


--------------------------------------------------------------------------------
/backend/tests/test_SenseVoiceSmall.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import requests
 3 | from dotenv import load_dotenv
 4 | import os
 5 | import subprocess
 6 | 
 7 | load_dotenv()
 8 | 
 9 | def convert_video_to_wav(input_path, output_path):
10 |     """
11 |     使用ffmpeg将视频转换为WAV音频格式。
12 |     """
13 |     command = [
14 |         "ffmpeg",
15 |         "-i", input_path,
16 |         "-vn",  # 移除视频流
17 |         "-acodec", "pcm_s16le",  # 使用PCM编码
18 |         output_path
19 |     ]
20 |     subprocess.run(command, check=True)
21 | 
22 | token = os.getenv("OPENAI_API_KEY")
23 | 
24 | # 使用requests库发送POST请求到SiliconFlow API进行音频转录
25 | url = "https://api.siliconflow.cn/v1/audio/transcriptions"
26 | headers = {
27 |     "Authorization": f"Bearer {token}"
28 | }
29 | video_path = r"C:\Users\ke\Documents\projects\python_projects\HearSight\app_datas\download_videos\【从零开始学OB】—— 百变强大的Obsidian 社区主题：Blue Topaz.mp4"
30 | wav_path = r"C:\Users\ke\Documents\projects\python_projects\HearSight\tests\converted_audio.wav"
31 | 
32 | # 转换视频为WAV
33 | convert_video_to_wav(video_path, wav_path)
34 | 
35 | files = {
36 |     "file": open(wav_path, "rb")
37 | }
38 | data = {
39 |     "model": "FunAudioLLM/SenseVoiceSmall"
40 | }
41 | 
42 | response = requests.post(url, headers=headers, files=files, data=data)
43 | 
44 | print(response.json())
45 | # 写入json文件里面
46 | with open("output.json", "w") as f:
47 |     json.dump(response.json(), f)


--------------------------------------------------------------------------------
/frontend/src/features/app/docs/滚动条问题解决记录.md:
--------------------------------------------------------------------------------
 1 | # 前端布局滚动条问题解决记录
 2 | 
 3 | ## 问题描述
 4 | 
 5 | 在 HearSight 前端应用中，主控制区（播放器和侧边栏组成的容器）出现了不应该出现的滚动条，导致用户体验不佳。滚动条可能出现在页面级别或容器内部，影响布局的整洁性。
 6 | 
 7 | ## 问题分析
 8 | 
 9 | - 初始怀疑是 `AppLayout` 组件的布局问题，导致容器高度未正确控制。
10 | - `AppPage` 的根容器使用 `h-screen flex flex-col`，但子元素高度管理不当，可能导致内容超出视窗高度。
11 | - `VideoPlayer` 中的视频元素高度设置可能导致内容溢出。
12 | - 响应式适配代码可能引入不必要的复杂性。
13 | 
14 | ## 解决步骤
15 | 
16 | 1. **简化布局结构**：
17 |    - 给 `AppLayout` 组件的根 `div` 添加 `flex-1` 类，确保它占据剩余空间。
18 |    - 删除移动端和平板端的响应式适配逻辑，只保留桌面端布局，减少复杂性。
19 | 
20 | 2. **移除不必要的滚动**：
21 |    - 从 `AppLayout` 的 `centerPanel` 容器中移除 `overflow-auto` 属性，避免内部滚动条。
22 | 
23 | 3. **优化头部布局**：
24 |    - 给 `HeaderBar` 的根元素添加 `flex-shrink-0` 类，确保它在 flex 布局中不被压缩，正确占用固定高度。
25 | 
26 | 4. **调整视频播放器**：
27 |    - 最初将 `VideoPlayer` 中 `video` 元素的 `max-h-[80vh]` 改为 `h-full`，尝试让视频填满容器。
28 |    - 但这导致视频播放控制条（进度条和拖动控制）不可见或被缩放。
29 |    - 最终改回 `max-h-[80vh]`，以保持控制条可见。
30 | 
31 | 5. **防止页面滚动**：
32 |    - 给 `AppPage` 的根容器添加 `overflow-hidden` 属性，隐藏任何超出视窗高度的内容，消除页面滚动条。
33 | 
34 | ## 最终修改文件
35 | 
36 | - `frontend/src/features/app/components/AppLayout.tsx`：添加 `flex-1` 类，简化适配逻辑，移除 `overflow-auto`。
37 | - `frontend/src/features/app/components/HeaderBar.tsx`：添加 `flex-shrink-0` 类。
38 | - `frontend/src/components/VideoPlayer.tsx`：调整视频高度设置。
39 | - `frontend/src/features/app/AppPage.tsx`：添加 `overflow-hidden` 类。
40 | 
41 | ## 结果
42 | 
43 | 通过以上修改，布局高度得到正确控制，滚动条不再出现，视频播放控制条正常显示，用户体验改善。
44 | 


--------------------------------------------------------------------------------
/frontend/src/services/summaryService.ts:
--------------------------------------------------------------------------------
 1 | import type { 
 2 |   SummarizeResponse,
 3 |   Segment,
 4 |   Summary
 5 | } from '../types'
 6 | 
 7 | export const generateSummary = async (segments: Segment[]): Promise<SummarizeResponse> => {
 8 |   const response = await fetch('/api/summarize', {
 9 |     method: 'POST',
10 |     headers: { 'Content-Type': 'application/json' },
11 |     body: JSON.stringify({ segments })
12 |   })
13 |   
14 |   if (!response.ok) {
15 |     throw new Error(`总结失败：${response.status}`)
16 |   }
17 |   
18 |   return response.json()
19 | }
20 | 
21 | export const saveSummaries = async (
22 |   transcriptId: number,
23 |   summaries: Summary[]
24 | ): Promise<{ success: boolean; message: string; saved: boolean; transcript_id: number }> => {
25 |   const response = await fetch(`/api/transcripts/${transcriptId}/summaries`, {
26 |     method: 'POST',
27 |     headers: { 'Content-Type': 'application/json' },
28 |     body: JSON.stringify({ summaries })
29 |   })
30 | 
31 |   if (!response.ok) {
32 |     throw new Error(`保存总结失败：${response.status}`)
33 |   }
34 | 
35 |   return response.json()
36 | }
37 | 
38 | export const getSummaries = async (
39 |   transcriptId: number
40 | ): Promise<{ summaries: Summary[] | null; has_summaries: boolean }> => {
41 |   const response = await fetch(`/api/transcripts/${transcriptId}/summaries`)
42 | 
43 |   if (!response.ok) {
44 |     throw new Error(`获取已保存总结失败：${response.status}`)
45 |   }
46 | 
47 |   return response.json()
48 | }


--------------------------------------------------------------------------------
/backend/media_processing/video/download/youtube/docs/下载问题修复记录.md:
--------------------------------------------------------------------------------
 1 | # YouTube视频下载问题修复记录
 2 | 
 3 | ## 问题描述
 4 | 
 5 | 在使用YouTube下载器下载视频时，下载完成后视频文件无法正常播放。日志显示下载过程正常，但播放器无法打开文件。
 6 | 
 7 | ## 错误记录
 8 | 
 9 | 在运行`python example.py`测试下载时，出现以下错误：
10 | 
11 | ```text
12 | AssertionError
13 | 下载YouTube视频失败:
14 | AssertionError
15 | ```
16 | 
17 | 具体错误发生在yt-dlp的fixup过程中：
18 | 
19 | ```text
20 | File "C:\Users\ke\miniconda3\envs\audio\lib\site-packages\yt_dlp\YoutubeDL.py", line 3566, in fixup
21 |     assert fixup_policy in ('detect_or_warn', None)
22 | AssertionError
23 | ```
24 | 
25 | ## 原因分析
26 | 
27 | 1. 初始配置使用了复杂的format字符串，可能导致文件格式问题
28 | 2. fixup参数设置为True，但yt-dlp期望字符串值'detect_or_warn'或None
29 | 
30 | ## 修复过程
31 | 
32 | 1. 简化format配置：从复杂的`'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]'`改为`'best[ext=mp4]/best'`
33 | 2. 修改fixup参数：从`True`改为`'detect_or_warn'`
34 | 3. 移除不必要的merge_output_format参数
35 | 
36 | ## 最终配置
37 | 
38 | ```python
39 | ydl_opts = {
40 |     'outtmpl': os.path.join(temp_dir, '%(title)s.%(ext)s'),
41 |     'format': 'best[ext=mp4]/best',
42 |     'quiet': False,
43 |     'no_warnings': False,
44 |     'socket_timeout': 30,
45 |     'extractor_retries': 3,
46 |     'skip_download': False,
47 |     'fixup': 'detect_or_warn',
48 |     'keepvideo': False,
49 | }
50 | ```
51 | 
52 | ## 测试结果
53 | 
54 | - 下载成功，进度100%
55 | - 生成的MP4文件可以正常播放
56 | - 解决了之前的AssertionError问题
57 | 
58 | ## 总结
59 | 
60 | 通过简化yt-dlp配置并正确设置fixup参数，成功修复了YouTube视频下载无法播放的问题。建议在配置yt-dlp时优先使用简单可靠的选项，避免复杂的合并和修复逻辑。
61 | 


--------------------------------------------------------------------------------
/lefthook.yml:
--------------------------------------------------------------------------------
 1 | pre-commit:
 2 |   commands:
 3 |     frontend-format:
 4 |       glob: "frontend/**/*.{ts,tsx,js,jsx,json,css,scss,html}"
 5 |       run: bash -c "cd frontend && npx prettier --write \$(git diff --cached --name-only --diff-filter=ACM | grep -E '\\.(ts|tsx|js|jsx|json|css|scss|html)$')"
 6 |       skip: [merge-commit]
 7 | 
 8 |     frontend-lint:
 9 |       glob: "frontend/**/*.{ts,tsx,js,jsx}"
10 |       run: bash -c 'cd frontend && npm run lint && npx tsc --noEmit'
11 |       skip: [merge-commit]
12 | 
13 |     backend-format:
14 |       glob: "backend/**/*.py"
15 |       run: bash -c 'python -m black $(git diff --cached --name-only --diff-filter=ACM | grep "^backend/.*\\.py$") && python -m isort $(git diff --cached --name-only --diff-filter=ACM | grep "^backend/.*\\.py$")'
16 |       skip: [merge-commit]
17 | 
18 |     backend-lint:
19 |       glob: "backend/**/*.py"
20 |       run: bash -c 'python -m pylint $(git diff --cached --name-only --diff-filter=ACM | grep "^backend/.*\\.py$") || true'
21 |       skip: [merge-commit]
22 | 
23 |     supabase-format:
24 |       glob: "supabase_project/**/*.ts"
25 |       run: bash -c "cd supabase_project && npx prettier --write \$(git diff --cached --name-only --diff-filter=ACM | grep -E '\\.(ts|js)$')"
26 |       skip: [merge-commit]
27 | 
28 |     supabase-lint:
29 |       glob: "supabase_project/**/*.ts"
30 |       run: bash -c "cd supabase_project && npx eslint \$(git diff --cached --name-only --diff-filter=ACM | grep -E '\\.(ts|js)$') || true"
31 |       skip: [merge-commit]


--------------------------------------------------------------------------------
/frontend/src/components/RightPanel/Chat/ChatToolbar.tsx:
--------------------------------------------------------------------------------
 1 | import { Button } from "@/components/ui/button"
 2 | import { Switch } from "@/components/ui/switch"
 3 | import { Label } from "@/components/ui/label"
 4 | import { Trash2 } from "lucide-react"
 5 | 
 6 | interface ChatToolbarProps {
 7 |   readonly imageModeEnabled: boolean
 8 |   readonly isAudio: boolean
 9 |   readonly messagesLength: number
10 |   readonly onImageModeChange: (enabled: boolean) => void
11 |   readonly onClearChat: () => void
12 | }
13 | 
14 | export default function ChatToolbar({
15 |   imageModeEnabled,
16 |   isAudio,
17 |   messagesLength,
18 |   onImageModeChange,
19 |   onClearChat,
20 | }: ChatToolbarProps) {
21 |   return (
22 |     <div className="flex justify-between items-center">
23 |       <div className="flex items-center gap-2">
24 |         <Switch
25 |           id="image-mode"
26 |           checked={imageModeEnabled}
27 |           onCheckedChange={onImageModeChange}
28 |           disabled={isAudio}
29 |         />
30 |         <Label
31 |           htmlFor="image-mode"
32 |           className={`text-sm cursor-pointer ${isAudio ? 'text-slate-400' : ''}`}
33 |           title={isAudio ? '音频文件不支持图文展示' : ''}
34 |         >
35 |           图文展示
36 |         </Label>
37 |       </div>
38 |       <Button
39 |         variant="ghost"
40 |         size="sm"
41 |         onClick={onClearChat}
42 |         disabled={messagesLength === 0}
43 |         className="gap-1"
44 |       >
45 |         <Trash2 className="h-3 w-3" />
46 |         清空对话
47 |       </Button>
48 |     </div>
49 |   )
50 | }


--------------------------------------------------------------------------------
/backend/ReAct/tools_server.py:
--------------------------------------------------------------------------------
 1 | """本地工具 MCP 服务器
 2 | 
 3 | 将本地工具函数暴露为 MCP (Model Context Protocol) 工具服务。
 4 | """
 5 | 
 6 | import sys
 7 | import os
 8 | 
 9 | # 添加 backend 目录到路径
10 | backend_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
11 | print("backend_path:", backend_path)
12 | sys.path.insert(0, backend_path)
13 | print("sys.path:", sys.path[:3])  # 只打印前3个
14 | 
15 | try:
16 |     import backend
17 |     print("backend imported successfully")
18 | except ImportError as e:
19 |     print("import backend failed:", e)
20 | 
21 | from fastmcp import FastMCP
22 | 
23 | # 导入本地工具
24 | # from tools.calculator import add_numbers  # 示例，已移除
25 | from backend.tools.retrieval_tool import retrieval_tool
26 | 
27 | # 创建 MCP 服务器
28 | mcp = FastMCP("LocalTools")
29 | 
30 | 
31 | @mcp.tool()
32 | async def knowledge_retrieval(question: str, transcript_ids) -> str:
33 |     """知识库检索工具，从指定转录中检索相关内容。
34 | 
35 |     参数:
36 |         question: 用户问题，字符串类型，用于描述需要检索的具体内容
37 |         transcript_ids: 转录ID，可以是以下格式：
38 |             - 单个整数：如 123，表示从单个视频文件中检索
39 |             - 整数列表：如 [123, 456, 789]，表示从多个视频文件中同时检索
40 |             - 字符串格式的列表：如 "[123, 456]"，会被自动解析为列表
41 | 
42 |     返回:
43 |         压缩后的关键信息字符串，包含检索到的相关内容、时间戳和文件名信息
44 | 
45 |     示例:
46 |         knowledge_retrieval("什么是机器学习？", [1, 2, 3])
47 |         knowledge_retrieval("插件功能介绍", 5)
48 |     """
49 |     result = await retrieval_tool.retrieve_knowledge(question, transcript_ids)
50 |     return result
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     # 启动 MCP 服务器，使用 HTTP 传输
55 |     mcp.run(transport="http", port=8004)


--------------------------------------------------------------------------------
/backend/queues/tasks/knowledge_base_stage.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """知识库处理阶段"""
 3 | 
 4 | import logging
 5 | from typing import List, Dict, Any, Tuple
 6 | 
 7 | from backend.services.knowledge_base_service import knowledge_base
 8 | from backend.schemas import Segment
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | def handle_knowledge_base_stage(
14 |     job_id: int,
15 |     transcript_id: int,
16 |     segments: List[Dict[str, Any]]
17 | ) -> None:
18 |     """处理知识库添加阶段"""
19 |     try:
20 |         # 知识库只存 minimal metadata：transcript_id 与 chunk_index（自动添加）
21 |         metadata = {"transcript_id": transcript_id}
22 | 
23 |         knowledge_base.add_transcript(
24 |             segments=segments,
25 |             metadata=metadata
26 |         )
27 |         logger.info(f"转写句子段已添加到知识库: job_id={job_id}")
28 |     except Exception as e:
29 |         logger.error(f"添加转写句子段到知识库失败: {str(e)}")
30 |         raise
31 | 
32 | 
33 | def handle_knowledge_retrieval_stage(
34 |     question: str,
35 |     transcript_id: int
36 | ) -> Tuple[List[Segment], str]:
37 |     """处理知识库检索阶段"""
38 |     try:
39 |         from backend.services.chat_knowledge_service import ChatKnowledgeService
40 |         service = ChatKnowledgeService()
41 |         relevant_segments, filename = service._perform_knowledge_retrieval(question, transcript_id)
42 |         logger.info(f"知识库检索完成: transcript_id={transcript_id}, 检索到 {len(relevant_segments)} 个片段")
43 |         return relevant_segments, filename
44 |     except Exception as e:
45 |         logger.error(f"知识库检索失败: {str(e)}")
46 |         raise


--------------------------------------------------------------------------------
/ASRBackend/Dockerfile.local:
--------------------------------------------------------------------------------
 1 | # 本地版本 Dockerfile
 2 | # 包含完整的本地模型支持、torch 等大型依赖
 3 | # 构建大小：约 3-5GB（根据 torch 版本）
 4 | 
 5 | FROM python:3.10-slim
 6 | 
 7 | WORKDIR /app
 8 | 
 9 | # 设置运行模式为本地
10 | ENV ASR_MODE=local
11 | ENV PYTHONPATH=/app
12 | 
13 | # 配置 pip 使用清华源
14 | RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
15 | 
16 | # 复制项目文件
17 | COPY . .
18 | 
19 | # 配置清华源加速 apt 安装
20 | RUN sed -i 's|deb.debian.org|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/debian.sources
21 | 
22 | # 安装系统依赖
23 | RUN apt-get update && apt-get install -y --no-install-recommends \
24 |     build-essential \
25 |     git \
26 |     curl \
27 |     ffmpeg \
28 |     ca-certificates \
29 |     && rm -rf /var/lib/apt/lists/*
30 | 
31 | # 安装 Python 依赖
32 | RUN pip3 install --upgrade pip
33 | 
34 | # 先安装 requirements（使用清华源），再安装大型 PyTorch wheel（指定 PyTorch 官方索引）
35 | RUN pip3 install -r requirements-local.txt
36 | 
37 | # 安装带 CUDA 支持的 PyTorch 和 torchaudio，增加超时并使用 no-cache-dir，添加 trusted-host
38 | # 建议在非 GPU 环境下使用 "+cpu" 版本以减小下载体积。如需 GPU，保留 cu118，并可考虑 pin 具体版本。
39 | RUN pip3 install --no-cache-dir --prefer-binary \
40 |     torch torchvision torchaudio \
41 |     --index-url https://download.pytorch.org/whl/cu118 \
42 |     --trusted-host download.pytorch.org
43 | 
44 | # 创建必要的目录
45 | RUN mkdir -p cache
46 | 
47 | # 暴露端口
48 | EXPOSE 8003
49 | 
50 | # 健康检查
51 | HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
52 |     CMD curl -f http://localhost:8003/health || exit 1
53 | 
54 | # 启动应用
55 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8003"]


--------------------------------------------------------------------------------
/backend/routers/chat/summarize_router.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """总结相关的路由"""
 3 | 
 4 | import os
 5 | from fastapi import APIRouter, HTTPException, Request
 6 | 
 7 | from backend.text_process.summarize import summarize_segments
 8 | from .models import SummarizeRequest, SummarizeResponse
 9 | 
10 | 
11 | router = APIRouter()
12 | 
13 | 
14 | @router.post("/summarize")
15 | def api_summarize(payload: SummarizeRequest, request: Request) -> SummarizeResponse:
16 |     """基于句级片段一次性生成总结。
17 | 
18 |         请求 body 字段：
19 |             - segments: List[Segment] （必需）
20 |             - api_key/base_url/model: 可选（若未提供则从 config 或环境变量读取）
21 | 
22 |     返回：{"summaries": List[SummaryItem]}
23 |     """
24 |     segments = payload.get("segments")
25 |     if not segments or not isinstance(segments, list):
26 |         raise HTTPException(status_code=400, detail="segments (list) is required")
27 | 
28 |     # 从配置或环境读取 CHAT_MAX_WINDOWS（优先级：环境变量 -> 默认 1000000）
29 |     chat_max = int(
30 |         os.environ.get("CHAT_MAX_WINDOWS") or 1000000
31 |     )
32 | 
33 |     try:
34 |         summaries = summarize_segments(
35 |             segments=segments,
36 |             chat_max_windows=chat_max,
37 |             max_tokens=4096,
38 |         )
39 |     except ValueError as e:
40 |         # 例如 token 超限等可预期的错误，返回 400
41 |         raise HTTPException(status_code=400, detail=str(e))
42 |     except Exception as e:
43 |         # 其他未知错误返回 500
44 |         raise HTTPException(status_code=500, detail=f"summarization failed: {e}")
45 | 
46 |     # 返回直接的 list[SummaryItem] 以简化前端处理
47 |     return {"summaries": summaries}


--------------------------------------------------------------------------------
/backend/services/embedding_litellm_example.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """litellm embedding 示例测试"""
 3 | 
 4 | import sys
 5 | import os
 6 | import asyncio
 7 | 
 8 | # 添加项目根目录到 sys.path
 9 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
10 | 
11 | # 加载环境变量
12 | from dotenv import load_dotenv
13 | load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env'))
14 | 
15 | import litellm
16 | from litellm import Router
17 | from backend.config import settings
18 | from backend.startup import get_embedding_router
19 | 
20 | async def test_litellm_embedding():
21 |     """测试 litellm embedding 调用"""
22 | 
23 |     router = get_embedding_router()
24 | 
25 |     input_text = "请总结这个视频的内容"
26 | 
27 |     print("=== 测试 litellm Router embedding 调用 ===")
28 |     print(f"模型: {settings.embedding_model}")
29 |     print(f"base_url: {settings.embedding_provider_base_url}")
30 | 
31 |     try:
32 |         response = await router.aembedding(
33 |             model=settings.embedding_model,
34 |             input=input_text
35 |         )
36 | 
37 |         print("embedding 响应成功:")
38 |         print(f"response type: {type(response)}")
39 |         print(f"data type: {type(response.data)}")
40 |         print(f"data[0] type: {type(response.data[0])}")
41 |         embedding_vector = response.data[0]['embedding']
42 |         print(f"向量维度: {len(embedding_vector)}")
43 |         print(f"前5个值: {embedding_vector[:5]}")
44 | 
45 |     except Exception as e:
46 |         print(f"调用失败: {e}")
47 | 
48 | if __name__ == "__main__":
49 |     asyncio.run(test_litellm_embedding())
50 | 


--------------------------------------------------------------------------------
/backend/text_process/docs/ChatGPT翻译技巧说明.md:
--------------------------------------------------------------------------------
 1 | # ChatGPT翻译技巧说明
 2 | 
 3 | ## 概述
 4 | 
 5 | 本文会介绍一种使用ChatGPT进行高质量翻译的方法，特别适用于长文翻译，以避免信息丢失和不自然的问题。核心是“直译 + 意译”的两步翻译法，借鉴了“Step-by-Step”提示词理念。
 6 | 
 7 | ## 基本理念
 8 | 
 9 | 这个方法类似于让ChatGPT一步步思考，通过分步处理大幅改善翻译结果。研究显示，在提示词中加入“让我们一步一步的思考”能提升生成质量。
10 | 
11 | ## 两步翻译法（适用于GPT-4）
12 | 
13 | ### 提示词示例
14 | 
15 | 你是一位精通简体中文的专业翻译，曾参与《纽约时报》和《经济学人》中文版的翻译工作，因此对于新闻和时事文章的翻译有深入的理解。我希望你能帮我将以下英文新闻段落翻译成中文，风格与上述杂志的中文版相似。
16 | 
17 | 规则：
18 | 
19 | - 翻译时要准确传达新闻事实和背景。
20 | - 保留特定的英文术语或名字，并在其前后加上空格，例如："中 UN 文"。
21 | - 分成两次翻译，并且打印每一次结果：
22 | 
23 | 1. 根据新闻内容直译，不要遗漏任何信息
24 | 2. 根据第一次直译的结果重新意译，遵守原意的前提下让内容更通俗易懂，符合中文表达习惯
25 | 
26 | 本条消息只需要回复OK，接下来的消息我将会给你发送完整内容，收到后请按照上面的规则打印两次翻译结果。
27 | 
28 | ### 效果对比
29 | 
30 | 优化前：直接翻译，可能丢失信息或不自然。
31 | 优化后：通过直译+意译，翻译更准确、自然。
32 | 
33 | ## 两步翻译法（适用于GPT-3.5）
34 | 
35 | 由于GPT-3.5能力较弱，需拆分成两步执行。
36 | 
37 | ### 第一步：直译提示词
38 | 
39 | 你是一位精通简体中文的专业翻译，曾参与《纽约时报》和《经济学人》中文版的翻译工作，因此对于新闻和时事文章的翻译有深入的理解。我希望你能帮我将以下英文新闻段落翻译成中文，风格与上述杂志的中文版相似。
40 | 
41 | 规则：
42 | 
43 | - 翻译时要准确传达新闻事实和背景。
44 | - 保留特定的英文术语或名字，并在其前后加上空格，例如："中 UN 文"。
45 | - 根据新闻内容直译，不要遗漏任何信息。
46 | 
47 | 英文原文：
48 | { 英文原文 }
49 | 
50 | 直译结果：
51 | 
52 | ### 第二步：意译提示词
53 | 
54 | 你是一位专业中文翻译，擅长对翻译结果进行二次修改和润色成通俗易懂的中文，我希望你能帮我将以下英文视频的中文翻译结果重新意译和润色。
55 | 
56 | 规则：
57 | 
58 | - 这些字幕包含机器学习或AI等专业知识相关，注意翻译时术语的准确性
59 | - 保留特定的英文术语、数字或名字，并在其前后加上空格，例如："生成式 AI 产品"，"不超过 10 秒"。
60 | - 基于直译结果重新意译，意译时务必对照原始英文，不要添加也不要遗漏内容，并以让翻译结果通俗易懂，符合中文表达习惯
61 | 
62 | 英文原文：
63 | { 英文原文 }
64 | 
65 | 直译结果：
66 | { 第一直译的结果 }
67 | 
68 | 意译和润色后：
69 | 
70 | ## 总结
71 | 
72 | 这种方法能有效提升翻译质量，尤其在处理复杂内容时。通过分步提示词，我们能让模型更可靠地输出结果。虽然GPT-3.5需额外步骤，但效果依然不错。
73 | 


--------------------------------------------------------------------------------
/backend/tests/test_summarize.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | 最小可运行示例：
 4 | - 构造少量句级段落（不含 spk_id），调用 summarize_once 并打印结果；
 5 | - 读取 config.yaml 中的 chat.{api_key, base_url, model} 作为调用所需配置；
 6 | - 若缺少必要配置，则直接打印提示并退出（不做 try/except）。
 7 | 
 8 | 遵循项目约定：
 9 | - 交流与注释中文；
10 | - 直接调用，避免 argparse/unittest；
11 | - 最小测试原则。
12 | """
13 | from __future__ import annotations
14 | 
15 | import json
16 | import os
17 | import sys
18 | from typing import List, Dict, Any
19 | 
20 | # 兼容在 backend/tests 目录下直接运行：将项目根目录加入 sys.path
21 | _THIS_DIR = os.path.dirname(__file__)
22 | _PROJECT_ROOT = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
23 | if _PROJECT_ROOT not in sys.path:
24 |     sys.path.insert(0, _PROJECT_ROOT)
25 | 
26 | from backend.services.summarize_service import summarize_once  # noqa: E402
27 | 
28 | 
29 | def run() -> None:
30 |     # 构造最小段落列表
31 |     segments: List[Dict[str, Any]] = [
32 |         {"index": 1, "sentence": "你好，世界！这是一次最小总结测试。", "start_time": 0.0, "end_time": 1.2},
33 |         {"index": 2, "sentence": "我们希望输出主题与简要总结。", "start_time": 1.2, "end_time": 2.8},
34 |     ]
35 | 
36 |     # 执行一次总结
37 |     items = summarize_once(segments)
38 | 
39 |     # 输出与保存
40 |     print("summarize result:")
41 |     print(json.dumps(items, ensure_ascii=False, indent=2))
42 | 
43 |     out_dir = os.path.join(_THIS_DIR, "results")
44 |     os.makedirs(out_dir, exist_ok=True)
45 |     out_path = os.path.join(out_dir, "summarize_result.json")
46 |     with open(out_path, "w", encoding="utf-8") as f:
47 |         json.dump(items, f, ensure_ascii=False, indent=2)
48 |     print("保存路径:", out_path)
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     run()
53 | 


--------------------------------------------------------------------------------
/backend/media_processing/video/download/youtube/example.py:
--------------------------------------------------------------------------------
 1 | """
 2 | YouTube视频下载示例脚本
 3 | 使用youtube_downloader模块下载指定的YouTube视频
 4 | """
 5 | 
 6 | import os
 7 | import sys
 8 | 
 9 | # 添加backend到路径，确保绝对导入正常工作
10 | backend_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
11 | project_root = os.path.dirname(backend_dir)
12 | if project_root not in sys.path:
13 |     sys.path.insert(0, project_root)
14 | 
15 | try:
16 |     from backend.media_processing.video.download.youtube.youtube_downloader import download_youtube_video
17 | except ImportError:
18 |     try:
19 |         from .youtube_downloader import download_youtube_video
20 |     except ImportError:
21 |         raise ImportError("无法导入必要的模块，请检查项目结构")
22 | 
23 | def progress_callback(progress_info):
24 |     """
25 |     进度回调函数
26 |     """
27 |     status = progress_info.get('status', 'unknown')
28 |     progress_percent = progress_info.get('progress_percent', 0)
29 |     print(f"下载状态: {status}, 进度: {progress_percent:.2f}%")
30 | 
31 | if __name__ == "__main__":
32 |     # 测试URL
33 |     test_url = "https://www.youtube.com/watch?v=A6ZgS0vGsl8"
34 | 
35 |     print("开始下载YouTube视频...")
36 |     result = download_youtube_video(test_url, progress_callback=progress_callback)
37 | 
38 |     if result.success:
39 |         print("下载成功!")
40 |         print(f"标题: {result.title}")
41 |         print(f"视频路径: {result.video_path}")
42 |         print(f"音频路径: {result.audio_path}")
43 |         print(f"时长: {result.duration}秒")
44 |         print(f"媒体类型: {result.media_type}")
45 |     else:
46 |         print(f"下载失败: {result.error_message}")
47 | 


--------------------------------------------------------------------------------
/frontend/src/components/RightPanel/Chat/MessageInput.tsx:
--------------------------------------------------------------------------------
 1 | import type { KeyboardEvent } from "react"
 2 | import { Button } from "@/components/ui/button"
 3 | import { Textarea } from "@/components/ui/textarea"
 4 | import { Send, Loader2 } from "lucide-react"
 5 | 
 6 | interface MessageInputProps {
 7 |   readonly inputValue: string
 8 |   readonly loading: boolean
 9 |   readonly disabled: boolean
10 |   readonly onInputChange: (value: string) => void
11 |   readonly onSend: () => void
12 | }
13 | 
14 | export default function MessageInput({
15 |   inputValue,
16 |   loading,
17 |   disabled,
18 |   onInputChange,
19 |   onSend,
20 | }: MessageInputProps) {
21 |   const handleKeyPress = (e: KeyboardEvent<HTMLTextAreaElement>) => {
22 |     if (e.key === 'Enter' && !e.shiftKey) {
23 |       e.preventDefault()
24 |       if (!loading && inputValue.trim()) {
25 |         onSend()
26 |       }
27 |     }
28 |   }
29 | 
30 |   return (
31 |     <>
32 |       <Textarea
33 |         value={inputValue}
34 |         onChange={(e) => onInputChange(e.target.value)}
35 |         onKeyDown={handleKeyPress}
36 |         placeholder="请输入您的问题..."
37 |         className="min-h-[80px] resize-none"
38 |       />
39 |       <Button
40 |         onClick={onSend}
41 |         disabled={disabled}
42 |         className="w-full gap-2"
43 |       >
44 |         {loading ? (
45 |           <>
46 |             <Loader2 className="h-4 w-4 animate-spin" />
47 |             发送中
48 |           </>
49 |         ) : (
50 |           <>
51 |             <Send className="h-4 w-4" />
52 |             发送
53 |           </>
54 |         )}
55 |       </Button>
56 |     </>
57 |   )
58 | }


--------------------------------------------------------------------------------
/backend/tests/test_multi_platform_downloader.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | # 添加项目根目录到路径，以便导入模块
 5 | sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
 6 | 
 7 | from backend.media_processing import MediaDownloaderFactory
 8 | 
 9 | def test_multi_platform_downloader():
10 |     """测试多平台下载器"""
11 |     # 测试URL列表
12 |     test_urls = [
13 |         "https://www.bilibili.com/video/BV12CWrzqELo/?spm_id_from=333.1007.tianma.6-4-22.click",  # B站视频
14 |         "https://www.youtube.com/watch?v=wjZofJX0v4M",  # YouTube视频
15 |         "https://www.xiaoyuzhoufm.com/episode/68f7034122654730207b940c"  # 小宇宙播客
16 |     ]
17 | 
18 |     # 创建测试下载目录
19 |     test_out_dir = os.path.join(os.path.dirname(__file__), 'test_downloads')
20 |     if not os.path.exists(test_out_dir):
21 |         os.makedirs(test_out_dir)
22 | 
23 |     factory = MediaDownloaderFactory(output_dir=test_out_dir)
24 | 
25 |     for url in test_urls:
26 |         print(f"测试下载: {url}")
27 |         try:
28 |             result = factory.download(url)
29 |             if result.success:
30 |                 file_path = result.video_path or result.audio_path
31 |                 print(f"下载成功，文件: {file_path}")
32 |                 # 检查文件是否存在
33 |                 if file_path and os.path.exists(file_path):
34 |                     print(f"文件验证成功: {file_path}")
35 |                 else:
36 |                     print(f"文件不存在: {file_path}")
37 |             else:
38 |                 print(f"下载失败: {result.error_message}")
39 |         except Exception as e:
40 |             print(f"下载异常: {e}")
41 |         print("-" * 50)
42 | 
43 | if __name__ == "__main__":
44 |     test_multi_platform_downloader()


--------------------------------------------------------------------------------
/backend/ReAct/test_chat_request.py:
--------------------------------------------------------------------------------
 1 | """测试ChatAgent的功能"""
 2 | 
 3 | import asyncio
 4 | import sys
 5 | import os
 6 | 
 7 | # 添加backend目录到路径
 8 | backend_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
 9 | if backend_path not in sys.path:
10 |     sys.path.insert(0, backend_path)
11 | 
12 | from backend.config import settings
13 | from ReAct.chat_agent import ChatAgent
14 | 
15 | async def test_chat_agent():
16 |     """测试ChatAgent的generate_answer方法"""
17 |     # 创建ChatAgent实例
18 |     chat_agent = ChatAgent(
19 |         openai_api_key=settings.llm_provider_api_key,
20 |         openai_api_base=settings.llm_provider_base_url,
21 |         openai_api_model=settings.llm_model,
22 |         tools_backend_url="http://localhost:8004/mcp"
23 |     )
24 | 
25 |     question = "我们目前的视频里面都有介绍了多少Obsidian插件？"
26 |     transcript_ids = [5, 4, 3, 2, 1]
27 | 
28 |     try:
29 |         # 调用generate_answer，传入transcript_ids
30 |         result = await chat_agent.generate_answer(question, allowed_tools=None, transcript_ids=transcript_ids)
31 | 
32 |         print("ChatAgent调用成功")
33 |         print("最终答案:", result.final_answer)
34 |         print("推理步骤数量:", len(result.trace))
35 |         print("消息历史长度:", len(result.messages))
36 | 
37 |         # 打印推理轨迹
38 |         for step in result.trace:
39 |             print(f"步骤 {step.step}: {step.thought}")
40 |             if step.action:
41 |                 print(f"  动作: {step.action}")
42 |             if step.observation:
43 |                 print(f"  观察: {step.observation}")
44 | 
45 |     except Exception as e:
46 |         print("ChatAgent调用失败:", str(e))
47 | 
48 | if __name__ == "__main__":
49 |     print("测试ChatAgent功能:")
50 |     asyncio.run(test_chat_agent())


--------------------------------------------------------------------------------
/ASRBackend/asr_functions/docs/asr_sentence_segments设计文档.md:
--------------------------------------------------------------------------------
 1 | # asr_sentence_segments 模块设计文档
 2 | 
 3 | ## 概述
 4 | 
 5 | `asr_sentence_segments.py` 是一个用于本地语音识别（ASR）的核心处理模块，基于 FunASR 库实现。该模块负责处理音频文件并将其转换为带时间戳的文本片段，支持本地文件和远程 URL 地址的音频处理。
 6 | 
 7 | ## 功能特性
 8 | 
 9 | - 支持本地音频文件和远程 URL 音频处理
10 | - 自动下载远程音频文件至临时目录
11 | - 使用 FunASR 进行语音识别
12 | - 支持说话人分离
13 | - 支持句子合并和规范化处理
14 | - 自动清理临时文件
15 | 
16 | ## 输入输出说明
17 | 
18 | ### 主要入口函数
19 | 
20 | ```python
21 | def process(
22 |     audio_path: str,
23 |     merge_sentences: bool = True,
24 |     merge_short_sentences: bool = True,
25 |     batch_size_s: int = 300,
26 |     hotword: str = "Obsidian"
27 | ) -> List[Dict]
28 | ```
29 | 
30 | #### 参数说明
31 | 
32 | - `audio_path`: 音频文件路径或 URL 地址
33 | - `merge_sentences`: 是否合并句子（默认: True）
34 | - `merge_short_sentences`: 是否合并短句（默认: True）
35 | - `batch_size_s`: 批处理大小（秒）（默认: 300）
36 | - `hotword`: 热词（默认: "Obsidian"）
37 | 
38 | #### 返回值
39 | 
40 | 返回一个包含识别结果的字典列表，每个字典包含以下字段：
41 | 
42 | - `spk_id`: 说话人 ID
43 | - `sentence`: 识别的文本内容
44 | - `start_time`: 句子开始时间（毫秒）
45 | - `end_time`: 句子结束时间（毫秒）
46 | 
47 | ### 辅助函数
48 | 
49 | #### `is_url(path: str) -> bool`
50 | 
51 | 判断给定路径是否为 URL。
52 | 
53 | #### `download_audio(url: str, max_size: int = 100 * 1024 * 1024) -> str`
54 | 
55 | 下载远程音频文件到本地临时目录。
56 | 
57 | #### `get_model()`
58 | 
59 | 获取或加载 ASR 模型（单例模式）。
60 | 
61 | ## 工作流程
62 | 
63 | 1. 判断输入路径是本地文件还是 URL
64 | 2. 如果是 URL，则下载到临时文件
65 | 3. 加载 ASR 模型（如果尚未加载）
66 | 4. 使用 FunASR 处理音频文件
67 | 5. 解析并规范化识别结果
68 | 6. 清理临时文件
69 | 7. 返回标准化的结果列表
70 | 
71 | ## 依赖项
72 | 
73 | - funasr: 主要的 ASR 处理库
74 | - requests: 用于下载远程音频文件
75 | - segment_normalizer: 用于结果规范化处理
76 | 
77 | ## 异常处理
78 | 
79 | 模块会在以下情况下抛出异常或返回空列表：
80 | - 音频文件不存在或无法访问
81 | - 模型加载失败
82 | - ASR 处理过程中发生错误
83 | - 网络连接问题导致下载失败
84 | 
85 | 所有异常都会被捕获并记录日志，函数通常会返回空列表而不是传播异常。


--------------------------------------------------------------------------------
/backend/tests/test_stream_translate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | 测试流式翻译功能
 5 | """
 6 | import sys
 7 | import os
 8 | 
 9 | project_root = os.path.dirname(os.path.abspath(__file__))
10 | sys.path.insert(0, project_root)
11 | 
12 | import asyncio
13 | from config import settings
14 | from backend.text_process.translate import translate_segments_async
15 | 
16 | async def main():
17 |     # 测试数据
18 |     segments = [
19 |         {"index": 0, "sentence": "Hello, this is an English video.", "start_time": 0.0, "end_time": 3.5},
20 |         {"index": 1, "sentence": "We will discuss important topics today.", "start_time": 3.5, "end_time": 7.2},
21 |         {"index": 2, "sentence": "Let's start with the first point.", "start_time": 7.2, "end_time": 10.0},
22 |     ]
23 |     
24 |     def progress_callback(translated_count: int, total: int):
25 |         progress = (translated_count / total * 100) if total > 0 else 0
26 |         print(f"进度: {translated_count}/{total} ({progress:.1f}%)")
27 |     
28 |     print("开始流式翻译测试...")
29 |     print("=" * 60)
30 |     
31 |     result = await translate_segments_async(
32 |         segments,
33 |         target_language="zh",
34 |         max_tokens=4096,
35 |         source_lang_name="English",
36 |         target_lang_name="Chinese",
37 |         progress_callback=progress_callback,
38 |     )
39 |     
40 |     print("=" * 60)
41 |     print("\n翻译完成结果:")
42 |     for seg in result:
43 |         trans = seg.get("translation", {})
44 |         zh_trans = trans.get("zh", "无翻译") if isinstance(trans, dict) else "无翻译"
45 |         print(f"[{seg['index']}] {seg['sentence']}")
46 |         print(f"    -> {zh_trans}")
47 | 
48 | if __name__ == "__main__":
49 |     asyncio.run(main())
50 | 


--------------------------------------------------------------------------------
/backend/tests/download_media_file.py:
--------------------------------------------------------------------------------
 1 | import yt_dlp
 2 | import sys
 3 | import os
 4 | import argparse
 5 | 
 6 | def download_media(url, output_dir='downloads', list_formats=False):
 7 |     """
 8 |     使用 yt-dlp 下载媒体文件（视频或音频）
 9 |     支持 B站、YouTube、小宇宙播客等平台
10 |     """
11 |     if not os.path.exists(output_dir):
12 |         os.makedirs(output_dir)
13 | 
14 |     ydl_opts = {
15 |         'outtmpl': os.path.join(output_dir, '%(title)s.%(ext)s'),
16 |         'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',  # 优先 mp4 格式，避免会员限制
17 |     }
18 | 
19 |     # if list_formats:
20 |     #     ydl_opts['listformats'] = True
21 |     #     ydl_opts['simulate'] = True  # 模拟模式，只列出格式，不下载
22 | 
23 |     try:
24 |         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
25 |             if list_formats:
26 |                 print(f"列出格式: {url}")
27 |             else:
28 |                 print(f"开始下载: {url}")
29 |             ydl.download([url])
30 |             if not list_formats:
31 |                 print("下载完成！")
32 |     except Exception as e:
33 |         print(f"操作失败: {e}")
34 | 
35 | def main():
36 |     parser = argparse.ArgumentParser(description="下载媒体文件")
37 |     parser.add_argument('url', help='媒体URL')
38 |     parser.add_argument('--list-formats', '-F', action='store_true', help='列出可用格式')
39 | 
40 |     args = parser.parse_args()
41 | 
42 |     download_media(args.url, list_formats=args.list_formats)
43 | 
44 | if __name__ == "__main__":
45 |     main()
46 | """
47 | ```bash
48 | # 下载 B站视频
49 | python main.py https://www.bilibili.com/video/BV12CWrzqELo/?spm_id_from=333.1007.tianma.6-4-22.click
50 | 
51 | # 下载 YouTube 视频
52 | python main.py https://www.youtube.com/watch?v=wjZofJX0v4M
53 | 
54 | # 下载小宇宙播客
55 | python main.py https://www.xiaoyuzhoufm.com/episode/68f7034122654730207b940c
56 | ```
57 | 
58 | """


--------------------------------------------------------------------------------
/frontend/src/components/docs/ProgressCard设计说明.md:
--------------------------------------------------------------------------------
  1 | # ProgressCard 进度卡片组件设计说明
  2 | 
  3 | ## 概述
  4 | 
  5 | ProgressCard 组件负责展示 HearSight 应用中文件处理任务的进度信息，以卡片形式显示任务状态、进度和相关信息。该组件是用户了解文件处理状态的核心展示模块。
  6 | 
  7 | ## 组件结构
  8 | 
  9 | ### ProgressCard.tsx
 10 | 
 11 | 主容器组件，负责：
 12 | 
 13 | - 展示文件处理进度信息
 14 | - 显示任务状态和阶段
 15 | - 格式化数据展示（文件大小、处理时间等）
 16 | 
 17 | ### 核心功能模块
 18 | 
 19 | - **进度展示**: 显示任务处理进度
 20 | - **状态指示**: 显示任务当前状态
 21 | - **信息格式化**: 格式化文件大小和处理时间
 22 | - **阶段标识**: 显示任务处理阶段
 23 | 
 24 | ## 设计原则
 25 | 
 26 | ### 1. 信息清晰
 27 | - 清晰展示任务处理状态
 28 | - 直观显示进度信息
 29 | - 提供阶段标识和描述
 30 | 
 31 | ### 2. 数据格式化
 32 | - 文件大小自动格式化为合适的单位
 33 | - 处理时间自动格式化为可读格式
 34 | - 数值信息以用户友好的方式展示
 35 | 
 36 | ### 3. 状态可视化
 37 | - 使用图标区分不同状态
 38 | - 通过颜色区分成功、失败、进行中等状态
 39 | - 提供实时进度更新
 40 | 
 41 | ## 功能详解
 42 | 
 43 | ### 进度展示
 44 | 
 45 | - 显示任务处理进度百分比
 46 | - 提供进度条可视化展示
 47 | - 实时更新进度信息
 48 | 
 49 | ### 状态指示
 50 | 
 51 | - 使用图标表示不同状态（进行中、成功、失败等）
 52 | - 通过颜色区分状态类型
 53 | - 提供状态文本描述
 54 | 
 55 | ### 信息格式化
 56 | 
 57 | - 文件大小格式化（B, KB, MB, GB）
 58 | - 处理时间格式化（秒、分、小时）
 59 | - 数值信息自动四舍五入和单位转换
 60 | 
 61 | ### 阶段标识
 62 | 
 63 | - 显示任务处理阶段（等待、上传、下载、ASR、处理等）
 64 | - 提供阶段中文描述
 65 | - 根据阶段调整界面展示
 66 | 
 67 | ## 数据流设计
 68 | 
 69 | ### Props 接口
 70 | 
 71 | - filename: 文件名
 72 | - progress: 进度信息对象
 73 | 
 74 | ### 内部状态
 75 | 
 76 | - 使用 useEffect 监听进度变化
 77 | - 格式化函数处理数据展示
 78 | 
 79 | ### 进度数据结构
 80 | 
 81 | - stage: 处理阶段
 82 | - status: 任务状态
 83 | - progress: 进度百分比
 84 | - total_size: 总大小
 85 | - downloaded_size: 已下载大小
 86 | - duration: 处理时长
 87 | 
 88 | ## 用户交互设计
 89 | 
 90 | ### 信息展示
 91 | 
 92 | - 卡片式布局展示任务信息
 93 | - 清晰的状态图标和颜色标识
 94 | - 进度条可视化展示进度
 95 | - 格式化的数据信息展示
 96 | 
 97 | ### 状态标识
 98 | 
 99 | - 进行中：旋转加载图标
100 | - 成功：绿色勾号图标
101 | - 失败：红色叉号图标
102 | - 默认：灰色加载图标
103 | 
104 | ## 无障碍设计
105 | 
106 | ### 屏幕阅读器支持
107 | - 提供适当的 ARIA 标签
108 | - 为状态图标添加描述性文本
109 | - 确保界面元素语义化


--------------------------------------------------------------------------------
/backend/queues/worker_launcher.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Celery worker启动脚本"""
 3 | 
 4 | from __future__ import annotations
 5 | 
 6 | import os
 7 | import sys
 8 | from pathlib import Path
 9 | 
10 | # 获取项目根目录
11 | if os.path.exists("/app"):
12 |     project_root = "/app"
13 | else:
14 |     project_root = str(Path(__file__).parent.parent.parent)
15 | 
16 | # 添加项目根目录到 sys.path
17 | if project_root not in sys.path:
18 |     sys.path.insert(0, project_root)
19 | 
20 | # 加载环境变量
21 | from dotenv import load_dotenv
22 | env_file = Path(project_root) / "backend" / ".env"
23 | if env_file.exists():
24 |     load_dotenv(env_file)
25 | 
26 | # 导入Celery应用
27 | from backend.config import create_celery_app
28 | 
29 | def main():
30 |     """启动Celery worker"""
31 |     # 初始化应用组件
32 |     from backend.startup import initialize_llm_router, initialize_embedding_router
33 |     initialize_llm_router()
34 |     initialize_embedding_router()
35 |     
36 |     # 从环境变量读取worker配置
37 |     concurrency = int(os.getenv("CELERY_WORKER_CONCURRENCY", "4"))
38 |     loglevel = os.getenv("CELERY_LOG_LEVEL", "info")
39 | 
40 |     print(f"启动Celery worker，并发数: {concurrency}, 日志级别: {loglevel}")
41 |     from backend.config import settings
42 |     print(f"Broker: {os.getenv('CELERY_BROKER_URL', 'redis://localhost:6379/0')}")
43 |     print(f"Result backend: {getattr(settings, 'celery_result_backend', 'redis://localhost:6379/1')}")
44 | 
45 |     # 创建并启动worker
46 |     celery_app = create_celery_app()
47 |     celery_app.worker_main([
48 |         "worker",
49 |         "--loglevel", loglevel,
50 |         "--concurrency", str(concurrency),
51 |         "--pool", "solo",
52 |         "--time-limit", "3600",
53 |         "--soft-time-limit", "3300",
54 |     ])
55 | 
56 | if __name__ == "__main__":
57 |     main()
58 | 


--------------------------------------------------------------------------------
/ASRBackend/.env.example:
--------------------------------------------------------------------------------
 1 | # ASR Backend 环境变量示例文件
 2 | # 复制此文件为 .env，然后根据需要修改配置
 3 | 
 4 | # ========== 基本配置 ==========
 5 | # 应用名称
 6 | APP_NAME=HearSight ASR Backend
 7 | 
 8 | # 调试模式
 9 | DEBUG=true
10 | 
11 | # 服务端口
12 | PORT=8003
13 | 
14 | # CORS 配置
15 | CORS_ORIGINS_STR=http://localhost:5173,http://localhost:8080,http://localhost:8000
16 | CORS_ALLOW_CREDENTIALS=true
17 | CORS_ALLOW_METHODS_STR=*
18 | CORS_ALLOW_HEADERS_STR=*
19 | 
20 | # ========== 运行模式选择 ==========
21 | # 可选值：local（本地模式）或 cloud（云端模式）
22 | # local: 使用 FunASR 本地模型，支持文件上传和 URL
23 | # cloud: 使用阿里云 DashScope API，仅支持 URL，轻量级部署
24 | ASR_MODE=local
25 | 
26 | # ========== 本地模式配置 ==========
27 | # 仅在 ASR_MODE=local 时生效
28 | 
29 | # FunASR 模型配置
30 | LOCAL_MODEL_NAME=paraformer-zh
31 | LOCAL_MODEL_REVISION=v2.0.4
32 | 
33 | # VAD（语音活动检测）模型
34 | LOCAL_VAD_MODEL=fsmn-vad
35 | LOCAL_VAD_MODEL_REVISION=v2.0.4
36 | 
37 | # 标点符号模型
38 | LOCAL_PUNC_MODEL=ct-punc-c
39 | LOCAL_PUNC_MODEL_REVISION=v2.0.4
40 | 
41 | # 说话人识别模型
42 | LOCAL_SPK_MODEL=cam++
43 | 
44 | # ========== 云端模式配置 ==========
45 | # 仅在 ASR_MODE=cloud 时生效
46 | 
47 | # 阿里云 DashScope API Key
48 | # 从 https://dashscope.console.aliyun.com/ 获取
49 | DASHSCOPE_API_KEY=your-api-key-here
50 | 
51 | # DashScope 模型
52 | DASHSCOPE_MODEL=paraformer-v2
53 | 
54 | # 语言提示（用于多语言识别）
55 | DASHSCOPE_LANGUAGE_HINTS=zh,en
56 | 
57 | # ========== Supabase 配置 ==========
58 | # 用于云端模式的文件上传存储
59 | # 从 https://supabase.com/ 创建项目后获取
60 | 
61 | # Supabase 项目 URL
62 | SUPABASE_URL=https://your-project.supabase.co
63 | 
64 | # Supabase API Key (anon public key)
65 | SUPABASE_KEY=your-anon-key-here
66 | 
67 | # Supabase 存储桶名称
68 | SUPABASE_BUCKET_NAME=test-public
69 | 
70 | # 上传文件的文件夹名称
71 | SUPABASE_FOLDER_NAME=asr
72 | 
73 | # 管理员邮箱（可选，用于登录）
74 | SUPABASE_ADMIN_EMAIL=your-admin-email@example.com
75 | 
76 | # 管理员密码（可选，用于登录）
77 | SUPABASE_ADMIN_PASSWORD=your-admin-password
78 | 


--------------------------------------------------------------------------------
/backend/services/example_chat_service.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """chat_service 示例测试"""
 3 | 
 4 | import sys
 5 | import os
 6 | import warnings
 7 | 
 8 | # 忽略 litellm 的异步客户端清理警告
 9 | warnings.filterwarnings("ignore", message="coroutine 'close_litellm_async_clients' was never awaited")
10 | 
11 | # 加载环境变量
12 | from dotenv import load_dotenv
13 | load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env'))
14 | 
15 | # 添加项目根目录到 sys.path
16 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
17 | 
18 | from backend.services.knowledge_base_service import knowledge_base
19 | from backend.services.chat_service import chat_service
20 | 
21 | def test_chat_service():
22 |     """测试 chat_service 的流式调用"""
23 | 
24 |     # 模拟参数
25 |     question = "视频讲了什么？"
26 | 
27 | 
28 |     print(f"问题: {question}")
29 | 
30 |     # 检查知识库中的 transcript_ids
31 |     available_ids = knowledge_base.get_transcript_ids()
32 |     print(f"知识库中的 transcript_ids: {available_ids}")
33 | 
34 |     # 使用所有可用的 transcript_ids 进行检索
35 |     transcript_ids = available_ids
36 | 
37 |     # 收集流式响应
38 |     full_response = []
39 | 
40 |     def stream_callback(chunk: str):
41 |         print(chunk, end="", flush=True)
42 |         full_response.append(chunk)
43 | 
44 |     try:
45 |         # 调用 chat_service
46 |         generator = chat_service.chat_with_transcripts_stream(
47 |             question=question,
48 |             transcript_ids=transcript_ids,
49 |             stream_callback=stream_callback
50 |         )
51 | 
52 |         # 消费生成器以触发执行，即使有 callback
53 |         for _ in generator:
54 |             pass
55 | 
56 |         print("=== 测试完成 ===")
57 |         print(f"完整响应: {''.join(full_response)}")
58 | 
59 |     except Exception as e:
60 |         print(f"测试失败: {e}")
61 | 
62 | if __name__ == "__main__":
63 |     test_chat_service()
64 | 


--------------------------------------------------------------------------------
/frontend/src/components/ui/scroll-area.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react"
 2 | import * as ScrollAreaPrimitive from "@radix-ui/react-scroll-area"
 3 | 
 4 | import { cn } from "@/lib/utils"
 5 | 
 6 | function ScrollArea({
 7 |   className,
 8 |   children,
 9 |   ...props
10 | }: React.ComponentProps<typeof ScrollAreaPrimitive.Root>) {
11 |   return (
12 |     <ScrollAreaPrimitive.Root
13 |       data-slot="scroll-area"
14 |       className={cn("relative", className)}
15 |       {...props}
16 |     >
17 |       <ScrollAreaPrimitive.Viewport
18 |         data-slot="scroll-area-viewport"
19 |         className="focus-visible:ring-ring/50 size-full rounded-[inherit] transition-[color,box-shadow] outline-none focus-visible:ring-[3px] focus-visible:outline-1"
20 |       >
21 |         {children}
22 |       </ScrollAreaPrimitive.Viewport>
23 |       <ScrollBar />
24 |       <ScrollAreaPrimitive.Corner />
25 |     </ScrollAreaPrimitive.Root>
26 |   )
27 | }
28 | 
29 | function ScrollBar({
30 |   className,
31 |   orientation = "vertical",
32 |   ...props
33 | }: React.ComponentProps<typeof ScrollAreaPrimitive.ScrollAreaScrollbar>) {
34 |   return (
35 |     <ScrollAreaPrimitive.ScrollAreaScrollbar
36 |       data-slot="scroll-area-scrollbar"
37 |       orientation={orientation}
38 |       className={cn(
39 |         "flex touch-none p-px transition-colors select-none",
40 |         orientation === "vertical" &&
41 |           "h-full w-2.5 border-l border-l-transparent",
42 |         orientation === "horizontal" &&
43 |           "h-2.5 flex-col border-t border-t-transparent",
44 |         className
45 |       )}
46 |       {...props}
47 |     >
48 |       <ScrollAreaPrimitive.ScrollAreaThumb
49 |         data-slot="scroll-area-thumb"
50 |         className="bg-border relative flex-1 rounded-full"
51 |       />
52 |     </ScrollAreaPrimitive.ScrollAreaScrollbar>
53 |   )
54 | }
55 | 
56 | export { ScrollArea, ScrollBar }
57 | 


--------------------------------------------------------------------------------
/backend/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10-slim
 2 | 
 3 | # 环境变量与镜像运行行为
 4 | ENV PYTHONDONTWRITEBYTECODE=1 \
 5 |     PYTHONUNBUFFERED=1 \
 6 |     # 默认使用阿里云 PyPI 镜像加速大部分包安装
 7 |     PIP_INDEX_URL=https://mirrors.aliyun.com/pypi/simple \
 8 |     PIP_TRUSTED_HOST=mirrors.aliyun.com
 9 | 
10 | WORKDIR /app
11 | 
12 | # 安装系统依赖（包含音频处理所需的 ffmpeg/libsndfile）
13 | # 使用清华镜像源加速 apt 下载（替换常见 Ubuntu/Debian 源），并在网络错误时重试
14 | RUN sed -i 's|http://archive.ubuntu.com/ubuntu/|https://mirrors.tuna.tsinghua.edu.cn/ubuntu/|g' /etc/apt/sources.list \
15 |      || true \
16 |      && sed -i 's|http://security.ubuntu.com/ubuntu/|https://mirrors.tuna.tsinghua.edu.cn/ubuntu/|g' /etc/apt/sources.list \
17 |      || true \
18 |      && sed -i 's|http://deb.debian.org/debian/|https://mirrors.tuna.tsinghua.edu.cn/debian/|g' /etc/apt/sources.list \
19 |      || true \
20 |      && apt-get -o Acquire::Retries=3 update || apt-get -o Acquire::Retries=3 update --fix-missing \
21 |      && apt-get -o Acquire::Retries=3 install -y --no-install-recommends ca-certificates apt-transport-https \
22 |          build-essential git curl ffmpeg libsndfile1 \
23 |      || apt-get install -y --no-install-recommends --fix-missing ca-certificates apt-transport-https \
24 |          build-essential git curl ffmpeg libsndfile1 \
25 |      && rm -rf /var/lib/apt/lists/*
26 | 
27 | # 复制 requirements 并安装依赖
28 | COPY backend/requirements.txt /app/requirements.txt
29 | 
30 | RUN python -m pip install --upgrade pip \
31 |     && python -m pip install -r /app/requirements.txt
32 | 
33 | # 复制源码
34 | COPY . /app
35 | 
36 | # 确保持久化目录存在
37 | RUN mkdir -p /app/app_datas/download_videos \
38 |     /app/app_datas/model_cache \
39 |     /app/app_datas/torch_cache \
40 |     /app/app_datas/transformers_cache \
41 |     /app/app_datas/xdg_cache
42 | 
43 | EXPOSE 8000
44 | 
45 | # 启动时使用运行时的 PORT 环境变量（默认 8000）
46 | CMD ["sh", "-c", "uvicorn backend.main:app --host 0.0.0.0 --port ${PORT:-8000}"]
47 | 


--------------------------------------------------------------------------------
/backend/ReAct/action_parser.py:
--------------------------------------------------------------------------------
 1 | """Action 解析工具 - 统一处理 ReAct 格式的动作解析"""
 2 | 
 3 | import json
 4 | import re
 5 | from typing import Any, Dict, Optional, Tuple
 6 | 
 7 | 
 8 | def parse_response(
 9 |     text: str,
10 | ) -> Tuple[str, Optional[str], Optional[Dict[str, Any]], Optional[str]]:
11 |     """
12 |     从 LLM 输出解析完整的 ReAct 响应
13 | 
14 |     解析格式：
15 |     Thought: 思考内容
16 |     Action: 动作名称
17 |     Action Input: {"key": "value"}
18 |     Final Answer: 最终答案
19 | 
20 |     参数:
21 |         text: LLM 输出的文本
22 | 
23 |     返回:
24 |         Tuple[thought, action_name, action_input, final_answer]
25 |         - thought: 思考内容
26 |         - action_name: 动作名称
27 |         - action_input: 动作输入字典
28 |         - final_answer: 最终答案
29 |     """
30 |     lines = text.splitlines()
31 | 
32 |     # 提取 Thought
33 |     thought_line = next((l for l in lines if l.startswith("Thought:")), None)
34 |     thought = (
35 |         thought_line.replace("Thought:", "").strip() if thought_line else ""
36 |     )
37 | 
38 |     # 提取 Action
39 |     action_line = next((l for l in lines if l.startswith("Action:")), None)
40 |     action_name = None
41 |     if action_line:
42 |         action_name = action_line.replace("Action:", "").strip()
43 | 
44 |     # 提取 Action Input
45 |     action_input = None
46 |     if action_name:
47 |         input_line = next(
48 |             (l for l in lines if l.startswith("Action Input:")), None
49 |         )
50 |         if input_line:
51 |             try:
52 |                 action_input = json.loads(
53 |                     input_line.replace("Action Input:", "").strip()
54 |                 )
55 |             except json.JSONDecodeError:
56 |                 action_input = {}
57 | 
58 |     # 提取 Final Answer
59 |     final_answer = None
60 |     if "Final Answer:" in text:
61 |         final_answer = text.split("Final Answer:", 1)[-1].strip()
62 | 
63 |     return thought, action_name, action_input, final_answer
64 | 


--------------------------------------------------------------------------------
/frontend/src/components/RightPanel/hooks/useLanguageSwitch.ts:
--------------------------------------------------------------------------------
 1 | import { useState, useCallback } from 'react'
 2 | 
 3 | const AVAILABLE_LANGUAGES = [
 4 |   { code: 'original', name: '原文', nameEn: 'Original' },
 5 |   { code: 'zh', name: '中文', nameEn: 'Chinese' },
 6 |   { code: 'en', name: '英文', nameEn: 'English' },
 7 |   { code: 'ja', name: '日文', nameEn: 'Japanese' },
 8 |   { code: 'ko', name: '韩文', nameEn: 'Korean' },
 9 |   { code: 'es', name: '西班牙文', nameEn: 'Spanish' },
10 |   { code: 'fr', name: '法文', nameEn: 'French' },
11 |   { code: 'de', name: '德文', nameEn: 'German' },
12 |   { code: 'ru', name: '俄文', nameEn: 'Russian' },
13 | ]
14 | 
15 | export const useLanguageSwitch = (originalLanguage: string = 'original') => {
16 |   const [displayLanguage, setDisplayLanguage] = useState(originalLanguage)
17 |   const [availableLanguages, setAvailableLanguages] = useState<string[]>([originalLanguage])
18 | 
19 |   const addLanguage = useCallback((languageCode: string) => {
20 |     if (!languageCode || languageCode.trim() === '') {
21 |       return
22 |     }
23 |     setAvailableLanguages(prev => {
24 |       if (!prev.includes(languageCode)) {
25 |         return [...prev, languageCode]
26 |       }
27 |       return prev
28 |     })
29 |   }, [])
30 | 
31 |   const switchLanguage = useCallback((languageCode: string) => {
32 |     if (availableLanguages.includes(languageCode)) {
33 |       setDisplayLanguage(languageCode)
34 |     }
35 |   }, [availableLanguages])
36 | 
37 |   const getLanguageName = useCallback((code: string) => {
38 |     const lang = AVAILABLE_LANGUAGES.find(l => l.code === code)
39 |     return lang ? lang.name : code
40 |   }, [])
41 | 
42 |   const validLanguages = availableLanguages.filter(lang => lang && lang.trim() !== '')
43 | 
44 |   return {
45 |     displayLanguage,
46 |     availableLanguages: validLanguages,
47 |     allLanguages: AVAILABLE_LANGUAGES,
48 |     addLanguage,
49 |     switchLanguage,
50 |     getLanguageName,
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/frontend/src/components/docs/VideoPlayer设计说明.md:
--------------------------------------------------------------------------------
  1 | # VideoPlayer 视频播放器组件设计说明
  2 | 
  3 | ## 概述
  4 | 
  5 | VideoPlayer 组件负责在 HearSight 应用中播放视频和音频内容，支持多种媒体格式并提供统一的播放界面。该组件是用户查看和播放媒体文件的核心展示模块。
  6 | 
  7 | ## 组件结构
  8 | 
  9 | ### VideoPlayer.tsx
 10 | 
 11 | 主容器组件，负责：
 12 | 
 13 | - 视频/音频内容的播放展示
 14 | - 媒体类型识别和界面适配
 15 | - 播放控制和状态管理
 16 | 
 17 | ### 核心功能模块
 18 | 
 19 | - **媒体播放**: 支持视频和音频播放
 20 | - **类型识别**: 自动识别媒体类型并调整界面
 21 | - **播放控制**: 提供标准播放控制功能
 22 | - **状态展示**: 显示播放器状态和可用性
 23 | 
 24 | ## 设计原则
 25 | 
 26 | ### 1. 多媒体支持
 27 | - 支持视频和音频文件播放
 28 | - 自动识别媒体类型
 29 | - 根据类型调整界面展示
 30 | 
 31 | ### 2. 用户体验
 32 | - 提供直观的播放界面
 33 | - 支持标准播放控制（播放、暂停、音量等）
 34 | - 显示播放器状态信息
 35 | 
 36 | ### 3. 响应式设计
 37 | - 适配不同屏幕尺寸
 38 | - 保持良好的观看体验
 39 | - 支持全屏播放
 40 | 
 41 | ## 功能详解
 42 | 
 43 | ### 媒体播放
 44 | 
 45 | - 支持 HTML5 视频播放器功能
 46 | - 提供标准播放控制（播放、暂停、进度条、音量等）
 47 | - 支持全屏播放
 48 | - 支持预加载元数据
 49 | 
 50 | ### 类型识别
 51 | 
 52 | - 自动识别媒体类型（视频或音频）
 53 | - 根据文件扩展名判断类型
 54 | - 视频格式包括：mp4, avi, mov, mkv, flv, wmv, webm
 55 | - 音频格式包括：mp3, wav, m4a, aac, flac, ogg, wma
 56 | 
 57 | ### 界面适配
 58 | 
 59 | - 视频文件显示标准视频播放器
 60 | - 音频文件显示专门的音频播放界面
 61 | - 音频播放器提供视觉化的背景和图标
 62 | - 根据媒体类型调整播放器标题
 63 | 
 64 | ### 状态展示
 65 | 
 66 | - 显示播放器标题（视频播放器/音频播放器）
 67 | - 显示"可播放"状态指示
 68 | - 无媒体文件时显示占位符界面
 69 | - 提供操作引导信息
 70 | 
 71 | ## 数据流设计
 72 | 
 73 | ### Props 接口
 74 | 
 75 | - videoSrc: 视频源URL
 76 | - mediaType: 媒体类型（video/audio）
 77 | - loading: 加载状态
 78 | 
 79 | ### 内部状态
 80 | 
 81 | - 使用 React.forwardRef 传递视频元素引用
 82 | - 通过 CSS 控制界面展示效果
 83 | 
 84 | ### 媒体数据结构
 85 | 
 86 | - src: 媒体文件URL
 87 | - type: 媒体类型
 88 | - controls: 控制条显示
 89 | - preload: 预加载策略
 90 | 
 91 | ## 用户交互设计
 92 | 
 93 | ### 播放控制
 94 | 
 95 | - 标准视频播放控制条
 96 | - 支持播放/暂停
 97 | - 支持音量控制
 98 | - 支持进度条拖拽
 99 | - 支持全屏播放
100 | 
101 | ### 界面展示
102 | 
103 | - 视频文件显示完整播放界面
104 | - 音频文件显示专门设计的音频界面
105 | - 无文件时显示占位符和操作引导
106 | - 状态指示器显示播放器可用性
107 | 
108 | ## 无障碍设计
109 | 
110 | ### 键盘导航
111 | - 支持标准播放器键盘控制
112 | - 支持 Tab 键在控制元素间导航
113 | - 支持空格键播放/暂停
114 | 
115 | ### 屏幕阅读器支持
116 | - 提供适当的 ARIA 标签
117 | - 为播放器添加描述性文本
118 | - 确保界面元素语义化


--------------------------------------------------------------------------------
/ASRBackend/supabase_utils/example_tests/test_upload_file.py:
--------------------------------------------------------------------------------
 1 | """最小化 Supabase 文件上传示例
 2 | 
 3 | 基于 config.py 配置，使用 test.txt 文件进行测试。
 4 | """
 5 | 
 6 | import sys
 7 | import os
 8 | 
 9 | from supabase import create_client
10 | from config import settings
11 | 
12 | def upload_file_example():
13 |     """上传 test.txt 文件到 Supabase"""
14 |     # 获取配置
15 |     supabase_url = settings.supabase_url
16 |     supabase_key = settings.supabase_key
17 |     bucket = settings.supabase_bucket_name
18 |     folder = settings.supabase_folder_name
19 | 
20 |     if not supabase_url or not supabase_key:
21 |         print("Supabase 配置缺失")
22 |         return
23 | 
24 |     # 创建客户端
25 |     client = create_client(supabase_url, supabase_key)
26 | 
27 |     # 登录管理员账号（如果配置了）
28 |     if settings.supabase_admin_email and settings.supabase_admin_password:
29 |         try:
30 |             client.auth.sign_in_with_password({
31 |                 "email": settings.supabase_admin_email,
32 |                 "password": settings.supabase_admin_password
33 |             })
34 |             print("Supabase 管理员登录成功")
35 |         except Exception as e:
36 |             print(f"Supabase 管理员登录失败: {e}")
37 |             # 登录失败继续使用 anon key
38 | 
39 |     # 文件路径
40 |     file_path = "test.txt"
41 |     if not os.path.exists(file_path):
42 |         print(f"文件不存在: {file_path}")
43 |         return
44 | 
45 |     # 读取文件
46 |     with open(file_path, 'rb') as f:
47 |         file_data = f.read()
48 | 
49 |     # 上传
50 |     remote_path = f"{folder}/test.txt"
51 |     try:
52 |         response = client.storage.from_(bucket).upload(
53 |             path=remote_path,
54 |             file=file_data,
55 |             file_options={"content-type": "text/plain"}
56 |         )
57 |         public_url = client.storage.from_(bucket).get_public_url(remote_path)
58 |         print(f"上传成功，公开 URL: {public_url}")
59 |     except Exception as e:
60 |         print(f"上传失败: {e}")
61 | 
62 | if __name__ == "__main__":
63 |     upload_file_example()
64 | 


--------------------------------------------------------------------------------
/frontend/src/components/LeftPanel/LeftPanel.tsx:
--------------------------------------------------------------------------------
 1 | import { Card, CardContent } from '@/components/ui/card'
 2 | import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs'
 3 | import type { TranscriptMeta, JobItem } from '../../types'
 4 | import ProcessedTab from './ProcessedTab'
 5 | import TasksTab from './TasksTab'
 6 | 
 7 | interface LeftPanelProps {
 8 |   readonly transcripts: TranscriptMeta[]
 9 |   readonly jobs: JobItem[]
10 |   readonly activeTranscriptId: number | null
11 |   readonly onLoadTranscript: (id: number) => void
12 |   readonly onTranscriptsUpdate: () => void
13 | }
14 | 
15 | function LeftPanel({
16 |   transcripts,
17 |   jobs,
18 |   activeTranscriptId,
19 |   onLoadTranscript,
20 |   onTranscriptsUpdate,
21 | }: LeftPanelProps) {
22 | 
23 |   return (
24 |     <div className="h-full flex flex-col">
25 |       <Card className="h-full flex flex-col">
26 |         <CardContent className="flex-1 p-0 min-h-0">
27 |           <Tabs defaultValue="processed" className="h-full flex flex-col">
28 |             <TabsList className="w-full rounded-none border-b">
29 |               <TabsTrigger value="processed" className="flex-1">已处理</TabsTrigger>
30 |               <TabsTrigger value="tasks" className="flex-1">任务</TabsTrigger>
31 |             </TabsList>
32 |             
33 |             <TabsContent value="processed" className="flex-1 m-0 p-3 overflow-hidden">
34 |               <ProcessedTab
35 |                 transcripts={transcripts}
36 |                 activeTranscriptId={activeTranscriptId}
37 |                 onLoadTranscript={onLoadTranscript}
38 |                 onTranscriptsUpdate={onTranscriptsUpdate}
39 |               />
40 |             </TabsContent>
41 | 
42 |             <TabsContent value="tasks" className="flex-1 m-0 p-3 overflow-hidden">
43 |               <TasksTab jobs={jobs} />
44 |             </TabsContent>
45 |           </Tabs>
46 |         </CardContent>
47 |       </Card>
48 |     </div>
49 |   )
50 | }
51 | 
52 | export default LeftPanel
53 | 


--------------------------------------------------------------------------------
/backend/ReAct/chat_prompt_builder.py:
--------------------------------------------------------------------------------
 1 | """Chat Agent 提示词生成器"""
 2 | 
 3 | from textwrap import dedent
 4 | from typing import Sequence
 5 | 
 6 | 
 7 | def build_chat_agent_system_prompt(
 8 |     action_names: Sequence[str],
 9 |     tool_description: str = "",
10 |     transcript_ids: Sequence[int] = None,
11 | ) -> str:
12 |     """
13 |     构建 Chat Agent 系统提示词
14 | 
15 |     参数:
16 |         action_names: 可用的动作名称列表
17 |         tool_description: 工具描述文本
18 |         transcript_ids: 用户选择的转录ID列表
19 | 
20 |     返回:
21 |         系统提示词文本
22 |     """
23 |     # 构建文件信息
24 |     file_info = ""
25 |     if transcript_ids:
26 |         file_info = f"""
27 |     # 用户选择的视频文件
28 |     用户选择了以下视频文件进行问答（ID: {', '.join(map(str, transcript_ids))}）。
29 |     请基于这些文件的内容回答问题，不要使用其他外部知识。
30 |     """
31 |     
32 |     prompt = f"""
33 |     你是一个智能聊天助手，可以通过调用外部工具来回答用户关于视频转录内容的问题。
34 | 
35 |     {tool_description}
36 |     {file_info}
37 | 
38 |     # 对话规则
39 |     1. 保持友好的对话语气
40 |     2. 基于检索到的内容准确回答问题
41 |     3. 如果没有找到相关信息，诚实地说明
42 |     4. 可以进行多轮对话，记住上下文
43 | 
44 |     # 推理策略
45 |     1. 先理解用户的问题，确定需要什么信息
46 |     2. 如果需要从视频内容中查找信息，使用knowledge_retrieval工具
47 |     3. 每次调用工具时，精确描述需要查找的内容
48 |     4. 基于工具返回的结果进行分析和总结
49 |     5. 如果信息不够完整，可以再次调用工具获取更多信息
50 |     6. 最终回答要基于检索到的具体内容，包括时间戳信息
51 | 
52 |     # 工具使用指南
53 | 
54 |     - knowledge_retrieval: 从视频转录中检索相关内容
55 |       - 输入: {{"question": "具体的问题描述", "transcript_id": 123}}
56 |       - 输出: 压缩后的关键信息，包含时间戳和相关内容
57 |       - 注意: 每次调用时要具体描述需要查找的内容，不要过于宽泛
58 | 
59 |     # 示例对话
60 | 
61 |     问题：视频中提到了什么技术？
62 |     Thought: 用户询问视频中的技术内容，我需要调用检索工具来获取相关信息。
63 |     Action: knowledge_retrieval
64 |     Action Input: {{"question": "视频中提到的技术", "transcript_id": 123}}
65 | 
66 |     Observation: 来源文件: video.mp4
67 | 
68 |     压缩总结:
69 |     视频中主要讨论了人工智能和大数据技术。人工智能在医疗领域可以帮助医生诊断疾病[121540.00-145440.00]。大数据技术可以帮助企业做出更好的决策[145440.00-169340.00]。
70 | 
71 |     Thought: 我已经从检索结果中获得了相关技术信息，可以总结回答用户了。
72 |     Final Answer: 视频中主要提到了人工智能和大数据技术。人工智能在医疗领域可以帮助医生诊断疾病，大数据技术可以帮助企业做出更好的决策。
73 |     """
74 |     return dedent(prompt).strip()


--------------------------------------------------------------------------------
/ASRBackend/tests/test_asr_segments.py:
--------------------------------------------------------------------------------
 1 | """测试 ASR 句子分段功能"""
 2 | 
 3 | import asyncio
 4 | import os
 5 | import sys
 6 | from pathlib import Path
 7 | 
 8 | # 添加项目根目录到路径，以便导入模块
 9 | project_root = Path(__file__).parent.parent.parent
10 | sys.path.insert(0, str(project_root))
11 | 
12 | import pytest
13 | 
14 | from ASRBackend.asr_functions.asr_sentence_segments import process
15 | from ASRBackend.asr_functions.utils import detect_language
16 | from ASRBackend.config import settings
17 | from ASRBackend.services.asr_service import ASRService
18 | 
19 | # 测试数据路径
20 | TEST_DATA_DIR = Path(__file__).parent / "test_datas"
21 | TEST_AUDIO_FILE = TEST_DATA_DIR / "test.mp4"
22 | 
23 | 
24 | @pytest.mark.asyncio
25 | async def test_transcribe_audio_url():
26 |     """测试使用音频 URL 进行语音识别（支持云端和本地模式）"""
27 |     # 使用一个测试 URL（这个 URL 在测试文件中也使用了）
28 |     test_url = "https://sbp-7fgelj2azms1xag5.supabase.opentrust.net/storage/v1/object/public/test-public/asr/test.mp4"
29 | 
30 |     # 调用 ASR 服务进行识别
31 |     result = await ASRService.transcribe_audio_from_url(test_url)
32 | 
33 |     # 验证结果结构
34 |     assert isinstance(result, dict)
35 |     assert "filename" in result
36 |     assert "status" in result
37 | 
38 |     # 云端模式可能需要 API Key，本地模式可能需要模型
39 |     # 这里只验证基本结构，不强制要求成功
40 |     if result["status"] == "success":
41 |         assert "text" in result
42 |         assert "language" in result
43 |         assert "segments" in result
44 |         assert isinstance(result["text"], str)
45 |         assert result["language"] in ["zh", "en"]
46 |         assert isinstance(result["segments"], list)
47 |         if result["segments"]:
48 |             seg = result["segments"][0]
49 |             assert "index" in seg
50 |             assert "spk_id" in seg
51 |             assert "sentence" in seg
52 |             assert "start_time" in seg
53 |             assert "end_time" in seg
54 | 
55 | 
56 | def test_test_data_exists():
57 |     """测试测试数据文件是否存在"""
58 |     assert TEST_DATA_DIR.exists(), f"测试数据目录不存在: {TEST_DATA_DIR}"
59 |     assert TEST_AUDIO_FILE.exists(), f"测试音频文件不存在: {TEST_AUDIO_FILE}"
60 | 


--------------------------------------------------------------------------------
/backend/media_processing/video/download/youtube/test_login_handler.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """测试youtube cookie处理"""
 3 | 
 4 | import sys
 5 | import os
 6 | 
 7 | backend_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
 8 | project_root = os.path.dirname(backend_dir)
 9 | if project_root not in sys.path:
10 |     sys.path.insert(0, project_root)
11 | 
12 | from backend.media_processing.video.download.youtube.youtube_login_handler import (
13 |     login_and_get_cookies_sync,
14 |     load_cookies,
15 |     save_cookies,
16 | )
17 | 
18 | def test_login_and_get_cookies():
19 |     """测试登录并获取cookie"""
20 |     print("开始测试登录和获取cookie...")
21 |     cookies = login_and_get_cookies_sync(headless=False)
22 | 
23 |     if cookies:
24 |         print(f"✓ 成功获取cookie，共{len(cookies)}个")
25 |         print("Cookie内容:")
26 |         for c in cookies:
27 |             name = c.get('name')
28 |             value = c.get('value')
29 |             print(f"  {name}: {value[:50]}..." if len(str(value)) > 50 else f"  {name}: {value}")
30 |         return True
31 |     else:
32 |         print("✗ 获取cookie失败")
33 |         return False
34 | 
35 | def test_load_cookies():
36 |     """测试加载保存的cookie"""
37 |     print("\n开始测试加载保存的cookie...")
38 |     cookies = load_cookies()
39 | 
40 |     if cookies:
41 |         print(f"✓ 成功加载cookie，共{len(cookies)}个")
42 |         print("Cookie内容:")
43 |         for c in cookies:
44 |             name = c.get('name')
45 |             value = c.get('value')
46 |             print(f"  {name}: {value[:50]}..." if len(str(value)) > 50 else f"  {name}: {value}")
47 |         return True
48 |     else:
49 |         print("✗ 未找到保存的cookie")
50 |         return False
51 | 
52 | if __name__ == "__main__":
53 |     print("=" * 60)
54 |     print("YouTube Cookie处理测试")
55 |     print("=" * 60)
56 | 
57 |     if test_load_cookies():
58 |         print("\n已有保存的cookie，无需重新登录")
59 |     else:
60 |         test_login_and_get_cookies()
61 | 
62 |     print("\n" + "=" * 60)
63 |     print("测试完成")
64 |     print("=" * 60)
65 | 


--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "frontend",
 3 |   "private": true,
 4 |   "version": "0.0.0",
 5 |   "type": "module",
 6 |   "scripts": {
 7 |     "dev": "vite",
 8 |     "build": "tsc -b && vite build",
 9 |     "lint": "eslint .",
10 |     "preview": "vite preview"
11 |   },
12 |   "dependencies": {
13 |     "@hookform/resolvers": "^5.2.2",
14 |     "@radix-ui/react-dialog": "^1.1.15",
15 |     "@radix-ui/react-dropdown-menu": "^2.1.16",
16 |     "@radix-ui/react-label": "^2.1.7",
17 |     "@radix-ui/react-scroll-area": "^1.2.10",
18 |     "@radix-ui/react-select": "^2.2.6",
19 |     "@radix-ui/react-separator": "^1.1.7",
20 |     "@radix-ui/react-slot": "^1.2.3",
21 |     "@radix-ui/react-switch": "^1.2.6",
22 |     "@radix-ui/react-tabs": "^1.1.13",
23 |     "@types/markdown-it": "^14.1.2",
24 |     "class-variance-authority": "^0.7.1",
25 |     "clsx": "^2.1.1",
26 |     "framer-motion": "^12.23.24",
27 |     "lucide-react": "^0.546.0",
28 |     "markdown-it": "^14.1.0",
29 |     "react": "^19.1.1",
30 |     "react-dom": "^19.1.1",
31 |     "react-hook-form": "^7.65.0",
32 |     "react-resizable-panels": "^3.0.6",
33 |     "react-router-dom": "^7.9.4",
34 |     "sonner": "^2.0.7",
35 |     "tailwind-merge": "^3.3.1",
36 |     "zod": "^4.1.12",
37 |     "zustand": "^5.0.8"
38 |   },
39 |   "devDependencies": {
40 |     "@eslint/js": "^9.36.0",
41 |     "@tailwindcss/vite": "^4.1.14",
42 |     "@types/node": "^24.6.0",
43 |     "@types/react": "^19.1.16",
44 |     "@types/react-dom": "^19.1.9",
45 |     "@vitejs/plugin-react": "^5.0.4",
46 |     "autoprefixer": "^10.4.21",
47 |     "babel-plugin-react-compiler": "^19.1.0-rc.3",
48 |     "eslint": "^9.36.0",
49 |     "eslint-plugin-jsx-a11y": "^6.10.2",
50 |     "eslint-plugin-react-hooks": "^5.2.0",
51 |     "eslint-plugin-react-refresh": "^0.4.22",
52 |     "globals": "^16.4.0",
53 |     "postcss": "^8.5.6",
54 |     "tailwindcss": "^4.1.14",
55 |     "tw-animate-css": "^1.4.0",
56 |     "typescript": "~5.9.3",
57 |     "typescript-eslint": "^8.45.0",
58 |     "vite": "npm:rolldown-vite@7.1.14"
59 |   },
60 |   "overrides": {
61 |     "vite": "npm:rolldown-vite@7.1.14"
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/backend/media_processing/video/download/bilibili/test_login_handler.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """测试bilibili cookie处理"""
 3 | 
 4 | import sys
 5 | import os
 6 | 
 7 | backend_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
 8 | project_root = os.path.dirname(backend_dir)
 9 | if project_root not in sys.path:
10 |     sys.path.insert(0, project_root)
11 | 
12 | from backend.media_processing.video.download.bilibili.bilibili_login_handler import (
13 |     login_and_get_cookies_sync,
14 |     load_cookies,
15 |     save_cookies,
16 | )
17 | 
18 | def test_login_and_get_cookies():
19 |     """测试登录并获取cookie"""
20 |     print("开始测试登录和获取cookie...")
21 |     cookies = login_and_get_cookies_sync(headless=False)
22 | 
23 |     if cookies:
24 |         print(f"✓ 成功获取cookie，共{len(cookies)}个")
25 |         print("Cookie内容:")
26 |         for c in cookies:
27 |             name = c.get('name')
28 |             value = c.get('value')
29 |             print(f"  {name}: {value[:50]}..." if len(str(value)) > 50 else f"  {name}: {value}")
30 |         return True
31 |     else:
32 |         print("✗ 获取cookie失败")
33 |         return False
34 | 
35 | def test_load_cookies():
36 |     """测试加载保存的cookie"""
37 |     print("\n开始测试加载保存的cookie...")
38 |     cookies = load_cookies()
39 | 
40 |     if cookies:
41 |         print(f"✓ 成功加载cookie，共{len(cookies)}个")
42 |         print("Cookie内容:")
43 |         for c in cookies:
44 |             name = c.get('name')
45 |             value = c.get('value')
46 |             print(f"  {name}: {value[:50]}..." if len(str(value)) > 50 else f"  {name}: {value}")
47 |         return True
48 |     else:
49 |         print("✗ 未找到保存的cookie")
50 |         return False
51 | 
52 | if __name__ == "__main__":
53 |     print("=" * 60)
54 |     print("Bilibili Cookie处理测试")
55 |     print("=" * 60)
56 | 
57 |     if test_load_cookies():
58 |         print("\n已有保存的cookie，无需重新登录")
59 |     else:
60 |         test_login_and_get_cookies()
61 | 
62 |     print("\n" + "=" * 60)
63 |     print("测试完成")
64 |     print("=" * 60)
65 | 


--------------------------------------------------------------------------------
/frontend/src/components/docs/FileUploader设计说明.md:
--------------------------------------------------------------------------------
  1 | # FileUploader 文件上传组件设计说明
  2 | 
  3 | ## 概述
  4 | 
  5 | FileUploader 组件负责处理 HearSight 应用中的本地文件上传功能，支持视频和音频文件的拖拽上传和选择上传。该组件是用户上传本地媒体文件的核心交互模块。
  6 | 
  7 | ## 组件结构
  8 | 
  9 | ### FileUploader.tsx
 10 | 
 11 | 主容器组件，负责：
 12 | 
 13 | - 文件选择和拖拽上传
 14 | - 文件类型验证
 15 | - 文件上传过程管理
 16 | - 上传结果处理
 17 | 
 18 | ### 核心功能模块
 19 | 
 20 | - **文件选择**: 支持点击选择文件上传
 21 | - **拖拽上传**: 支持拖拽文件到指定区域上传
 22 | - **文件验证**: 验证文件类型是否为支持的音视频格式
 23 | - **上传管理**: 管理上传过程和状态
 24 | 
 25 | ## 设计原则
 26 | 
 27 | ### 1. 用户友好
 28 | - 提供直观的上传界面
 29 | - 支持多种上传方式（拖拽、点击）
 30 | - 显示上传进度和状态
 31 | - 提供清晰的操作反馈
 32 | 
 33 | ### 2. 文件类型支持
 34 | - 支持常见视频格式（mp4, avi, mov, mkv等）
 35 | - 支持常见音频格式（mp3, wav, m4a, aac等）
 36 | - 自动识别文件类型并验证
 37 | 
 38 | ### 3. 状态管理
 39 | - 管理文件选择状态
 40 | - 管理上传过程状态
 41 | - 处理上传结果回调
 42 | 
 43 | ## 功能详解
 44 | 
 45 | ### 文件选择
 46 | 
 47 | - 点击上传区域选择文件
 48 | - 支持单个文件选择
 49 | - 自动验证文件类型
 50 | - 不支持的文件格式会显示错误提示
 51 | 
 52 | ### 拖拽上传
 53 | 
 54 | - 支持将文件拖拽到上传区域
 55 | - 拖拽过程中有视觉反馈
 56 | - 放下文件后自动开始处理
 57 | - 同样进行文件类型验证
 58 | 
 59 | ### 文件验证
 60 | 
 61 | - 检查文件扩展名是否为支持的音视频格式
 62 | - 视频格式包括：mp4, avi, mov, mkv, flv, wmv, webm
 63 | - 音频格式包括：mp3, wav, m4a, aac, flac, ogg, wma
 64 | - 不支持的格式会拒绝上传并提示用户
 65 | 
 66 | ### 上传管理
 67 | 
 68 | - 显示上传进度条
 69 | - 显示上传过程中的加载状态
 70 | - 上传完成后触发成功回调
 71 | - 上传失败时触发错误回调
 72 | 
 73 | ## 数据流设计
 74 | 
 75 | ### Props 接口
 76 | 
 77 | - onUploadSuccess: 上传成功回调
 78 | - onUploadError: 上传错误回调
 79 | 
 80 | ### 内部状态
 81 | 
 82 | - file: 当前选择的文件
 83 | - uploading: 上传状态
 84 | - dragActive: 拖拽状态
 85 | 
 86 | ### 上传结果数据结构
 87 | 
 88 | - static_url: 文件访问URL
 89 | - is_audio: 是否为音频文件
 90 | - placeholder_url: 占位图片URL（可选）
 91 | - job_id: 任务ID（可选）
 92 | 
 93 | ## 用户交互设计
 94 | 
 95 | ### 上传区域
 96 | 
 97 | - 显示上传提示文字和图标
 98 | - 支持点击和拖拽两种交互方式
 99 | - 拖拽时有视觉状态变化
100 | 
101 | ### 文件信息展示
102 | 
103 | - 显示已选择文件的名称
104 | - 显示文件大小
105 | - 提供取消选择功能
106 | 
107 | ### 操作按钮
108 | 
109 | - **选择文件按钮**: 点击选择文件
110 | - **取消按钮**: 取消已选择的文件
111 | - **上传按钮**: 开始上传文件
112 | 
113 | ## 无障碍设计
114 | 
115 | ### 键盘导航
116 | - 支持 Tab 键在元素间导航
117 | - 支持 Enter 键激活按钮
118 | - 支持 Space 键激活文件选择
119 | 
120 | ### 屏幕阅读器支持
121 | - 提供适当的 ARIA 标签
122 | - 为操作按钮添加描述性文本
123 | - 确保界面元素语义化


--------------------------------------------------------------------------------
/frontend/src/components/RightPanel/Chat/docs/消息列表排版设计说明.md:
--------------------------------------------------------------------------------
 1 | # 消息列表排版设计说明
 2 | 
 3 | ## 概述
 4 | 
 5 | 消息列表组件 (`MessageList.tsx`) 负责显示问答对话中的所有消息，包括用户消息和 AI 回复。该组件采用现代化的聊天界面设计，注重可读性和交互性。
 6 | 
 7 | ## 整体布局
 8 | 
 9 | - **容器结构**: 使用 `ScrollArea` 实现可滚动区域，防止消息过多时界面溢出
10 | - **消息排列**: 消息按时间顺序垂直排列，每个消息占一行
11 | - **响应式设计**: 支持不同屏幕尺寸的自适应布局
12 | 
13 | ## 消息样式设计
14 | 
15 | ### 用户消息
16 | 
17 | - **位置**: 右侧对齐 (`justify-end`)
18 | - **背景**: 蓝色渐变背景 (`bg-blue-500 text-white`)
19 | - **圆角**: 标准圆角设计
20 | - **内容**: 直接显示用户输入的文本
21 | 
22 | ### AI 消息
23 | 
24 | - **位置**: 左侧对齐 (`justify-start`)
25 | - **背景**: 浅灰色背景 (`bg-slate-100 text-slate-900`)
26 | - **圆角**: 标准圆角设计
27 | - **内容**: 智能解析并渲染，包括文本、时间戳链接和缩略图
28 | 
29 | ## 时间戳处理
30 | 
31 | ### 解析规则
32 | 
33 | - **单视频格式**: `[开始时间-结束时间]` (如 `[92000.00-113620.00]`)
34 | - **多视频格式**: `[视频名 开始时间-结束时间]` (如 `[example.mp4 92000.00-113620.00]`)
35 | - **正则表达式**: `/(\[.*?\s*\d+(?:\.\d+)?-\d+(?:\.\d+)?\])/g`
36 | 
37 | ### 显示设计
38 | 
39 | - **换行显示**: 时间戳部分单独占一行，不与文本挤压
40 | - **垂直布局**: 缩略图在上，时间戳按钮在下
41 | - **样式**: 蓝色链接样式，hover 效果
42 | - **时间格式**: 转换为可读的时间格式显示
43 | 
44 | ## 缩略图显示
45 | 
46 | ### 触发条件
47 | 
48 | - 仅在 `imageModeEnabled` 为 `true` 时显示
49 | - 自动从后端获取对应时间的视频截图
50 | 
51 | ### 布局设计
52 | 
53 | - **位置**: 位于时间戳上方
54 | - **尺寸**: 最大宽度 320px，高度 96px (h-24)
55 | - **样式**: 圆角边框，hover 透明度变化
56 | - **加载状态**: 显示加载动画直到图片加载完成
57 | - **点击交互**: 点击打开大图预览对话框
58 | 
59 | ### 缓存机制
60 | 
61 | - 使用时间戳作为缓存键: `${startTimeSec}-${endTimeSec}`
62 | - 避免重复加载相同时间的缩略图
63 | 
64 | ## 复制功能
65 | 
66 | ### 按钮设计
67 | 
68 | - **位置**: 每条消息右上角
69 | - **样式**: 幽灵按钮样式，hover 显示
70 | - **图标**: 使用 Copy 图标
71 | - **反馈**: 成功复制时显示 toast 提示
72 | 
73 | ## 响应式适配
74 | 
75 | - **移动端**: 消息宽度自适应，缩略图最大宽度调整
76 | - **桌面端**: 保持固定最大宽度，优化阅读体验
77 | 
78 | ## 交互设计
79 | 
80 | - **时间戳点击**: 跳转到对应视频时间点
81 | - **多视频支持**: 自动识别视频名并切换到对应视频
82 | - **图片预览**: 点击缩略图打开模态对话框
83 | - **加载状态**: 显示加载动画，提升用户体验
84 | 
85 | ## 性能优化
86 | 
87 | - **虚拟滚动**: 使用 ScrollArea 优化大量消息的渲染性能
88 | - **图片懒加载**: 仅在需要时加载缩略图
89 | - **缓存策略**: 本地缓存已加载的缩略图，避免重复请求
90 | 
91 | ## 无障碍设计
92 | 
93 | - **语义化标签**: 使用适当的 ARIA 标签
94 | - **键盘导航**: 支持键盘操作时间戳链接
95 | - **屏幕阅读器**: 提供描述性文本和标签
96 | 


--------------------------------------------------------------------------------
/frontend/src/components/RightPanel/docs/翻译对话框设计说明.md:
--------------------------------------------------------------------------------
  1 | # TranslateDialog 翻译对话框设计说明
  2 | 
  3 | ## 概述
  4 | 
  5 | TranslateDialog 组件负责处理 HearSight 应用中的文本翻译功能，提供语言检测、翻译选项和翻译过程管理。该组件是多语言支持功能的核心交互模块，帮助用户将转写内容翻译为目标语言。
  6 | 
  7 | ## 组件结构
  8 | 
  9 | ### TranslateDialog.tsx
 10 | 
 11 | 主容器组件，负责：
 12 | 
 13 | - 语言检测和推荐
 14 | - 翻译选项的选择和配置
 15 | - 翻译过程的启动和管理
 16 | - 用户交互事件的处理
 17 | 
 18 | ### 核心功能模块
 19 | 
 20 | - **语言检测**: 自动检测原文语言并推荐目标语言
 21 | - **翻译选项**: 提供常用语言选择和翻译控制
 22 | - **翻译过程**: 管理翻译任务的启动和状态
 23 | - **用户确认**: 支持强制重新翻译选项
 24 | 
 25 | ## 设计原则
 26 | 
 27 | ### 1. 智能化
 28 | - 自动检测原文语言
 29 | - 智能推荐目标语言
 30 | - 提供翻译质量保障
 31 | 
 32 | ### 2. 用户友好
 33 | - 简洁直观的操作界面
 34 | - 清晰的状态反馈
 35 | - 灵活的翻译控制选项
 36 | 
 37 | ### 3. 可靠性
 38 | - 处理翻译过程中的错误
 39 | - 提供翻译进度反馈
 40 | - 支持翻译任务取消
 41 | 
 42 | ## 功能详解
 43 | 
 44 | ### 语言检测
 45 | 
 46 | - 组件打开时自动检测原文语言
 47 | - 使用内置语言检测算法分析文本
 48 | - 根据检测结果推荐目标语言
 49 | - 显示检测过程中的加载状态
 50 | 
 51 | ### 语言选择
 52 | 
 53 | - 提供常用语言选项（中文、英文等）
 54 | - 显示语言的详细名称
 55 | - 支持手动选择目标语言
 56 | - 保存用户选择偏好
 57 | 
 58 | ### 翻译控制
 59 | 
 60 | - 支持强制重新翻译选项
 61 | - 检测已有翻译内容并提示用户
 62 | - 提供翻译过程中的状态反馈
 63 | - 支持翻译任务取消操作
 64 | 
 65 | ### 用户确认
 66 | 
 67 | - 检测到已有翻译时提示用户确认
 68 | - 提供继续翻译或强制重新翻译选项
 69 | - 确认后启动翻译任务
 70 | 
 71 | ## 数据流设计
 72 | 
 73 | ### Props 接口
 74 | 
 75 | - open: 对话框打开状态
 76 | - onOpenChange: 对话框状态变更回调
 77 | - transcriptId: 转写ID
 78 | - segments: 转写片段数据
 79 | - onStartTranslate: 启动翻译回调
 80 | - isTranslating: 翻译进行中状态
 81 | 
 82 | ### 内部状态
 83 | 
 84 | - step: 当前步骤（检测中、选择中）
 85 | - detection: 语言检测结果
 86 | - targetLanguage: 目标语言
 87 | - loading: 加载状态
 88 | - error: 错误信息
 89 | - forceRetranslate: 强制重新翻译状态
 90 | 
 91 | ### 语言选项数据结构
 92 | 
 93 | - code: 语言代码
 94 | - name: 语言名称
 95 | 
 96 | ## 用户交互设计
 97 | 
 98 | ### 对话框流程
 99 | 
100 | - **第一步**: 语言检测阶段，显示检测进度
101 | - **第二步**: 语言选择阶段，展示推荐语言和选项
102 | - **操作按钮**: 提供取消和确认按钮
103 | 
104 | ### 语言选项
105 | 
106 | - 使用下拉选择器展示语言选项
107 | - 显示语言的完整名称
108 | - 支持快速选择常用语言
109 | 
110 | ### 强制翻译选项
111 | 
112 | - 提供开关选项控制是否强制重新翻译
113 | - 检测到已有翻译时默认启用确认
114 | 
115 | ## 无障碍设计
116 | 
117 | ### 键盘导航
118 | - 支持 Tab 键在元素间导航
119 | - 支持 Enter 键确认操作
120 | - 支持 Escape 键关闭对话框
121 | 
122 | ### 屏幕阅读器支持
123 | - 提供适当的 ARIA 标签
124 | - 为操作元素添加描述性文本
125 | - 确保界面元素语义化


--------------------------------------------------------------------------------
/backend/db/transcript_summary_crud.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """转写记录总结 CRUD 操作模块"""
 3 | from __future__ import annotations
 4 | 
 5 | import json
 6 | from typing import Any, Dict, List, Optional
 7 | 
 8 | import psycopg2
 9 | from psycopg2.extras import RealDictCursor
10 | 
11 | from .conn_utils import connect_db
12 | 
13 | 
14 | def save_summaries(
15 |     db_url: Optional[str], transcript_id: int, summaries: List[Dict[str, Any]]
16 | ) -> bool:
17 |     """保存总结到数据库。
18 | 
19 |     Args:
20 |         db_url: 数据库连接 URL
21 |         transcript_id: 转写记录 ID
22 |         summaries: 总结列表
23 | 
24 |     Returns:
25 |         是否保存成功
26 |     """
27 |     conn = connect_db(db_url)
28 |     data = json.dumps(summaries, ensure_ascii=False)
29 |     try:
30 |         with conn:
31 |             with conn.cursor() as cur:
32 |                 cur.execute(
33 |                     """
34 |                     UPDATE transcripts 
35 |                     SET summaries_json = %s, updated_at = NOW()
36 |                     WHERE id = %s
37 |                     """,
38 |                     (data, transcript_id),
39 |                 )
40 |                 return cur.rowcount > 0
41 |     finally:
42 |         conn.close()
43 | 
44 | 
45 | def get_summaries(
46 |     db_url: Optional[str], transcript_id: int
47 | ) -> Optional[List[Dict[str, Any]]]:
48 |     """获取已保存的总结。
49 | 
50 |     Args:
51 |         db_url: 数据库连接 URL
52 |         transcript_id: 转写记录 ID
53 | 
54 |     Returns:
55 |         总结列表，如果不存在或为空返回 None
56 |     """
57 |     conn = connect_db(db_url)
58 |     try:
59 |         with conn:
60 |             with conn.cursor(cursor_factory=RealDictCursor) as cur:
61 |                 cur.execute(
62 |                     "SELECT summaries_json FROM transcripts WHERE id = %s",
63 |                     (int(transcript_id),),
64 |                 )
65 |                 row = cur.fetchone()
66 |                 if not row or not row.get("summaries_json"):
67 |                     return None
68 |                 try:
69 |                     return json.loads(row["summaries_json"])
70 |                 except Exception:
71 |                     return None
72 |     finally:
73 |         conn.close()


--------------------------------------------------------------------------------
/backend/tests/test_download_media.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from download_media_file import download_media
 4 | 
 5 | @pytest.mark.parametrize("platform, url", [
 6 |     ("youtube", "https://www.youtube.com/watch?v=dQw4w9WgXcQ"),
 7 |     ("bilibili", "https://www.bilibili.com/video/BV1yFs3zJEhd"),
 8 |     ("xiaoyuzhou", "https://www.xiaoyuzhoufm.com/episode/68f7034122654730207b940c"),
 9 | ])
10 | def test_multi_platform_download(platform, url):
11 |     """Test download functionality for multiple platforms"""
12 |     test_dir = f"test_downloads_{platform}"
13 |     try:
14 |         # Execute download
15 |         download_media(url, output_dir=test_dir)
16 |         
17 |         # Verify files were created
18 |         assert os.path.exists(test_dir), f"Download directory not created for {platform}"
19 |         files = os.listdir(test_dir)
20 |         assert len(files) > 0, f"No files downloaded for {platform}"
21 |         assert any(f.endswith(('.mp4', '.mp3', '.m4a')) for f in files), f"No media files found for {platform}"
22 |     finally:
23 |         # Cleanup
24 |         if os.path.exists(test_dir):
25 |             for file in os.listdir(test_dir):
26 |                 os.remove(os.path.join(test_dir, file))
27 |             os.rmdir(test_dir)
28 | 
29 | 
30 | def test_url_validation():
31 |     """Test URL validation logic"""
32 |     invalid_urls = [
33 |         "example.com/video",  # Missing protocol
34 |         "https://unsupportedplatform.com/video",  # Unsupported platform
35 |         "",  # Empty URL
36 |     ]
37 |     
38 |     test_dir = "test_invalid_urls"
39 |     for url in invalid_urls:
40 |         try:
41 |             download_media(url, output_dir=test_dir)
42 |             # If no exception is raised, the test fails
43 |             assert False, f"Expected error for invalid URL: {url}"
44 |         except Exception as e:
45 |             # Verify it's a meaningful error
46 |             assert str(e), f"No error message for invalid URL: {url}"
47 |         finally:
48 |             if os.path.exists(test_dir):
49 |                 for file in os.listdir(test_dir):
50 |                     os.remove(os.path.join(test_dir, file))
51 |                 os.rmdir(test_dir)
52 | 


--------------------------------------------------------------------------------
/backend/media_processing/audio/local/upload_handler.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """本地上传音频处理"""
 3 | 
 4 | from __future__ import annotations
 5 | 
 6 | import logging
 7 | import os
 8 | from pathlib import Path
 9 | from typing import Optional
10 | 
11 | from backend.common_interfaces import DownloadResult
12 | 
13 | logger = logging.getLogger(__name__)
14 | 
15 | SUPPORTED_AUDIO_FORMATS = {".mp3", ".wav", ".m4a", ".aac", ".flac", ".ogg", ".wma"}
16 | 
17 | 
18 | def process_uploaded_audio(file_path: str) -> DownloadResult:
19 |     """处理本地上传的音频文件。
20 | 
21 |     验证并返回音频文件路径。
22 | 
23 |     Args:
24 |         file_path: 上传文件的完整路径
25 | 
26 |     Returns:
27 |         DownloadResult: 处理结果，包含audio_path
28 |     """
29 |     try:
30 |         file_path_obj = Path(file_path)
31 |         if not file_path_obj.exists():
32 |             return DownloadResult(
33 |                 success=False,
34 |                 error_message=f"文件不存在: {file_path}"
35 |             )
36 | 
37 |         file_ext = file_path_obj.suffix.lower()
38 | 
39 |         # 检查是否为音频文件
40 |         if file_ext not in SUPPORTED_AUDIO_FORMATS:
41 |             return DownloadResult(
42 |                 success=False,
43 |                 error_message=f"不支持的音频格式: {file_ext}。支持格式: {', '.join(SUPPORTED_AUDIO_FORMATS)}"
44 |             )
45 | 
46 |         # 验证文件能被打开
47 |         try:
48 |             with open(file_path, "rb") as f:
49 |                 file_size = os.path.getsize(file_path)
50 |         except IOError as e:
51 |             return DownloadResult(
52 |                 success=False,
53 |                 error_message=f"无法打开音频文件: {str(e)}"
54 |             )
55 | 
56 |         logger.info(f"上传音频文件: {file_path} (格式: {file_ext}, 大小: {file_size} 字节)")
57 |         return DownloadResult(
58 |             audio_path=file_path,
59 |             title=file_path_obj.stem,
60 |             media_type="audio",
61 |             success=True
62 |         )
63 | 
64 |     except Exception as e:
65 |         logger.error(f"处理上传音频文件异常: {e}", exc_info=True)
66 |         return DownloadResult(
67 |             success=False,
68 |             error_message=f"处理文件失败: {str(e)}"
69 |         )
70 | 


--------------------------------------------------------------------------------
/backend/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.8'
 2 | 
 3 | services:
 4 |   backend:
 5 |     build:
 6 |       context: ..
 7 |       dockerfile: backend/Dockerfile
 8 |     container_name: hearsight-backend
 9 |     restart: unless-stopped
10 |     depends_on:
11 |       postgres:
12 |         condition: service_healthy
13 |       redis:
14 |         condition: service_healthy
15 |       asr-backend:
16 |         condition: service_healthy
17 |     env_file: .env
18 |     volumes:
19 |       - ../app_datas:/app/app_datas
20 |     ports:
21 |       - "${BACKEND_PORT:-9999}:8000"
22 |     environment:
23 |       - PORT=8000
24 |       - POSTGRES_HOST=postgres
25 |       - MS_MODEL_CACHE=/app/app_datas/model_cache
26 |       - TORCH_HOME=/app/app_datas/torch_cache
27 |       - HF_HOME=/app/app_datas/transformers_cache
28 |       - XDG_CACHE_HOME=/app/app_datas/xdg_cache
29 |       - FRONTEND_HOST=localhost
30 |       - ASR_BACKEND_URL=http://asr-backend:8003
31 |       - CELERY_BROKER_URL=redis://redis:6379/0
32 |       - CELERY_RESULT_BACKEND=redis://redis:6379/1
33 |     deploy: {}
34 |     gpus: all
35 | 
36 |   celery-worker:
37 |     build:
38 |       context: ..
39 |       dockerfile: backend/Dockerfile
40 |     container_name: hearsight-celery-worker
41 |     restart: unless-stopped
42 |     depends_on:
43 |       postgres:
44 |         condition: service_healthy
45 |       redis:
46 |         condition: service_healthy
47 |       asr-backend:
48 |         condition: service_healthy
49 |     env_file: .env
50 |     volumes:
51 |       - ../app_datas:/app/app_datas
52 |     working_dir: /app
53 |     environment:
54 |       - POSTGRES_HOST=postgres
55 |       - MS_MODEL_CACHE=/app/app_datas/model_cache
56 |       - TORCH_HOME=/app/app_datas/torch_cache
57 |       - HF_HOME=/app/app_datas/transformers_cache
58 |       - XDG_CACHE_HOME=/app/app_datas/xdg_cache
59 |       # Celery 配置
60 |       - CELERY_BROKER_URL=redis://redis:6379/0
61 |       - CELERY_RESULT_BACKEND=redis://redis:6379/1
62 |       - CELERY_WORKER_CONCURRENCY=2
63 |       - CELERY_LOG_LEVEL=info
64 |       # ASRBackend 服务地址
65 |       - ASR_BACKEND_URL=http://asr-backend:8003
66 |     command: python -m backend.queues.worker_launcher
67 |     deploy: {}
68 |     gpus: all


--------------------------------------------------------------------------------
/backend/app.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """FastAPI 应用工厂"""
 3 | 
 4 | from __future__ import annotations
 5 | 
 6 | import os
 7 | from pathlib import Path
 8 | 
 9 | from fastapi import FastAPI
10 | from fastapi.middleware.cors import CORSMiddleware
11 | from fastapi.staticfiles import StaticFiles
12 | 
13 | from backend.routers import (
14 |     chat_router,
15 |     download_router,
16 |     progress_router,
17 |     thumbnail_router,
18 |     transcript_router,
19 |     translate_router,
20 |     upload_router,
21 | )
22 | 
23 | 
24 | def create_app(static_dir: Path, db_url: str | None) -> FastAPI:
25 |     """创建并配置 FastAPI 应用。
26 | 
27 |     Args:
28 |         static_dir: 静态文件目录（视频存放位置）
29 |         db_url: 数据库连接字符串
30 |     """
31 |     app = FastAPI(title="HearSight API")
32 | 
33 |     # 配置 CORS
34 |     allow_origins_env = os.environ.get("ALLOW_ORIGINS")
35 |     if allow_origins_env:
36 |         allow_origins = [s.strip() for s in allow_origins_env.split(",") if s.strip()]
37 |     else:
38 |         frontend_host = os.environ.get("FRONTEND_HOST")
39 |         frontend_port = os.environ.get("FRONTEND_PORT")
40 |         allow_origins = (
41 |             [f"http://{frontend_host}:{frontend_port}"]
42 |             if frontend_host and frontend_port
43 |             else ["*"]
44 |         )
45 | 
46 |     app.add_middleware(
47 |         CORSMiddleware,
48 |         allow_origins=allow_origins,
49 |         allow_credentials=True,
50 |         allow_methods=["*"],
51 |         allow_headers=["*"],
52 |     )
53 | 
54 |     # 挂载静态目录和设置应用状态
55 |     app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
56 |     app.state.static_dir = static_dir
57 |     app.state.db_url = db_url
58 | 
59 |     # 注册路由
60 |     app.include_router(download_router, prefix="/api")
61 |     app.include_router(transcript_router, prefix="/api")
62 |     app.include_router(chat_router, prefix="/api")
63 |     app.include_router(thumbnail_router, prefix="/api")
64 |     app.include_router(progress_router, prefix="/api")
65 |     app.include_router(translate_router, prefix="/api")
66 |     app.include_router(upload_router, prefix="/api")
67 | 
68 |     return app
69 | 


--------------------------------------------------------------------------------
/backend/services/upload_service.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | 上传相关服务函数：
 4 | - create_audio_placeholder: 为音频文件创建占位符图片
 5 | - get_unique_filename: 在目录下生成不冲突的唯一文件名
 6 | """
 7 | from __future__ import annotations
 8 | 
 9 | import logging
10 | from pathlib import Path
11 | from typing import Optional
12 | 
13 | from PIL import Image, ImageDraw, ImageFont
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | def create_audio_placeholder(output_path: Path, duration: float = 0) -> None:
19 |     """为音频文件创建占位符图片"""
20 |     width, height = 1280, 720
21 |     img = Image.new("RGB", (width, height), color=(45, 55, 72))
22 |     draw = ImageDraw.Draw(img)
23 | 
24 |     try:
25 |         font = ImageFont.truetype("arial.ttf", 60)
26 |         small_font = ImageFont.truetype("arial.ttf", 30)
27 |     except Exception:
28 |         font = ImageFont.load_default()
29 |         small_font = ImageFont.load_default()
30 | 
31 |     text = "Audio File"
32 |     bbox = draw.textbbox((0, 0), text, font=font)
33 |     text_width = bbox[2] - bbox[0]
34 |     text_height = bbox[3] - bbox[1]
35 |     x = (width - text_width) / 2
36 |     y = (height - text_height) / 2 - 50
37 | 
38 |     draw.text((x, y), text, fill=(203, 213, 225), font=font)
39 | 
40 |     subtitle = "Playing audio content"
41 |     bbox2 = draw.textbbox((0, 0), subtitle, font=small_font)
42 |     sub_width = bbox2[2] - bbox2[0]
43 |     sub_x = (width - sub_width) / 2
44 |     sub_y = y + text_height + 30
45 |     draw.text((sub_x, sub_y), subtitle, fill=(148, 163, 184), font=small_font)
46 | 
47 |     img.save(output_path)
48 |     logger.info(f"创建音频占位符图片: {output_path}")
49 | 
50 | 
51 | def get_unique_filename(directory: Path, filename: str) -> str:
52 |     """生成唯一文件名,如果存在冲突则添加后缀"""
53 |     file_path = directory / filename
54 |     if not file_path.exists():
55 |         return filename
56 | 
57 |     stem = Path(filename).stem
58 |     ext = Path(filename).suffix
59 |     counter = 1
60 | 
61 |     while True:
62 |         new_filename = f"{stem}-{counter}{ext}"
63 |         new_path = directory / new_filename
64 |         if not new_path.exists():
65 |             return new_filename
66 |         counter += 1
67 | 


--------------------------------------------------------------------------------
/backend/db/transcript_translation_crud.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """转写记录翻译 CRUD 操作模块"""
 3 | from __future__ import annotations
 4 | 
 5 | import json
 6 | from typing import Any, Dict, List, Optional
 7 | 
 8 | import psycopg2
 9 | from psycopg2.extras import RealDictCursor
10 | 
11 | from .conn_utils import connect_db
12 | 
13 | 
14 | def save_translations(
15 |     db_url: Optional[str],
16 |     transcript_id: int,
17 |     translations: Dict[str, List[Dict[str, Any]]],
18 | ) -> bool:
19 |     """保存翻译结果到数据库。
20 | 
21 |     Args:
22 |         db_url: 数据库连接 URL
23 |         transcript_id: 转写记录 ID
24 |         translations: 翻译结果字典（键为语言代码）
25 | 
26 |     Returns:
27 |         是否保存成功
28 |     """
29 |     conn = connect_db(db_url)
30 |     data = json.dumps(translations, ensure_ascii=False)
31 |     try:
32 |         with conn:
33 |             with conn.cursor() as cur:
34 |                 cur.execute(
35 |                     """
36 |                     UPDATE transcripts 
37 |                     SET translations_json = %s, updated_at = NOW()
38 |                     WHERE id = %s
39 |                     """,
40 |                     (data, transcript_id),
41 |                 )
42 |                 return cur.rowcount > 0
43 |     finally:
44 |         conn.close()
45 | 
46 | 
47 | def get_translations(
48 |     db_url: Optional[str], transcript_id: int
49 | ) -> Optional[Dict[str, List[Dict[str, Any]]]]:
50 |     """获取已保存的翻译。
51 | 
52 |     Args:
53 |         db_url: 数据库连接 URL
54 |         transcript_id: 转写记录 ID
55 | 
56 |     Returns:
57 |         翻译字典，如果不存在或为空返回 None
58 |     """
59 |     conn = connect_db(db_url)
60 |     try:
61 |         with conn:
62 |             with conn.cursor(cursor_factory=RealDictCursor) as cur:
63 |                 cur.execute(
64 |                     "SELECT translations_json FROM transcripts WHERE id = %s",
65 |                     (int(transcript_id),),
66 |                 )
67 |                 row = cur.fetchone()
68 |                 if not row or not row.get("translations_json"):
69 |                     return None
70 |                 try:
71 |                     return json.loads(row["translations_json"])
72 |                 except Exception:
73 |                     return None
74 |     finally:
75 |         conn.close()


--------------------------------------------------------------------------------
/frontend/src/components/ui/resizable.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react"
 2 | import { GripVerticalIcon } from "lucide-react"
 3 | import * as ResizablePrimitive from "react-resizable-panels"
 4 | 
 5 | import { cn } from "@/lib/utils"
 6 | 
 7 | function ResizablePanelGroup({
 8 |   className,
 9 |   ...props
10 | }: React.ComponentProps<typeof ResizablePrimitive.PanelGroup>) {
11 |   return (
12 |     <ResizablePrimitive.PanelGroup
13 |       data-slot="resizable-panel-group"
14 |       className={cn(
15 |         "flex h-full w-full data-[panel-group-direction=vertical]:flex-col",
16 |         className
17 |       )}
18 |       {...props}
19 |     />
20 |   )
21 | }
22 | 
23 | function ResizablePanel({
24 |   ...props
25 | }: React.ComponentProps<typeof ResizablePrimitive.Panel>) {
26 |   return <ResizablePrimitive.Panel data-slot="resizable-panel" {...props} />
27 | }
28 | 
29 | function ResizableHandle({
30 |   withHandle,
31 |   className,
32 |   ...props
33 | }: React.ComponentProps<typeof ResizablePrimitive.PanelResizeHandle> & {
34 |   withHandle?: boolean
35 | }) {
36 |   return (
37 |     <ResizablePrimitive.PanelResizeHandle
38 |       data-slot="resizable-handle"
39 |       className={cn(
40 |         "bg-border focus-visible:ring-ring relative flex w-px items-center justify-center after:absolute after:inset-y-0 after:left-1/2 after:w-1 after:-translate-x-1/2 focus-visible:ring-1 focus-visible:ring-offset-1 focus-visible:outline-hidden data-[panel-group-direction=vertical]:h-px data-[panel-group-direction=vertical]:w-full data-[panel-group-direction=vertical]:after:left-0 data-[panel-group-direction=vertical]:after:h-1 data-[panel-group-direction=vertical]:after:w-full data-[panel-group-direction=vertical]:after:translate-x-0 data-[panel-group-direction=vertical]:after:-translate-y-1/2 [&[data-panel-group-direction=vertical]>div]:rotate-90",
41 |         className
42 |       )}
43 |       {...props}
44 |     >
45 |       {withHandle && (
46 |         <div className="bg-border z-10 flex h-4 w-3 items-center justify-center rounded-xs border">
47 |           <GripVerticalIcon className="size-2.5" />
48 |         </div>
49 |       )}
50 |     </ResizablePrimitive.PanelResizeHandle>
51 |   )
52 | }
53 | 
54 | export { ResizablePanelGroup, ResizablePanel, ResizableHandle }
55 | 


--------------------------------------------------------------------------------
/backend/services/chat_knowledge_service.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """聊天知识检索服务模块"""
 3 | 
 4 | import os
 5 | from typing import List, Dict, Optional, Tuple
 6 | 
 7 | from backend.schemas import Segment
 8 | from backend.services.knowledge_base_service import knowledge_base
 9 | from backend.db.transcript_crud import get_transcript_by_id
10 | 
11 | 
12 | class ChatKnowledgeService:
13 |     """聊天知识检索服务类"""
14 | 
15 |     def _perform_knowledge_retrieval(self, question: str, transcript_id: int) -> tuple[List[Segment], str]:
16 |         """
17 |         执行知识检索。
18 | 
19 |         从向量数据库中检索相关片段，按index排序。
20 | 
21 |         参数：
22 |         - question: 用户问题
23 |         - transcript_id: 转录ID
24 | 
25 |         返回：
26 |         - (相关片段列表, 来源文件名)
27 |         """
28 |         # 直接执行检索，避免在任务中调用任务
29 |         search_results = knowledge_base.search_similar(query=question, n_results=5, transcript_ids=[transcript_id])
30 |         
31 |         all_segments = []
32 |         for result in search_results:
33 |             doc_id = result.get("doc_id")
34 |             if doc_id:
35 |                 # 获取文档详情，包括segments
36 |                 doc_details = knowledge_base.get_doc_details(doc_id, None)  # db_url暂时设为None，需要传递
37 |                 
38 |                 if doc_details and doc_details.get("sentences"):
39 |                     all_segments.extend(doc_details["sentences"])
40 |         
41 |         
42 |         # 获取文件名
43 |         transcript = get_transcript_by_id(None, transcript_id)
44 |         filename = "未知文件"
45 |         if transcript:
46 |             video_path = transcript.get("video_path")
47 |             audio_path = transcript.get("audio_path")
48 |             if video_path:
49 |                 filename = os.path.basename(video_path)
50 |             elif audio_path:
51 |                 filename = os.path.basename(audio_path)
52 |         
53 |         return all_segments, filename
54 | 
55 |     def _count_tokens_for_segments(self, segments: List[Segment]) -> int:
56 |         """
57 |         计算句子片段的总 token 数。
58 | 
59 |         参数：
60 |         - segments: 句子片段列表
61 | 
62 |         返回：
63 |         - 总 token 数
64 |         """
65 |         from backend.utils.token_utils.calculate_tokens import count_segments_tokens
66 |         return count_segments_tokens(segments)


--------------------------------------------------------------------------------
/frontend/src/components/ui/tabs.tsx:
--------------------------------------------------------------------------------
 1 | "use client"
 2 | 
 3 | import * as React from "react"
 4 | import * as TabsPrimitive from "@radix-ui/react-tabs"
 5 | 
 6 | import { cn } from "@/lib/utils"
 7 | 
 8 | function Tabs({
 9 |   className,
10 |   ...props
11 | }: React.ComponentProps<typeof TabsPrimitive.Root>) {
12 |   return (
13 |     <TabsPrimitive.Root
14 |       data-slot="tabs"
15 |       className={cn("flex flex-col gap-2", className)}
16 |       {...props}
17 |     />
18 |   )
19 | }
20 | 
21 | function TabsList({
22 |   className,
23 |   ...props
24 | }: React.ComponentProps<typeof TabsPrimitive.List>) {
25 |   return (
26 |     <TabsPrimitive.List
27 |       data-slot="tabs-list"
28 |       className={cn(
29 |         "bg-muted text-muted-foreground inline-flex h-9 w-fit items-center justify-center rounded-lg p-[3px]",
30 |         className
31 |       )}
32 |       {...props}
33 |     />
34 |   )
35 | }
36 | 
37 | function TabsTrigger({
38 |   className,
39 |   ...props
40 | }: React.ComponentProps<typeof TabsPrimitive.Trigger>) {
41 |   return (
42 |     <TabsPrimitive.Trigger
43 |       data-slot="tabs-trigger"
44 |       className={cn(
45 |         "data-[state=active]:bg-background dark:data-[state=active]:text-foreground focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:outline-ring dark:data-[state=active]:border-input dark:data-[state=active]:bg-input/30 text-foreground dark:text-muted-foreground inline-flex h-[calc(100%-1px)] flex-1 items-center justify-center gap-1.5 rounded-md border border-transparent px-2 py-1 text-sm font-medium whitespace-nowrap transition-[color,box-shadow] focus-visible:ring-[3px] focus-visible:outline-1 disabled:pointer-events-none disabled:opacity-50 data-[state=active]:shadow-sm [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
46 |         className
47 |       )}
48 |       {...props}
49 |     />
50 |   )
51 | }
52 | 
53 | function TabsContent({
54 |   className,
55 |   ...props
56 | }: React.ComponentProps<typeof TabsPrimitive.Content>) {
57 |   return (
58 |     <TabsPrimitive.Content
59 |       data-slot="tabs-content"
60 |       className={cn("flex-1 outline-none", className)}
61 |       {...props}
62 |     />
63 |   )
64 | }
65 | 
66 | export { Tabs, TabsList, TabsTrigger, TabsContent }
67 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/useVideoSync.ts:
--------------------------------------------------------------------------------
 1 | import { useEffect, useRef } from 'react'
 2 | import { seekVideoTo } from '../utils'
 3 | import type { Segment } from '../types'
 4 | 
 5 | interface UseVideoSyncProps {
 6 |   segments: Segment[]
 7 |   autoScroll: boolean
 8 |   segScrollRef: React.RefObject<HTMLDivElement | null>
 9 |   setActiveSegIndex: (index: number | null) => void
10 |   videoRef: React.RefObject<HTMLVideoElement | null>
11 | }
12 | 
13 | export const useVideoSync = ({
14 |   segments,
15 |   autoScroll,
16 |   segScrollRef,
17 |   setActiveSegIndex,
18 |   videoRef
19 | }: UseVideoSyncProps) => {
20 |   const prevActiveRef = useRef<number | null>(null)
21 | 
22 |   useEffect(() => {
23 |     const v = videoRef.current
24 |     if (!v) return
25 | 
26 |     const onTimeUpdate = () => {
27 |       const ms = (v.currentTime || 0) * 1000
28 |       let newIndex: number | null = null
29 |       for (const s of segments) {
30 |         const st = Number(s.start_time) || 0
31 |         const et = Number(s.end_time) || 0
32 |         if (ms >= st && ms < et) {
33 |           newIndex = s.index
34 |           break
35 |         }
36 |       }
37 | 
38 |       if (prevActiveRef.current !== newIndex) {
39 |         prevActiveRef.current = newIndex
40 |         setActiveSegIndex(newIndex)
41 |         if (autoScroll && newIndex != null && segScrollRef.current) {
42 |           const scrollContainer = segScrollRef.current
43 |           if (scrollContainer) {
44 |             const el = scrollContainer.querySelector(`[data-seg-index="${newIndex}"]`)
45 |             if (el) {
46 |               try { 
47 |                 el.scrollIntoView({ behavior: 'smooth', block: 'center' }) 
48 |               } catch {
49 |                 // ignore scroll errors
50 |               }
51 |             }
52 |           }
53 |         }
54 |       }
55 |     }
56 | 
57 |     v.addEventListener('timeupdate', onTimeUpdate)
58 |     return () => v.removeEventListener('timeupdate', onTimeUpdate)
59 |   }, [segments, autoScroll, segScrollRef, setActiveSegIndex, videoRef])
60 | 
61 |   const handleSeekTo = (timeMs: number) => {
62 |     seekVideoTo(videoRef.current, timeMs)
63 |   }
64 | 
65 |   return {
66 |     handleSeekTo
67 |   }
68 | }
69 | 


--------------------------------------------------------------------------------
/backend/ReAct/actions.py:
--------------------------------------------------------------------------------
 1 | """动作执行模块"""
 2 | 
 3 | import json
 4 | from typing import Any, Dict
 5 | 
 6 | from .models import ToolCallable
 7 | 
 8 | 
 9 | class ActionExecutor:
10 |     """执行各个动作"""
11 | 
12 |     def __init__(self, llm_router: Any, llm_model: str):
13 |         self.llm_router = llm_router
14 |         self.llm_model = llm_model
15 | 
16 |     async def execute_action(
17 |         self,
18 |         action_name: str,
19 |         action_input: Dict[str, Any],
20 |         available_tools: Dict[str, ToolCallable],
21 |     ) -> str:
22 |         """
23 |         执行指定的动作
24 | 
25 |         参数:
26 |             action_name: 动作名称
27 |             action_input: 动作输入参数
28 |             available_tools: 可用工具字典
29 | 
30 |         返回:
31 |             动作执行结果
32 |         """
33 |         # 内部动作 - 不需要外部工具，直接处理
34 |         if action_name == "finish":
35 |             return await self._action_finish(action_input)
36 | 
37 |         # 外部工具 - 检查是否在可用工具字典中
38 |         if action_name not in available_tools:
39 |             return f"未知工具: {action_name}"
40 | 
41 |         # 执行外部工具
42 |         return await self._execute_tool(
43 |             action_name, action_input, available_tools
44 |         )
45 | 
46 |     async def _execute_tool(
47 |         self,
48 |         tool_name: str,
49 |         action_input: Dict[str, Any],
50 |         available_tools: Dict[str, ToolCallable],
51 |     ) -> str:
52 |         """
53 |         执行外部工具
54 | 
55 |         参数:
56 |             tool_name: 工具名称
57 |             action_input: 动作输入参数
58 |             available_tools: 可用工具字典
59 |         """
60 |         try:
61 |             # 将输入参数序列化为 JSON 字符串传递给工具
62 |             result = await available_tools[tool_name](json.dumps(action_input))
63 | 
64 |             if not result:
65 |                 return "工具执行未返回结果"
66 | 
67 |             return result
68 |         except Exception as e:
69 |             return f"工具执行失败: {str(e)}"
70 | 
71 |     async def _action_finish(self, action_input: Dict[str, Any]) -> str:
72 |         """
73 |         结束动作 - 返回最终答案
74 | 
75 |         参数:
76 |             action_input: 包含 'answer' 的字典
77 |         """
78 |         answer = action_input.get("answer", "")
79 |         return answer
80 | 


--------------------------------------------------------------------------------
/backend/test_litellm.py:
--------------------------------------------------------------------------------
 1 | """测试litellm的使用效果"""
 2 | 
 3 | import asyncio
 4 | import sys
 5 | import os
 6 | 
 7 | # 添加backend目录到路径
 8 | backend_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
 9 | if backend_path not in sys.path:
10 |     sys.path.insert(0, backend_path)
11 | 
12 | from backend.startup import get_llm_router
13 | from backend.config import settings
14 | 
15 | async def test_litellm_completion():
16 |     """测试litellm的completion方法（非流式）"""
17 |     llm_router = get_llm_router()
18 | 
19 |     messages = [
20 |         {"role": "system", "content": "你是一个AI助手"},
21 |         {"role": "user", "content": "什么是人工智能？"}
22 |     ]
23 | 
24 |     try:
25 |         response = llm_router.completion(
26 |             model=settings.llm_model,
27 |             messages=messages,
28 |             temperature=0.3,
29 |             max_tokens=200,
30 |         )
31 |         print("非流式调用成功")
32 |         print("响应:", response.choices[0].message.content)
33 |     except Exception as e:
34 |         print("非流式调用失败:", str(e))
35 | 
36 | async def test_litellm_streaming():
37 |     """测试litellm的completion方法（流式）"""
38 |     llm_router = get_llm_router()
39 | 
40 |     messages = [
41 |         {"role": "system", "content": "你是一个AI助手"},
42 |         {"role": "user", "content": "简单介绍一下机器学习"}
43 |     ]
44 | 
45 |     try:
46 |         response = llm_router.completion(
47 |             model=settings.llm_model,
48 |             messages=messages,
49 |             temperature=0.3,
50 |             max_tokens=200,
51 |             stream=True,
52 |         )
53 |         print("流式调用开始")
54 |         full_response = ""
55 |         for chunk in response:
56 |             if chunk.choices[0].delta.content:
57 |                 content = chunk.choices[0].delta.content
58 |                 print(content, end="", flush=True)
59 |                 full_response += content
60 |         print("\n流式调用完成")
61 |         print("完整响应:", full_response)
62 |     except Exception as e:
63 |         print("流式调用失败:", str(e))
64 | 
65 | if __name__ == "__main__":
66 |     print("测试litellm completion方法")
67 |     print("\n1. 非流式调用:")
68 |     asyncio.run(test_litellm_completion())
69 | 
70 |     print("\n2. 流式调用:")
71 |     asyncio.run(test_litellm_streaming())


--------------------------------------------------------------------------------
/ASRBackend/main.py:
--------------------------------------------------------------------------------
 1 | """HearSight ASR Backend
 2 | 
 3 | 基于 FastAPI 构建的语音识别后端服务，提供音频转文本功能。
 4 | """
 5 | 
 6 | from __future__ import annotations
 7 | 
 8 | from fastapi import FastAPI
 9 | from fastapi.middleware.cors import CORSMiddleware
10 | 
11 | from config import settings
12 | from routers.asr_router import router as asr_router
13 | 
14 | app = FastAPI(title=settings.app_name)
15 | 
16 | # 配置 CORS
17 | app.add_middleware(
18 |     CORSMiddleware,
19 |     allow_origins=["*"],
20 |     allow_credentials=True,
21 |     allow_methods=["*"],
22 |     allow_headers=["*"],
23 | )
24 | 
25 | # 注册路由
26 | app.include_router(asr_router)
27 | 
28 | 
29 | @app.get("/health")
30 | async def health_check():
31 |     """健康检查接口，返回服务状态和运行模式"""
32 |     return {
33 |         "status": "healthy",
34 |         "service": "ASR Backend",
35 |         "mode": settings.asr_mode,
36 |     }
37 | 
38 | 
39 | def main():
40 |     """主启动函数"""
41 |     print("=" * 60)
42 |     print("HearSight ASR Backend")
43 |     print("=" * 60)
44 | 
45 |     # 验证配置
46 |     try:
47 |         settings.validate_config()
48 |     except ValueError as e:
49 |         print(f"❌ 配置错误: {e}")
50 |         import sys
51 | 
52 |         sys.exit(1)
53 | 
54 |     # 显示运行模式信息
55 |     mode = settings.asr_mode
56 |     if settings.is_local_mode():
57 |         print(f"✓ 运行模式: 本地 (local)")
58 |         print(f"  - 模型: {settings.local_model_name}")
59 |         print(f"  - VAD 模型: {settings.local_vad_model}")
60 |         print(f"  - 标点模型: {settings.local_punc_model}")
61 |         print(f"  - 支持: 文件上传、URL")
62 |         print(f"  - 特点: 完全离线，但需要大量空间和计算资源")
63 |     else:
64 |         print(f"✓ 运行模式: 云端 (cloud)")
65 |         print(f"  - 提供商: DashScope (阿里云)")
66 |         print(f"  - 模型: {settings.dashscope_model}")
67 |         print(f"  - 语言提示: {settings.dashscope_language_hints}")
68 |         print(f"  - 支持: URL 转录")
69 |         print(f"  - 特点: 轻量级，支持多语言")
70 | 
71 |     print(f"✓ 调试模式: {'开启' if settings.debug else '关闭'}")
72 |     print("=" * 60)
73 |     print(f"启动服务器: http://0.0.0.0:{settings.port}")
74 |     print("=" * 60)
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     main()
79 |     import uvicorn
80 | 
81 |     uvicorn.run("main:app", host="0.0.0.0", port=settings.port,reload=True)
82 | 


--------------------------------------------------------------------------------
/backend/text_process/translate_batch_service.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | 翻译分批处理模块：处理翻译分批逻辑。
 4 | """
 5 | 
 6 | from __future__ import annotations
 7 | 
 8 | from typing import List
 9 | 
10 | from backend.schemas import Segment
11 | from backend.utils.token_utils.calculate_tokens import OpenAITokenCalculator
12 | 
13 | 
14 | def _split_segments_by_output_tokens(
15 |     segments: List[Segment], max_tokens: int = 4096
16 | ) -> List[List[Segment]]:
17 |     """
18 |     根据预估的翻译输出token数和句子数量，将分句分批。
19 |     目标：每批约 10 句左右，但不超过 max_tokens 限制。
20 | 
21 |     参数:
22 |     - segments: 要分批的分句列表
23 |     - max_tokens: 每批最大输出token数（默认4096）
24 | 
25 |     返回: 分批后的分句列表
26 |     """
27 |     if not segments:
28 |         return []
29 | 
30 |     # 目标：每批约 10 句
31 |     target_batch_size = 10
32 | 
33 |     # 先按数量分批，然后检查 token 限制
34 |     batches = []
35 |     for i in range(0, len(segments), target_batch_size):
36 |         batch = segments[i : i + target_batch_size]
37 |         batches.append(batch)
38 | 
39 |     # 如果某个批次过大（token 超限），则进一步拆分
40 |     calculator = OpenAITokenCalculator()
41 |     final_batches = []
42 | 
43 |     for batch in batches:
44 |         # 估算这个批次的总 token 数
45 |         total_tokens = 0
46 |         for seg in batch:
47 |             sentence = seg.get("sentence", "")
48 |             estimated_tokens = int(calculator.count_tokens(sentence) * 1.5) + 20
49 |             total_tokens += estimated_tokens
50 | 
51 |         # 如果在 token 限制内，直接使用
52 |         if total_tokens <= max_tokens:
53 |             final_batches.append(batch)
54 |         else:
55 |             # 否则进一步拆分为更小的批次
56 |             current_sub_batch = []
57 |             current_tokens = 0
58 | 
59 |             for seg in batch:
60 |                 sentence = seg.get("sentence", "")
61 |                 estimated_tokens = int(calculator.count_tokens(sentence) * 1.5) + 20
62 | 
63 |                 if current_tokens + estimated_tokens > max_tokens and current_sub_batch:
64 |                     final_batches.append(current_sub_batch)
65 |                     current_sub_batch = [seg]
66 |                     current_tokens = estimated_tokens
67 |                 else:
68 |                     current_sub_batch.append(seg)
69 |                     current_tokens += estimated_tokens
70 | 
71 |             if current_sub_batch:
72 |                 final_batches.append(current_sub_batch)
73 | 
74 |     return final_batches


--------------------------------------------------------------------------------
/frontend/src/components/ui/button.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react"
 2 | import { Slot } from "@radix-ui/react-slot"
 3 | import { cva, type VariantProps } from "class-variance-authority"
 4 | 
 5 | import { cn } from "@/lib/utils"
 6 | 
 7 | const buttonVariants = cva(
 8 |   "inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-md text-sm font-medium transition-all disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg:not([class*='size-'])]:size-4 shrink-0 [&_svg]:shrink-0 outline-none focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px] aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive",
 9 |   {
10 |     variants: {
11 |       variant: {
12 |         default: "bg-primary text-primary-foreground hover:bg-primary/90",
13 |         destructive:
14 |           "bg-destructive text-white hover:bg-destructive/90 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 dark:bg-destructive/60",
15 |         outline:
16 |           "border bg-background shadow-xs hover:bg-accent hover:text-accent-foreground dark:bg-input/30 dark:border-input dark:hover:bg-input/50",
17 |         secondary:
18 |           "bg-secondary text-secondary-foreground hover:bg-secondary/80",
19 |         ghost:
20 |           "hover:bg-accent hover:text-accent-foreground dark:hover:bg-accent/50",
21 |         link: "text-primary underline-offset-4 hover:underline",
22 |       },
23 |       size: {
24 |         default: "h-9 px-4 py-2 has-[>svg]:px-3",
25 |         sm: "h-8 rounded-md gap-1.5 px-3 has-[>svg]:px-2.5",
26 |         lg: "h-10 rounded-md px-6 has-[>svg]:px-4",
27 |         icon: "size-9",
28 |         "icon-sm": "size-8",
29 |         "icon-lg": "size-10",
30 |       },
31 |     },
32 |     defaultVariants: {
33 |       variant: "default",
34 |       size: "default",
35 |     },
36 |   }
37 | )
38 | 
39 | function Button({
40 |   className,
41 |   variant,
42 |   size,
43 |   asChild = false,
44 |   ...props
45 | }: React.ComponentProps<"button"> &
46 |   VariantProps<typeof buttonVariants> & {
47 |     asChild?: boolean
48 |   }) {
49 |   const Comp = asChild ? Slot : "button"
50 | 
51 |   return (
52 |     <Comp
53 |       data-slot="button"
54 |       className={cn(buttonVariants({ variant, size, className }))}
55 |       {...props}
56 |     />
57 |   )
58 | }
59 | 
60 | export { Button, buttonVariants }
61 | 


--------------------------------------------------------------------------------
/frontend/src/components/ui/card.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react"
 2 | 
 3 | import { cn } from "@/lib/utils"
 4 | 
 5 | function Card({ className, ...props }: React.ComponentProps<"div">) {
 6 |   return (
 7 |     <div
 8 |       data-slot="card"
 9 |       className={cn(
10 |         "bg-card text-card-foreground flex flex-col gap-6 rounded-xl border py-6 shadow-sm",
11 |         className
12 |       )}
13 |       {...props}
14 |     />
15 |   )
16 | }
17 | 
18 | function CardHeader({ className, ...props }: React.ComponentProps<"div">) {
19 |   return (
20 |     <div
21 |       data-slot="card-header"
22 |       className={cn(
23 |         "@container/card-header grid auto-rows-min grid-rows-[auto_auto] items-start gap-2 px-6 has-data-[slot=card-action]:grid-cols-[1fr_auto] [.border-b]:pb-6",
24 |         className
25 |       )}
26 |       {...props}
27 |     />
28 |   )
29 | }
30 | 
31 | function CardTitle({ className, ...props }: React.ComponentProps<"div">) {
32 |   return (
33 |     <div
34 |       data-slot="card-title"
35 |       className={cn("leading-none font-semibold", className)}
36 |       {...props}
37 |     />
38 |   )
39 | }
40 | 
41 | function CardDescription({ className, ...props }: React.ComponentProps<"div">) {
42 |   return (
43 |     <div
44 |       data-slot="card-description"
45 |       className={cn("text-muted-foreground text-sm", className)}
46 |       {...props}
47 |     />
48 |   )
49 | }
50 | 
51 | function CardAction({ className, ...props }: React.ComponentProps<"div">) {
52 |   return (
53 |     <div
54 |       data-slot="card-action"
55 |       className={cn(
56 |         "col-start-2 row-span-2 row-start-1 self-start justify-self-end",
57 |         className
58 |       )}
59 |       {...props}
60 |     />
61 |   )
62 | }
63 | 
64 | function CardContent({ className, ...props }: React.ComponentProps<"div">) {
65 |   return (
66 |     <div
67 |       data-slot="card-content"
68 |       className={cn("px-6", className)}
69 |       {...props}
70 |     />
71 |   )
72 | }
73 | 
74 | function CardFooter({ className, ...props }: React.ComponentProps<"div">) {
75 |   return (
76 |     <div
77 |       data-slot="card-footer"
78 |       className={cn("flex items-center px-6 [.border-t]:pt-6", className)}
79 |       {...props}
80 |     />
81 |   )
82 | }
83 | 
84 | export {
85 |   Card,
86 |   CardHeader,
87 |   CardFooter,
88 |   CardTitle,
89 |   CardAction,
90 |   CardDescription,
91 |   CardContent,
92 | }
93 | 


--------------------------------------------------------------------------------
/frontend/src/features/app/docs/文稿Tab页滚动问题解决记录.md:
--------------------------------------------------------------------------------
 1 | # 文稿Tab页滚动问题解决记录
 2 | 
 3 | ## 问题描述
 4 | 
 5 | 在 HearSight 前端应用中，右侧边栏有两个tab页：字幕分句和文稿。当用户在文稿tab页中滚动内容或点击字幕跳转视频时，会导致整个页面向上滚动，部分内容被header遮挡，影响用户体验。而字幕分句tab页则没有这个问题。
 6 | 
 7 | ## 问题分析
 8 | 
 9 | ### 根本原因
10 | 1. **组件实现差异**：
11 |    - `SegmentsTab` 使用 `ScrollArea` 组件，提供独立的滚动容器
12 |    - `TranscriptTab` 最初使用普通的 `div` + `overflow-y-auto`，滚动行为未被正确隔离
13 | 
14 | 2. **滚动事件冒泡**：
15 |    - 当 `TranscriptTab` 内容超出容器高度时，滚动事件会冒泡到父级容器
16 |    - 导致整个 `AppPage` 容器滚动，内容被header遮挡
17 | 
18 | 3. **自动滚动逻辑问题**：
19 |    - `RightPanel` 中的自动滚动使用 `scrollIntoView()` 方法
20 |    - 该方法会寻找最近的可滚动父元素，可能导致页面级别的滚动
21 | 
22 | ## 解决步骤
23 | 
24 | ### 1. 统一滚动组件实现
25 | 将 `TranscriptTab` 改为使用 `ScrollArea` 组件，与 `SegmentsTab` 保持一致：
26 | 
27 | ```tsx
28 | // 修改前
29 | <div ref={ref} className="h-full overflow-y-auto">
30 | 
31 | // 修改后
32 | <ScrollArea ref={ref} className="h-full overflow-hidden">
33 | ```
34 | 
35 | ### 2. 修改自动滚动逻辑
36 | 替换 `scrollIntoView()` 为手动滚动计算：
37 | 
38 | ```tsx
39 | // 修改前
40 | element?.scrollIntoView({ behavior: "smooth", block: "center" })
41 | 
42 | // 修改后
43 | const scrollContainer = transcriptScrollRef.current?.querySelector('[data-radix-scroll-area-viewport]') as HTMLElement
44 | if (element && scrollContainer) {
45 |   const elementRect = element.getBoundingClientRect()
46 |   const containerRect = scrollContainer.getBoundingClientRect()
47 |   const scrollTop = scrollContainer.scrollTop
48 |   const elementTop = elementRect.top - containerRect.top + scrollTop
49 |   const containerHeight = scrollContainer.clientHeight
50 |   const targetScrollTop = elementTop - containerHeight / 2 + element.offsetHeight / 2
51 | 
52 |   scrollContainer.scrollTo({
53 |     top: targetScrollTop,
54 |     behavior: 'smooth'
55 |   })
56 | }
57 | ```
58 | 
59 | ### 3. 更新相关函数
60 | 同样修改 `useScrollHandlers` 中的 `centerActiveSegment` 函数，确保一致性。
61 | 
62 | ## 技术细节
63 | 
64 | ### ScrollArea 组件优势
65 | - 提供独立的滚动上下文，防止滚动事件冒泡
66 | - 内置的滚动条样式和行为
67 | - 支持自定义滚动行为和样式
68 | 
69 | ### 手动滚动计算原理
70 | 1. 获取目标元素和滚动容器的边界矩形
71 | 2. 计算元素相对于滚动容器的位置
72 | 3. 计算目标滚动位置（元素居中显示）
73 | 4. 使用 `scrollTo()` 方法进行平滑滚动
74 | 
75 | ## 修改文件
76 | 
77 | - `frontend/src/components/RightPanel/TranscriptTab.tsx`：改为使用 ScrollArea 组件
78 | - `frontend/src/components/RightPanel/RightPanel.tsx`：修改自动滚动逻辑
79 | - `frontend/src/components/RightPanel/hooks/useScrollHandlers.ts`：更新 centerActiveSegment 函数
80 | 
81 | 


--------------------------------------------------------------------------------
/frontend/src/components/RightPanel/TranscriptTab.tsx:
--------------------------------------------------------------------------------
 1 | import { forwardRef } from "react"
 2 | import { ScrollArea } from "@/components/ui/scroll-area"
 3 | import type { Segment } from "../../types"
 4 | 
 5 | interface TranscriptTabProps {
 6 |   readonly segments: Segment[]
 7 |   readonly activeSegIndex: number | null
 8 |   readonly onSegmentClick: (segment: Segment) => void
 9 |   readonly displayLanguage?: string
10 | }
11 | 
12 | const TranscriptTab = forwardRef<HTMLDivElement, TranscriptTabProps>(
13 |   ({ segments, activeSegIndex, onSegmentClick, displayLanguage = 'original' }, ref) => {
14 |   const getDisplayText = (segment: Segment) => {
15 |     if (displayLanguage === 'original') {
16 |       return segment.sentence || "(空)"
17 |     }
18 |     if (segment.translation?.[displayLanguage]) {
19 |       return segment.translation[displayLanguage]
20 |     }
21 |     return segment.sentence || "(空)"
22 |   }
23 | 
24 |   return (
25 |       <ScrollArea ref={ref} className="h-full">
26 |         {segments.length === 0 ? (
27 |           <div className="flex items-center justify-center h-40 text-sm text-slate-500">
28 |             暂无内容
29 |           </div>
30 |         ) : (
31 |           <div className="p-4 pb-8 text-base leading-7 text-slate-800 text-left min-h-full">
32 |             {segments.map((seg) => {
33 |               const isActive = activeSegIndex === seg.index
34 |               const displayText = getDisplayText(seg)
35 | 
36 |               return (
37 | 
38 |                 // 这里使用span元素而不是button，是为了让句子能够连续排列形成文本流，
39 |                 // 只有在超出容器宽度时才换行，而不是每个句子后都换行。
40 |                 // 虽然违反了可访问性规则，但点击功能正常，且视觉效果更符合转录文本的展示需求。
41 |                 <span
42 |                   key={seg.index}
43 |                   data-seg-index={seg.index}
44 |                   onClick={() => onSegmentClick(seg)}
45 |                   className={`px-0.5 py-0.5 mr-1 rounded cursor-pointer transition-colors duration-200 break-words ${
46 |                     isActive ? "bg-blue-100 text-slate-900 shadow-inner" : "hover:bg-blue-50"
47 |                   }`}
48 |                 >
49 |                   {displayText}
50 |                 </span>
51 | 
52 |               )
53 |             })}
54 |           </div>
55 |         )}
56 |       </ScrollArea>
57 |     )
58 |   }
59 | )
60 | 
61 | TranscriptTab.displayName = "TranscriptTab"
62 | 
63 | export default TranscriptTab
64 | 


--------------------------------------------------------------------------------
/backend/ReAct/example.py:
--------------------------------------------------------------------------------
 1 | """ReAct 示例代码
 2 | 
 3 | 使用 config.py 中的配置来测试 ReAct 代理。
 4 | """
 5 | 
 6 | import asyncio
 7 | import os
 8 | import subprocess
 9 | import sys
10 | 
11 | # 添加项目根目录到路径，以便导入
12 | sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
13 | 
14 | from config import settings
15 | from ReAct import BaseAgent
16 | 
17 | 
18 | async def main():
19 |     """主函数，使用 MCP 工具服务器进行推理。"""
20 | 
21 |     # 从 config 获取配置
22 |     api_key = settings.llm_provider_api_key
23 |     api_base = settings.llm_provider_base_url or "https://api.openai.com/v1"
24 |     model = settings.llm_model or "gpt-3.5-turbo"
25 | 
26 |     if not api_key:
27 |         print("请在 config.py 或环境变量中设置 llm_provider_api_key。")
28 |         return
29 | 
30 |     print("启动本地工具 MCP 服务器...")
31 | 
32 |     # 启动工具服务器（在后台运行）
33 |     import subprocess
34 |     import sys
35 |     import os
36 | 
37 |     server_script = os.path.join(os.path.dirname(__file__), "tools_server.py")
38 |     server_process = subprocess.Popen([sys.executable, server_script])
39 | 
40 |     # 等待服务器启动
41 |     await asyncio.sleep(2)
42 | 
43 |     try:
44 |         print("创建 ReAct 代理...")
45 | 
46 |         # 创建 BaseAgent，连接到本地 MCP 服务器
47 |         agent = BaseAgent(
48 |             openai_api_key=api_key,
49 |             openai_api_base=api_base,
50 |             openai_api_model=model,
51 |             tools_backend_url="http://localhost:8001",  # 本地 MCP 服务器地址
52 |             config_path=None,
53 |         )
54 | 
55 |         # 用户问题
56 |         question = "计算 2 + 3 的结果。"
57 | 
58 |         print(f"执行推理: {question}")
59 | 
60 |         # 执行推理
61 |         result = await agent.react_loop.run(question, allowed_tools=["calculator"])
62 | 
63 |         print(f"最终答案: {result.final_answer}")
64 |         print("推理轨迹:")
65 |         for step in result.trace:
66 |             print(f"  步骤 {step.step}: {step.thought}")
67 |             if step.action:
68 |                 print(f"    动作: {step.action}")
69 |             if step.action_input:
70 |                 print(f"    输入: {step.action_input}")
71 |             if step.observation:
72 |                 print(f"    观察: {step.observation}")
73 | 
74 |     finally:
75 |         # 停止服务器
76 |         print("停止工具服务器...")
77 |         server_process.terminate()
78 |         server_process.wait()
79 | 
80 |     print("测试完成。")
81 | 
82 | 
83 | if __name__ == "__main__":
84 |     asyncio.run(main())


--------------------------------------------------------------------------------
/frontend/src/utils/language-detector.ts:
--------------------------------------------------------------------------------
 1 | export interface LanguageDetectionResult {
 2 |   primary_language: 'zh' | 'en' | 'other'
 3 |   chinese_ratio: number
 4 |   need_confirmation: boolean
 5 |   suggestion: string
 6 | }
 7 | 
 8 | interface Segment {
 9 |   sentence?: string
10 |   [key: string]: unknown
11 | }
12 | 
13 | function getLanguageRatio(text: string): [number, number] {
14 |   let chineseCount = 0
15 |   let englishCount = 0
16 |   
17 |   for (const char of text) {
18 |     const code = char.charCodeAt(0)
19 |     if (code >= 0x4e00 && code <= 0x9fff) {
20 |       chineseCount++
21 |     } else if ((code >= 97 && code <= 122) || (code >= 65 && code <= 90)) {
22 |       englishCount++
23 |     }
24 |   }
25 |   
26 |   const total = chineseCount + englishCount
27 |   if (total === 0) {
28 |     return [0, 0]
29 |   }
30 |   
31 |   return [chineseCount / total, englishCount / total]
32 | }
33 | 
34 | export function detectLanguage(segments: Segment[]): LanguageDetectionResult {
35 |   if (!segments || segments.length === 0) {
36 |     return {
37 |       primary_language: 'other',
38 |       chinese_ratio: 0,
39 |       need_confirmation: false,
40 |       suggestion: '没有可用分句'
41 |     }
42 |   }
43 |   
44 |   let totalChinese = 0
45 |   let totalEnglish = 0
46 |   
47 |   for (const seg of segments) {
48 |     const sentence = seg.sentence || ''
49 |     if (!sentence) continue
50 |     
51 |     const [chineseRatio, englishRatio] = getLanguageRatio(sentence)
52 |     totalChinese += chineseRatio
53 |     totalEnglish += englishRatio
54 |   }
55 |   
56 |   const segmentCount = segments.length
57 |   const avgChinese = segmentCount > 0 ? totalChinese / segmentCount : 0
58 |   
59 |   if (avgChinese >= 0.8) {
60 |     return {
61 |       primary_language: 'zh',
62 |       chinese_ratio: avgChinese,
63 |       need_confirmation: true,
64 |       suggestion: '检测到主要为中文，是否翻译成英文？'
65 |     }
66 |   }
67 |   
68 |   const avgEnglish = segmentCount > 0 ? totalEnglish / segmentCount : 0
69 |   if (avgEnglish >= 0.8) {
70 |     return {
71 |       primary_language: 'en',
72 |       chinese_ratio: 1 - avgEnglish,
73 |       need_confirmation: false,
74 |       suggestion: '检测到主要为英文，将自动翻译成中文'
75 |     }
76 |   }
77 |   
78 |   return {
79 |     primary_language: 'other',
80 |     chinese_ratio: avgChinese,
81 |     need_confirmation: false,
82 |     suggestion: '检测到混合语言，建议手动翻译'
83 |   }
84 | }
85 | 


--------------------------------------------------------------------------------
/ASRBackend/asr_functions/docs/dashscope_paraformer_v2_transcription设计文档.md:
--------------------------------------------------------------------------------
  1 | # dashscope_paraformer_v2_transcription 模块设计文档
  2 | 
  3 | ## 概述
  4 | 
  5 | `dashscope_paraformer_v2_transcription.py` 是一个用于云端语音识别（ASR）的模块，基于阿里云 DashScope API 的 Paraformer-v2 模型实现。该模块提供异步音频转录功能，支持多语言识别，并能处理长时间音频文件。
  6 | 
  7 | ## 功能特性
  8 | 
  9 | - 基于阿里云 DashScope API 实现云端语音识别
 10 | - 支持异步转录处理，适合处理大文件
 11 | - 支持多语言识别
 12 | - 自动处理任务状态轮询
 13 | - 结果规范化处理
 14 | - 详细的错误处理机制
 15 | 
 16 | ## 输入输出说明
 17 | 
 18 | ### 主要入口函数
 19 | 
 20 | ```python
 21 | def transcribe_audio_from_url(
 22 |     url: str,
 23 |     model: str = MODEL_NAME,
 24 |     language_hints: Optional[List[str]] = None,
 25 |     timeout: int = 600,
 26 | ) -> Optional[Dict]
 27 | ```
 28 | 
 29 | #### 参数说明
 30 | 
 31 | - `url`: 音频文件的 URL 地址
 32 | - `model`: 使用的转录模型，默认为 "paraformer-v2"
 33 | - `language_hints`: 语言提示列表，默认为 ["zh", "en"]
 34 | - `timeout`: 等待超时时间（秒），默认为 600 秒
 35 | 
 36 | #### 返回值
 37 | 
 38 | 返回一个包含识别结果的字典，结构如下：
 39 | 
 40 | 成功时：
 41 | ```json
 42 | {
 43 |   "filename": "原始文件名",
 44 |   "text": "完整的识别文本",
 45 |   "language": "检测到的语言",
 46 |   "segments": [
 47 |     {
 48 |       "spk_id": "说话人ID",
 49 |       "sentence": "识别的句子",
 50 |       "start_time": 起始时间（毫秒）,
 51 |       "end_time": 结束时间（毫秒）
 52 |     }
 53 |   ],
 54 |   "status": "success",
 55 |   "task_id": "任务ID"
 56 | }
 57 | ```
 58 | 
 59 | 失败时：
 60 | ```json
 61 | {
 62 |   "status": "error",
 63 |   "error": "错误描述",
 64 |   "...": "其他错误相关信息"
 65 | }
 66 | ```
 67 | 
 68 | ### 辅助函数
 69 | 
 70 | #### `initialize_dashscope_client(api_key: str) -> None`
 71 | 
 72 | 初始化 DashScope 客户端，设置 API 密钥。
 73 | 
 74 | #### `async_transcribe_audio(file_urls: List[str], model: str = MODEL_NAME, language_hints: Optional[List[str]] = None) -> Optional[str]`
 75 | 
 76 | 提交异步转录任务，返回任务 ID。
 77 | 
 78 | #### `get_transcription_status(task_id: str, wait_timeout: int = 0) -> Optional[Dict]`
 79 | 
 80 | 获取转录任务的状态。
 81 | 
 82 | #### `_parse_transcription_result(result: Dict) -> Optional[List[Dict]]`
 83 | 
 84 | 解析 DashScope 转录结果为标准格式。
 85 | 
 86 | ## 工作流程
 87 | 
 88 | 1. 初始化 DashScope 客户端
 89 | 2. 提交异步转录任务
 90 | 3. 轮询任务状态直到完成或超时
 91 | 4. 获取转录结果
 92 | 5. 解析并规范化识别结果
 93 | 6. 检测文本语言
 94 | 7. 返回标准化的结果
 95 | 
 96 | ## 依赖项
 97 | 
 98 | - dashscope: 阿里云 DashScope SDK
 99 | - requests: 用于获取转录结果
100 | - segment_normalizer: 用于结果规范化处理
101 | - utils: 用于语言检测
102 | 
103 | ## 异常处理
104 | 
105 | 模块具有完善的异常处理机制：
106 | 
107 | - 网络异常处理
108 | - API 调用失败处理
109 | - 超时处理
110 | - 结果解析异常处理
111 | - 任务失败状态处理
112 | 
113 | 所有异常都会被捕获并转化为标准错误响应格式返回。


--------------------------------------------------------------------------------