├── backend ├── db │ ├── __init__.py │ ├── job_store.py │ ├── transcript_crud.py │ ├── transcript_summary_crud.py │ └── transcript_translation_crud.py ├── ReAct │ ├── tools │ │ └── __init__.py │ ├── chat_tools_config.json │ ├── __init__.py │ ├── models.py │ ├── fastmcp_client_example.py │ ├── utils.py │ ├── test_chat_agent.py │ ├── tools_server.py │ ├── test_chat_request.py │ ├── action_parser.py │ ├── chat_prompt_builder.py │ ├── actions.py │ └── example.py ├── queues │ ├── __init__.py │ ├── tasks.py │ ├── __main__.py │ ├── tasks │ │ ├── __init__.py │ │ ├── progress_utils.py │ │ └── knowledge_base_stage.py │ └── worker_launcher.py ├── utils │ ├── __init__.py │ └── typing_defs.py ├── __init__.py ├── services │ ├── __init__.py │ ├── docs │ │ ├── embedding_文件名增强设计文档.md │ │ └── 多视频聊天服务调试经验.md │ ├── embedding_litellm_example.py │ ├── example_chat_service.py │ ├── upload_service.py │ └── chat_knowledge_service.py ├── text_process │ ├── __init__.py │ ├── translate │ │ └── __init__.py │ ├── docs │ │ ├── 翻译服务性能测试报告.md │ │ └── ChatGPT翻译技巧说明.md │ └── translate_batch_service.py ├── media_processing │ ├── audio │ │ ├── __init__.py │ │ ├── download │ │ │ ├── __init__.py │ │ │ └── xiaoyuzhou │ │ │ │ └── __init__.py │ │ └── local │ │ │ ├── __init__.py │ │ │ └── upload_handler.py │ ├── video │ │ ├── __init__.py │ │ ├── download │ │ │ ├── __init__.py │ │ │ ├── bilibili │ │ │ │ ├── __init__.py │ │ │ │ └── test_login_handler.py │ │ │ └── youtube │ │ │ │ ├── __init__.py │ │ │ │ ├── docs │ │ │ │ └── 下载问题修复记录.md │ │ │ │ ├── example.py │ │ │ │ └── test_login_handler.py │ │ └── local │ │ │ └── __init__.py │ ├── __init__.py │ ├── docs │ │ └── downloader_factory_design.md │ └── upload_handler.py ├── common_interfaces.py ├── routers │ ├── chat │ │ ├── __init__.py │ │ └── summarize_router.py │ └── __init__.py ├── main.py ├── requirements.txt ├── tests │ ├── test_asr_sentence_segments.py │ ├── test_audio2text.py │ ├── test_download_vedio.py │ ├── test_SenseVoiceSmall2.py │ ├── test_paraformer_a2t.py │ ├── test_SenseVoiceSmall.py │ ├── test_summarize.py │ ├── test_multi_platform_downloader.py │ ├── test_stream_translate.py │ ├── download_media_file.py │ └── test_download_media.py ├── schemas.py ├── .env.example ├── llm_test.py ├── Dockerfile ├── docker-compose.yml ├── app.py └── test_litellm.py ├── ASRBackend ├── tests │ ├── __init__.py │ └── test_asr_segments.py ├── supabase_utils │ ├── example_tests │ │ ├── test.txt │ │ └── test_upload_file.py │ ├── test_supabase_upload.py │ └── docs │ │ └── supabase_upload_design.md ├── __init__.py ├── .dockerignore ├── services │ └── __init__.py ├── routers │ └── __init__.py ├── asr_functions │ ├── utils │ │ ├── __init__.py │ │ └── language_utils.py │ ├── __init__.py │ └── docs │ │ ├── asr_sentence_segments设计文档.md │ │ └── dashscope_paraformer_v2_transcription设计文档.md ├── .gitignore ├── requirements-cloud.txt ├── docker-compose.cloud.yml ├── docker-compose.local.yml ├── requirements-local.txt ├── Dockerfile.cloud ├── Dockerfile.local ├── .env.example └── main.py ├── frontend ├── src │ ├── components │ │ ├── LeftPanel │ │ │ ├── index.ts │ │ │ ├── docs │ │ │ │ └── 组件设计说明.md │ │ │ ├── hooks.tsx │ │ │ └── LeftPanel.tsx │ │ ├── RightPanel │ │ │ ├── index.ts │ │ │ ├── types.ts │ │ │ ├── Chat │ │ │ │ ├── index.ts │ │ │ │ ├── ChatToolbar.tsx │ │ │ │ ├── MessageInput.tsx │ │ │ │ └── docs │ │ │ │ │ └── 消息列表排版设计说明.md │ │ │ ├── hooks │ │ │ │ ├── index.ts │ │ │ │ ├── useSegmentHandlers.ts │ │ │ │ ├── useTranslationLanguages.ts │ │ │ │ ├── useSearchHandlers.ts │ │ │ │ └── useLanguageSwitch.ts │ │ │ ├── docs │ │ │ │ ├── 组件设计说明.md │ │ │ │ └── 翻译对话框设计说明.md │ │ │ ├── LanguageSwitcher.tsx │ │ │ └── TranscriptTab.tsx │ │ ├── ui │ │ │ ├── label.tsx │ │ │ ├── separator.tsx │ │ │ ├── textarea.tsx │ │ │ ├── input.tsx │ │ │ ├── docs │ │ │ │ └── 组件设计说明.md │ │ │ ├── switch.tsx │ │ │ ├── scroll-area.tsx │ │ │ ├── resizable.tsx │ │ │ ├── tabs.tsx │ │ │ ├── button.tsx │ │ │ └── card.tsx │ │ ├── MarkdownRenderer.tsx │ │ └── docs │ │ │ ├── ProgressCard设计说明.md │ │ │ ├── VideoPlayer设计说明.md │ │ │ └── FileUploader设计说明.md │ ├── App.tsx │ ├── hooks │ │ ├── index.ts │ │ ├── docs │ │ │ └── 设计说明.md │ │ └── useVideoSync.ts │ ├── lib │ │ ├── utils.ts │ │ └── docs │ │ │ └── 工具库设计说明.md │ ├── utils │ │ ├── pendingUrl.ts │ │ ├── message.ts │ │ └── language-detector.ts │ ├── types │ │ ├── react-markdown.d.ts │ │ └── global.d.ts │ ├── main.tsx │ ├── routes.tsx │ ├── services │ │ ├── progressService.ts │ │ ├── downloadService.ts │ │ ├── transcriptService.ts │ │ ├── thumbnailService.ts │ │ └── summaryService.ts │ ├── HomePage │ │ └── docs │ │ │ ├── 组件设计说明.md │ │ │ └── 主页组件设计说明.md │ ├── App.css │ ├── assets │ │ └── docs │ │ │ └── 资源管理说明.md │ └── features │ │ └── app │ │ ├── docs │ │ ├── App组件设计说明.md │ │ ├── 滚动条问题解决记录.md │ │ └── 文稿Tab页滚动问题解决记录.md │ │ └── components │ │ └── UploadDialog.tsx ├── public │ ├── logo.png │ ├── human.png │ └── chatbot.png ├── tsconfig.json ├── .gitignore ├── Dockerfile ├── index.html ├── components.json ├── docker-compose.yml ├── tsconfig.node.json ├── tsconfig.app.json ├── eslint.config.js ├── vite.config.ts └── package.json ├── example_tests ├── __init__.py └── 音频转视频测试.html ├── docs ├── mermaid图汇集 │ ├── ARM设备直接构建流程图.md │ ├── ARM设备交叉构建流程图.md │ ├── ReAct设计文档-记忆管理机制图.md │ ├── embedding文件名增强设计文档-流程图.md │ ├── 翻译服务设计文档-翻译流程图.md │ ├── ASR_Backend启动流程概览图.md │ ├── ASR_MODE选择模式图.md │ ├── 知识库结构设计文档-整体架构图.md │ ├── ReAct设计文档-工作流程图.md │ ├── 知识库结构设计文档-数据流图.md │ ├── ReAct设计文档-工具管理流程图.md │ ├── 前端架构设计文档-文件上传流程图.md │ ├── 前端架构设计文档-聊天交互流程图.md │ ├── 前端架构设计文档-转写结果显示流程图.md │ ├── 前端拖拽侧边栏功能设计文档-拖拽流程图.md │ ├── 知识库使用设计文档-使用流程图.md │ └── ReAct设计文档-类层次结构图.md ├── 测试示例.md └── api_文档导航.md ├── main.ipynb ├── .dockerignore └── lefthook.yml /backend/db/__init__.py: -------------------------------------------------------------------------------- 1 | # db package -------------------------------------------------------------------------------- /backend/ReAct/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # tools 工具模块 -------------------------------------------------------------------------------- /ASRBackend/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """测试模块初始化""" 2 | -------------------------------------------------------------------------------- /ASRBackend/supabase_utils/example_tests/test.txt: -------------------------------------------------------------------------------- 1 | 测试使用的文件。 -------------------------------------------------------------------------------- /backend/ReAct/chat_tools_config.json: -------------------------------------------------------------------------------- 1 | ["knowledge_retrieval"] -------------------------------------------------------------------------------- /backend/queues/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """任务队列模块""" 3 | -------------------------------------------------------------------------------- /backend/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # utils package 3 | -------------------------------------------------------------------------------- /ASRBackend/__init__.py: -------------------------------------------------------------------------------- 1 | """ASRBackend 模块初始化""" 2 | __version__ = "0.1.0" 3 | -------------------------------------------------------------------------------- /backend/__init__.py: -------------------------------------------------------------------------------- 1 | # Make backend a regular package for reliable imports. 2 | -------------------------------------------------------------------------------- /backend/services/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # services package 3 | -------------------------------------------------------------------------------- /backend/text_process/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # text_process package 3 | -------------------------------------------------------------------------------- /frontend/src/components/LeftPanel/index.ts: -------------------------------------------------------------------------------- 1 | export { default } from "./LeftPanel" 2 | -------------------------------------------------------------------------------- /frontend/src/components/RightPanel/index.ts: -------------------------------------------------------------------------------- 1 | export { default } from "./RightPanel" 2 | -------------------------------------------------------------------------------- /ASRBackend/.dockerignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.py[cod] 3 | datas/ 4 | .env 5 | results/ 6 | -------------------------------------------------------------------------------- /ASRBackend/services/__init__.py: -------------------------------------------------------------------------------- 1 | """ASR Backend 服务模块 2 | 3 | 包含业务逻辑服务层的实现。 4 | """ 5 | -------------------------------------------------------------------------------- /example_tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | example_tests 包初始化文件 4 | """ -------------------------------------------------------------------------------- /ASRBackend/routers/__init__.py: -------------------------------------------------------------------------------- 1 | """ASR Backend 路由模块 2 | 3 | 包含所有 API 路由的定义和注册。 4 | """ 5 | -------------------------------------------------------------------------------- /backend/queues/tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/li-xiu-qi/HearSight/HEAD/backend/queues/tasks.py -------------------------------------------------------------------------------- /frontend/public/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/li-xiu-qi/HearSight/HEAD/frontend/public/logo.png -------------------------------------------------------------------------------- /frontend/public/human.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/li-xiu-qi/HearSight/HEAD/frontend/public/human.png -------------------------------------------------------------------------------- /backend/media_processing/audio/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """音频处理模块""" 3 | 4 | __all__ = [] 5 | -------------------------------------------------------------------------------- /backend/media_processing/video/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """视频处理模块""" 3 | 4 | __all__ = [] 5 | -------------------------------------------------------------------------------- /frontend/public/chatbot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/li-xiu-qi/HearSight/HEAD/frontend/public/chatbot.png -------------------------------------------------------------------------------- /backend/media_processing/audio/download/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """音频下载模块""" 3 | 4 | __all__ = [] 5 | -------------------------------------------------------------------------------- /backend/media_processing/video/download/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """视频下载模块""" 3 | 4 | __all__ = [] 5 | -------------------------------------------------------------------------------- /ASRBackend/asr_functions/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """语言检测工具模块""" 2 | 3 | from .language_utils import detect_language 4 | 5 | __all__ = ["detect_language"] 6 | -------------------------------------------------------------------------------- /frontend/src/App.tsx: -------------------------------------------------------------------------------- 1 | import AppPage from "@/features/app/AppPage" 2 | 3 | function App() { 4 | return 5 | } 6 | 7 | export default App 8 | -------------------------------------------------------------------------------- /frontend/src/hooks/index.ts: -------------------------------------------------------------------------------- 1 | export { useUrlHandler } from './useUrlHandler' 2 | export { useDataLoader } from './useDataLoader' 3 | export { useVideoSync } from './useVideoSync' 4 | -------------------------------------------------------------------------------- /backend/queues/__main__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Celery worker模块入口""" 3 | 4 | from backend.queues.worker_launcher import main 5 | 6 | if __name__ == "__main__": 7 | main() 8 | -------------------------------------------------------------------------------- /backend/media_processing/audio/local/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """本地音频处理模块""" 3 | 4 | from .upload_handler import process_uploaded_audio 5 | 6 | __all__ = ['process_uploaded_audio'] 7 | -------------------------------------------------------------------------------- /backend/media_processing/video/local/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """本地视频处理模块""" 3 | 4 | from .upload_handler import process_uploaded_video 5 | 6 | __all__ = ['process_uploaded_video'] 7 | -------------------------------------------------------------------------------- /frontend/src/lib/utils.ts: -------------------------------------------------------------------------------- 1 | import { clsx, type ClassValue } from "clsx" 2 | import { twMerge } from "tailwind-merge" 3 | 4 | export function cn(...inputs: ClassValue[]) { 5 | return twMerge(clsx(inputs)) 6 | } 7 | -------------------------------------------------------------------------------- /backend/text_process/translate/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 翻译模块:支持分批翻译、上下文感知、JSON格式化输出。 4 | """ 5 | from .core import translate_segments_async 6 | 7 | __all__ = ["translate_segments_async"] 8 | -------------------------------------------------------------------------------- /docs/mermaid图汇集/ARM设备直接构建流程图.md: -------------------------------------------------------------------------------- 1 | # ARM设备直接构建流程图 2 | 3 | ```mermaid 4 | flowchart TD 5 | A[在ARM设备上安装Docker] --> B[传输项目代码] 6 | B --> C[配置环境变量] 7 | C --> D[运行docker-compose build] 8 | D --> E[运行docker-compose up] 9 | ``` 10 | -------------------------------------------------------------------------------- /backend/queues/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Celery异步任务定义 - 模块化版本""" 3 | 4 | # 导入分解后的任务模块 5 | from .process_job_task import process_job_task, knowledge_retrieval_task 6 | 7 | __all__ = ["process_job_task", "knowledge_retrieval_task"] -------------------------------------------------------------------------------- /docs/mermaid图汇集/ARM设备交叉构建流程图.md: -------------------------------------------------------------------------------- 1 | # ARM设备交叉构建流程图 2 | 3 | ```mermaid 4 | flowchart TD 5 | A[启用Docker Buildx] --> B[检查Dockerfile兼容性] 6 | B --> C[构建多架构镜像] 7 | C --> D[保存镜像为文件] 8 | D --> E[传输到ARM设备] 9 | E --> F[加载并运行镜像] 10 | ``` 11 | -------------------------------------------------------------------------------- /docs/mermaid图汇集/ReAct设计文档-记忆管理机制图.md: -------------------------------------------------------------------------------- 1 | # ReAct设计文档 - 记忆管理机制图 2 | 3 | ```mermaid 4 | graph TD 5 | A[新消息] --> B{是否需要总结} 6 | B -->|是| C[生成总结] 7 | B -->|否| D[添加到缓冲区] 8 | C --> E[压缩消息缓冲区] 9 | E --> F[更新上下文] 10 | D --> F 11 | F --> G[继续对话] 12 | ``` 13 | -------------------------------------------------------------------------------- /docs/mermaid图汇集/embedding文件名增强设计文档-流程图.md: -------------------------------------------------------------------------------- 1 | # embedding文件名增强设计文档 - 流程图 2 | 3 | ```mermaid 4 | graph TD 5 | A[接收segments和metadata] --> B[分组为chunks] 6 | B --> C[获取文件名] 7 | C --> D[生成增强chunk_text] 8 | D --> E[计算embedding] 9 | E --> F[存储到ChromaDB] 10 | ``` 11 | -------------------------------------------------------------------------------- /backend/media_processing/video/download/bilibili/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """b站视频下载模块""" 3 | 4 | from .bilibili_downloader import BilibiliDownloader, get_downloader, download_bilibili_video 5 | 6 | __all__ = ['BilibiliDownloader', 'get_downloader', 'download_bilibili_video'] 7 | -------------------------------------------------------------------------------- /backend/media_processing/video/download/youtube/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """YouTube视频下载模块""" 3 | 4 | from .youtube_downloader import YoutubeDownloader, get_downloader, download_youtube_video 5 | 6 | __all__ = ['YoutubeDownloader', 'get_downloader', 'download_youtube_video'] 7 | -------------------------------------------------------------------------------- /frontend/src/components/RightPanel/types.ts: -------------------------------------------------------------------------------- 1 | export interface TranslateProgress { 2 | status: 'idle' | 'detecting' | 'translating' | 'done' | 'error' 3 | progress: number 4 | message: string 5 | detectionInfo?: string 6 | targetLanguage?: string 7 | newTranscriptId?: number 8 | } 9 | -------------------------------------------------------------------------------- /frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": [], 3 | "references": [ 4 | { "path": "./tsconfig.app.json" }, 5 | { "path": "./tsconfig.node.json" } 6 | ], 7 | "compilerOptions": { 8 | "baseUrl": ".", 9 | "paths": { 10 | "@/*": ["./src/*"] 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /backend/db/job_store.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """任务存储模块,统一导出所有任务相关函数""" 3 | 4 | from .job_base_store import create_job, get_job 5 | from .job_status_store import update_job_status, update_job_celery_task_id 6 | from .job_result_store import finish_job_success, finish_job_failed, update_job_result -------------------------------------------------------------------------------- /frontend/src/utils/pendingUrl.ts: -------------------------------------------------------------------------------- 1 | let pendingUrl: string | null = null 2 | 3 | export const setPendingUrl = (url: string | null) => { 4 | pendingUrl = url 5 | } 6 | 7 | export const getPendingUrl = () => { 8 | const url = pendingUrl 9 | pendingUrl = null 10 | return url 11 | } -------------------------------------------------------------------------------- /main.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0c9365e3", 6 | "metadata": {}, 7 | "source": [] 8 | } 9 | ], 10 | "metadata": { 11 | "language_info": { 12 | "name": "python" 13 | } 14 | }, 15 | "nbformat": 4, 16 | "nbformat_minor": 5 17 | } 18 | -------------------------------------------------------------------------------- /backend/media_processing/audio/download/xiaoyuzhou/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """小宇宙下载模块""" 3 | 4 | from .xiaoyuzhou_downloader import XiaoyuzhouDownloader, get_downloader, download_xiaoyuzhou_episode 5 | 6 | __all__ = ['XiaoyuzhouDownloader', 'get_downloader', 'download_xiaoyuzhou_episode'] 7 | -------------------------------------------------------------------------------- /docs/mermaid图汇集/翻译服务设计文档-翻译流程图.md: -------------------------------------------------------------------------------- 1 | # 翻译服务设计文档 - 翻译流程图 2 | 3 | ```mermaid 4 | graph TD 5 | A[输入句子] --> B[语言验证] 6 | B --> C[分批处理] 7 | C --> D[第一步:直译] 8 | D --> E[解析直译结果] 9 | E --> F[第二步:意译] 10 | F --> G[解析意译结果] 11 | G --> H[质量检查] 12 | H --> I[结果整合] 13 | I --> J[输出翻译] 14 | ``` 15 | -------------------------------------------------------------------------------- /backend/text_process/docs/翻译服务性能测试报告.md: -------------------------------------------------------------------------------- 1 | # 翻译服务性能测试报告 2 | 3 | ## 测试概述 4 | 5 | 两步翻译系统,使用DeepSeek-V3.2-Exp模型,基于translate目录下的example.py进行测试。 6 | 7 | ## 测试结果 8 | 9 | - 英文→中文:3句,耗时约11秒,平均3.7秒/句 10 | - 中文→英文:3句,耗时约10秒,平均3.3秒/句 11 | - 性能测试:5句,耗时13.16秒,平均2.63秒/句 12 | 13 | ## 结论 14 | 15 | 两步翻译系统性能良好,平均2.5-3.7秒/句,质量高,适合使用。 16 | -------------------------------------------------------------------------------- /docs/mermaid图汇集/ASR_Backend启动流程概览图.md: -------------------------------------------------------------------------------- 1 | # ASR Backend 启动流程概览图 2 | 3 | ```mermaid 4 | flowchart TD 5 | A[前置准备] --> B{选择运行模式} 6 | B -->|云端模式| C[获取API Key] 7 | B -->|本地模式| D[配置本地环境] 8 | C --> E[配置环境变量] 9 | D --> E 10 | E --> F[安装依赖] 11 | F --> G[启动服务] 12 | G --> H[验证服务] 13 | H --> I[开始使用] 14 | ``` 15 | -------------------------------------------------------------------------------- /docs/mermaid图汇集/ASR_MODE选择模式图.md: -------------------------------------------------------------------------------- 1 | # ASR_MODE 选择模式图 2 | 3 | ```mermaid 4 | graph TD 5 | A[ASR_MODE] --> B{选择模式} 6 | B -->|cloud| C[云端模式] 7 | B -->|local| D[本地模式] 8 | 9 | C --> E[轻量级部署] 10 | C --> F[需要API Key] 11 | C --> G[支持多语言] 12 | 13 | D --> H[完全离线] 14 | D --> I[需要GPU] 15 | D --> J[单语言支持] 16 | ``` 17 | -------------------------------------------------------------------------------- /docs/mermaid图汇集/知识库结构设计文档-整体架构图.md: -------------------------------------------------------------------------------- 1 | # 知识库结构设计文档 - 整体架构图 2 | 3 | ```mermaid 4 | graph TB 5 | A[视频转写] --> B[句子段数组] 6 | B --> C[ChatService] 7 | C --> D[KnowledgeBaseService] 8 | D --> E[ChromaDB] 9 | 10 | F[用户查询] --> C 11 | C --> G[LLM API] 12 | G --> H[回答] 13 | 14 | I[转写删除] --> D 15 | D --> J[清理向量数据] 16 | ``` 17 | -------------------------------------------------------------------------------- /frontend/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | dist 12 | dist-ssr 13 | *.local 14 | 15 | # Editor directories and files 16 | .vscode/* 17 | !.vscode/extensions.json 18 | .idea 19 | .DS_Store 20 | *.suo 21 | *.ntvs* 22 | *.njsproj 23 | *.sln 24 | *.sw? 25 | -------------------------------------------------------------------------------- /frontend/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:22-alpine 2 | 3 | WORKDIR /app 4 | 5 | # Copy package files 6 | COPY package*.json ./ 7 | 8 | # Install dependencies 9 | RUN npm ci 10 | 11 | # Copy source code 12 | COPY . . 13 | 14 | # Expose port 5173 for Vite dev server 15 | EXPOSE 5173 16 | 17 | # Start Vite dev server 18 | CMD ["npm", "run", "dev", "--", "--host", "0.0.0.0", "--port", "5173"] -------------------------------------------------------------------------------- /frontend/src/components/RightPanel/Chat/index.ts: -------------------------------------------------------------------------------- 1 | export { default as MessageList } from './MessageList' 2 | export { default as VideoSelector } from './VideoSelector' 3 | export { default as ChatToolbar } from './ChatToolbar' 4 | export { default as MessageInput } from './MessageInput' 5 | export { default as ChatSessionManager } from './ChatSessionManager' 6 | export { default as ChatView } from './ChatView' -------------------------------------------------------------------------------- /docs/mermaid图汇集/ReAct设计文档-工作流程图.md: -------------------------------------------------------------------------------- 1 | # ReAct设计文档 - 工作流程图 2 | 3 | ```mermaid 4 | graph TD 5 | A[用户输入问题] --> B[初始化推理上下文] 6 | B --> C[构建系统提示] 7 | C --> D[进入推理循环] 8 | D --> E[调用LLM生成响应] 9 | E --> F{解析响应} 10 | F --> G[是最终答案?] 11 | G -->|是| H[返回结果] 12 | G -->|否| I[执行工具] 13 | I --> J[获取观察结果] 14 | J --> K[反馈到LLM] 15 | K --> D 16 | H --> L[结束] 17 | ``` 18 | -------------------------------------------------------------------------------- /frontend/src/types/react-markdown.d.ts: -------------------------------------------------------------------------------- 1 | declare module 'react-markdown' { 2 | import React from 'react'; 3 | 4 | interface ReactMarkdownProps { 5 | children: string; 6 | className?: string; 7 | components?: Record>; 8 | [key: string]: unknown; 9 | } 10 | 11 | const ReactMarkdown: React.FC; 12 | export default ReactMarkdown; 13 | } -------------------------------------------------------------------------------- /docs/mermaid图汇集/知识库结构设计文档-数据流图.md: -------------------------------------------------------------------------------- 1 | # 知识库结构设计文档 - 数据流图 2 | 3 | ```mermaid 4 | flowchart TD 5 | A[输入segments] --> B{transcript_id提供?} 6 | B -->|否| C[计算tokens] 7 | B -->|是| D[从DB获取完整转录稿] 8 | D --> E[计算完整tokens] 9 | E --> F{完整tokens ≤ 阈值?} 10 | F -->|是| G[使用完整转录稿] 11 | F -->|否| H[知识库检索分块] 12 | C --> I[构建提示词] 13 | G --> I 14 | H --> I 15 | I --> J[调用LLM] 16 | J --> K[返回回答] 17 | ``` 18 | -------------------------------------------------------------------------------- /frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | HearSight:智能视频内容分析与理解平台 8 | 9 | 10 |
11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /ASRBackend/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.py[cod] 3 | *$py.class 4 | *.so 5 | .Python 6 | build/ 7 | develop-eggs/ 8 | dist/ 9 | downloads/ 10 | eggs/ 11 | .eggs/ 12 | lib/ 13 | lib64/ 14 | parts/ 15 | sdist/ 16 | var/ 17 | wheels/ 18 | *.egg-info/ 19 | .installed.cfg 20 | *.egg 21 | .env 22 | .venv 23 | env/ 24 | venv/ 25 | ENV/ 26 | datas/ 27 | results/ 28 | *.log 29 | .pytest_cache/ 30 | .coverage 31 | htmlcov/ 32 | .DS_Store 33 | -------------------------------------------------------------------------------- /ASRBackend/requirements-cloud.txt: -------------------------------------------------------------------------------- 1 | # ASR Backend - 云端版本 2 | # 轻量级依赖,仅支持阿里云 DashScope API 调用 3 | # 不包含任何本地模型和 torch,节省空间 4 | 5 | fastapi 6 | uvicorn 7 | pydantic 8 | pydantic-settings 9 | python-multipart 10 | pytest 11 | pytest-asyncio 12 | httpx 13 | requests 14 | 15 | # 云端 API 支持 16 | dashscope>=1.3.0 17 | 18 | # Supabase 支持 19 | supabase 20 | 21 | # 可选:如果需要保留语言检测功能 22 | langdetect 23 | 24 | # 环境变量支持 25 | python-dotenv>=1.0.0 26 | -------------------------------------------------------------------------------- /frontend/src/main.tsx: -------------------------------------------------------------------------------- 1 | import { StrictMode } from "react" 2 | import { createRoot } from "react-dom/client" 3 | import { Toaster } from "sonner" 4 | import "./index.css" 5 | import Routes from "./routes.tsx" 6 | 7 | createRoot(document.getElementById("root")!).render( 8 | 9 | 10 | 15 | , 16 | ) 17 | -------------------------------------------------------------------------------- /docs/mermaid图汇集/ReAct设计文档-工具管理流程图.md: -------------------------------------------------------------------------------- 1 | # ReAct设计文档 - 工具管理流程图 2 | 3 | ```mermaid 4 | sequenceDiagram 5 | participant A as Agent 6 | participant TM as ToolManager 7 | participant C as FastMCP Client 8 | participant T as Tools Server 9 | 10 | A->>TM: 获取可用工具 11 | TM->>C: 连接工具后端 12 | C->>T: 获取工具列表 13 | T-->>C: 工具列表 14 | C-->>TM: 工具列表 15 | TM->>TM: 过滤允许工具 16 | TM->>TM: 生成工具描述 17 | TM-->>A: 可用工具和描述 18 | ``` 19 | -------------------------------------------------------------------------------- /frontend/src/routes.tsx: -------------------------------------------------------------------------------- 1 | import { createBrowserRouter, RouterProvider } from "react-router-dom" 2 | import HomePage from "./HomePage/HomePage" 3 | import App from "./App" 4 | 5 | const router = createBrowserRouter([ 6 | { 7 | path: "/", 8 | element: , 9 | }, 10 | { 11 | path: "/app", 12 | element: , 13 | }, 14 | ]) 15 | 16 | export default function Routes() { 17 | return 18 | } 19 | -------------------------------------------------------------------------------- /frontend/src/components/RightPanel/hooks/index.ts: -------------------------------------------------------------------------------- 1 | export { useScrollHandlers } from './useScrollHandlers' 2 | export { useSegmentHandlers } from './useSegmentHandlers' 3 | export { useSearchHandlers } from './useSearchHandlers' 4 | export { useSummaryHandlers } from './useSummaryHandlers' 5 | export { useTranslateHandlers } from './useTranslateHandlers' 6 | export { useLanguageSwitch } from './useLanguageSwitch' 7 | export { useRightPanelController } from './useRightPanelController' 8 | -------------------------------------------------------------------------------- /docs/mermaid图汇集/前端架构设计文档-文件上传流程图.md: -------------------------------------------------------------------------------- 1 | # 前端架构设计文档 - 文件上传流程图 2 | 3 | ```mermaid 4 | sequenceDiagram 5 | participant User 6 | participant FileUploader 7 | participant UploadService 8 | participant Backend 9 | 10 | User->>FileUploader: 选择文件 11 | FileUploader->>UploadService: 调用上传接口 12 | UploadService->>Backend: 发送文件数据 13 | Backend-->>UploadService: 返回上传结果 14 | UploadService-->>FileUploader: 更新上传状态 15 | FileUploader-->>User: 显示上传结果 16 | ``` 17 | -------------------------------------------------------------------------------- /frontend/src/utils/message.ts: -------------------------------------------------------------------------------- 1 | import { toast } from "sonner" 2 | 3 | export const message = { 4 | success: (content: string) => { 5 | toast.success(content, { dismissible: true }) 6 | }, 7 | error: (content: string) => { 8 | toast.error(content, { dismissible: true }) 9 | }, 10 | warning: (content: string) => { 11 | toast.warning(content, { dismissible: true }) 12 | }, 13 | info: (content: string) => { 14 | toast.info(content, { dismissible: true }) 15 | }, 16 | } 17 | -------------------------------------------------------------------------------- /ASRBackend/asr_functions/__init__.py: -------------------------------------------------------------------------------- 1 | """ASR functions package initializer. 2 | 3 | Avoid importing heavy/local-specific modules at package import time so 4 | that submodule-level imports (e.g. asr_functions.asr_sentence_segments) 5 | do not accidentally trigger imports for all implementations (local/cloud). 6 | Modules should import submodules directly, e.g.: 7 | from asr_functions.asr_sentence_segments import process 8 | or rely on providers to import the appropriate submodule lazily. 9 | """ 10 | 11 | __all__ = [] 12 | -------------------------------------------------------------------------------- /docs/mermaid图汇集/前端架构设计文档-聊天交互流程图.md: -------------------------------------------------------------------------------- 1 | # 前端架构设计文档 - 聊天交互流程图 2 | 3 | ```mermaid 4 | sequenceDiagram 5 | participant User 6 | participant ChatView 7 | participant ChatService 8 | participant Backend 9 | participant Store 10 | 11 | User->>ChatView: 输入消息并发送 12 | ChatView->>ChatService: 发起聊天请求 13 | ChatService->>Backend: POST /chat/stream 14 | Backend-->>ChatService: 流式返回响应 15 | ChatService->>Store: 更新聊天状态 16 | Store-->>ChatView: 提供聊天数据 17 | ChatView-->>User: 显示AI回复 18 | ``` 19 | -------------------------------------------------------------------------------- /backend/media_processing/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """媒体处理模块""" 3 | 4 | from backend.common_interfaces import DownloadResult 5 | from .upload_handler import process_uploaded_file 6 | from .audio.local import process_uploaded_audio 7 | from .video.local import process_uploaded_video 8 | from .downloader_factory import MediaDownloaderFactory 9 | 10 | __all__ = [ 11 | 'DownloadResult', 12 | 'MediaDownloaderFactory', 13 | 'process_uploaded_file', 14 | 'process_uploaded_audio', 15 | 'process_uploaded_video', 16 | ] 17 | -------------------------------------------------------------------------------- /frontend/components.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://ui.shadcn.com/schema.json", 3 | "style": "new-york", 4 | "rsc": false, 5 | "tsx": true, 6 | "tailwind": { 7 | "config": "", 8 | "css": "src/index.css", 9 | "baseColor": "neutral", 10 | "cssVariables": true, 11 | "prefix": "" 12 | }, 13 | "iconLibrary": "lucide", 14 | "aliases": { 15 | "components": "@/components", 16 | "utils": "@/lib/utils", 17 | "ui": "@/components/ui", 18 | "lib": "@/lib", 19 | "hooks": "@/hooks" 20 | }, 21 | "registries": {} 22 | } 23 | -------------------------------------------------------------------------------- /ASRBackend/docker-compose.cloud.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | asr-backend: 5 | build: 6 | context: . 7 | dockerfile: Dockerfile.cloud 8 | container_name: hearsight-asr-backend-cloud 9 | restart: unless-stopped 10 | env_file: .env 11 | ports: 12 | - "${ASR_BACKEND_PORT:-8003}:8003" 13 | environment: 14 | - ASR_MODE=cloud 15 | healthcheck: 16 | test: ["CMD", "curl", "-f", "http://localhost:8003/health"] 17 | interval: 30s 18 | timeout: 10s 19 | retries: 3 20 | start_period: 40s -------------------------------------------------------------------------------- /ASRBackend/docker-compose.local.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | asr-backend: 5 | build: 6 | context: . 7 | dockerfile: Dockerfile.local 8 | container_name: hearsight-asr-backend-local 9 | restart: unless-stopped 10 | env_file: .env 11 | ports: 12 | - "${ASR_BACKEND_PORT:-8003}:8003" 13 | environment: 14 | - ASR_MODE=local 15 | healthcheck: 16 | test: ["CMD", "curl", "-f", "http://localhost:8003/health"] 17 | interval: 30s 18 | timeout: 10s 19 | retries: 3 20 | start_period: 40s -------------------------------------------------------------------------------- /frontend/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | frontend: 5 | build: 6 | context: . 7 | dockerfile: Dockerfile 8 | container_name: hearsight-frontend 9 | restart: unless-stopped 10 | ports: 11 | - "${FRONTEND_PORT:-10000}:5173" 12 | environment: 13 | - BACKEND_HOST=host.docker.internal 14 | - BACKEND_PORT=9999 15 | - USE_DOCKER=false 16 | - VITE_BACKEND_URL=http://host.docker.internal:9999 17 | - VITE_USE_DOCKER=false 18 | volumes: 19 | - .:/app 20 | - /app/node_modules -------------------------------------------------------------------------------- /backend/common_interfaces.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """通用接口定义""" 3 | 4 | from __future__ import annotations 5 | 6 | from dataclasses import dataclass 7 | from typing import Optional 8 | 9 | 10 | @dataclass 11 | class DownloadResult: 12 | """下载结果""" 13 | video_path: Optional[str] = None 14 | audio_path: Optional[str] = None 15 | title: Optional[str] = None 16 | duration: Optional[float] = None 17 | media_type: Optional[str] = None # "video", "audio", 或 "both" 18 | success: bool = False 19 | error_message: Optional[str] = None -------------------------------------------------------------------------------- /frontend/src/services/progressService.ts: -------------------------------------------------------------------------------- 1 | export const getDownloadProgress = async (jobId: number) => { 2 | const response = await fetch(`/api/progress/download/${jobId}`) 3 | 4 | if (!response.ok) { 5 | throw new Error(`获取下载进度失败:${response.status}`) 6 | } 7 | 8 | return response.json() 9 | } 10 | 11 | export const getTaskProgress = async (jobId: number) => { 12 | const response = await fetch(`/api/progress/task/${jobId}`) 13 | 14 | if (!response.ok) { 15 | throw new Error(`获取任务进度失败:${response.status}`) 16 | } 17 | 18 | return response.json() 19 | } -------------------------------------------------------------------------------- /frontend/src/types/global.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | /// 3 | 4 | declare module '*.css' { 5 | const content: Record; 6 | export default content; 7 | } 8 | 9 | declare module '*.module.css' { 10 | const classes: Record; 11 | export default classes; 12 | } 13 | 14 | declare module '*.scss' { 15 | const content: Record; 16 | export default content; 17 | } 18 | 19 | declare module '*.module.scss' { 20 | const classes: Record; 21 | export default classes; 22 | } -------------------------------------------------------------------------------- /frontend/src/components/RightPanel/hooks/useSegmentHandlers.ts: -------------------------------------------------------------------------------- 1 | import { useCallback } from 'react' 2 | import type { Segment } from '../../../types' 3 | 4 | export const useSegmentHandlers = ( 5 | onActiveSegmentChange: (index: number) => void, 6 | onSeekTo: (timeMs: number) => void 7 | ) => { 8 | const handleSegmentClick = useCallback( 9 | (segment: Segment) => { 10 | onActiveSegmentChange(segment.index) 11 | onSeekTo(segment.start_time) 12 | }, 13 | [onActiveSegmentChange, onSeekTo] 14 | ) 15 | 16 | return { handleSegmentClick } 17 | } 18 | -------------------------------------------------------------------------------- /docs/mermaid图汇集/前端架构设计文档-转写结果显示流程图.md: -------------------------------------------------------------------------------- 1 | # 前端架构设计文档 - 转写结果显示流程图 2 | 3 | ```mermaid 4 | sequenceDiagram 5 | participant User 6 | participant TranscriptTab 7 | participant TranscriptService 8 | participant Backend 9 | participant Store 10 | 11 | User->>TranscriptTab: 打开转写Tab 12 | TranscriptTab->>TranscriptService: 请求转写数据 13 | TranscriptService->>Backend: GET /transcript/{id} 14 | Backend-->>TranscriptService: 返回转写数据 15 | TranscriptService->>Store: 更新转写状态 16 | Store-->>TranscriptTab: 提供转写数据 17 | TranscriptTab-->>User: 显示转写结果 18 | ``` 19 | -------------------------------------------------------------------------------- /backend/utils/typing_defs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 集中类型定义,避免各处重复: 4 | - Segment: 语音转写后的句级片段(不含 spk_id) 5 | - SummaryItem: 汇总输出项 6 | """ 7 | from __future__ import annotations 8 | 9 | from typing import Dict, Optional 10 | from typing_extensions import TypedDict 11 | 12 | 13 | class Segment(TypedDict, total=False): 14 | index: int 15 | sentence: str 16 | start_time: float 17 | end_time: float 18 | translation: Optional[Dict[str, str]] 19 | 20 | 21 | class SummaryItem(TypedDict): 22 | topic: str 23 | summary: str 24 | start_time: float 25 | end_time: float 26 | -------------------------------------------------------------------------------- /ASRBackend/requirements-local.txt: -------------------------------------------------------------------------------- 1 | # ASR Backend - 本地版本 2 | # 包含完整的本地模型支持,包括 torch 等大型依赖 3 | # 本地版本可以作为备选方案支持云端版本 4 | 5 | fastapi 6 | uvicorn 7 | pydantic 8 | pydantic-settings 9 | python-multipart 10 | pytest 11 | pytest-asyncio 12 | httpx 13 | requests 14 | 15 | # 本地 ASR 模型依赖 16 | funasr>=0.4.0 17 | 18 | # Supabase 支持 19 | supabase 20 | 21 | # 深度学习框架 - 在 Dockerfile 中单独安装 CUDA 版本 22 | # torch==2.1.0+cu118 23 | # torchaudio==2.1.0+cu118 24 | 25 | # 语言检测 26 | langdetect 27 | textblob 28 | 29 | # 可选:云端 API 支持(如果想在本地版本中也支持云端作为备选) 30 | dashscope 31 | 32 | # 环境变量支持 33 | python-dotenv>=1.0.0 34 | -------------------------------------------------------------------------------- /docs/mermaid图汇集/前端拖拽侧边栏功能设计文档-拖拽流程图.md: -------------------------------------------------------------------------------- 1 | # 前端拖拽侧边栏功能设计文档 - 拖拽流程图 2 | 3 | ```mermaid 4 | sequenceDiagram 5 | participant U as 用户 6 | participant H as ResizableHandle 7 | participant P as ResizablePanel 8 | participant S as LayoutStore 9 | 10 | U->>H: 鼠标按下 (pointerdown) 11 | H->>P: 开始调整 (onResizeStart) 12 | P->>S: 更新状态 (setPanelSize) 13 | U->>H: 鼠标移动 (pointermove) 14 | H->>P: 实时调整 (onResize) 15 | P->>P: 应用约束 (minSize, maxSize) 16 | P->>S: 同步状态 17 | U->>H: 鼠标释放 (pointerup) 18 | H->>P: 结束调整 (onResizeEnd) 19 | P->>S: 保存到 localStorage (saveLayout) 20 | ``` 21 | -------------------------------------------------------------------------------- /backend/routers/chat/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """聊天路由模块""" 3 | 4 | from fastapi import APIRouter 5 | 6 | from .summarize_router import router as summarize_router 7 | from .chat_conversation_router import router as chat_conversation_router 8 | from .messages_router import router as messages_router 9 | from .chat_session_router import router as chat_session_router 10 | 11 | 12 | router = APIRouter(tags=["chat"]) 13 | 14 | # 包含所有子路由 15 | router.include_router(summarize_router) 16 | router.include_router(chat_conversation_router) 17 | router.include_router(messages_router) 18 | router.include_router(chat_session_router) -------------------------------------------------------------------------------- /backend/main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """HearSight 应用入口""" 3 | 4 | from __future__ import annotations 5 | 6 | import sys 7 | import os 8 | 9 | # 添加项目根目录到 sys.path 以支持模块导入 10 | sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) 11 | 12 | from backend.app import create_app 13 | from backend.startup import initialize_app, get_backend_port 14 | 15 | # 初始化应用 16 | static_dir, db_url = initialize_app() 17 | 18 | # 创建 FastAPI 应用 19 | app = create_app(static_dir, db_url) 20 | 21 | if __name__ == "__main__": 22 | import uvicorn 23 | 24 | port = get_backend_port() 25 | uvicorn.run("main:app", host="0.0.0.0", port=port, reload=False) 26 | -------------------------------------------------------------------------------- /backend/requirements.txt: -------------------------------------------------------------------------------- 1 | yt-dlp 2 | # 用于HuggingFace风格分词器(AutoTokenizer) 3 | modelscope 4 | # 用于OpenAI tiktoken分词 5 | tiktoken 6 | # pytorch安装请去https://pytorch.org/get-started/locally/ 7 | # Windows:pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126 8 | 9 | fastapi 10 | uvicorn 11 | PyYAML 12 | psycopg2-binary 13 | python-dotenv 14 | 15 | # 异步任务队列 16 | celery 17 | redis 18 | 19 | pillow 20 | # 用于重试机制,处理超时和限速 21 | tenacity 22 | aiohttp 23 | requests 24 | openai 25 | litellm 26 | 27 | # 向量数据库 28 | chromadb 29 | 30 | # ReAct 相关依赖 31 | fastmcp 32 | 33 | black 34 | isort 35 | pylint 36 | pydantic_settings 37 | python-multipart -------------------------------------------------------------------------------- /backend/tests/test_asr_sentence_segments.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os, sys 3 | from pathlib import Path 4 | 5 | project_root = Path(__file__).parent.parent.parent 6 | sys.path.insert(0, str(project_root / "example_tests")) 7 | 8 | import json 9 | from asr_sentence_segments import process 10 | 11 | if __name__ == "__main__": 12 | # audio_path = r"C:\Users\ke\Documents\projects\python_projects\HearSight\backend\tests\datas\大语言模型进化论:从"听懂指令"到"学会思考",AI如何与人类对齐?.m4a" 13 | audio_path = r"C:\Users\ke\Documents\projects\python_projects\HearSight\backend\tests\datas\test.mp4" 14 | out = process(audio_path) 15 | print(json.dumps(out, ensure_ascii=False, indent=2)) -------------------------------------------------------------------------------- /docs/mermaid图汇集/知识库使用设计文档-使用流程图.md: -------------------------------------------------------------------------------- 1 | # 知识库使用设计文档 - 使用流程图 2 | 3 | ```mermaid 4 | sequenceDiagram 5 | participant U as 用户 6 | participant R as 路由 7 | participant C as ChatService 8 | participant K as KnowledgeBaseService 9 | participant DB as ChromaDB 10 | participant LLM as LLM API 11 | 12 | U->>R: 发送问答请求 13 | R->>C: 调用 chat_with_segments 14 | C->>C: 检查 token 数 15 | alt token 超过阈值 16 | C->>K: 检索相似内容 17 | K->>DB: 查询向量 18 | DB-->>K: 返回结果 19 | K-->>C: 返回相关 segments 20 | end 21 | C->>C: 构建提示词 22 | C->>LLM: 调用 LLM 23 | LLM-->>C: 返回回答 24 | C-->>R: 返回结果 25 | R-->>U: 返回回答 26 | ``` 27 | -------------------------------------------------------------------------------- /frontend/src/services/downloadService.ts: -------------------------------------------------------------------------------- 1 | export const startDownload = async ( 2 | url: string, 3 | jobId: number, 4 | sessdata?: string, 5 | playlist?: boolean, 6 | quality?: string 7 | ) => { 8 | const response = await fetch('/api/download', { 9 | method: 'POST', 10 | headers: { 'Content-Type': 'application/json' }, 11 | body: JSON.stringify({ 12 | url, 13 | job_id: jobId, 14 | sessdata: sessdata || '', 15 | playlist: playlist || false, 16 | quality: quality || 'best', 17 | workers: 1, 18 | }) 19 | }) 20 | 21 | if (!response.ok) { 22 | throw new Error(`启动下载失败:${response.status}`) 23 | } 24 | 25 | return response.json() 26 | } -------------------------------------------------------------------------------- /backend/routers/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """路由模块统一入口""" 3 | 4 | from .chat import router as chat_router 5 | from .download_router import router as download_router 6 | from .progress_router import router as progress_router 7 | from .thumbnail_router import router as thumbnail_router 8 | from .transcript_router import router as transcript_router 9 | from .translate_router import router as translate_router 10 | from .upload_router import router as upload_router 11 | 12 | __all__ = [ 13 | "download_router", 14 | "transcript_router", 15 | "chat_router", 16 | "thumbnail_router", 17 | "progress_router", 18 | "translate_router", 19 | "upload_router", 20 | ] 21 | -------------------------------------------------------------------------------- /frontend/src/hooks/docs/设计说明.md: -------------------------------------------------------------------------------- 1 | # Hooks 设计说明 2 | 3 | ## 概述 4 | 5 | hooks目录包含HearSight前端应用的所有自定义React hooks,提供状态管理和副作用处理的逻辑复用。 6 | 7 | ## 主要Hooks 8 | 9 | ### useDataLoader 10 | 数据加载和管理hook,负责: 11 | - 转录记录列表获取 12 | - 转录详情加载 13 | - 视频/音频资源管理 14 | - 任务状态同步 15 | 16 | ### useUrlHandler 17 | URL参数处理hook,功能: 18 | - 页面URL解析 19 | - 参数状态同步 20 | - 路由导航管理 21 | 22 | ### useVideoSync 23 | 视频同步hook,负责: 24 | - 视频播放状态管理 25 | - 时间轴同步 26 | - 字幕定位控制 27 | 28 | ## 设计原则 29 | 30 | 1. **单一职责**: 每个hook专注特定功能 31 | 2. **组合性**: hooks可灵活组合使用 32 | 3. **状态隔离**: 避免不必要的状态耦合 33 | 4. **性能优化**: 使用useCallback和useMemo优化重渲染 34 | 35 | ## 使用模式 36 | 37 | - 通过自定义hooks封装复杂逻辑 38 | - 返回状态和操作函数的元组 39 | - 支持TypeScript类型安全 -------------------------------------------------------------------------------- /backend/tests/test_audio2text.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | 5 | # 确保将项目 backend 目录加入到导入路径,便于从 tests 目录直接运行 6 | sys.path.append(os.path.dirname(os.path.dirname(__file__))) 7 | 8 | from audio2text.audio2text import audio_to_text 9 | 10 | audio_path = r"C:\Users\ke\Documents\projects\python_projects\HearSight\backend\tests\datas\大语言模型进化论:从“听懂指令”到“学会思考”,AI如何与人类对齐?.m4a" 11 | text, segments = audio_to_text(audio_path, return_segments=True) 12 | print(text) 13 | # 如需保存解析结果,可自行取消下面注释 14 | output_dir = "results" 15 | os.makedirs(output_dir, exist_ok=True) 16 | with open(os.path.join(output_dir, 'test_audio2text.json'), 'w', encoding='utf-8') as f: 17 | json.dump(segments, f, ensure_ascii=False, indent=2) 18 | -------------------------------------------------------------------------------- /backend/schemas.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 转录和总结相关的数据模式定义 4 | - Segment: 语音转写后的句级片段 5 | - SummaryItem: 总结项目 6 | """ 7 | from __future__ import annotations 8 | 9 | from typing import Dict, Optional 10 | from typing_extensions import TypedDict 11 | 12 | 13 | class Segment(TypedDict, total=False): 14 | """语音转写后的句级片段(包含说话人信息,允许额外字段被忽略)""" 15 | index: int 16 | sentence: str 17 | start_time: float 18 | end_time: float 19 | spk_id: Optional[str] # 说话人ID,可为空 20 | translation: Optional[Dict[str, str]] 21 | 22 | 23 | class SummaryItem(TypedDict): 24 | """总结项:包含主题标题、内容和时间范围""" 25 | topic: str 26 | summary: str 27 | start_time: float 28 | end_time: float 29 | -------------------------------------------------------------------------------- /frontend/src/components/ui/label.tsx: -------------------------------------------------------------------------------- 1 | "use client" 2 | 3 | import * as React from "react" 4 | import * as LabelPrimitive from "@radix-ui/react-label" 5 | 6 | import { cn } from "@/lib/utils" 7 | 8 | function Label({ 9 | className, 10 | ...props 11 | }: React.ComponentProps) { 12 | return ( 13 | 21 | ) 22 | } 23 | 24 | export { Label } 25 | -------------------------------------------------------------------------------- /ASRBackend/supabase_utils/test_supabase_upload.py: -------------------------------------------------------------------------------- 1 | """测试 Supabase 文件上传功能""" 2 | 3 | from supabase_utils.supabase_upload import upload_file_to_supabase 4 | import os 5 | 6 | def test_upload(): 7 | """测试上传文件""" 8 | # 使用 example_tests 目录下的 test.txt 9 | file_path = os.path.join(os.path.dirname(__file__), "example_tests", "test.txt") 10 | 11 | if not os.path.exists(file_path): 12 | print(f"测试文件不存在: {file_path}") 13 | return 14 | 15 | print(f"开始上传文件: {file_path}") 16 | success, result = upload_file_to_supabase(file_path) 17 | if success: 18 | print(f"上传成功: {result}") 19 | else: 20 | print(f"上传失败: {result}") 21 | 22 | if __name__ == "__main__": 23 | test_upload() 24 | -------------------------------------------------------------------------------- /frontend/src/components/LeftPanel/docs/组件设计说明.md: -------------------------------------------------------------------------------- 1 | # 左侧面板组件设计说明 2 | 3 | ## 概述 4 | 5 | 左侧面板负责展示和管理HearSight应用中的转录记录和处理任务,提供列表视图和状态管理功能。 6 | 7 | ## 组件结构 8 | 9 | ### LeftPanel.tsx 10 | 11 | 主容器组件,负责: 12 | 13 | - 标签页布局管理 14 | - 数据传递给子组件 15 | - 响应用户交互事件 16 | 17 | ### 标签页组件 18 | 19 | - **ProcessedTab.tsx**: 已处理转录列表展示和选择 20 | - **TasksTab.tsx**: 任务进度和状态展示 21 | 22 | ## 设计原则 23 | 24 | 1. **列表展示**: 使用卡片式布局清晰展示各项内容 25 | 2. **状态指示**: 通过颜色和图标直观显示处理状态 26 | 3. **交互友好**: 支持点击加载、刷新等操作 27 | 4. **性能优化**: 虚拟滚动处理大量数据 28 | 29 | ## 数据流 30 | 31 | - 父组件提供transcripts和jobs数据 32 | - 子组件通过回调函数触发数据更新 33 | - 状态变化通过事件冒泡传递 34 | 35 | ## 功能特性 36 | 37 | - **转录管理**: 展示所有已处理的转录,支持选择和加载 38 | - **任务监控**: 实时显示处理任务的进度和状态 39 | - **状态同步**: 与右侧面板保持数据一致性 40 | -------------------------------------------------------------------------------- /ASRBackend/asr_functions/utils/language_utils.py: -------------------------------------------------------------------------------- 1 | """语言检测工具模块""" 2 | 3 | import unicodedata 4 | 5 | 6 | def detect_language(text: str) -> str: 7 | """检测文本主要语言,支持中文和英文。 8 | 9 | 如果中文字符比例 > 70%,返回 'zh',否则返回 'en'。 10 | """ 11 | if not text: 12 | return "en" 13 | 14 | chinese_count = 0 15 | total_count = 0 16 | for char in text: 17 | if char.isspace(): 18 | continue 19 | total_count += 1 20 | if unicodedata.category(char).startswith("Lo") or "\u4e00" <= char <= "\u9fff": 21 | chinese_count += 1 22 | 23 | if total_count == 0: 24 | return "en" 25 | 26 | chinese_ratio = chinese_count / total_count 27 | return "zh" if chinese_ratio > 0.7 else "en" 28 | -------------------------------------------------------------------------------- /frontend/src/HomePage/docs/组件设计说明.md: -------------------------------------------------------------------------------- 1 | # HomePage 组件设计说明 2 | 3 | ## 概述 4 | 5 | HomePage是HearSight应用的首页组件,提供产品介绍、功能展示和快速入口功能。 6 | 7 | ## 组件结构 8 | 9 | ### HomePage.tsx 10 | 11 | 主页面组件,包含: 12 | 13 | - 顶部导航栏 14 | - 产品介绍区域 15 | - 功能特性展示 16 | - URL输入表单 17 | - 快速开始按钮 18 | 19 | ## 设计特点 20 | 21 | 1. **渐变背景**: 使用渐变色营造视觉吸引力 22 | 2. **响应式布局**: 支持不同屏幕尺寸的自适应 23 | 3. **交互友好**: 清晰的CTA(行动召唤)按钮 24 | 4. **信息层次**: 通过卡片和图标组织内容 25 | 26 | ## 功能特性 27 | 28 | - **产品介绍**: 展示HearSight的核心功能 29 | - **URL输入**: 支持直接输入视频链接快速开始 30 | - **导航引导**: 提供进入应用的便捷路径 31 | - **特性展示**: 通过图标和描述突出产品优势 32 | 33 | ## 用户流程 34 | 35 | 1. 访问首页查看产品介绍 36 | 2. 输入视频URL或点击进入应用 37 | 3. 跳转到主应用界面开始使用 38 | 39 | ## 技术实现 40 | 41 | - 使用React Router进行页面导航 42 | - 表单处理用户输入 43 | - 本地存储管理待处理URL 44 | -------------------------------------------------------------------------------- /frontend/tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo", 4 | "target": "ES2023", 5 | "lib": ["ES2023"], 6 | "module": "ESNext", 7 | "types": ["node"], 8 | "skipLibCheck": true, 9 | 10 | /* Bundler mode */ 11 | "moduleResolution": "bundler", 12 | "allowImportingTsExtensions": true, 13 | "verbatimModuleSyntax": true, 14 | "moduleDetection": "force", 15 | "noEmit": true, 16 | 17 | /* Linting */ 18 | "strict": true, 19 | "noUnusedLocals": true, 20 | "noUnusedParameters": true, 21 | "erasableSyntaxOnly": true, 22 | "noFallthroughCasesInSwitch": true, 23 | "noUncheckedSideEffectImports": true 24 | }, 25 | "include": ["vite.config.ts"] 26 | } 27 | -------------------------------------------------------------------------------- /backend/ReAct/__init__.py: -------------------------------------------------------------------------------- 1 | # ReAct 核心模块 2 | 3 | from .action_parser import parse_response 4 | from .actions import ActionExecutor 5 | from .base_agent import BaseAgent 6 | from .chat_agent import ChatAgent 7 | from .models import AgentResult, StreamCallback, ToolCallable, TraceStep 8 | from .react_loop import ReactLoop 9 | from .tool_manager import ToolManager 10 | from .utils import create_tool_wrapper, normalize_input 11 | 12 | __all__ = [ 13 | "parse_response", 14 | "ActionExecutor", 15 | "BaseAgent", 16 | "ChatAgent", 17 | "LLMClient", 18 | "AgentResult", 19 | "StreamCallback", 20 | "ToolCallable", 21 | "TraceStep", 22 | "ReactLoop", 23 | "ToolManager", 24 | "create_tool_wrapper", 25 | "normalize_input", 26 | ] 27 | -------------------------------------------------------------------------------- /backend/tests/test_download_vedio.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | # 确保将项目根目录加入到导入路径,便于从 tests 目录直接运行(使顶级包 `backend` 可被导入) 4 | # __file__ = .../backend/tests/test_download_vedio.py 5 | # 向上三次 dirname 到达项目根目录 6 | project_root = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) 7 | if project_root not in sys.path: 8 | sys.path.insert(0, project_root) 9 | 10 | from backend.utils.vedio_utils.download_video.download_bilibili import download_bilibili 11 | 12 | 13 | 14 | example_url = "https://www.bilibili.com/video/BV1pm8dzXEpA?spm_id_from=333.1007.tianma.2-3-6.click" # 示例占位 15 | sess = os.getenv("BILI_SESSDATA", "") 16 | paths = download_bilibili(example_url, out_dir="downloads", playlist=False, sessdata=sess) 17 | print("Saved files:") 18 | for p in paths: 19 | print(p) -------------------------------------------------------------------------------- /frontend/src/HomePage/docs/主页组件设计说明.md: -------------------------------------------------------------------------------- 1 | # 主页组件设计说明 2 | 3 | ## 概述 4 | 5 | HomePage组件是HearSight应用的入口页面,提供整体布局框架和核心功能导航。它整合了文件上传、工作区管理和全局状态控制等功能。 6 | 7 | ## 组件结构 8 | 9 | ### HomePage.tsx 10 | 主页面组件,负责: 11 | - 整体页面布局 12 | - 核心功能模块协调 13 | - 全局状态管理 14 | - 路由初始化 15 | 16 | ## 主要功能模块 17 | 18 | ### 文件上传模块 19 | - 支持拖拽上传 20 | - 文件格式校验 21 | - 上传进度显示 22 | - 错误处理机制 23 | 24 | ### 工作区管理 25 | - 当前工作项目展示 26 | - 工作区切换功能 27 | - 历史记录管理 28 | 29 | ### 导航控制 30 | - 左右面板切换 31 | - 页面路由跳转 32 | - 用户操作引导 33 | 34 | ## 设计原则 35 | 36 | 1. **直观性**: 界面布局清晰,功能一目了然 37 | 2. **易用性**: 操作流程简洁,降低学习成本 38 | 3. **响应式**: 适配不同设备屏幕尺寸 39 | 4. **可扩展**: 便于后续功能模块添加 40 | 41 | ## 数据流 42 | 43 | - 通过上下文(Context)管理全局状态 44 | - 各子组件通过props接收数据和回调函数 45 | - 用户操作通过事件机制向上传递 46 | 47 | ## 性能优化 48 | 49 | - 组件懒加载 50 | - 虚拟滚动处理大量数据 51 | - 防抖节流优化频繁操作 52 | - 记忆化计算避免重复渲染 -------------------------------------------------------------------------------- /frontend/src/App.css: -------------------------------------------------------------------------------- 1 | #root { 2 | max-width: 1280px; 3 | margin: 0 auto; 4 | padding: 2rem; 5 | text-align: center; 6 | } 7 | 8 | .logo { 9 | height: 6em; 10 | padding: 1.5em; 11 | will-change: filter; 12 | transition: filter 300ms; 13 | } 14 | .logo:hover { 15 | filter: drop-shadow(0 0 2em #646cffaa); 16 | } 17 | .logo.react:hover { 18 | filter: drop-shadow(0 0 2em #61dafbaa); 19 | } 20 | 21 | @keyframes logo-spin { 22 | from { 23 | transform: rotate(0deg); 24 | } 25 | to { 26 | transform: rotate(360deg); 27 | } 28 | } 29 | 30 | @media (prefers-reduced-motion: no-preference) { 31 | a:nth-of-type(2) .logo { 32 | animation: logo-spin infinite 20s linear; 33 | } 34 | } 35 | 36 | .card { 37 | padding: 2em; 38 | } 39 | 40 | .read-the-docs { 41 | color: #888; 42 | } 43 | -------------------------------------------------------------------------------- /frontend/src/components/ui/separator.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | import * as SeparatorPrimitive from "@radix-ui/react-separator" 3 | 4 | import { cn } from "@/lib/utils" 5 | 6 | function Separator({ 7 | className, 8 | orientation = "horizontal", 9 | decorative = true, 10 | ...props 11 | }: React.ComponentProps) { 12 | return ( 13 | 23 | ) 24 | } 25 | 26 | export { Separator } 27 | -------------------------------------------------------------------------------- /frontend/src/components/ui/textarea.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | 3 | import { cn } from "@/lib/utils" 4 | 5 | function Textarea({ className, ...props }: React.ComponentProps<"textarea">) { 6 | return ( 7 |