├── .flake8
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── README_EN.md
├── backend
├── .dockerignore
├── .gitignore
├── .pre-commit-config.yaml
├── Dockerfile
├── README.md
├── __init__.py
├── actions
│ ├── __init__.py
│ ├── asr.py
│ ├── dispatcher.py
│ ├── llm.py
│ └── tos.py
├── constants.py
├── env.py
├── main.py
└── requirements.txt
├── docker-compose.yaml
├── docs
└── images
│ ├── ai_talk.png
│ ├── cors.png
│ ├── custom_prompt.png
│ ├── details.jpg
│ ├── index.jpg
│ ├── loading.jpg
│ ├── logo.jpeg
│ ├── mindmap.jpg
│ ├── process_flow.jpg
│ ├── readme_header.jpg
│ ├── tos_endpoint.png
│ └── wxgzh
│ ├── aild.png
│ ├── hellogithub.png
│ ├── ryf.png
│ └── yf.png
├── frontend
├── .dockerignore
├── .env.development
├── .env.production
├── .gitignore
├── Dockerfile
├── README.md
├── index.html
├── package.json
├── src
│ ├── App.vue
│ ├── apis
│ │ ├── api.ts
│ │ ├── asrService.ts
│ │ ├── chatService.ts
│ │ ├── http.ts
│ │ ├── index.ts
│ │ ├── markdownService.ts
│ │ ├── types.ts
│ │ └── uploadService.ts
│ ├── assets
│ │ ├── logo.jpeg
│ │ ├── logo.png
│ │ ├── lottie
│ │ │ └── loading.json
│ │ ├── user.jpeg
│ │ ├── 字幕.svg
│ │ ├── 小红书.svg
│ │ ├── 微信公众号.svg
│ │ ├── 思维导图.svg
│ │ ├── 汇总.svg
│ │ └── 笔记.svg
│ ├── components
│ │ ├── AppSidebar.vue
│ │ ├── Settings
│ │ │ └── Settings.vue
│ │ └── VideoToMarkdown
│ │ │ ├── ChatPanel.vue
│ │ │ ├── GeneratedContentPanel.vue
│ │ │ ├── LoadingOverlay.vue
│ │ │ ├── ProcessSteps.vue
│ │ │ ├── TaskDetail.vue
│ │ │ ├── TranscriptionPanel.vue
│ │ │ ├── UploadSection.vue
│ │ │ └── index.vue
│ ├── config.js
│ ├── constants.js
│ ├── main.js
│ ├── scroll-fix.css
│ ├── style.css
│ └── utils
│ │ ├── db.js
│ │ ├── eventBus.js
│ │ ├── ffmpeg.js
│ │ └── md5.js
└── vite.config.js
└── variables.env
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = C901, W503, F405, E203
3 | # C901: is too complex
4 | # W503: line break before binary operator
5 | # F405: may be undefined, or defined from star imports
6 | exclude =
7 | *migrations*,
8 | *.pyc,
9 | .git,
10 | .cover,
11 | __pycache__,
12 | */node_modules/*,
13 | */templates_module*,
14 | */bin/*,
15 | local/*,
16 | local_settings.py,
17 | max-line-length = 120
18 | max-complexity = 12
19 | format = pylint
20 | show_source = True
21 | statistics = True
22 | count = True
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | /backend/.idea/
3 |
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 韩数
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | IMAGE_VERSION ?= latest
2 |
3 | PROJECT_NAME := ai-media2doc
4 | MODULES := backend frontend
5 | ROOT_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
6 |
7 | .PHONY: help
8 | help:
9 | @echo "Make Targets: "
10 | @echo " docker-image: Build image"
11 | @echo " run: Run project"
12 | @echo " stop: Stop project"
13 |
14 | .PHONY: docker-image
15 | docker-image:
16 | @for module in $(MODULES) ; do \
17 | echo "[docker-image] start to build $(PROJECT_NAME)-$$module."; \
18 | cd $(ROOT_DIR)/$$module/; \
19 | docker build -t $(PROJECT_NAME)-$$module:$(IMAGE_VERSION) .; \
20 | done
21 |
22 | @echo "当前服务配置如下:";
23 | @cat $(ROOT_DIR)/variables.env;
24 | @echo "📣 为确保程序正常运行,请检查:";
25 | @echo "1️⃣ 请按指引(https://github.com/hanshuaikang/AI-Media2Doc/blob/main/backend/README.md)了解如何获取上述配置项。";
26 | @echo "2️⃣ 在项目根目录的 variables.env 文件中填写相应的配置项。";
27 | @echo "3️⃣ 运行 make run 启动项目。";
28 |
29 | .PHONY: run
30 | run:
31 | docker compose up -d
32 |
33 | @echo "🚀 项目已启动,访问地址:http://localhost:5173/";
34 | @echo "💤 停止运行:make stop";
35 |
36 | .PHONY: stop
37 | stop:
38 | docker compose down
39 |
40 | @echo "👋";
41 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | ### 📖 简介
19 |
20 | AI 视频图文创作助手是一款 Web 工具, 基于 AI 大模型, 一键将视频和音频转化为各种风格的文档, 无需登录注册, 前后端本地部署,以极低的成本体验 AI 视频/音频转风格文档服务。
21 |
22 | ### ✨ 核心功能
23 |
24 | - ✅ **完全开源**:MIT 协议授权,支持本地部署。
25 | - 🔒 **隐私保护**:无需登录注册,任务记录保存在本地
26 | - 💻 **前端处理**:采用 ffmpeg wasm 技术,无需本地安装 ffmpeg
27 | - 🎯 **多种风格支持**:支持小红书/公众号/知识笔记/思维导图/内容总结等多种文档风格支持。
28 | - 🤖 **AI 对话**:支持针对视频内容进行 AI 二次问答。
29 | - 🎬 **支持字幕导出**: 结果一键导出为字幕文件。
30 | - 🎨 **支持自定义 Prompt**:支持在前端自定义配置 prompt。
31 | - 🐳 **一键部署**:支持 Docker 一键部署。
32 |
33 |
34 | ### 🔜 未来计划
35 |
36 | - 📷 支持智能截取视频关键帧,实现真正的图文并茂
37 | - 🎙️ 音频识别支持使用 fast-whisper 本地大模型处理,进一步降低成本
38 | - 🎨 支持在前端自定义配置 prompt。
39 |
40 |
41 | ### 项目截图
42 |
43 | #### 全新设计的首页, 尽力之作
44 |
45 |
46 |
47 |
48 |
49 | #### 更加简洁的加载页
50 |
51 |
52 |
53 |
54 |
55 | #### 全新设计的结果页, 支持一键导出为字幕。
56 |
57 |
58 |
59 |
60 | #### 支持自定义 prompt
61 |
62 |
63 |
64 |
65 |
66 | #### 可基于视频内容进行 AI 二次对话
67 |
68 |
69 |
70 |
71 |
72 |
73 | ### 📦 docker 本地一键部署
74 |
75 | 1️⃣:镜像构建:
76 |
77 | ```shell
78 | $ make docker-image
79 | ```
80 | 2️⃣:请根据 [后端部署指引 / 配置项说明](https://github.com/hanshuaikang/AI-Media2Doc/blob/main/backend/README.md#%E5%9C%A8%E7%81%AB%E5%B1%B1%E5%BC%95%E6%93%8E%E8%8E%B7%E5%8F%96%E5%AF%B9%E5%BA%94%E7%9A%84%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F%E7%9A%84%E5%80%BC) 完善根目录下的 `variables.env` 文件。
81 |
82 | 3️⃣:修改前端的 evn.development 中的 VITE_API_BASE_URL 为 http://0.0.0.0:8080
83 |
84 | 4️⃣:运行项目
85 |
86 | ```shell
87 | $ make run
88 | ```
89 |
90 | ### 👾 开发者的废话
91 |
92 | AI 视频创作助手源于我年初的一个想法, 作为一个喜欢阅读的人, 我更希望将一些视频内容转化为文字, 方便我进行二次阅读思考和总结记录笔记, 但市面上并没有一个好的工具来实现这个想法, 大多数工具都需要登录和付费, 我不太想在互联网上注册过多的账号, 同时也不想将自己想要总结的内容上传至除了云厂商之外的第三方平台,因此我开发了这个小应用,MIT 协议, 任何人都可以以极低的成本去体验音视频转文本。
93 |
94 |
95 | ### 🔄 处理流程
96 |
97 |
98 |
99 |
100 |
101 | ### 🔧 本地开发指南
102 |
103 | - [后端本地部署](./backend/README.md)
104 | - [前端本地部署](./frontend/README.md)
105 |
106 | ### 📄 许可证
107 |
108 | 本项目采用 [MIT 许可证](./LICENSE)
109 |
110 | ### 🔗 相关链接
111 |
112 | - [volcengine-ai-app-lab](https://github.com/volcengine/ai-app-lab)
113 |
114 | - [throttled-py](https://github.com/ZhuoZhuoCrayon/throttled-py):✨Python 限流库,合理限制、平滑云资源用量。
115 |
116 | ### 🌵 支持开发者
117 | 你可以关注我的小红书:
118 |
119 | [韩数的开发笔记: 致力于分享 Github 上那些好玩、有趣、免费、实用的高质量项目](https://www.xiaohongshu.com/user/profile/5e2992b000000000010064a4)
120 |
121 | **或者请我吃一包辣条** , 作为一个超级喜欢吃辣条的人, 很多晚上维护开源项目都是边吃辣条边写代码的 🤪。如果你有很喜欢吃的辣条, 你可以投喂我吃一包辣条, 5毛一包的也可以, 我将直接开启代码狂暴模式(化身八爪鱼: 🐙)。
122 |
123 | 请留下你要赞助作者的辣条名称 (🤩❤️🔥): 支持卫龙/麻辣王子/博士牛筋/臭干子,禁止投喂(🥵😵😵💫🤧🥴): **缺牙齿**/**霸王丝**等有损本人生命值的辣条。
124 |
125 | 同时请备注你的 github 名称, 方便我整理展示在项目主页。🙌
126 |
127 |
128 | 赞助作者: 我的爱发电主页
129 |
130 |
131 |
132 | ### 🙌 致谢
133 | 感谢这些朋友对本项目做出的贡献:
134 |
135 | | | |
136 | |:-:|:-:|
137 | | crayon | chen_jx |
138 |
139 | 感谢以下自媒体对本项目的关注和转发(以下排名不分先后), 大家感兴趣的话可以关注下他们 ~
140 |
141 | | [HelloGithub](https://hellogithub.com) | 开源AI项目落地 | [阮一峰的网络日志](https://www.ruanyifeng.com/blog/weekly/) | 一飞开源 |
142 | |:----:|:-------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------:|
143 | | | | | |
144 |
145 | 小红书:
146 | [小红书:AI-GitHub](https://www.xiaohongshu.com/user/profile/65c44d1200000000090254fc)
147 |
148 | 推特:
149 | | [Geek](https://x.com/geekbb) | [AIGCLINK](https://x.com/aigclink) | [ilovelife](https://x.com/ilovek8s) | [ahhhhfs](https://x.com/abskoop)
150 |
151 | ### 🌟 Star History
152 |
153 | [](https://www.star-history.com/#hanshuaikang/AI-Media2Doc&Date)
154 |
155 |
--------------------------------------------------------------------------------
/README_EN.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | AI Media2Doc Assistant
6 |
7 |
8 |
9 | Based on AI large models, convert videos and audios to various document styles like Xiaohongshu/WeChat Official Account/Knowledge Notes/Mind Maps with one click.
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 | [中文文档](./README.md)
22 |
23 | ### 📖 Introduction
24 |
25 | AI Media2Doc Assistant is a web tool based on AI large models that converts videos and audios to various document styles with one click. No login or registration required, with both frontend and backend supporting local deployment. Experience AI video/audio to styled document conversion services at an extremely low cost - I spent just five dollars for a month of development and testing.
26 |
27 | ### ✨ Core Features
28 |
29 | - ✅ **Fully Open Source**: Licensed under MIT, supports local deployment.
30 | - 🔒 **Privacy Protection**: No login or registration required, task records saved locally.
31 | - 💻 **Frontend Processing**: Uses ffmpeg wasm technology, no need to install ffmpeg locally.
32 | - 🎯 **Multiple Style Support**: Supports various document styles like Xiaohongshu/WeChat Official Account/Knowledge Notes/Mind Maps/Content Summaries.
33 | - 🤖 **AI Conversation**: Supports secondary Q&A based on video content.
34 | - 🤖 **Local Deployment Friendly**: With basic development knowledge, you can get it running in no time.
35 | - 🐳 **One-Click Deployment**: Supports one-click deployment with Docker.
36 |
37 | ### 🔜 Future Plans
38 |
39 | - 📷 Support intelligent extraction of video key frames, achieving true integration of text and images
40 | - 🎙️ Support audio recognition using fast-whisper local large model processing to further reduce costs
41 | - 🎨 Completely rebuild the frontend page using React for a smoother experience
42 |
43 | ### 📦 Installation Guide
44 |
45 | 1) Image Build:
46 |
47 | ```shell
48 | $ make docker-image
49 | ```
50 |
51 | 2) Please refer to the [Backend Deployment Guide / Configuration Instructions](https://github.com/hanshuaikang/AI-Media2Doc/blob/main/backend/README.md#%E5%9C%A8%E7%81%AB%E5%B1%B1%E5%BC%95%E6%93%8E%E8%8E%B7%E5%8F%96%E5%AF%B9%E5%BA%94%E7%9A%84%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F%E7%9A%84%E5%80%BC) to complete the `variables.env` file in the root directory.
52 |
53 | 3) Run the Project:
54 |
55 | ```shell
56 | $ make run
57 | ```
58 |
59 | ### 👾 Developer's Note
60 |
61 | The AI Media2Doc Assistant originated from an idea I had at the beginning of the year. As someone who enjoys reading, I prefer to convert video content into text for easier re-reading, thinking, and note-taking. However, I couldn't find a good tool to achieve this - most tools required login and payment. I didn't want to register too many accounts on the internet, nor did I want to upload my content to third-party platforms other than cloud providers. Therefore, I developed this small application under the MIT license, allowing anyone to experience audio/video to text conversion at a minimal cost.
62 |
63 | ### Project Screenshots
64 |
65 | #### Support AI Q&A based on video content
66 |
67 |
68 |
69 |
70 | #### Support mind map generation
71 |
72 | Generated mind maps can be exported to third-party platforms for editing and optimization
73 |
74 |
75 |
76 |
77 | ### 🔄 Processing Flow
78 |
79 |
80 |
81 |
82 |
83 | ### 🔧 Local Development Guide
84 |
85 | - [Backend Local Deployment](./backend/README.md)
86 | - [Frontend Local Deployment](./frontend/README.md)
87 |
88 | ### 📄 License
89 |
90 | This project is licensed under the [MIT License](./LICENSE)
91 |
92 | ### 🔗 Related Links
93 |
94 | - [volcengine-ai-app-lab](https://github.com/volcengine/ai-app-lab)
95 | - [throttled-py](https://github.com/ZhuoZhuoCrayon/throttled-py): ✨Python rate-limiting library, reasonably limits and smooths cloud resource usage.
96 |
97 |
98 | [韩数的开发笔记: 致力于分享 Github 上那些好玩、有趣、免费、实用的高质量项目](https://www.xiaohongshu.com/user/profile/5e2992b000000000010064a4)
99 |
100 | ### 🌟 Star History
101 |
102 | [](https://www.star-history.com/#hanshuaikang/AI-Media2Doc&Date)
--------------------------------------------------------------------------------
/backend/.dockerignore:
--------------------------------------------------------------------------------
1 | .idea
2 | .venv
3 | *.log
4 | Dockerfile
5 | .dockerignore
6 |
--------------------------------------------------------------------------------
/backend/.gitignore:
--------------------------------------------------------------------------------
1 | env.env
2 | trace*
3 | .venv
4 | .idea
5 | .DS_Store
6 | __pycache__
--------------------------------------------------------------------------------
/backend/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | default_stages: [ commit ]
2 | repos:
3 | - repo: https://github.com/pre-commit/pre-commit-hooks
4 | rev: v2.1.0
5 | hooks:
6 | - id: check-merge-conflict
7 | - repo: https://github.com/psf/black
8 | rev: 22.3.0
9 | hooks:
10 | - id: black
11 | language_version: python3
12 | - repo: https://github.com/pycqa/flake8
13 | rev: 5.0.4
14 | hooks:
15 | - id: flake8
16 | language_version: python3
--------------------------------------------------------------------------------
/backend/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG PYTHON_VERSION=3.11
2 |
3 | FROM python:${PYTHON_VERSION}-slim AS base
4 |
5 | ENV LANG=C.UTF-8 \
6 | LC_ALL=C.UTF-8 \
7 | PATH="/.venv/bin:$PATH" \
8 | PYTHONDONTWRITEBYTECODE=1 \
9 | PYTHONUNBUFFERED=1
10 |
11 | RUN set -ex && \
12 | sed -i 's/deb.debian.org/mirrors.cloud.tencent.com/g' /etc/apt/sources.list.d/debian.sources && \
13 | apt update
14 |
15 | # ------------------- Stage 1: Build Stage ------------------------------
16 | FROM base AS builder
17 |
18 | WORKDIR /
19 |
20 | COPY requirements.txt .
21 |
22 | RUN set -ex && \
23 | mkdir ~/.pip && \
24 | echo "[global]" >> ~/.pip/pip.conf && \
25 | echo "index-url=https://mirrors.tencent.com/pypi/simple/" >> ~/.pip/pip.conf
26 |
27 | RUN set -ex && \
28 | python -m venv .venv && \
29 | pip install --upgrade pip && \
30 | pip install -r ./requirements.txt
31 |
32 | # ------------------- Stage 2: Final Stage ------------------------------
33 | FROM base
34 |
35 | WORKDIR /app
36 |
37 | COPY --from=builder /.venv /.venv
38 | ADD ./ ./
39 |
40 | ENV ENDPOINT_ID="xxx" \
41 | ARK_API_KEY="xxx" \
42 | TOS_ACCESS_KEY="xxx" \
43 | TOS_SECRET_KEY="xxx" \
44 | TOS_ENDPOINT="xxx" \
45 | TOS_REGION="xxx" \
46 | TOS_BUCKET="xxx" \
47 | AUC_APP_ID="xxx" \
48 | AUC_ACCESS_TOKEN="xxx"
49 |
50 | EXPOSE 8080
51 |
52 | CMD ["python", "/app/main.py"]
53 |
--------------------------------------------------------------------------------
/backend/README.md:
--------------------------------------------------------------------------------
1 | # 后端部署教程
2 |
3 | 后端依赖于字节跳动火山引擎高代码 SDK Arkitect [高代码 SDK Arkitect](https://github.com/volcengine/ai-app-lab/blob/main/arkitect/README.md)。
4 | 在启动后端服务之前, 需要先安装好 Arkitect 的依赖, 并且申请字节 Tos 对象存储服务 以及 对应大模型的 API 调用权限
5 |
6 | **注意 ⚠️: 请至少保证你本地的 Python 版本为 3.9 及以上, 否则可能会出现依赖无法安装, 项目启动失败等问题。**
7 |
8 | ## 1. 安装依赖
9 | ```bash
10 | pip install -r requirements.txt
11 | ```
12 |
13 | ## 2. 配置环境变量
14 |
15 | ```bash
16 | export ENDPOINT_ID=xxxx
17 | export ARK_API_KEY=xxxx
18 | export TOS_ACCESS_KEY=xxxx
19 | export TOS_SECRET_KEY=xxxx
20 | export TOS_ENDPOINT=xxxx
21 | export TOS_REGION=xxxx
22 | export TOS_BUCKET=xxxx
23 | export AUC_APP_ID=xxxx
24 | export AUC_ACCESS_TOKEN=xxxx
25 | ```
26 |
27 |
28 | ## 3. 启动服务
29 | ```bash
30 | python main.py
31 | ```
32 |
33 | ## 在火山引擎获取对应的环境变量的值
34 | 主要分为三部分, 火山方舟/字节Tos/音频识别大模型
35 |
36 | ### 火山方舟
37 | #### ENDPOINT_ID
38 | 登录[方舟控制台](https://console.volcengine.com/ark/region:ark+cn-beijing/endpoint?projectName=default),创建一个推理接入点(Endpoint),推荐使用Doubao-pro-32k [参考文档](https://www.volcengine.com/docs/82379/1399008#_2-%E5%88%9B%E5%BB%BA%E5%9C%A8%E7%BA%BF%E6%8E%A8%E7%90%86%E6%8E%A5%E5%85%A5%E7%82%B9%EF%BC%88endpoint%EF%BC%89) 你就得到了 `ENDPOINT_ID` 的 值。
39 | #### ARK_API_KEY
40 | 在 API Key 管理中创建一个 API Key [参考文档](https://www.volcengine.com/docs/82379/1399008#_3-%E5%88%9B%E5%BB%BAAPIKey) 你就得到了 `ARK_API_KEY` 的值。
41 |
42 | ### 火山引擎对象存储服务
43 | #### 创建 bucket 设置跨域规则
44 | 登录[对象存储控制台](https://console.volcengine.com/tos) 创建一个 bucket, 创建完毕之后进入该 bucket。点击右侧权限管理, 找到跨域访问设置, 新建一条跨域访问规则。
45 |
46 |
47 |
48 | 当然你也可以根据实际情况灵活选择。
49 |
50 | #### TOS_ENDPOINT
51 | 点击 **桶列表** -> **点进去你创建的那个 bucket** -> **点击概览** -> **眼睛往下看**
52 |
53 | 你会看到一个访问域名, TOS_ENDPOINT 的值就是红框框里面那个,不同的区域 TOS_ENDPOINT 的值可能不一样。
54 |
55 |
56 |
57 |
58 |
59 | #### TOS_BUCKET
60 | `TOS_BUCKET` 的值就是你创建的 bucket 的名称。
61 |
62 | #### TOS_REGION
63 | `TOS_REGION` 的值就是你创建的 bucket 的区域, 例如 `cn-beijing`。
64 |
65 |
66 | #### TOS_ACCESS_KEY 和 TOS_SECRET_KEY
67 | 进入 [IAM控制台](https://console.volcengine.com/iam/keymanage) 创建一个访问密钥,
68 | 你就得到了 `TOS_ACCESS_KEY` 和 `TOS_SECRET_KEY` 的值。
69 |
70 |
71 | ### 音频识别大模型
72 | 登录录音文件识别大模型控制台(https://console.volcengine.com/speech/service), 点击右侧录音文件识别大模型, 创建一个应用, 你就得到了 `AUC_APP_ID` 和 `AUC_ACCESS_TOKEN` 的值。
73 | #### AUC_APP_ID
74 | `AUC_APP_ID` 的值就是你创建的应用的 ID。
75 |
76 | #### AUC_ACCESS_TOKEN
77 | `AUC_ACCESS_TOKEN` 的值就是你创建的应用的 Access Token。
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
--------------------------------------------------------------------------------
/backend/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanshuaikang/AI-Media2Doc/ccea52404d64505fb6940bb1836a251a1d676267/backend/__init__.py
--------------------------------------------------------------------------------
/backend/actions/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | from actions.asr import * # noqa
3 | from actions.llm import * # noqa
4 | from actions.tos import * # noqa
5 |
--------------------------------------------------------------------------------
/backend/actions/asr.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | import json
3 | import uuid
4 | import time
5 |
6 | import requests
7 | from arkitect.core.component.llm import ArkChatRequest
8 | from arkitect.core.component.llm.model import ArkChatResponse
9 | from throttled import Throttled, per_sec, MemoryStore
10 |
11 | from constants import VolcengineASRResponseStatusCode, AsrTaskStatus
12 | from .dispatcher import ActionDispatcher
13 |
14 | from actions.tos import generate_download_url
15 | from env import AUC_APP_ID, AUC_ACCESS_TOKEN
16 |
17 | STORE = MemoryStore()
18 |
19 |
20 | @ActionDispatcher.register("submit_asr_task")
21 | async def submit_asr_task(request: ArkChatRequest):
22 | """
23 | 提交一个音频转写任务
24 | :param request: message: filename
25 | :return:
26 | """
27 | submit_url = "https://openspeech.bytedance.com/api/v3/auc/bigmodel/submit"
28 | # 音频文件名
29 | file_name = request.messages[0].content
30 | download_url = generate_download_url(file_name)
31 | # 生成人物 id
32 | task_id = uuid.uuid4().hex
33 |
34 | data = {
35 | "audio": {"format": "mp3", "url": download_url},
36 | "request": {"model_name": "bigmodel", "enable_itn": True},
37 | }
38 |
39 | headers = {
40 | "X-Api-App-Key": AUC_APP_ID,
41 | "X-Api-Access-Key": AUC_ACCESS_TOKEN,
42 | "X-Api-Resource-Id": "volc.bigasr.auc",
43 | "X-Api-Request-Id": task_id,
44 | "X-Api-Sequence": "-1",
45 | }
46 |
47 | # 最大 QPS 限制在 100,避免频繁请求。
48 | with Throttled(key=AUC_APP_ID, store=STORE, quota=per_sec(limit=100, burst=100)):
49 | response = requests.post(submit_url, data=json.dumps(data), headers=headers)
50 |
51 | # 判断任务是否成功
52 | if (
53 | "X-Api-Status-Code" in response.headers
54 | and response.headers["X-Api-Status-Code"] == "20000000"
55 | ):
56 | yield ArkChatResponse(
57 | id="upload_url",
58 | choices=[],
59 | created=int(time.time()),
60 | model="",
61 | object="chat.completion",
62 | usage=None,
63 | bot_usage=None,
64 | metadata={"task_id": task_id},
65 | )
66 | else:
67 | raise Exception(
68 | f"Submit task failed and the response headers " f"are: {response.headers}"
69 | )
70 |
71 |
72 | @ActionDispatcher.register("query_asr_task_status")
73 | async def query_asr_task_status(request: ArkChatRequest):
74 | task_id = request.messages[0].content
75 | headers = {
76 | "X-Api-App-Key": AUC_APP_ID,
77 | "X-Api-Access-Key": AUC_ACCESS_TOKEN,
78 | "X-Api-Resource-Id": "volc.bigasr.auc",
79 | "X-Api-Request-Id": task_id,
80 | }
81 |
82 | query_url = "https://openspeech.bytedance.com/api/v3/auc/bigmodel/query"
83 |
84 | # 最大 QPS 限制在 100,避免频繁请求。
85 | with Throttled(key=AUC_APP_ID, store=STORE, quota=per_sec(limit=100, burst=100)):
86 | response = requests.post(query_url, json.dumps({}), headers=headers)
87 |
88 | if "X-Api-Status-Code" in response.headers:
89 | if (
90 | response.headers["X-Api-Status-Code"]
91 | == VolcengineASRResponseStatusCode.SUCCESS.value
92 | ):
93 |
94 | data = response.json()
95 | utterances = data["result"]["utterances"]
96 | result = [
97 | {
98 | "start_time": utterance["start_time"],
99 | "end_time": utterance["end_time"],
100 | "text": utterance["text"],
101 | }
102 | for utterance in utterances
103 | ]
104 |
105 | yield ArkChatResponse(
106 | id="query_asr_task_status",
107 | choices=[],
108 | created=int(time.time()),
109 | model="",
110 | object="chat.completion",
111 | usage=None,
112 | bot_usage=None,
113 | metadata={
114 | "result": result,
115 | "status": AsrTaskStatus.FINISHED.value,
116 | },
117 | )
118 | elif response.headers["X-Api-Status-Code"] in [
119 | VolcengineASRResponseStatusCode.PENDING.value,
120 | VolcengineASRResponseStatusCode.RUNNING.value,
121 | ]:
122 | yield ArkChatResponse(
123 | id="query_asr_task_status",
124 | choices=[],
125 | created=int(time.time()),
126 | model="",
127 | object="chat.completion",
128 | usage=None,
129 | bot_usage=None,
130 | metadata={"result": None, "status": AsrTaskStatus.RUNNING.value},
131 | )
132 | else:
133 | yield ArkChatResponse(
134 | id="query_asr_task_status",
135 | choices=[],
136 | created=int(time.time()),
137 | model="",
138 | object="chat.completion",
139 | usage=None,
140 | bot_usage=None,
141 | metadata={"result": None, "status": AsrTaskStatus.FAILED.value},
142 | )
143 | else:
144 | raise Exception(
145 | f"Query task failed and the response headers " f"are: {response.headers}"
146 | )
147 |
--------------------------------------------------------------------------------
/backend/actions/dispatcher.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | from typing import Dict, Callable, AsyncIterable
3 | from functools import wraps
4 | from arkitect.core.component.llm.model import Response
5 |
6 |
7 | class ActionDispatcher:
8 | _instance = None
9 | _actions: Dict[str, Callable] = {}
10 |
11 | def __new__(cls):
12 | if cls._instance is None:
13 | cls._instance = super(ActionDispatcher, cls).__new__(cls)
14 | return cls._instance
15 |
16 | @classmethod
17 | def register(cls, action_name: str):
18 | def decorator(func):
19 | cls._actions[action_name] = func
20 |
21 | @wraps(func)
22 | def wrapper(*args, **kwargs):
23 | return func(*args, **kwargs)
24 |
25 | return wrapper
26 |
27 | return decorator
28 |
29 | async def dispatch(self, action_name: str, *args, **kwargs) -> AsyncIterable[Response]:
30 | if action_name not in self._actions:
31 | raise ValueError(f"Action {action_name} not found")
32 | action = self._actions[action_name]
33 | async for response in action(*args, **kwargs):
34 | yield response
35 |
--------------------------------------------------------------------------------
/backend/actions/llm.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 |
3 | from arkitect.core.component.llm import ArkChatRequest, BaseChatLanguageModel
4 | from arkitect.core.component.llm.model import ArkChatParameters
5 |
6 | from .dispatcher import ActionDispatcher
7 | from env import ENDPOINT_ID
8 |
9 |
10 | @ActionDispatcher.register("generate_markdown_text")
11 | async def generate_markdown_text(request: ArkChatRequest):
12 | parameters = ArkChatParameters(**request.__dict__)
13 | llm = BaseChatLanguageModel(
14 | endpoint_id=ENDPOINT_ID,
15 | messages=request.messages,
16 | parameters=parameters,
17 | )
18 | if request.stream:
19 | async for resp in llm.astream():
20 | yield resp
21 | else:
22 | yield await llm.arun()
23 |
24 |
25 | @ActionDispatcher.register("default")
26 | async def default_llm_action(request: ArkChatRequest):
27 | parameters = ArkChatParameters(**request.__dict__)
28 | llm = BaseChatLanguageModel(
29 | endpoint_id=ENDPOINT_ID,
30 | messages=request.messages,
31 | parameters=parameters,
32 | )
33 | if request.stream:
34 | async for resp in llm.astream():
35 | yield resp
36 | else:
37 | yield await llm.arun()
38 |
--------------------------------------------------------------------------------
/backend/actions/tos.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | import time
3 | import tos
4 | from arkitect.core.component.llm import ArkChatRequest
5 | from arkitect.core.component.llm.model import ArkChatResponse
6 | from .dispatcher import ActionDispatcher
7 | from env import TOS_ACCESS_KEY, TOS_SECRET_KEY, TOS_REGION, TOS_BUCKET, TOS_ENDPOINT
8 |
9 | @ActionDispatcher.register("generate_upload_url")
10 | async def generate_upload_url(request: ArkChatRequest):
11 | file_name = request.messages[0].content
12 | tos_client = tos.TosClient(tos.Auth(TOS_ACCESS_KEY, TOS_SECRET_KEY, TOS_REGION), TOS_ENDPOINT)
13 | url = tos_client.generate_presigned_url(Method='PUT', Bucket=TOS_BUCKET, Key=file_name, ExpiresIn=3600)
14 |
15 | yield ArkChatResponse(
16 | id="upload_url",
17 | choices=[],
18 | created=int(time.time()),
19 | model="",
20 | object="chat.completion",
21 | usage=None,
22 | bot_usage=None,
23 | metadata={"upload_url": url}
24 | )
25 |
26 | def generate_download_url(file_name: str):
27 | tos_client = tos.TosClient(tos.Auth(TOS_ACCESS_KEY, TOS_SECRET_KEY, TOS_REGION), TOS_ENDPOINT)
28 | return tos_client.generate_presigned_url(Method='GET', Bucket=TOS_BUCKET, Key=file_name, ExpiresIn=3600)
29 |
--------------------------------------------------------------------------------
/backend/constants.py:
--------------------------------------------------------------------------------
1 | import enum
2 |
3 |
4 | class VolcengineASRResponseStatusCode(enum.Enum):
5 | SUCCESS = "20000000"
6 | RUNNING = "20000001"
7 | PENDING = "20000002"
8 |
9 |
10 | class AsrTaskStatus(enum.Enum):
11 | RUNNING = "running"
12 | FINISHED = "finished"
13 | FAILED = "failed"
14 |
--------------------------------------------------------------------------------
/backend/env.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | import os
3 |
4 | ARK_API_KEY = os.getenv("ARK_API_KEY")
5 | TOS_ACCESS_KEY = os.getenv("TOS_ACCESS_KEY")
6 | TOS_SECRET_KEY = os.getenv("TOS_SECRET_KEY")
7 | TOS_ENDPOINT = os.getenv("TOS_ENDPOINT")
8 | TOS_REGION = os.getenv("TOS_REGION")
9 | TOS_BUCKET = os.getenv("TOS_BUCKET")
10 | ENDPOINT_ID = os.getenv("ENDPOINT_ID")
11 | AUC_APP_ID = os.getenv("AUC_APP_ID")
12 | AUC_ACCESS_TOKEN = os.getenv("AUC_ACCESS_TOKEN")
13 |
--------------------------------------------------------------------------------
/backend/main.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | """
3 | 默认llm逻辑
4 | """
5 | import os
6 | from typing import AsyncIterable
7 |
8 |
9 | from arkitect.core.component.llm.model import (
10 | ArkChatRequest,
11 | Response,
12 | )
13 | from arkitect.launcher.local.serve import launch_serve
14 | from arkitect.telemetry.trace import task
15 | from arkitect.utils.context import get_headers
16 | from throttled import MemoryStore
17 |
18 | from actions.dispatcher import ActionDispatcher
19 |
20 |
21 | @task()
22 | async def main(request: ArkChatRequest) -> AsyncIterable[Response]:
23 | dispatcher = ActionDispatcher()
24 | # 通过使用不同的 header 分发到不同的处理逻辑
25 | request_action = get_headers().get("request-action", "default")
26 |
27 | async for response in dispatcher.dispatch(request_action, request):
28 | yield response
29 |
30 |
31 | store: MemoryStore()
32 |
33 | if __name__ == "__main__":
34 | port = os.getenv("_FAAS_RUNTIME_PORT")
35 | launch_serve(
36 | package_path="main",
37 | port=int(port) if port else 8080,
38 | health_check_path="/v1/ping",
39 | endpoint_path="/api/v3/bots/chat/completions",
40 | clients={},
41 | )
42 |
--------------------------------------------------------------------------------
/backend/requirements.txt:
--------------------------------------------------------------------------------
1 | arkitect==0.1.11
2 | opentelemetry-sdk>=1.22.0
3 | opentelemetry-semantic-conventions>=0.43b0
4 | throttled-py==2.0.2
5 |
--------------------------------------------------------------------------------
/docker-compose.yaml:
--------------------------------------------------------------------------------
1 | services:
2 | backend:
3 | image: ai-media2doc-backend:latest
4 | env_file:
5 | - "variables.env"
6 | deploy:
7 | mode: replicated
8 | replicas: 1
9 | ports:
10 | - "8080:8080"
11 |
12 | frontend:
13 | image: ai-media2doc-frontend:latest
14 | deploy:
15 | mode: replicated
16 | replicas: 1
17 | ports:
18 | - "5173:5173"
19 |
--------------------------------------------------------------------------------
/docs/images/ai_talk.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanshuaikang/AI-Media2Doc/ccea52404d64505fb6940bb1836a251a1d676267/docs/images/ai_talk.png
--------------------------------------------------------------------------------
/docs/images/cors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanshuaikang/AI-Media2Doc/ccea52404d64505fb6940bb1836a251a1d676267/docs/images/cors.png
--------------------------------------------------------------------------------
/docs/images/custom_prompt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanshuaikang/AI-Media2Doc/ccea52404d64505fb6940bb1836a251a1d676267/docs/images/custom_prompt.png
--------------------------------------------------------------------------------
/docs/images/details.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanshuaikang/AI-Media2Doc/ccea52404d64505fb6940bb1836a251a1d676267/docs/images/details.jpg
--------------------------------------------------------------------------------
/docs/images/index.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanshuaikang/AI-Media2Doc/ccea52404d64505fb6940bb1836a251a1d676267/docs/images/index.jpg
--------------------------------------------------------------------------------
/docs/images/loading.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanshuaikang/AI-Media2Doc/ccea52404d64505fb6940bb1836a251a1d676267/docs/images/loading.jpg
--------------------------------------------------------------------------------
/docs/images/logo.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanshuaikang/AI-Media2Doc/ccea52404d64505fb6940bb1836a251a1d676267/docs/images/logo.jpeg
--------------------------------------------------------------------------------
/docs/images/mindmap.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanshuaikang/AI-Media2Doc/ccea52404d64505fb6940bb1836a251a1d676267/docs/images/mindmap.jpg
--------------------------------------------------------------------------------
/docs/images/process_flow.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanshuaikang/AI-Media2Doc/ccea52404d64505fb6940bb1836a251a1d676267/docs/images/process_flow.jpg
--------------------------------------------------------------------------------
/docs/images/readme_header.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanshuaikang/AI-Media2Doc/ccea52404d64505fb6940bb1836a251a1d676267/docs/images/readme_header.jpg
--------------------------------------------------------------------------------
/docs/images/tos_endpoint.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanshuaikang/AI-Media2Doc/ccea52404d64505fb6940bb1836a251a1d676267/docs/images/tos_endpoint.png
--------------------------------------------------------------------------------
/docs/images/wxgzh/aild.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanshuaikang/AI-Media2Doc/ccea52404d64505fb6940bb1836a251a1d676267/docs/images/wxgzh/aild.png
--------------------------------------------------------------------------------
/docs/images/wxgzh/hellogithub.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanshuaikang/AI-Media2Doc/ccea52404d64505fb6940bb1836a251a1d676267/docs/images/wxgzh/hellogithub.png
--------------------------------------------------------------------------------
/docs/images/wxgzh/ryf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanshuaikang/AI-Media2Doc/ccea52404d64505fb6940bb1836a251a1d676267/docs/images/wxgzh/ryf.png
--------------------------------------------------------------------------------
/docs/images/wxgzh/yf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanshuaikang/AI-Media2Doc/ccea52404d64505fb6940bb1836a251a1d676267/docs/images/wxgzh/yf.png
--------------------------------------------------------------------------------
/frontend/.dockerignore:
--------------------------------------------------------------------------------
1 | .idea
2 | node_modules
3 | package-lock.json
4 | Dockerfile
5 | .dockerignore
6 |
--------------------------------------------------------------------------------
/frontend/.env.development:
--------------------------------------------------------------------------------
1 | # API 服务基础URL(开发环境)
2 | VITE_API_BASE_URL=http://localhost:8080
3 | # 如果使用 docker 部署请修改为
4 | # VITE_API_BASE_URL=http://0.0.0.0:8080
5 |
6 |
--------------------------------------------------------------------------------
/frontend/.env.production:
--------------------------------------------------------------------------------
1 | # API 服务基础URL(生产环境)
2 | VITE_API_BASE_URL=”“
3 |
4 | # 上传服务URL(生产环境)
5 | VITE_UPLOAD_BASE_URL=""
6 |
--------------------------------------------------------------------------------
/frontend/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | package-lock.json
3 | dist
4 | coverage
5 | .vscode
6 | .idea
7 | *.log
8 | *.env
9 | *.tsbuildinfo
10 | *.tgz
11 | *.zip
12 | *.tar
13 | .vite
14 | .DS_Store
--------------------------------------------------------------------------------
/frontend/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG NODE_VERSION=22
2 |
3 | FROM node:${NODE_VERSION}-slim
4 |
5 | ENV LANG=C.UTF-8 \
6 | LC_ALL=C.UTF-8 \
7 | PATH="/app/node_modules/.bin:$PATH"
8 |
9 | RUN set -ex && \
10 | sed -i 's/deb.debian.org/mirrors.cloud.tencent.com/g' /etc/apt/sources.list.d/debian.sources && \
11 | apt update && \
12 | apt install -y --no-install-recommends \
13 | git \
14 | ca-certificates
15 |
16 | RUN npm config set registry https://mirrors.cloud.tencent.com/npm/
17 |
18 | WORKDIR /app
19 |
20 | COPY package.json ./
21 |
22 | RUN npm install
23 |
24 | COPY . .
25 |
26 | EXPOSE 5173
27 |
28 | CMD ["npm", "run", "dev", "--", "--host", "0.0.0.0"]
29 |
--------------------------------------------------------------------------------
/frontend/README.md:
--------------------------------------------------------------------------------
1 | # 前端环境配置
2 |
3 | ## 1. 安装 Node.js
4 | 要求 node 版本为 20+, 我本地的 node 版本为 `22.0.0`
5 |
6 | ## 2. 安装依赖
7 | ```bash
8 |
9 | npm install
10 | ```
11 |
12 |
13 | ## 3. 启动服务
14 | ```bash
15 | npm run dev
16 | ```
17 |
18 | ## 浏览器访问
19 | 打开浏览器访问 `http://localhost:5173/` 即可。
--------------------------------------------------------------------------------
/frontend/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "ai-media2doc-frontend",
3 | "private": true,
4 | "version": "0.1.0",
5 | "type": "module",
6 | "scripts": {
7 | "dev": "vite",
8 | "build": "vite build",
9 | "preview": "vite preview",
10 | "server": "node server/upload-proxy.js"
11 | },
12 | "dependencies": {
13 | "@element-plus/icons-vue": "^2.3.1",
14 | "@ffmpeg/core": "^0.12.10",
15 | "@ffmpeg/ffmpeg": "^0.10.1",
16 | "axios": "^1.6.2",
17 | "body-parser": "^1.20.2",
18 | "cors": "^2.8.5",
19 | "element-plus": "^2.9.7",
20 | "express": "^4.18.2",
21 | "idb": "^8.0.2",
22 | "lottie-web-vue": "^2.0.7",
23 | "markdown-it": "^14.1.0",
24 | "multer": "^1.4.5-lts.1",
25 | "simple-mind-map": "^0.13.1-fix.2",
26 | "spark-md5": "^3.0.2",
27 | "vue": "^3.5.13"
28 | },
29 | "devDependencies": {
30 | "@vitejs/plugin-vue": "^5.2.1",
31 | "vite": "^6.2.0"
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/frontend/src/App.vue:
--------------------------------------------------------------------------------
1 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
182 |
--------------------------------------------------------------------------------
/frontend/src/apis/api.ts:
--------------------------------------------------------------------------------
1 | export const uploadFile = async (url: string, file: Blob, onProgress?: (progress: number) => void) => {
2 | const xhr = new XMLHttpRequest()
3 |
4 | return new Promise((resolve, reject) => {
5 | xhr.upload.onprogress = (event) => {
6 | if (event.lengthComputable && onProgress) {
7 | const percentComplete = Math.round((event.loaded / event.total) * 100)
8 | onProgress(percentComplete)
9 | }
10 | }
11 |
12 | xhr.onloadend = () => {
13 | if (xhr.status >= 200 && xhr.status < 300) {
14 | resolve(xhr.response)
15 | } else {
16 | reject(new Error(`Upload failed: ${xhr.status}`))
17 | }
18 | }
19 |
20 | xhr.onerror = () => reject(new Error('Upload failed'))
21 |
22 | xhr.open('PUT', url)
23 | xhr.send(file)
24 | })
25 | }
26 |
27 |
--------------------------------------------------------------------------------
/frontend/src/apis/asrService.ts:
--------------------------------------------------------------------------------
1 | import httpService from './http'
2 | import { API_PATHS } from '../config'
3 | import { AudioTaskResponse, AudioTaskResult, TaskStatus } from './types'
4 |
5 | /**
6 | * 提交音频处理任务
7 | * @param audioFileName 音频文件名
8 | * @returns 任务ID
9 | */
10 | export const submitAsrTask = async (audioFileName: string): Promise => {
11 | try {
12 | const response = await httpService.request({
13 | url: API_PATHS.AUDIO_TASK,
14 | method: 'POST',
15 | headers: {
16 | 'request-action': 'submit_asr_task',
17 | },
18 | data: {
19 | model: 'my-bot',
20 | messages: [
21 | {
22 | role: 'user',
23 | content: audioFileName
24 | }
25 | ]
26 | }
27 | })
28 |
29 | if (response.error) {
30 | throw new Error(response.error)
31 | }
32 |
33 | return response.metadata?.task_id || ''
34 | } catch (error) {
35 | console.error('提交音频任务失败:', error)
36 | throw error
37 | }
38 | }
39 |
40 | /**
41 | * 查询音频处理任务状态
42 | * @param taskId 任务ID
43 | * @returns 任务结果和状态
44 | */
45 | export const queryAsrTask = async (taskId: string): Promise => {
46 | try {
47 | const response = await httpService.request({
48 | url: API_PATHS.AUDIO_TASK,
49 | method: 'POST',
50 | headers: {
51 | 'request-action': 'query_asr_task_status',
52 | },
53 | data: {
54 | model: 'my-bot',
55 | messages: [
56 | {
57 | role: 'user',
58 | content: taskId
59 | }
60 | ]
61 | }
62 | })
63 |
64 | if (response.error) {
65 | throw new Error(response.error)
66 | }
67 |
68 | return {
69 | text: response.metadata?.result || '',
70 | status: (response.metadata?.status || 'pending') as TaskStatus
71 | }
72 | } catch (error) {
73 | console.error('查询音频任务失败:', error)
74 | throw error
75 | }
76 | }
77 |
78 | /**
79 | * 轮询音频处理任务直到完成
80 | * @param taskId 任务ID
81 | * @param onProgress 进度回调
82 | * @param maxAttempts 最大尝试次数
83 | * @param interval 轮询间隔(ms)
84 | * @returns 处理结果文本
85 | */
86 | export const pollAsrTask = async (
87 | taskId: string,
88 | maxAttempts = 60,
89 | interval = 3000
90 | ): Promise => {
91 | let attempts = 0
92 |
93 | while (attempts < maxAttempts) {
94 | const result = await queryAsrTask(taskId)
95 | console.log('Polling result:', result)
96 |
97 | if (result.status === 'finished') {
98 | return result.text
99 | }
100 |
101 | if (result.status === 'failed') {
102 | throw new Error('音频识别失败')
103 | }
104 |
105 | await new Promise(resolve => setTimeout(resolve, interval))
106 | attempts++
107 | }
108 |
109 | throw new Error('音频识别超时')
110 | }
111 |
--------------------------------------------------------------------------------
/frontend/src/apis/chatService.ts:
--------------------------------------------------------------------------------
1 | import httpService from './http'
2 | import { API_PATHS } from '../config'
3 | import { ChatMessage, ChatResponse } from './types'
4 |
5 | /**
6 | * 发送聊天消息
7 | * @param messages 聊天消息列表
8 | * @returns 助手响应消息
9 | */
10 | export const sendChatMessage = async (messages: ChatMessage[]): Promise => {
11 | try {
12 | const response = await httpService.request({
13 | url: API_PATHS.CHAT_COMPLETIONS,
14 | method: 'POST',
15 | data: {
16 | model: 'my-bot',
17 | messages
18 | }
19 | })
20 |
21 | if (response.error) {
22 | throw new Error(response.error)
23 | }
24 |
25 | if (!response.choices?.[0]?.message) {
26 | throw new Error('无效的响应格式')
27 | }
28 |
29 | return response.choices[0].message as ChatMessage
30 | } catch (error) {
31 | console.error('聊天请求失败:', error)
32 | throw error
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/frontend/src/apis/http.ts:
--------------------------------------------------------------------------------
1 | import axios, { AxiosRequestConfig, AxiosResponse, AxiosInstance } from 'axios'
2 | import { ElMessage } from 'element-plus'
3 | import { API_BASE_URL } from '../config'
4 |
5 | /**
6 | * 统一的API请求错误
7 | */
8 | export class ApiError extends Error {
9 | status: number
10 | data?: any
11 |
12 | constructor(message: string, status: number = 500, data?: any) {
13 | super(message)
14 | this.name = 'ApiError'
15 | this.status = status
16 | this.data = data
17 | }
18 | }
19 |
20 | /**
21 | * 统一的HTTP请求服务
22 | */
23 | class HttpService {
24 | private axiosInstance: AxiosInstance
25 |
26 | constructor(baseURL: string) {
27 | this.axiosInstance = axios.create({
28 | baseURL,
29 | timeout: 240000,
30 | headers: {
31 | 'Content-Type': 'application/json'
32 | }
33 | })
34 |
35 | // 请求拦截器
36 | this.axiosInstance.interceptors.request.use(
37 | config => config,
38 | error => Promise.reject(error)
39 | )
40 |
41 | // 响应拦截器
42 | this.axiosInstance.interceptors.response.use(
43 | response => response.data,
44 | error => {
45 | const message = error.response?.data?.message || error.message || '请求失败'
46 | const status = error.response?.status || 500
47 | const data = error.response?.data
48 |
49 | console.error(`API错误 [${status}]:`, message, data)
50 | ElMessage.error(message)
51 |
52 | return Promise.reject(new ApiError(message, status, data))
53 | }
54 | )
55 | }
56 |
57 | /**
58 | * 发送HTTP请求
59 | * @param config 请求配置
60 | * @returns 响应数据
61 | */
62 | async request(config: AxiosRequestConfig): Promise {
63 | try {
64 | return await this.axiosInstance.request(config)
65 | } catch (error) {
66 | if (error instanceof ApiError) {
67 | throw error
68 | }
69 | throw new ApiError(error.message || '请求失败')
70 | }
71 | }
72 |
73 | /**
74 | * 上传文件(使用XHR以支持进度回调)
75 | * @param url 上传URL
76 | * @param file 文件对象
77 | * @param onProgress 进度回调
78 | */
79 | async uploadFile(url: string, file: Blob, onProgress?: (percent: number) => void): Promise {
80 | return new Promise((resolve, reject) => {
81 | const xhr = new XMLHttpRequest()
82 |
83 | xhr.upload.onprogress = (event) => {
84 | if (event.lengthComputable && onProgress) {
85 | const percent = Math.round((event.loaded / event.total) * 100)
86 | onProgress(percent)
87 | }
88 | }
89 |
90 | xhr.onload = () => {
91 | if (xhr.status >= 200 && xhr.status < 300) {
92 | resolve({ success: true, status: xhr.status })
93 | } else {
94 | reject(new ApiError(`上传失败: ${xhr.status}`, xhr.status))
95 | }
96 | }
97 |
98 | xhr.onerror = () => {
99 | reject(new ApiError('网络错误,上传失败'))
100 | }
101 |
102 | xhr.open('PUT', url)
103 | xhr.send(file)
104 | })
105 | }
106 | }
107 |
108 | // 导出默认的HTTP服务实例
109 | export default new HttpService(API_BASE_URL)
110 |
--------------------------------------------------------------------------------
/frontend/src/apis/index.ts:
--------------------------------------------------------------------------------
1 | import * as audioService from './asrService'
2 | import * as markdownService from './markdownService'
3 | import * as uploadService from './uploadService'
4 | import * as chatService from './chatService'
5 | import httpService from './http'
6 |
7 | // 从各个服务中导出常用函数
8 | export const { submitAudioTask, pollAsrTask: pollAudioTask, queryAudioTask } = audioService
9 | export const { generateMarkdownText } = markdownService
10 | export const { getAudioUploadUrl, uploadFile } = uploadService
11 | export const { sendChatMessage } = chatService
12 |
13 | // 导出所有服务
14 | export {
15 | audioService,
16 | markdownService,
17 | uploadService,
18 | chatService,
19 | httpService
20 | }
21 |
22 | // 导出类型
23 | export * from './types'
24 |
25 | // 默认导出所有服务的集合
26 | export default {
27 | audio: audioService,
28 | markdown: markdownService,
29 | upload: uploadService,
30 | chat: chatService,
31 | http: httpService
32 | }
33 |
--------------------------------------------------------------------------------
/frontend/src/apis/markdownService.ts:
--------------------------------------------------------------------------------
1 | import httpService from './http'
2 | import { API_PATHS } from '../config'
3 | import { ChatResponse, ContentStyle } from './types'
4 | import { DEFAULT_PROMPTS } from '../constants'
5 |
6 |
7 | // 获取本地自定义 prompt
8 | function getCustomPrompt(style: string): string | undefined {
9 | try {
10 | const str = localStorage.getItem('customPrompts')
11 | if (str) {
12 | const obj = JSON.parse(str)
13 | if (obj && typeof obj[style] === 'string') {
14 | return obj[style]
15 | }
16 | }
17 | } catch {}
18 | return undefined
19 | }
20 |
21 | /**
22 | * 根据文本和内容风格生成最终 prompt
23 | */
24 | function renderPrompt(style: string, text: string): string {
25 | const promptTpl = getCustomPrompt(style) || DEFAULT_PROMPTS[style] || ''
26 | return promptTpl.replace(/\{content\}/g, text)
27 | }
28 |
29 | /**
30 | * 根据文本生成Markdown内容
31 | * @param text 原始文本
32 | * @param contentStyle 内容风格
33 | * @returns 生成的Markdown内容
34 | */
35 | export const generateMarkdownText = async (text: string, contentStyle: string): Promise => {
36 | try {
37 | const prompt = renderPrompt(contentStyle, text)
38 | const response = await httpService.request({
39 | url: API_PATHS.CHAT_COMPLETIONS,
40 | method: 'POST',
41 | headers: {
42 | 'request-action': 'generate_markdown_text',
43 | },
44 | data: {
45 | model: 'my-bot',
46 | messages: [
47 | {
48 | role: 'user',
49 | content: prompt
50 | }
51 | ]
52 | }
53 | })
54 |
55 | if (response.error) {
56 | throw new Error(response.error)
57 | }
58 |
59 | return response.choices[0]?.message?.content || ''
60 | } catch (error) {
61 | console.error('生成Markdown失败:', error)
62 | throw error
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/frontend/src/apis/types.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * 通用API响应接口
3 | */
4 | export interface ApiResponse {
5 | error: string | null;
6 | data?: T;
7 | message?: string;
8 | }
9 |
10 | /**
11 | * Chat API响应格式
12 | */
13 | export interface ChatResponse {
14 | id: string;
15 | choices: {
16 | message: {
17 | role: string;
18 | content: string;
19 | };
20 | }[];
21 | error?: string | null;
22 | metadata?: Record;
23 | }
24 |
25 | /**
26 | * 聊天消息接口
27 | */
28 | export interface ChatMessage {
29 | role: 'user' | 'assistant' | 'system';
30 | content: string;
31 | }
32 |
33 | /**
34 | * 音频任务响应接口
35 | */
36 | export interface AudioTaskResponse {
37 | id: string;
38 | error: string | null;
39 | metadata: {
40 | task_id?: string;
41 | result?: string;
42 | status?: string;
43 | upload_url?: string;
44 | };
45 | }
46 |
47 | /**
48 | * 任务状态类型
49 | */
50 | export type TaskStatus = 'pending' | 'processing' | 'finished' | 'failed';
51 |
52 | /**
53 | * 音频任务结果接口
54 | */
55 | export interface AudioTaskResult {
56 | text: string;
57 | status: TaskStatus;
58 | }
59 |
60 | /**
61 | * 内容风格类型
62 | */
63 | export type ContentStyle = 'note' | 'summary' | 'xiaohongshu' | 'wechat' | 'mind';
64 |
65 | /**
66 | * 上传链接响应接口
67 | */
68 | export interface UploadUrlResponse {
69 | upload_url: string;
70 | }
71 |
72 | /**
73 | * 任务记录接口
74 | */
75 | export interface Task {
76 | id?: number;
77 | fileName: string;
78 | md5: string;
79 | transcriptionText: string;
80 | markdownContent: string;
81 | contentStyle: ContentStyle;
82 | createdAt: string;
83 | }
84 |
--------------------------------------------------------------------------------
/frontend/src/apis/uploadService.ts:
--------------------------------------------------------------------------------
1 | import httpService from './http'
2 | import { API_PATHS } from '../config'
3 | import { AudioTaskResponse } from './types'
4 |
5 | /**
6 | * 获取音频文件上传链接
7 | * @param filename 音频文件名
8 | * @returns 上传URL
9 | */
10 | export const getAudioUploadUrl = async (filename: string): Promise => {
11 | try {
12 | const response = await httpService.request({
13 | url: API_PATHS.UPLOAD_URL,
14 | method: 'POST',
15 | headers: {
16 | 'request-action': 'generate_upload_url'
17 | },
18 | data: {
19 | model: 'my-bot',
20 | messages: [
21 | {
22 | role: 'user',
23 | content: filename
24 | }
25 | ]
26 | }
27 | })
28 |
29 | if (response.error) {
30 | throw new Error(response.error)
31 | }
32 |
33 | if (!response.metadata?.upload_url) {
34 | throw new Error('响应中未找到上传链接')
35 | }
36 |
37 | return response.metadata.upload_url
38 | } catch (error) {
39 | console.error('获取上传链接失败:', error)
40 | throw error
41 | }
42 | }
43 |
44 | /**
45 | * 上传文件到预签名URL
46 | * @param uploadUrl 上传链接
47 | * @param file 文件对象
48 | * @param onProgress 进度回调
49 | * @returns 上传结果
50 | */
51 | export const uploadFile = async (
52 | uploadUrl: string,
53 | file: Blob,
54 | onProgress?: (percent: number) => void
55 | ): Promise<{ success: boolean }> => {
56 | try {
57 | console.log('开始上传文件到:', uploadUrl)
58 |
59 | const result = await httpService.uploadFile(uploadUrl, file, onProgress)
60 | return { success: true }
61 | } catch (error) {
62 | console.error('文件上传失败:', error)
63 | throw error
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/frontend/src/assets/logo.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanshuaikang/AI-Media2Doc/ccea52404d64505fb6940bb1836a251a1d676267/frontend/src/assets/logo.jpeg
--------------------------------------------------------------------------------
/frontend/src/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanshuaikang/AI-Media2Doc/ccea52404d64505fb6940bb1836a251a1d676267/frontend/src/assets/logo.png
--------------------------------------------------------------------------------
/frontend/src/assets/user.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hanshuaikang/AI-Media2Doc/ccea52404d64505fb6940bb1836a251a1d676267/frontend/src/assets/user.jpeg
--------------------------------------------------------------------------------
/frontend/src/assets/字幕.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/frontend/src/assets/小红书.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/frontend/src/assets/微信公众号.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/frontend/src/assets/思维导图.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/frontend/src/assets/汇总.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/frontend/src/assets/笔记.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/frontend/src/components/AppSidebar.vue:
--------------------------------------------------------------------------------
1 |
136 |
137 |
138 |
244 |
245 |
246 |
746 |
747 |
753 |
--------------------------------------------------------------------------------
/frontend/src/components/Settings/Settings.vue:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
12 |
13 |
14 |
15 |
17 |
18 |
{{ item.name }}
19 |
20 |
21 |
22 |
23 | 请勿修改 {content}
以及思维导图的 json 内容,不然可能会导致生成失败。
24 |
25 |
Prompt:
26 |
28 |
29 |
30 | 保存
31 | 已保存!
32 |
33 |
34 |
35 |
AI 视频图文创作助手
36 |
37 | AI 视频图文创作助手是一款 Web 工具, 基于 AI 大模型, 一键将视频和音频转化为各种风格的文档, 无需登录注册, 前后端本地部署,以极低的成本体验 AI 视频/音频转风格文档服务。
38 |
39 |
45 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
123 |
124 |
331 |
--------------------------------------------------------------------------------
/frontend/src/components/VideoToMarkdown/ChatPanel.vue:
--------------------------------------------------------------------------------
1 |
126 |
127 |
128 |
129 |
145 |
146 |
147 |
148 |
149 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
{{ msg.content }}
158 |
159 |
AI 助手
160 |
我
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
AI 思考中
177 |
178 |
179 |
180 |
181 |
196 |
197 |
198 |
199 |
663 |
--------------------------------------------------------------------------------
/frontend/src/components/VideoToMarkdown/GeneratedContentPanel.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
8 |
9 |
10 |
11 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
122 |
123 |
273 |
--------------------------------------------------------------------------------
/frontend/src/components/VideoToMarkdown/LoadingOverlay.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
正在智能处理您的内容
7 |
10 |
11 | {{ stepText }}
12 | {{ percent }}%
13 |
14 |
15 | 请勿关闭或者离开此页面
16 |
17 |
18 |
19 |
20 |
36 |
37 |
147 |
--------------------------------------------------------------------------------
/frontend/src/components/VideoToMarkdown/ProcessSteps.vue:
--------------------------------------------------------------------------------
1 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 | 处理进度
30 |
31 |
32 |
33 |
34 |
39 |
40 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
290 |
--------------------------------------------------------------------------------
/frontend/src/components/VideoToMarkdown/TaskDetail.vue:
--------------------------------------------------------------------------------
1 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
74 |
75 |
195 |
--------------------------------------------------------------------------------
/frontend/src/components/VideoToMarkdown/TranscriptionPanel.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
12 |
13 |
14 |
15 |
{{ formatTime(seg.start_time) }}
16 |
{{ seg.text }}
17 |
18 |
19 |
20 |
21 |
{{ transcription }}
22 |
23 |
24 |
25 |
26 |
27 |
28 |
116 |
117 |
244 |
--------------------------------------------------------------------------------
/frontend/src/components/VideoToMarkdown/UploadSection.vue:
--------------------------------------------------------------------------------
1 |
77 |
78 |
79 |
80 |
81 |
82 |
你好,我是 AI 图文创作助手
83 |
上传你的视频或MP3音频,我会帮你自动转写并生成多种风格的图文内容。
84 |
85 |
86 |
87 |
88 |
89 |
90 |
{{ item.name }}
91 |
92 |
93 |
94 |
95 |
96 |
97 | {{ acceptHint }}
98 |
99 |
100 |
101 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 | {{ ffmpegLoading ? '正在加载 ffmpeg,请稍候...' : '开始上传' }}
111 |
112 |
113 | 支持拖放或点击上传视频或MP3文件
114 | 支持格式:MP4、MOV、AVI、MKV、WebM、MP3,最大 100MB
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 | 文件名:
123 | {{ props.fileName }}
124 |
125 |
126 | 文件大小:
127 | {{ (props.fileSize / 1024 / 1024).toFixed(2) }} MB
128 |
129 |
130 | 文件MD5:
131 | {{ props.fileMd5 }}
132 |
133 |
134 |
135 |
137 |
139 |
140 | {{ item.name }}
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 | 开始处理
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 | 重新选择文件
158 |
159 |
160 |
161 |
162 |
163 |
164 |
593 |
--------------------------------------------------------------------------------
/frontend/src/components/VideoToMarkdown/index.vue:
--------------------------------------------------------------------------------
1 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
358 |
--------------------------------------------------------------------------------
/frontend/src/config.js:
--------------------------------------------------------------------------------
1 | /**
2 | * 项目统一配置文件
3 | * 从环境变量中读取配置
4 | */
5 |
6 | // API服务基础URL
7 | export const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://localhost:8080'
8 |
9 | // 各模块API路径
10 | export const API_PATHS = {
11 | CHAT_COMPLETIONS: '/api/v3/bots/chat/completions',
12 | UPLOAD_URL: '/api/v3/bots/chat/completions', // 获取上传URL的API路径
13 | AUDIO_TASK: '/api/v3/bots/chat/completions' // 音频任务API路径
14 | }
15 |
16 | export default {
17 | API_BASE_URL,
18 | API_PATHS
19 | }
20 |
--------------------------------------------------------------------------------
/frontend/src/constants.js:
--------------------------------------------------------------------------------
1 | export const DEFAULT_PROMPTS = {
2 | note: `你是一位高效的知识整理专家。请将以下文本内容转化为结构清晰、重点突出的 Markdown 笔记,便于后续复习和查阅。
3 |
4 | {content}
5 |
6 | 请严格按照以下要求输出:
7 | 1. 精简无关内容,仅保留核心知识点。
8 | 2. 使用分点、分级标题、列表等 Markdown 语法组织内容。
9 | 3. 只返回 Markdown 格式内容,不要添加额外说明。`,
10 |
11 | xiaohongshu: `你是一名资深小红书内容创作者,擅长打造高互动爆文。请将下方内容转化为小红书爆文风格,语言亲切有趣,善用 Emoji 和标签,突出亮点,吸引用户互动。
12 |
13 | {content}
14 |
15 | 输出要求:
16 | 1. 适当分段,便于阅读。
17 | 2. 增加实用建议或个人感受。
18 | 3. 只返回小红书爆文内容,不要添加其他说明。`,
19 |
20 | wechat: `你是一位专业的微信公众号编辑,擅长撰写高质量爆文。请将下方内容转化为公众号爆文风格,逻辑清晰、观点鲜明,适当加入案例或数据增强说服力。
21 |
22 | {content}
23 |
24 | 输出要求:
25 | 1. 结构分明,适当使用小标题。
26 | 2. 语言流畅,吸引读者深入阅读。
27 | 3. 只返回公众号爆文内容,不要添加其他说明。`,
28 |
29 | summary: `你是一名智能文本摘要助手,擅长提炼关键信息。请对以下内容进行高度概括,生成简明扼要的摘要,突出核心观点和要点。
30 |
31 | {content}
32 |
33 | 输出要求:
34 | 1. 只保留最重要的信息。
35 | 2. 语言简洁明了。
36 | 3. 只返回摘要内容,不要添加其他说明。`,
37 |
38 | mind: `你是一名思维导图结构化专家。请将下方文本内容转化为 mind-map 框架兼容的 JSON 格式,结构清晰、层级分明,便于可视化展示。
39 |
40 | {content}
41 |
42 | 输出要求:
43 | 1. 精简无关内容,仅保留核心主题和分支。
44 | 2. 只返回 mind-map 兼容的 JSON 数据,不要添加其他说明。
45 | 3. 参考以下格式输出:
46 | {
47 | "data": {
48 | "text": "根节点
",
49 | "expand": true,
50 | "uid": "430afa37-f0b5-4cf3-a270-d15028b413a9",
51 | "richText": true,
52 | "isActive": false
53 | },
54 | "children": [
55 | {
56 | "data": {
57 | "text": "二级节点
",
58 | "generalization": {
59 | "text": "概要
",
60 | "uid": "aebb0b2a-35fb-4ae6-a346-87706145bce5",
61 | "richText": true,
62 | "expand": true,
63 | "isActive": false
64 | },
65 | "uid": "b11c529a-3944-4c2f-ba6d-0cd2101ba6ab",
66 | "richText": true,
67 | "expand": true,
68 | "isActive": false
69 | },
70 | "children": [
71 | {
72 | "data": {
73 | "text": "分支主题
",
74 | "uid": "52579e9c-5a75-4dd7-b0dd-b67dc2ee38ab",
75 | "richText": true,
76 | "expand": true,
77 | "isActive": false
78 | },
79 | "children": []
80 | },
81 | {
82 | "data": {
83 | "text": "分支主题
",
84 | "uid": "d29ff394-03bd-4cf6-a5fa-a2f368f538d3",
85 | "richText": true,
86 | "expand": true,
87 | "isActive": false
88 | },
89 | "children": []
90 | }
91 | ]
92 | }
93 | ],
94 | "smmVersion": "0.13.1-fix.2"
95 | }
96 | `
97 | }
98 |
--------------------------------------------------------------------------------
/frontend/src/main.js:
--------------------------------------------------------------------------------
1 | import { createApp } from 'vue'
2 | import './style.css'
3 | import App from './App.vue'
4 | import ElementPlus from 'element-plus'
5 | import 'element-plus/dist/index.css'
6 | import './scroll-fix.css' // 添加这一行导入全局滚动修复样式
7 | import { LottieAnimation } from 'lottie-web-vue'
8 |
9 | const app = createApp(App)
10 |
11 | app.use(ElementPlus)
12 |
13 | app.component('LottieAnimation', LottieAnimation) // 正确注册全局组件
14 |
15 | app.mount('#app')
16 |
--------------------------------------------------------------------------------
/frontend/src/scroll-fix.css:
--------------------------------------------------------------------------------
1 | /*
2 | 全局滚动修复样式
3 | 这个文件用于修复页面滚动问题,确保所有内容在默认缩放下都能显示
4 | */
5 |
6 | html,
7 | body {
8 | scroll-behavior: smooth;
9 | overflow-anchor: auto;
10 | }
11 |
12 | #app {
13 | display: flex;
14 | flex-direction: column;
15 | min-height: 100%;
16 | height: auto;
17 | overflow-y: auto !important;
18 | overflow-anchor: auto;
19 | }
20 |
21 | .el-scrollbar,
22 | .el-scrollbar__wrap {
23 | height: auto !important;
24 | max-height: none !important;
25 | overflow-y: visible !important;
26 | }
27 |
28 | ::-webkit-scrollbar {
29 | width: 6px;
30 | height: 6px;
31 | }
32 |
33 | ::-webkit-scrollbar-thumb {
34 | background-color: rgba(144, 147, 153, 0.5);
35 | border-radius: 3px;
36 | }
37 |
38 | ::-webkit-scrollbar-track {
39 | background-color: transparent;
40 | }
--------------------------------------------------------------------------------
/frontend/src/style.css:
--------------------------------------------------------------------------------
1 | :root {
2 | font-family: system-ui, Avenir, Helvetica, Arial, sans-serif;
3 | line-height: 1.5;
4 | font-weight: 400;
5 |
6 | color-scheme: light dark;
7 | color: rgba(255, 255, 255, 0.87);
8 | background-color: #242424;
9 |
10 | font-synthesis: none;
11 | text-rendering: optimizeLegibility;
12 | -webkit-font-smoothing: antialiased;
13 | -moz-osx-font-smoothing: grayscale;
14 | }
15 |
16 | /* 全局重置和基础样式 */
17 | * {
18 | box-sizing: border-box;
19 | margin: 0;
20 | padding: 0;
21 | }
22 |
23 | html,
24 | body {
25 | width: 100%;
26 | height: 100%;
27 | margin: 0;
28 | padding: 0;
29 | overflow-x: hidden;
30 | }
31 |
32 | a {
33 | font-weight: 500;
34 | color: #646cff;
35 | text-decoration: inherit;
36 | }
37 |
38 | a:hover {
39 | color: #535bf2;
40 | }
41 |
42 | body {
43 | margin: 0;
44 | display: flex;
45 | place-items: center;
46 | min-width: 320px;
47 | min-height: 100vh;
48 | }
49 |
50 | h1 {
51 | font-size: 3.2em;
52 | line-height: 1.1;
53 | }
54 |
55 | button {
56 | border-radius: 8px;
57 | border: 1px solid transparent;
58 | padding: 0.6em 1.2em;
59 | font-size: 1em;
60 | font-weight: 500;
61 | font-family: inherit;
62 | background-color: #1a1a1a;
63 | cursor: pointer;
64 | transition: border-color 0.25s;
65 | }
66 |
67 | button:hover {
68 | border-color: #646cff;
69 | }
70 |
71 | button:focus,
72 | button:focus-visible {
73 | outline: 4px auto -webkit-focus-ring-color;
74 | }
75 |
76 | .card {
77 | padding: 2em;
78 | }
79 |
80 | #app {
81 | max-width: 1280px;
82 | margin: 0 auto;
83 | padding: 2rem;
84 | text-align: center;
85 | width: 100%;
86 | min-height: 100vh;
87 | position: relative;
88 | display: flex;
89 | flex-direction: column;
90 | margin-left: 260px;
91 | /* 新增:为固定侧边栏预留空间 */
92 | }
93 |
94 | /* 确保所有容器默认都是border-box盒模型 */
95 | div,
96 | section,
97 | article,
98 | aside,
99 | header,
100 | footer,
101 | nav,
102 | main {
103 | box-sizing: border-box;
104 | }
105 |
106 | /* 添加流畅的滚动效果 */
107 | html {
108 | scroll-behavior: smooth;
109 | }
110 |
111 | @media (prefers-color-scheme: light) {
112 | :root {
113 | color: #213547;
114 | background-color: #ffffff;
115 | }
116 |
117 | a:hover {
118 | color: #747bff;
119 | }
120 |
121 | button {
122 | background-color: #f9f9f9;
123 | }
124 | }
--------------------------------------------------------------------------------
/frontend/src/utils/db.js:
--------------------------------------------------------------------------------
1 | import { openDB, deleteDB } from 'idb'
2 |
3 | const dbName = 'videoTasksDB'
4 | const dbVersion = 1
5 |
6 | let dbInstance = null
7 |
8 | export async function initDB() {
9 | if (dbInstance) return dbInstance
10 |
11 | try {
12 | dbInstance = await openDB(dbName, dbVersion, {
13 | upgrade(db) {
14 | // 创建基本数据存储
15 | const taskStore = db.createObjectStore('tasks', { keyPath: 'id', autoIncrement: true })
16 | taskStore.createIndex('md5', 'md5', { unique: false }) // 非唯一,允许同一MD5有多个不同风格
17 | taskStore.createIndex('createdAt', 'createdAt', { unique: false })
18 | taskStore.createIndex('fileName', 'fileName', { unique: false })
19 | taskStore.createIndex('contentStyle', 'contentStyle', { unique: false })
20 | taskStore.createIndex('md5_contentStyle', ['md5', 'contentStyle'], { unique: true })
21 | }
22 | })
23 | return dbInstance
24 | } catch (error) {
25 | console.error('数据库初始化失败:', error)
26 | throw error
27 | }
28 | }
29 |
30 | // 工具:序列化 transcriptionText
31 | function serializeTranscriptionText(val) {
32 | if (Array.isArray(val)) {
33 | try {
34 | return JSON.stringify(val)
35 | } catch {
36 | return ''
37 | }
38 | }
39 | return val
40 | }
41 |
42 | // 工具:反序列化 transcriptionText
43 | function deserializeTranscriptionText(val) {
44 | if (typeof val === 'string') {
45 | try {
46 | // 判断是否为 JSON 数组字符串
47 | const arr = JSON.parse(val)
48 | if (Array.isArray(arr) && arr.length && typeof arr[0] === 'object' && 'text' in arr[0]) {
49 | return arr
50 | }
51 | } catch {
52 | // 不是 JSON 字符串,直接返回原始字符串
53 | }
54 | }
55 | return val
56 | }
57 |
58 | export async function saveTask(taskData) {
59 | try {
60 | const db = await initDB()
61 | const taskToSave = {
62 | ...taskData,
63 | createdAt: new Date().toISOString(),
64 | contentStyle: taskData.contentStyle,
65 | transcriptionText: serializeTranscriptionText(taskData.transcriptionText)
66 | }
67 | const taskId = await db.add('tasks', taskToSave)
68 |
69 | // 检查并保留最新的10条记录
70 | await cleanupOldTasks(db);
71 |
72 | return taskId; // 返回 taskId
73 | } catch (error) {
74 | console.error('保存任务失败:', error)
75 | throw error
76 | }
77 | }
78 |
79 | // 新增:清理旧任务,只保留最新的10条记录
80 | async function cleanupOldTasks(db) {
81 | try {
82 | const MAX_TASKS = 10;
83 | // 按创建时间降序排列获取所有任务
84 | const allTasks = await db.getAllFromIndex('tasks', 'createdAt');
85 | allTasks.sort((a, b) => new Date(b.createdAt) - new Date(a.createdAt));
86 |
87 | // 如果任务数超过10个,则删除较旧的任务
88 | if (allTasks.length > MAX_TASKS) {
89 | const tasksToDelete = allTasks.slice(MAX_TASKS);
90 | for (const task of tasksToDelete) {
91 | await db.delete('tasks', task.id);
92 | }
93 | console.log(`已清理 ${tasksToDelete.length} 条旧任务记录,保留最新的 ${MAX_TASKS} 条记录`);
94 | }
95 | } catch (error) {
96 | console.error('清理旧任务失败:', error);
97 | }
98 | }
99 |
100 | // 反序列化工具,批量处理任务数组
101 | function deserializeTasks(tasks) {
102 | return tasks.map(task => ({
103 | ...task,
104 | transcriptionText: deserializeTranscriptionText(task.transcriptionText)
105 | }))
106 | }
107 |
108 | export async function getAllTasks() {
109 | try {
110 | const db = await initDB()
111 | const tasks = await db.getAllFromIndex('tasks', 'createdAt')
112 | return deserializeTasks(tasks)
113 | } catch (error) {
114 | console.error('获取任务列表失败:', error)
115 | return []
116 | }
117 | }
118 |
119 | export async function getTaskByMd5(md5) {
120 | const db = await initDB()
121 | return db.getFromIndex('tasks', 'md5', md5)
122 | }
123 |
124 |
125 | export async function checkTaskExistsByMd5AndStyle(md5, contentStyle) {
126 | try {
127 | const db = await initDB()
128 | // 尝试使用组合索引查询
129 | try {
130 | const task = await db.getFromIndex('tasks', 'md5_contentStyle', [md5, contentStyle])
131 | return !!task // 有结果返回true,否则返回false
132 | } catch (e) {
133 | console.warn('组合索引查询失败,回退到手动筛选:', e)
134 | const tasks = await db.getAllFromIndex('tasks', 'md5', md5)
135 | return tasks.some(task => task.contentStyle === contentStyle)
136 | }
137 | } catch (error) {
138 | console.error('检查任务失败:', error)
139 | throw error
140 | }
141 | }
142 |
143 | export const checkTaskExistsByMd5 = async (md5) => {
144 | try {
145 | const db = await initDB()
146 | const tasks = await db.getAllFromIndex('tasks', 'md5', md5)
147 | return tasks.length > 0
148 | } catch (error) {
149 | console.error('检查任务失败:', error)
150 | throw error
151 | }
152 | }
153 |
154 | export async function deleteTask(taskId) {
155 | try {
156 | const db = await initDB()
157 | await db.delete('tasks', taskId)
158 | return true
159 | } catch (error) {
160 | console.error('删除任务失败:', error)
161 | throw error
162 | }
163 | }
164 |
165 | export async function resetDatabase() {
166 | // 先关闭已有连接
167 | if (dbInstance) {
168 | dbInstance.close();
169 | dbInstance = null;
170 | }
171 |
172 | try {
173 | // 删除整个数据库
174 | await deleteDB(dbName);
175 | console.log('数据库已删除');
176 |
177 | // 重新初始化数据库
178 | dbInstance = await openDB(dbName, dbVersion, {
179 | upgrade(db) {
180 | // 创建全新的数据存储
181 | const taskStore = db.createObjectStore('tasks', { keyPath: 'id', autoIncrement: true })
182 | taskStore.createIndex('md5', 'md5', { unique: false })
183 | taskStore.createIndex('createdAt', 'createdAt', { unique: false })
184 | taskStore.createIndex('fileName', 'fileName', { unique: false })
185 | taskStore.createIndex('contentStyle', 'contentStyle', { unique: false })
186 | taskStore.createIndex('md5_contentStyle', ['md5', 'contentStyle'], { unique: true })
187 |
188 | console.log('数据库已重建');
189 | }
190 | })
191 |
192 | return true;
193 | } catch (error) {
194 | console.error('数据库重置失败:', error);
195 | return false;
196 | }
197 | }
198 |
199 | export async function getAnyTaskByMd5(md5) {
200 | try {
201 | const db = await initDB()
202 | const tasks = await db.getAllFromIndex('tasks', 'md5', md5)
203 | const result = tasks.length > 0 ? tasks[0] : null
204 | if (result) {
205 | result.transcriptionText = deserializeTranscriptionText(result.transcriptionText)
206 | }
207 | return result
208 | } catch (error) {
209 | console.error('根据MD5获取任务失败:', error)
210 | return null
211 | }
212 | }
213 |
214 | export async function getTaskByID(taskId) {
215 | try {
216 | const db = await initDB()
217 | const task = await db.get('tasks', taskId)
218 | if (task) {
219 | task.transcriptionText = deserializeTranscriptionText(task.transcriptionText)
220 | }
221 | return task
222 | } catch (error) {
223 | console.error('通过ID获取任务失败:', error)
224 | return null
225 | }
226 | }
227 |
--------------------------------------------------------------------------------
/frontend/src/utils/eventBus.js:
--------------------------------------------------------------------------------
1 | import { reactive } from 'vue'
2 |
3 | export const eventBus = reactive({
4 | handlers: {},
5 |
6 | on(event, callback) {
7 | if (!this.handlers[event]) {
8 | this.handlers[event] = []
9 | }
10 | this.handlers[event].push(callback)
11 | },
12 |
13 | emit(event, ...args) {
14 | if (this.handlers[event]) {
15 | this.handlers[event].forEach(callback => callback(...args))
16 | }
17 | },
18 |
19 | off(event, callback) {
20 | if (this.handlers[event]) {
21 | if (callback) {
22 | this.handlers[event] = this.handlers[event].filter(cb => cb !== callback)
23 | } else {
24 | delete this.handlers[event]
25 | }
26 | }
27 | }
28 | })
29 |
--------------------------------------------------------------------------------
/frontend/src/utils/ffmpeg.js:
--------------------------------------------------------------------------------
1 | let ffmpeg = null
2 | let ffmpegLoaded = false
3 | let ffmpegLoading = false
4 |
5 | export const loadFFmpeg = async () => {
6 | if (ffmpegLoaded) return ffmpeg
7 |
8 | if (ffmpegLoading) {
9 | return new Promise((resolve) => {
10 | const checkLoaded = setInterval(() => {
11 | if (ffmpegLoaded) {
12 | clearInterval(checkLoaded)
13 | resolve(ffmpeg)
14 | }
15 | }, 100)
16 | })
17 | }
18 |
19 | ffmpegLoading = true
20 |
21 | try {
22 | const script = document.createElement('script')
23 | script.src = 'https://unpkg.com/@ffmpeg/ffmpeg@0.10.1/dist/ffmpeg.min.js'
24 | document.head.appendChild(script)
25 |
26 | await new Promise((resolve) => {
27 | script.onload = resolve
28 | })
29 |
30 | ffmpeg = FFmpeg.createFFmpeg({
31 | log: true,
32 | progress: ({ ratio }) => {
33 | // 进度回调
34 | }
35 | })
36 |
37 | await ffmpeg.load()
38 | ffmpegLoaded = true
39 | return ffmpeg
40 |
41 | } catch (error) {
42 | console.error('FFmpeg 加载错误:', error)
43 | throw error
44 | } finally {
45 | ffmpegLoading = false
46 | }
47 | }
48 |
49 | export const extractAudio = async (videoData) => {
50 | try {
51 | ffmpeg.FS('writeFile', 'input_video.mp4', videoData)
52 | await ffmpeg.run('-i', 'input_video.mp4', '-q:a', '0', '-map', 'a', 'output_audio.mp3')
53 | return ffmpeg.FS('readFile', 'output_audio.mp3')
54 | } catch (error) {
55 | console.error('音频提取失败:', error)
56 | throw error
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/frontend/src/utils/md5.js:
--------------------------------------------------------------------------------
1 | import SparkMD5 from 'spark-md5'
2 |
3 | /**
4 | * 计算文件或 ArrayBuffer 的 MD5 值
5 | * @param {File|ArrayBuffer|Uint8Array} file - 要计算 MD5 的文件或数据
6 | * @returns {Promise} MD5 哈希值
7 | */
8 | export const calculateMD5 = async (file) => {
9 | return new Promise((resolve, reject) => {
10 | try {
11 | const blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice
12 | const spark = new SparkMD5.ArrayBuffer()
13 |
14 | // 如果输入是 ArrayBuffer 或 Uint8Array
15 | if (file instanceof ArrayBuffer || file instanceof Uint8Array) {
16 | const buffer = file instanceof Uint8Array ? file.buffer : file
17 | spark.append(buffer)
18 | resolve(spark.end())
19 | return
20 | }
21 |
22 | // 如果输入是 File 或 Blob
23 | const fileReader = new FileReader()
24 | const chunkSize = 2097152 // 读取 2MB 块
25 | const chunks = Math.ceil(file.size / chunkSize)
26 | let currentChunk = 0
27 |
28 | fileReader.onload = (e) => {
29 | spark.append(e.target.result)
30 | currentChunk++
31 |
32 | if (currentChunk < chunks) {
33 | loadNext()
34 | } else {
35 | const md5Hash = spark.end()
36 | resolve(md5Hash)
37 | }
38 | }
39 |
40 | fileReader.onerror = (error) => {
41 | reject(error)
42 | }
43 |
44 | const loadNext = () => {
45 | const start = currentChunk * chunkSize
46 | const end = Math.min(start + chunkSize, file.size)
47 | fileReader.readAsArrayBuffer(blobSlice.call(file, start, end))
48 | }
49 |
50 | loadNext()
51 | } catch (error) {
52 | reject(error)
53 | }
54 | })
55 | }
56 |
57 | export default {
58 | calculateMD5
59 | }
60 |
--------------------------------------------------------------------------------
/frontend/vite.config.js:
--------------------------------------------------------------------------------
1 | import { defineConfig, loadEnv } from 'vite'
2 | import vue from '@vitejs/plugin-vue'
3 |
4 | export default defineConfig(({ mode }) => {
5 | // 加载环境变量
6 | const env = loadEnv(mode, process.cwd())
7 |
8 | return {
9 | plugins: [vue()],
10 | server: {
11 | headers: {
12 | 'Cross-Origin-Opener-Policy': 'same-origin',
13 | 'Cross-Origin-Embedder-Policy': 'require-corp',
14 | 'Cross-Origin-Resource-Policy': 'same-origin'
15 | },
16 | proxy: {
17 | '/api': {
18 | target: env.VITE_API_BASE_URL,
19 | changeOrigin: true
20 | },
21 | },
22 | cors: true
23 | },
24 | define: {
25 | 'process.env': {}
26 | }
27 | }
28 | })
29 |
--------------------------------------------------------------------------------
/variables.env:
--------------------------------------------------------------------------------
1 | ENDPOINT_ID=xxx
2 | ARK_API_KEY=xxx
3 | TOS_ACCESS_KEY=xxx
4 | TOS_SECRET_KEY=xxx
5 | TOS_ENDPOINT=xxx
6 | TOS_REGION=xxx
7 | TOS_BUCKET=xxx
8 | AUC_APP_ID=xxx
9 | AUC_ACCESS_TOKEN=xxx
10 |
--------------------------------------------------------------------------------