├── .dockerignore ├── .github ├── pull_request_template.md ├── release-drafter.yml └── workflows │ ├── auto-release-generator.yml │ ├── codeReview.yml │ └── discord-release-notification.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README-en.md ├── README-ja.md ├── README.md ├── app ├── __init__.py ├── asgi.py ├── config │ ├── __init__.py │ └── config.py ├── controllers │ ├── base.py │ ├── manager │ │ ├── base_manager.py │ │ ├── memory_manager.py │ │ └── redis_manager.py │ ├── ping.py │ ├── v1 │ │ ├── base.py │ │ ├── llm.py │ │ └── video.py │ └── v2 │ │ ├── base.py │ │ └── script.py ├── models │ ├── __init__.py │ ├── const.py │ ├── exception.py │ ├── schema.py │ └── schema_v2.py ├── router.py ├── services │ ├── SDE │ │ ├── prompt.py │ │ └── short_drama_explanation.py │ ├── SDP │ │ ├── generate_script_short.py │ │ └── utils │ │ │ ├── short_schema.py │ │ │ ├── step1_subtitle_analyzer_openai.py │ │ │ ├── step5_merge_script.py │ │ │ └── utils.py │ ├── __init__.py │ ├── audio_merger.py │ ├── clip_video.py │ ├── generate_narration_script.py │ ├── generate_video.py │ ├── llm.py │ ├── material.py │ ├── merger_video.py │ ├── script_service.py │ ├── state.py │ ├── subtitle.py │ ├── subtitle_merger.py │ ├── task.py │ ├── update_script.py │ ├── video.py │ ├── video_service.py │ ├── voice.py │ └── youtube_service.py ├── test │ ├── test_gemini.py │ ├── test_moviepy.py │ ├── test_moviepy_merge.py │ ├── test_moviepy_speed.py │ └── test_qwen.py └── utils │ ├── check_script.py │ ├── ffmpeg_utils.py │ ├── gemini_analyzer.py │ ├── qwenvl_analyzer.py │ ├── script_generator.py │ ├── utils.py │ └── video_processor.py ├── changelog.py ├── config.example.toml ├── docker-compose.yml ├── docker-entrypoint.sh ├── docker └── Dockerfile_MiniCPM ├── docs ├── check-en.png ├── check-zh.png ├── img001-en.png ├── img001-zh.png ├── img002-en.png ├── img002-zh.png ├── img003-en.png ├── img003-zh.png ├── img004-en.png ├── img004-zh.png ├── img005-en.png ├── img005-zh.png ├── img006-en.png ├── img006-zh.png ├── img007-en.png ├── img007-zh.png ├── index-en.png ├── index-zh.png └── voice-list.txt ├── main.py ├── project_version ├── release-notes.md ├── requirements.txt ├── resource ├── fonts │ └── fonts_in_here.txt ├── public │ └── index.html ├── scripts │ └── script_in_here.txt ├── songs │ └── song_in_here.txt ├── srt │ └── srt_in_here.txt └── videos │ └── video_in_here.txt ├── video_pipeline.py ├── webui.py ├── webui.txt └── webui ├── __init__.py ├── components ├── __init__.py ├── audio_settings.py ├── basic_settings.py ├── merge_settings.py ├── review_settings.py ├── script_settings.py ├── subtitle_settings.py ├── system_settings.py └── video_settings.py ├── config └── settings.py ├── i18n ├── __init__.py ├── en.json └── zh.json ├── tools ├── base.py ├── generate_script_docu.py ├── generate_script_short.py └── generate_short_summary.py └── utils ├── cache.py ├── file_utils.py ├── merge_video.py └── vision_analyzer.py /.dockerignore: -------------------------------------------------------------------------------- 1 | # Exclude common Python files and directories 2 | venv/ 3 | __pycache__/ 4 | *.pyc 5 | *.pyo 6 | *.pyd 7 | *.pyz 8 | *.pyw 9 | *.pyi 10 | *.egg-info/ 11 | 12 | # Exclude development and local files 13 | .env 14 | .env.* 15 | *.log 16 | *.db 17 | 18 | # Exclude version control system files 19 | .git/ 20 | .gitignore 21 | .svn/ 22 | 23 | storage/ 24 | config.toml 25 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## PR 类型 2 | 请选择一个适当的标签(必选其一): 3 | 4 | - [ ] 破坏性变更 (breaking) 5 | - [ ] 安全修复 (security) 6 | - [ ] 新功能 (feature) 7 | - [ ] Bug修复 (bug) 8 | - [ ] 代码重构 (refactor) 9 | - [ ] 依赖升级 (upgrade) 10 | - [ ] 文档更新 (docs) 11 | - [ ] 翻译相关 (lang-all) 12 | - [ ] 内部改进 (internal) 13 | 14 | ## 描述 15 | 16 | 17 | ## 相关 Issue 18 | 19 | 20 | ## 更改内容 21 | 22 | 23 | - xxx 24 | - xxx 25 | - xxx 26 | 27 | ## 测试 28 | 29 | 30 | - [ ] 单元测试 31 | - [ ] 集成测试 32 | - [ ] 手动测试 33 | 34 | ## 截图(如果适用) 35 | 36 | 37 | ## 检查清单 38 | 39 | - [ ] 我的代码遵循项目的代码风格 40 | - [ ] 我已经添加了必要的测试 41 | - [ ] 我已经更新了相关文档 42 | - [ ] 我的更改不会引入新的警告 43 | - [ ] PR 标题清晰描述了更改内容 44 | 45 | ## 补充说明 46 | -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name-template: 'v$RESOLVED_VERSION' 2 | tag-template: 'v$RESOLVED_VERSION' 3 | categories: 4 | - title: '🚀 新功能' 5 | labels: 6 | - 'feature' 7 | - 'enhancement' 8 | - title: '🐛 Bug 修复' 9 | labels: 10 | - 'fix' 11 | - 'bug' 12 | - title: '🧰 维护' 13 | labels: 14 | - 'chore' 15 | - 'maintenance' 16 | - title: '📚 文档' 17 | labels: 18 | - 'docs' 19 | - 'documentation' 20 | 21 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)' 22 | 23 | version-resolver: 24 | major: 25 | labels: 26 | - 'major' 27 | - 'breaking' 28 | minor: 29 | labels: 30 | - 'minor' 31 | - 'feature' 32 | patch: 33 | labels: 34 | - 'patch' 35 | - 'fix' 36 | - 'bug' 37 | - 'maintenance' 38 | default: patch 39 | 40 | template: | 41 | ## 更新内容 42 | 43 | $CHANGES 44 | 45 | ## 贡献者 46 | 47 | $CONTRIBUTORS -------------------------------------------------------------------------------- /.github/workflows/auto-release-generator.yml: -------------------------------------------------------------------------------- 1 | name: Auto Release Generator 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - 'project_version' # 确保路径准确,不使用通配符 9 | 10 | jobs: 11 | check-version-and-release: 12 | runs-on: ubuntu-latest 13 | permissions: 14 | contents: write # 用于创建 releases 15 | pull-requests: write # 可能需要的额外权限 16 | steps: 17 | - name: Checkout code 18 | uses: actions/checkout@v4 19 | with: 20 | fetch-depth: 0 # 获取完整历史以检查变更 21 | 22 | - name: Debug Environment 23 | run: | 24 | echo "工作目录内容:" 25 | ls -la 26 | echo "project_version 文件内容:" 27 | cat project_version || echo "文件不存在" 28 | 29 | - name: Check if version changed 30 | id: check-version 31 | run: | 32 | # 获取当前版本号 33 | if [ -f "project_version" ]; then 34 | CURRENT_VERSION=$(cat project_version) 35 | echo "Current version: $CURRENT_VERSION" 36 | 37 | # 获取上一个提交中的版本号 38 | git fetch origin main 39 | if git show HEAD~1:project_version &>/dev/null; then 40 | PREVIOUS_VERSION=$(git show HEAD~1:project_version) 41 | echo "Previous version from commit: $PREVIOUS_VERSION" 42 | 43 | if [[ "$CURRENT_VERSION" != "$PREVIOUS_VERSION" ]]; then 44 | echo "Version changed from $PREVIOUS_VERSION to $CURRENT_VERSION" 45 | echo "version_changed=true" >> $GITHUB_OUTPUT 46 | echo "current_version=$CURRENT_VERSION" >> $GITHUB_OUTPUT 47 | else 48 | echo "Version unchanged" 49 | echo "version_changed=false" >> $GITHUB_OUTPUT 50 | fi 51 | else 52 | echo "Cannot find previous version, assuming first release" 53 | echo "version_changed=true" >> $GITHUB_OUTPUT 54 | echo "current_version=$CURRENT_VERSION" >> $GITHUB_OUTPUT 55 | fi 56 | else 57 | echo "project_version file not found" 58 | echo "version_changed=false" >> $GITHUB_OUTPUT 59 | fi 60 | 61 | - name: Set up Python 62 | if: steps.check-version.outputs.version_changed == 'true' 63 | uses: actions/setup-python@v4 64 | with: 65 | python-version: '3.10' 66 | 67 | - name: Install OpenAI SDK 68 | if: steps.check-version.outputs.version_changed == 'true' 69 | run: pip install openai 70 | 71 | - name: Get commits since last release 72 | if: steps.check-version.outputs.version_changed == 'true' 73 | id: get-commits 74 | run: | 75 | # 直接获取最近10个提交 76 | echo "Getting last 13 commits" 77 | COMMITS=$(git log -13 --pretty=format:"%s") 78 | 79 | echo "Commits to be included in release notes:" 80 | echo "$COMMITS" 81 | 82 | echo "commits<> $GITHUB_OUTPUT 83 | echo "$COMMITS" >> $GITHUB_OUTPUT 84 | echo "EOF" >> $GITHUB_OUTPUT 85 | 86 | - name: Generate release notes with AI 87 | if: steps.check-version.outputs.version_changed == 'true' 88 | id: generate-notes 89 | env: 90 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 91 | OPENAI_BASE_URL: https://api.siliconflow.cn/v1 92 | CURRENT_VERSION: ${{ steps.check-version.outputs.current_version }} 93 | run: | 94 | cat > generate_release_notes.py << 'EOF' 95 | import os 96 | import sys 97 | from openai import OpenAI 98 | 99 | # 设置OpenAI客户端 100 | client = OpenAI( 101 | api_key=os.environ.get("OPENAI_API_KEY"), 102 | base_url=os.environ.get("OPENAI_BASE_URL") 103 | ) 104 | 105 | # 获取提交信息和版本号 106 | commits = sys.stdin.read() 107 | version = os.environ.get("CURRENT_VERSION") 108 | 109 | # 调用API生成发布说明 110 | try: 111 | response = client.chat.completions.create( 112 | model="deepseek-ai/DeepSeek-V3", 113 | messages=[ 114 | {"role": "system", "content": "你是一个专业的软件发布说明生成助手。请根据提供的git提交信息,生成一个结构化的发布说明,包括新功能、改进、修复的bug等类别。使用中文回复。"}, 115 | {"role": "user", "content": f"请根据以下git提交信息,生成一个版本{version}的发布说明,内容详细且完整,相似的提交信息不要重复出现: \n\n{commits}"} 116 | ], 117 | temperature=0.7, 118 | ) 119 | release_notes = response.choices[0].message.content 120 | print(f"commits: \n{commits}") 121 | print(f"大模型总结的发布说明: \n{release_notes}") 122 | except Exception as e: 123 | print(f"Error calling OpenAI API: {e}") 124 | release_notes = f"# 版本 {version} 发布\n\n## 更新内容\n\n" 125 | # 简单处理提交信息 126 | for line in commits.strip().split("\n"): 127 | if line: 128 | release_notes += f"- {line}\n" 129 | 130 | # 输出生成的发布说明 131 | print(release_notes) 132 | 133 | # 保存到GitHub输出 134 | with open(os.environ.get("GITHUB_OUTPUT"), "a") as f: 135 | f.write("release_notes< send_discord_notification.py << 'EOF' 30 | import os 31 | import sys 32 | import json 33 | from openai import OpenAI 34 | import requests 35 | from datetime import datetime 36 | from discord_webhook import DiscordWebhook, DiscordEmbed 37 | 38 | # 设置OpenAI客户端 39 | client = OpenAI( 40 | api_key=os.environ.get("OPENAI_API_KEY"), 41 | base_url=os.environ.get("OPENAI_BASE_URL") 42 | ) 43 | 44 | # 获取GitHub release信息 45 | github_token = os.environ.get("GITHUB_TOKEN") 46 | repo = os.environ.get("GITHUB_REPOSITORY") 47 | 48 | # 直接从GitHub API获取最新release 49 | headers = {"Authorization": f"token {github_token}"} 50 | response = requests.get(f"https://api.github.com/repos/{repo}/releases/latest", headers=headers) 51 | 52 | if response.status_code != 200: 53 | print(f"Error fetching release info: {response.status_code}") 54 | print(response.text) 55 | sys.exit(1) 56 | 57 | release_info = response.json() 58 | 59 | # 提取需要的信息 60 | release_notes = release_info.get("body", "无发布说明") 61 | version = release_info.get("tag_name", "未知版本") 62 | 63 | # 安全地解析发布日期 64 | published_at = release_info.get("published_at") 65 | if published_at: 66 | try: 67 | release_date = datetime.strptime(published_at, "%Y-%m-%dT%H:%M:%SZ").strftime("%Y年%m月%d日") 68 | except ValueError: 69 | release_date = "未知日期" 70 | else: 71 | release_date = "未知日期" 72 | 73 | # 使用大模型润色发布说明 74 | try: 75 | response = client.chat.completions.create( 76 | model="deepseek-ai/DeepSeek-V3", 77 | messages=[ 78 | {"role": "system", "content": "你是一个专业的软件发布公告优化助手。请优化以下发布说明,使其更加生动、专业,并明确区分新功能、优化内容、修复内容和移除内容等类别。保持原有信息的完整性,同时增强可读性和专业性。使用中文回复。\n\n重要:Discord不支持复杂的Markdown格式,因此请使用简单的格式化:\n1. 使用**粗体**和*斜体*而不是Markdown标题\n2. 使用简单的列表符号(•)而不是Markdown列表\n3. 避免使用#、##等标题格式\n4. 不要使用表格、代码块等复杂格式\n5. 确保段落之间有空行\n6. 使用简单的分隔符(如 ------)来分隔不同部分"}, 79 | {"role": "user", "content": f"请优化以下版本{version}的发布说明,使其更适合在Discord社区发布。请记住Discord不支持复杂的Markdown格式,所以使用简单的格式化方式:\n\n{release_notes}"} 80 | ], 81 | temperature=0.7, 82 | ) 83 | enhanced_notes = response.choices[0].message.content 84 | print(f"大模型润色后的发布说明: \n{enhanced_notes}") 85 | except Exception as e: 86 | print(f"Error calling OpenAI API: {e}") 87 | enhanced_notes = release_notes # 如果API调用失败,使用原始发布说明 88 | 89 | # 创建Discord消息 90 | webhook_url = os.environ.get("DISCORD_WEBHOOK_URL") 91 | if not webhook_url: 92 | print("Error: DISCORD_WEBHOOK_URL not set") 93 | sys.exit(1) 94 | 95 | webhook = DiscordWebhook(url=webhook_url) 96 | 97 | # 创建嵌入式消息 98 | embed = DiscordEmbed( 99 | title=f"🚀 NarratoAI {version} 发布公告", 100 | description=f"发布日期: {release_date}", 101 | color="5865F2" # Discord蓝色 102 | ) 103 | 104 | # 处理发布说明,确保不超过Discord的字段限制 105 | # Discord字段值限制为1024个字符 106 | MAX_FIELD_LENGTH = 1024 107 | 108 | # 如果内容很短,直接添加 109 | if enhanced_notes and len(enhanced_notes) <= MAX_FIELD_LENGTH: 110 | embed.add_embed_field(name="📋 更新内容", value=enhanced_notes) 111 | elif enhanced_notes: 112 | # 尝试按段落或明显的分隔符分割内容 113 | sections = [] 114 | 115 | # 检查是否有明显的新功能、优化、修复等部分 116 | if "**新增功能**" in enhanced_notes or "**新功能**" in enhanced_notes: 117 | parts = enhanced_notes.split("**新增功能**", 1) 118 | if len(parts) > 1: 119 | intro = parts[0].strip() 120 | if intro: 121 | sections.append(("📋 更新概述", intro)) 122 | 123 | rest = "**新增功能**" + parts[1] 124 | 125 | # 进一步分割剩余部分 126 | feature_end = -1 127 | for marker in ["**优化内容**", "**性能优化**", "**修复内容**", "**bug修复**", "**问题修复**"]: 128 | pos = rest.lower().find(marker.lower()) 129 | if pos != -1 and (feature_end == -1 or pos < feature_end): 130 | feature_end = pos 131 | 132 | if feature_end != -1: 133 | sections.append(("✨ 新增功能", rest[:feature_end].strip())) 134 | rest = rest[feature_end:] 135 | else: 136 | sections.append(("✨ 新增功能", rest.strip())) 137 | rest = "" 138 | 139 | # 继续分割剩余部分 140 | if rest: 141 | optimize_end = -1 142 | for marker in ["**修复内容**", "**bug修复**", "**问题修复**"]: 143 | pos = rest.lower().find(marker.lower()) 144 | if pos != -1 and (optimize_end == -1 or pos < optimize_end): 145 | optimize_end = pos 146 | 147 | if optimize_end != -1: 148 | sections.append(("⚡ 优化内容", rest[:optimize_end].strip())) 149 | sections.append(("🔧 修复内容", rest[optimize_end:].strip())) 150 | else: 151 | sections.append(("⚡ 优化内容", rest.strip())) 152 | else: 153 | # 如果没有明显的结构,按长度分割 154 | chunks = [enhanced_notes[i:i+MAX_FIELD_LENGTH] for i in range(0, len(enhanced_notes), MAX_FIELD_LENGTH)] 155 | for i, chunk in enumerate(chunks): 156 | if i == 0: 157 | sections.append(("📋 更新内容", chunk)) 158 | else: 159 | sections.append((f"📋 更新内容(续{i})", chunk)) 160 | 161 | # 添加所有部分到embed 162 | for name, content in sections: 163 | if len(content) > MAX_FIELD_LENGTH: 164 | # 如果单个部分仍然过长,进一步分割 165 | sub_chunks = [content[i:i+MAX_FIELD_LENGTH] for i in range(0, len(content), MAX_FIELD_LENGTH)] 166 | for i, chunk in enumerate(sub_chunks): 167 | if i == 0: 168 | embed.add_embed_field(name=name, value=chunk) 169 | else: 170 | embed.add_embed_field(name=f"{name}(续{i})", value=chunk) 171 | else: 172 | embed.add_embed_field(name=name, value=content) 173 | else: 174 | embed.add_embed_field(name="📋 更新内容", value="无详细更新内容") 175 | 176 | # 添加下载链接 177 | html_url = release_info.get("html_url", "") 178 | if html_url: 179 | embed.add_embed_field(name="📥 下载链接", value=html_url, inline=False) 180 | 181 | # 设置页脚 182 | embed.set_footer(text=f"NarratoAI 团队 • {release_date}") 183 | embed.set_timestamp() 184 | 185 | # 添加嵌入式消息到webhook 186 | webhook.add_embed(embed) 187 | 188 | # 发送消息 189 | response = webhook.execute() 190 | if response: 191 | print(f"Discord notification sent with status code: {response.status_code}") 192 | else: 193 | print("Failed to send Discord notification") 194 | EOF 195 | 196 | # 执行脚本 197 | python send_discord_notification.py -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | /config.toml 3 | /storage/ 4 | /.idea/ 5 | /app/services/__pycache__ 6 | /app/__pycache__/ 7 | /app/config/__pycache__/ 8 | /app/models/__pycache__/ 9 | /app/utils/__pycache__/ 10 | /*/__pycache__/* 11 | .vscode 12 | /**/.streamlit 13 | __pycache__ 14 | logs/ 15 | 16 | node_modules 17 | # VuePress 默认临时文件目录 18 | /sites/docs/.vuepress/.temp 19 | # VuePress 默认缓存目录 20 | /sites/docs/.vuepress/.cache 21 | # VuePress 默认构建生成的静态文件目录 22 | /sites/docs/.vuepress/dist 23 | # 模型目录 24 | /models/ 25 | ./models/* 26 | resource/scripts/*.json 27 | resource/videos/*.mp4 28 | resource/songs/*.mp3 29 | resource/songs/*.flac 30 | resource/fonts/*.ttc 31 | resource/fonts/*.ttf 32 | resource/fonts/*.otf 33 | resource/srt/*.srt 34 | app/models/faster-whisper-large-v2/* 35 | app/models/faster-whisper-large-v3/* 36 | app/models/bert/* 37 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # 构建阶段 2 | FROM python:3.10-slim-bullseye as builder 3 | 4 | # 设置工作目录 5 | WORKDIR /build 6 | 7 | # 安装构建依赖 8 | RUN apt-get update && apt-get install -y \ 9 | git \ 10 | git-lfs \ 11 | && rm -rf /var/lib/apt/lists/* 12 | 13 | # 创建虚拟环境 14 | RUN python -m venv /opt/venv 15 | ENV PATH="/opt/venv/bin:$PATH" 16 | 17 | # 首先安装 PyTorch(因为它是最大的依赖) 18 | RUN pip install --no-cache-dir torch torchvision torchaudio 19 | 20 | # 然后安装其他依赖 21 | COPY requirements.txt . 22 | RUN pip install --no-cache-dir -r requirements.txt 23 | 24 | # 运行阶段 25 | FROM python:3.10-slim-bullseye 26 | 27 | # 设置工作目录 28 | WORKDIR /NarratoAI 29 | 30 | # 从builder阶段复制虚拟环境 31 | COPY --from=builder /opt/venv /opt/venv 32 | ENV PATH="/opt/venv/bin:$PATH" 33 | 34 | # 安装运行时依赖 35 | RUN apt-get update && apt-get install -y \ 36 | imagemagick \ 37 | ffmpeg \ 38 | wget \ 39 | git-lfs \ 40 | && rm -rf /var/lib/apt/lists/* \ 41 | && sed -i '/ 2 |

NarratoAI 😎📽️

3 |

An all-in-one AI-powered tool for film commentary and automated video editing.🎬🎞️

4 | 5 | 6 |

📖 English | 简体中文 | 日本語

7 |
8 | 9 | [//]: # ( harry0703%2FNarratoAI | Trendshift) 10 |
11 |
12 | NarratoAI is an automated video narration tool that provides an all-in-one solution for script writing, automated video editing, voice-over, and subtitle generation, powered by LLM to enhance efficient content creation. 13 |
14 | 15 | [![madewithlove](https://img.shields.io/badge/made_with-%E2%9D%A4-red?style=for-the-badge&labelColor=orange)](https://github.com/linyqh/NarratoAI) 16 | [![GitHub license](https://img.shields.io/github/license/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/blob/main/LICENSE) 17 | [![GitHub issues](https://img.shields.io/github/issues/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/issues) 18 | [![GitHub stars](https://img.shields.io/github/stars/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/stargazers) 19 | 20 | 💬 Join the open source community to get project updates and the latest news. 21 | 22 |

🎉🎉🎉 Official Documentation 🎉🎉🎉

23 |

Home

24 | 25 | ![](docs/index-en.png) 26 | 27 |

Video Review Interface

28 | 29 | ![](docs/check-en.png) 30 | 31 | 32 | 33 | ## Latest News 34 | - 2025.05.11 Released new version 0.6.0, supports **short drama commentary** and optimized editing process 35 | - 2025.03.06 Released new version 0.5.2, supports DeepSeek R1 and DeepSeek V3 models for short drama mixing 36 | - 2024.12.16 Released new version 0.3.9, supports Alibaba Qwen2-VL model for video understanding; supports short drama mixing 37 | - 2024.11.24 Opened Discord community: https://discord.com/invite/V2pbAqqQNb 38 | - 2024.11.11 Migrated open source community, welcome to join! [Join the official community](https://github.com/linyqh/NarratoAI/wiki) 39 | - 2024.11.10 Released official documentation, details refer to [Official Documentation](https://p9mf6rjv3c.feishu.cn/wiki/SP8swLLZki5WRWkhuFvc2CyInDg) 40 | - 2024.11.10 Released new version v0.3.5; optimized video editing process, 41 | 42 | ## Major Benefits 🎉 43 | From now on, fully support DeepSeek model! Register to enjoy 20 million free tokens (worth 14 yuan platform quota), editing a 10-minute video only costs 0.1 yuan! 44 | 45 | 🔥 Quick benefits: 46 | 1️⃣ Click the link to register: https://cloud.siliconflow.cn/i/pyOKqFCV 47 | 2️⃣ Log in with your phone number, **be sure to fill in the invitation code: pyOKqFCV** 48 | 3️⃣ Receive a 14 yuan quota, experience high cost-effective AI editing quickly! 49 | 50 | 💡 Low cost, high creativity: 51 | Silicon Flow API Key can be integrated with one click, doubling intelligent editing efficiency! 52 | (Note: The invitation code is the only proof for benefit collection, automatically credited after registration) 53 | 54 | Immediately take action to unlock your AI productivity with "pyOKqFCV"! 55 | 56 | 😊 Update Steps: 57 | Integration Package: Click update.bat one-click update script 58 | Code Build: Use git pull to fetch the latest code 59 | 60 | ## Announcement 📢 61 | _**Note⚠️: Recently, someone has been impersonating the author on x (Twitter) to issue tokens on the pump.fun platform! This is a scam!!! Do not be deceived! Currently, NarratoAI has not made any official promotions on x (Twitter), please be cautious**_ 62 | 63 | Below is a screenshot of this person's x (Twitter) homepage 64 | 65 | Screenshot_20250109_114131_Samsung Internet 66 | 67 | ## Future Plans 🥳 68 | - [x] Windows Integration Pack Release 69 | - [x] Optimized the story generation process and improved the generation effect 70 | - [x] Released version 0.3.5 integration package 71 | - [x] Support Alibaba Qwen2-VL large model for video understanding 72 | - [x] Support short drama commentary 73 | - [x] One-click merge materials 74 | - [x] One-click transcription 75 | - [x] One-click clear cache 76 | - [ ] Support exporting to Jianying drafts 77 | - [X] Support short drama commentary 78 | - [ ] Character face matching 79 | - [ ] Support automatic matching based on voiceover, script, and video materials 80 | - [ ] Support more TTS engines 81 | - [ ] ... 82 | 83 | ## System Requirements 📦 84 | 85 | - Recommended minimum: CPU with 4 cores or more, 8GB RAM or more, GPU is not required 86 | - Windows 10/11 or MacOS 11.0 or above 87 | - [Python 3.12+](https://www.python.org/downloads/) 88 | 89 | ## Feedback & Suggestions 📢 90 | 91 | 👏 1. You can submit [issue](https://github.com/linyqh/NarratoAI/issues) or [pull request](https://github.com/linyqh/NarratoAI/pulls) 92 | 93 | 💬 2. [Join the open source community exchange group](https://github.com/linyqh/NarratoAI/wiki) 94 | 95 | 📷 3. Follow the official account [NarratoAI助手] to grasp the latest news 96 | 97 | ## Reference Projects 📚 98 | - https://github.com/FujiwaraChoki/MoneyPrinter 99 | - https://github.com/harry0703/MoneyPrinterTurbo 100 | 101 | This project was refactored based on the above projects with the addition of video narration features. Thanks to the original authors for their open-source spirit 🥳🥳🥳 102 | 103 | ## Buy the Author a Cup of Coffee ☕️ 104 |
105 | Image 1 106 | Image 2 107 |
108 | 109 | ## License 📝 110 | 111 | Click to view [`LICENSE`](LICENSE) file 112 | 113 | ## Star History 114 | 115 | [![Star History Chart](https://api.star-history.com/svg?repos=linyqh/NarratoAI&type=Date)](https://star-history.com/#linyqh/NarratoAI&Date) 116 | -------------------------------------------------------------------------------- /README-ja.md: -------------------------------------------------------------------------------- 1 |
2 |

NarratoAI 😎📽️

3 |

一体型AI映画解説および自動ビデオ編集ツール🎬🎞️

4 | 5 |

📖 简体中文 | English | 日本語

6 |
7 | 8 | [//]: # ( harry0703%2FNarratoAI | Trendshift) 9 |
10 |
11 | NarratoAIは、LLMを活用してスクリプト作成、自動ビデオ編集、ナレーション、字幕生成の一体型ソリューションを提供する自動化ビデオナレーションツールです。 12 |
13 | 14 | [![madewithlove](https://img.shields.io/badge/made_with-%E2%9D%A4-red?style=for-the-badge&labelColor=orange)](https://github.com/linyqh/NarratoAI) 15 | [![GitHub license](https://img.shields.io/github/license/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/blob/main/LICENSE) 16 | [![GitHub issues](https://img.shields.io/github/issues/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/issues) 17 | [![GitHub stars](https://img.shields.io/github/stars/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/stargazers) 18 | 19 | 💬 Discordオープンソースコミュニティに参加して、プロジェクトの最新情報を入手しましょう。 20 | 21 |

🎉🎉🎉 公式ドキュメント 🎉🎉🎉

22 |

ホーム

23 | 24 | ![](docs/index-zh.png) 25 | 26 |

ビデオレビューインターフェース

27 | 28 | ![](docs/check-zh.png) 29 | 30 |
31 | 32 | ## 最新情報 33 | - 2024.11.24 Discordコミュニティ開設:https://discord.gg/uVAJftcm 34 | - 2024.11.11 オープンソースコミュニティに移行、参加を歓迎します! [公式コミュニティに参加](https://github.com/linyqh/NarratoAI/wiki) 35 | - 2024.11.10 公式ドキュメント公開、詳細は [公式ドキュメント](https://p9mf6rjv3c.feishu.cn/wiki/SP8swLLZki5WRWkhuFvc2CyInDg) を参照 36 | - 2024.11.10 新バージョンv0.3.5リリース;ビデオ編集プロセスの最適化 37 | 38 | ## 今後の計画 🥳 39 | - [x] Windows統合パックリリース 40 | - [x] ストーリー生成プロセスの最適化、生成効果の向上 41 | - [x] バージョン0.3.5統合パックリリース 42 | - [x] アリババQwen2-VL大規模モデルのビデオ理解サポート 43 | - [x] 短編ドラマの解説サポート 44 | - [x] 一クリックで素材を統合 45 | - [x] 一クリックで文字起こし 46 | - [x] 一クリックでキャッシュをクリア 47 | - [ ] ジャン映草稿のエクスポートをサポート 48 | - [ ] 主役の顔のマッチング 49 | - [ ] 音声、スクリプト、ビデオ素材に基づいて自動マッチングをサポート 50 | - [ ] より多くのTTSエンジンをサポート 51 | - [ ] ... 52 | 53 | ## システム要件 📦 54 | 55 | - 推奨最低:CPU 4コア以上、メモリ8GB以上、GPUは必須ではありません 56 | - Windows 10またはMacOS 11.0以上 57 | 58 | ## フィードバックと提案 📢 59 | 60 | 👏 1. [issue](https://github.com/linyqh/NarratoAI/issues)または[pull request](https://github.com/linyqh/NarratoAI/pulls)を提出できます 61 | 62 | 💬 2. [オープンソースコミュニティ交流グループに参加](https://github.com/linyqh/NarratoAI/wiki) 63 | 64 | 📷 3. 公式アカウント【NarratoAI助手】をフォローして最新情報を入手 65 | 66 | ## 参考プロジェクト 📚 67 | - https://github.com/FujiwaraChoki/MoneyPrinter 68 | - https://github.com/harry0703/MoneyPrinterTurbo 69 | 70 | このプロジェクトは上記のプロジェクトを基にリファクタリングされ、映画解説機能が追加されました。オリジナルの作者に感謝します 🥳🥳🥳 71 | 72 | ## 作者にコーヒーを一杯おごる ☕️ 73 |
74 | Image 1 75 | Image 2 76 |
77 | 78 | ## ライセンス 📝 79 | 80 | [`LICENSE`](LICENSE) ファイルをクリックして表示 81 | 82 | ## Star History 83 | 84 | [![Star History Chart](https://api.star-history.com/svg?repos=linyqh/NarratoAI&type=Date)](https://star-history.com/#linyqh/NarratoAI&Date) 85 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |
3 |

NarratoAI 😎📽️

4 |

一站式 AI 影视解说+自动化剪辑工具🎬🎞️

5 | 6 | 7 |

📖 English | 简体中文 | 日本語

8 |
9 | 10 | [//]: # ( harry0703%2FNarratoAI | Trendshift) 11 |
12 |
13 | NarratoAI 是一个自动化影视解说工具,基于LLM实现文案撰写、自动化视频剪辑、配音和字幕生成的一站式流程,助力高效内容创作。 14 |
15 | 16 | [![madewithlove](https://img.shields.io/badge/made_with-%E2%9D%A4-red?style=for-the-badge&labelColor=orange)](https://github.com/linyqh/NarratoAI) 17 | [![GitHub license](https://img.shields.io/github/license/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/blob/main/LICENSE) 18 | [![GitHub issues](https://img.shields.io/github/issues/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/issues) 19 | [![GitHub stars](https://img.shields.io/github/stars/linyqh/NarratoAI?style=for-the-badge)](https://github.com/linyqh/NarratoAI/stargazers) 20 | 21 | 💬 加入 discord 开源社区,获取项目动态和最新资讯。 22 | 23 |

🎉🎉🎉 官方文档 🎉🎉🎉

24 |

首页

25 | 26 | ![](docs/index-zh.png) 27 | 28 |

视频审查界面

29 | 30 | ![](docs/check-zh.png) 31 | 32 |
33 | 34 | ## 最新资讯 35 | - 2025.05.11 发布新版本 0.6.0,支持 **短剧解说** 和 优化剪辑流程 36 | - 2025.03.06 发布新版本 0.5.2,支持 DeepSeek R1 和 DeepSeek V3 模型进行短剧混剪 37 | - 2024.12.16 发布新版本 0.3.9,支持阿里 Qwen2-VL 模型理解视频;支持短剧混剪 38 | - 2024.11.24 开通 discord 社群:https://discord.com/invite/V2pbAqqQNb 39 | - 2024.11.11 迁移开源社群,欢迎加入! [加入官方社群](https://github.com/linyqh/NarratoAI/wiki) 40 | - 2024.11.10 发布官方文档,详情参见 [官方文档](https://p9mf6rjv3c.feishu.cn/wiki/SP8swLLZki5WRWkhuFvc2CyInDg) 41 | - 2024.11.10 发布新版本 v0.3.5;优化视频剪辑流程, 42 | 43 | ## 重磅福利 🎉 44 | 即日起全面支持DeepSeek模型!注册即享2000万免费Token(价值14元平台配额),剪辑10分钟视频仅需0.1元! 45 | 46 | 🔥 快速领福利: 47 | 1️⃣ 点击链接注册:https://cloud.siliconflow.cn/i/pyOKqFCV 48 | 2️⃣ 使用手机号登录,**务必填写邀请码:pyOKqFCV** 49 | 3️⃣ 领取14元配额,极速体验高性价比AI剪辑 50 | 51 | 💡 小成本大创作: 52 | 硅基流动API Key一键接入,智能剪辑效率翻倍! 53 | (注:邀请码为福利领取唯一凭证,注册后自动到账) 54 | 55 | 立即行动,用「pyOKqFCV」解锁你的AI生产力! 56 | 57 | 😊 更新步骤: 58 | 整合包:点击 update.bat 一键更新脚本 59 | 代码构建:使用 git pull 拉去最新代码 60 | 61 | ## 公告 📢 62 | _**注意⚠️:近期在 x (推特) 上发现有人冒充作者在 pump.fun 平台上发行代币! 这是骗子!!! 不要被割了韭菜 63 | !!!目前 NarratoAI 没有在 x(推特) 上做任何官方宣传,注意甄别**_ 64 | 65 | 下面是此人 x(推特) 首页截图 66 | 67 | Screenshot_20250109_114131_Samsung Internet 68 | 69 | ## 未来计划 🥳 70 | - [x] windows 整合包发布 71 | - [x] 优化剧情生成流程,提升生成效果 72 | - [x] 发布 0.3.5 整合包 73 | - [x] 支持阿里 Qwen2-VL 大模型理解视频 74 | - [x] 支持短剧混剪 75 | - [x] 一键合并素材 76 | - [x] 一键转录 77 | - [x] 一键清理缓存 78 | - [ ] 支持导出剪映草稿 79 | - [X] 支持短剧解说 80 | - [ ] 主角人脸匹配 81 | - [ ] 支持根据口播,文案,视频素材自动匹配 82 | - [ ] 支持更多 TTS 引擎 83 | - [ ] ... 84 | 85 | ## 配置要求 📦 86 | 87 | - 建议最低 CPU 4核或以上,内存 8G 或以上,显卡非必须 88 | - Windows 10/11 或 MacOS 11.0 以上系统 89 | - [Python 3.12+](https://www.python.org/downloads/) 90 | 91 | ## 反馈建议 📢 92 | 93 | 👏 1. 可以提交 [issue](https://github.com/linyqh/NarratoAI/issues)或者 [pull request](https://github.com/linyqh/NarratoAI/pulls) 94 | 95 | 💬 2. [加入开源社区交流群](https://github.com/linyqh/NarratoAI/wiki) 96 | 97 | 📷 3. 关注公众号【NarratoAI助手】,掌握最新资讯 98 | 99 | ## 参考项目 📚 100 | - https://github.com/FujiwaraChoki/MoneyPrinter 101 | - https://github.com/harry0703/MoneyPrinterTurbo 102 | 103 | 该项目基于以上项目重构而来,增加了影视解说功能,感谢大佬的开源精神 🥳🥳🥳 104 | 105 | ## 请作者喝一杯咖啡 ☕️ 106 |
107 | Image 1 108 | Image 2 109 |
110 | 111 | ## 许可证 📝 112 | 113 | 点击查看 [`LICENSE`](LICENSE) 文件 114 | 115 | ## Star History 116 | 117 | [![Star History Chart](https://api.star-history.com/svg?repos=linyqh/NarratoAI&type=Date)](https://star-history.com/#linyqh/NarratoAI&Date) 118 | 119 | -------------------------------------------------------------------------------- /app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/app/__init__.py -------------------------------------------------------------------------------- /app/asgi.py: -------------------------------------------------------------------------------- 1 | """Application implementation - ASGI.""" 2 | 3 | import os 4 | 5 | from fastapi import FastAPI, Request 6 | from fastapi.exceptions import RequestValidationError 7 | from fastapi.responses import JSONResponse 8 | from loguru import logger 9 | from fastapi.staticfiles import StaticFiles 10 | from fastapi.middleware.cors import CORSMiddleware 11 | 12 | from app.config import config 13 | from app.models.exception import HttpException 14 | from app.router import root_api_router 15 | from app.utils import utils 16 | from app.utils import ffmpeg_utils 17 | 18 | 19 | def exception_handler(request: Request, e: HttpException): 20 | return JSONResponse( 21 | status_code=e.status_code, 22 | content=utils.get_response(e.status_code, e.data, e.message), 23 | ) 24 | 25 | 26 | def validation_exception_handler(request: Request, e: RequestValidationError): 27 | return JSONResponse( 28 | status_code=400, 29 | content=utils.get_response( 30 | status=400, data=e.errors(), message="field required" 31 | ), 32 | ) 33 | 34 | 35 | def get_application() -> FastAPI: 36 | """Initialize FastAPI application. 37 | 38 | Returns: 39 | FastAPI: Application object instance. 40 | 41 | """ 42 | instance = FastAPI( 43 | title=config.project_name, 44 | description=config.project_description, 45 | version=config.project_version, 46 | debug=False, 47 | ) 48 | instance.include_router(root_api_router) 49 | instance.add_exception_handler(HttpException, exception_handler) 50 | instance.add_exception_handler(RequestValidationError, validation_exception_handler) 51 | return instance 52 | 53 | 54 | app = get_application() 55 | 56 | # Configures the CORS middleware for the FastAPI app 57 | cors_allowed_origins_str = os.getenv("CORS_ALLOWED_ORIGINS", "") 58 | origins = cors_allowed_origins_str.split(",") if cors_allowed_origins_str else ["*"] 59 | app.add_middleware( 60 | CORSMiddleware, 61 | allow_origins=origins, 62 | allow_credentials=True, 63 | allow_methods=["*"], 64 | allow_headers=["*"], 65 | ) 66 | 67 | task_dir = utils.task_dir() 68 | app.mount( 69 | "/tasks", StaticFiles(directory=task_dir, html=True, follow_symlink=True), name="" 70 | ) 71 | 72 | public_dir = utils.public_dir() 73 | app.mount("/", StaticFiles(directory=public_dir, html=True), name="") 74 | 75 | 76 | @app.on_event("shutdown") 77 | def shutdown_event(): 78 | logger.info("shutdown event") 79 | 80 | 81 | @app.on_event("startup") 82 | def startup_event(): 83 | logger.info("startup event") 84 | 85 | # 检测FFmpeg硬件加速 86 | hwaccel_info = ffmpeg_utils.detect_hardware_acceleration() 87 | if hwaccel_info["available"]: 88 | logger.info(f"FFmpeg硬件加速检测结果: 可用 | 类型: {hwaccel_info['type']} | 编码器: {hwaccel_info['encoder']} | 独立显卡: {hwaccel_info['is_dedicated_gpu']} | 参数: {hwaccel_info['hwaccel_args']}") 89 | else: 90 | logger.warning(f"FFmpeg硬件加速不可用: {hwaccel_info['message']}, 将使用CPU软件编码") 91 | -------------------------------------------------------------------------------- /app/config/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from loguru import logger 5 | 6 | from app.config import config 7 | from app.utils import utils 8 | 9 | 10 | def __init_logger(): 11 | # _log_file = utils.storage_dir("logs/server.log") 12 | _lvl = config.log_level 13 | root_dir = os.path.dirname( 14 | os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 15 | ) 16 | 17 | def format_record(record): 18 | # 获取日志记录中的文件全路径 19 | file_path = record["file"].path 20 | # 将绝对路径转换为相对于项目根目录的路径 21 | relative_path = os.path.relpath(file_path, root_dir) 22 | # 更新记录中的文件路径 23 | record["file"].path = f"./{relative_path}" 24 | # 返回修改后的格式字符串 25 | # 您可以根据需要调整这里的格式 26 | _format = ( 27 | "{time:%Y-%m-%d %H:%M:%S} | " 28 | + "{level} | " 29 | + '"{file.path}:{line}": {function} ' 30 | + "- {message}" 31 | + "\n" 32 | ) 33 | return _format 34 | 35 | logger.remove() 36 | 37 | logger.add( 38 | sys.stdout, 39 | level=_lvl, 40 | format=format_record, 41 | colorize=True, 42 | ) 43 | 44 | # logger.add( 45 | # _log_file, 46 | # level=_lvl, 47 | # format=format_record, 48 | # rotation="00:00", 49 | # retention="3 days", 50 | # backtrace=True, 51 | # diagnose=True, 52 | # enqueue=True, 53 | # ) 54 | 55 | 56 | __init_logger() 57 | -------------------------------------------------------------------------------- /app/config/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import socket 3 | import toml 4 | import shutil 5 | from loguru import logger 6 | 7 | root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) 8 | config_file = f"{root_dir}/config.toml" 9 | version_file = f"{root_dir}/project_version" 10 | 11 | 12 | def get_version_from_file(): 13 | """从project_version文件中读取版本号""" 14 | try: 15 | if os.path.isfile(version_file): 16 | with open(version_file, "r", encoding="utf-8") as f: 17 | return f.read().strip() 18 | return "0.1.0" # 默认版本号 19 | except Exception as e: 20 | logger.error(f"读取版本号文件失败: {str(e)}") 21 | return "0.1.0" # 默认版本号 22 | 23 | 24 | def load_config(): 25 | # fix: IsADirectoryError: [Errno 21] Is a directory: '/NarratoAI/config.toml' 26 | if os.path.isdir(config_file): 27 | shutil.rmtree(config_file) 28 | 29 | if not os.path.isfile(config_file): 30 | example_file = f"{root_dir}/config.example.toml" 31 | if os.path.isfile(example_file): 32 | shutil.copyfile(example_file, config_file) 33 | logger.info(f"copy config.example.toml to config.toml") 34 | 35 | logger.info(f"load config from file: {config_file}") 36 | 37 | try: 38 | _config_ = toml.load(config_file) 39 | except Exception as e: 40 | logger.warning(f"load config failed: {str(e)}, try to load as utf-8-sig") 41 | with open(config_file, mode="r", encoding="utf-8-sig") as fp: 42 | _cfg_content = fp.read() 43 | _config_ = toml.loads(_cfg_content) 44 | return _config_ 45 | 46 | 47 | def save_config(): 48 | with open(config_file, "w", encoding="utf-8") as f: 49 | _cfg["app"] = app 50 | _cfg["azure"] = azure 51 | _cfg["ui"] = ui 52 | f.write(toml.dumps(_cfg)) 53 | 54 | 55 | _cfg = load_config() 56 | app = _cfg.get("app", {}) 57 | whisper = _cfg.get("whisper", {}) 58 | proxy = _cfg.get("proxy", {}) 59 | azure = _cfg.get("azure", {}) 60 | ui = _cfg.get("ui", {}) 61 | frames = _cfg.get("frames", {}) 62 | 63 | hostname = socket.gethostname() 64 | 65 | log_level = _cfg.get("log_level", "DEBUG") 66 | listen_host = _cfg.get("listen_host", "0.0.0.0") 67 | listen_port = _cfg.get("listen_port", 8080) 68 | project_name = _cfg.get("project_name", "NarratoAI") 69 | project_description = _cfg.get( 70 | "project_description", 71 | "https://github.com/linyqh/NarratoAI", 72 | ) 73 | # 从文件读取版本号,而不是从配置文件中获取 74 | project_version = get_version_from_file() 75 | reload_debug = False 76 | 77 | imagemagick_path = app.get("imagemagick_path", "") 78 | if imagemagick_path and os.path.isfile(imagemagick_path): 79 | os.environ["IMAGEMAGICK_BINARY"] = imagemagick_path 80 | 81 | ffmpeg_path = app.get("ffmpeg_path", "") 82 | if ffmpeg_path and os.path.isfile(ffmpeg_path): 83 | os.environ["IMAGEIO_FFMPEG_EXE"] = ffmpeg_path 84 | 85 | logger.info(f"{project_name} v{project_version}") 86 | -------------------------------------------------------------------------------- /app/controllers/base.py: -------------------------------------------------------------------------------- 1 | from uuid import uuid4 2 | 3 | from fastapi import Request 4 | 5 | from app.config import config 6 | from app.models.exception import HttpException 7 | 8 | 9 | def get_task_id(request: Request): 10 | task_id = request.headers.get("x-task-id") 11 | if not task_id: 12 | task_id = uuid4() 13 | return str(task_id) 14 | 15 | 16 | def get_api_key(request: Request): 17 | api_key = request.headers.get("x-api-key") 18 | return api_key 19 | 20 | 21 | def verify_token(request: Request): 22 | token = get_api_key(request) 23 | if token != config.app.get("api_key", ""): 24 | request_id = get_task_id(request) 25 | request_url = request.url 26 | user_agent = request.headers.get("user-agent") 27 | raise HttpException( 28 | task_id=request_id, 29 | status_code=401, 30 | message=f"invalid token: {request_url}, {user_agent}", 31 | ) 32 | -------------------------------------------------------------------------------- /app/controllers/manager/base_manager.py: -------------------------------------------------------------------------------- 1 | import threading 2 | from typing import Callable, Any, Dict 3 | 4 | 5 | class TaskManager: 6 | def __init__(self, max_concurrent_tasks: int): 7 | self.max_concurrent_tasks = max_concurrent_tasks 8 | self.current_tasks = 0 9 | self.lock = threading.Lock() 10 | self.queue = self.create_queue() 11 | 12 | def create_queue(self): 13 | raise NotImplementedError() 14 | 15 | def add_task(self, func: Callable, *args: Any, **kwargs: Any): 16 | with self.lock: 17 | if self.current_tasks < self.max_concurrent_tasks: 18 | print(f"add task: {func.__name__}, current_tasks: {self.current_tasks}") 19 | self.execute_task(func, *args, **kwargs) 20 | else: 21 | print( 22 | f"enqueue task: {func.__name__}, current_tasks: {self.current_tasks}" 23 | ) 24 | self.enqueue({"func": func, "args": args, "kwargs": kwargs}) 25 | 26 | def execute_task(self, func: Callable, *args: Any, **kwargs: Any): 27 | thread = threading.Thread( 28 | target=self.run_task, args=(func, *args), kwargs=kwargs 29 | ) 30 | thread.start() 31 | 32 | def run_task(self, func: Callable, *args: Any, **kwargs: Any): 33 | try: 34 | with self.lock: 35 | self.current_tasks += 1 36 | func(*args, **kwargs) # 在这里调用函数,传递*args和**kwargs 37 | finally: 38 | self.task_done() 39 | 40 | def check_queue(self): 41 | with self.lock: 42 | if ( 43 | self.current_tasks < self.max_concurrent_tasks 44 | and not self.is_queue_empty() 45 | ): 46 | task_info = self.dequeue() 47 | func = task_info["func"] 48 | args = task_info.get("args", ()) 49 | kwargs = task_info.get("kwargs", {}) 50 | self.execute_task(func, *args, **kwargs) 51 | 52 | def task_done(self): 53 | with self.lock: 54 | self.current_tasks -= 1 55 | self.check_queue() 56 | 57 | def enqueue(self, task: Dict): 58 | raise NotImplementedError() 59 | 60 | def dequeue(self): 61 | raise NotImplementedError() 62 | 63 | def is_queue_empty(self): 64 | raise NotImplementedError() 65 | -------------------------------------------------------------------------------- /app/controllers/manager/memory_manager.py: -------------------------------------------------------------------------------- 1 | from queue import Queue 2 | from typing import Dict 3 | 4 | from app.controllers.manager.base_manager import TaskManager 5 | 6 | 7 | class InMemoryTaskManager(TaskManager): 8 | def create_queue(self): 9 | return Queue() 10 | 11 | def enqueue(self, task: Dict): 12 | self.queue.put(task) 13 | 14 | def dequeue(self): 15 | return self.queue.get() 16 | 17 | def is_queue_empty(self): 18 | return self.queue.empty() 19 | -------------------------------------------------------------------------------- /app/controllers/manager/redis_manager.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Dict 3 | 4 | import redis 5 | 6 | from app.controllers.manager.base_manager import TaskManager 7 | from app.models.schema import VideoParams 8 | from app.services import task as tm 9 | 10 | FUNC_MAP = { 11 | "start": tm.start, 12 | # 'start_test': tm.start_test 13 | } 14 | 15 | 16 | class RedisTaskManager(TaskManager): 17 | def __init__(self, max_concurrent_tasks: int, redis_url: str): 18 | self.redis_client = redis.Redis.from_url(redis_url) 19 | super().__init__(max_concurrent_tasks) 20 | 21 | def create_queue(self): 22 | return "task_queue" 23 | 24 | def enqueue(self, task: Dict): 25 | task_with_serializable_params = task.copy() 26 | 27 | if "params" in task["kwargs"] and isinstance( 28 | task["kwargs"]["params"], VideoParams 29 | ): 30 | task_with_serializable_params["kwargs"]["params"] = task["kwargs"][ 31 | "params" 32 | ].dict() 33 | 34 | # 将函数对象转换为其名称 35 | task_with_serializable_params["func"] = task["func"].__name__ 36 | self.redis_client.rpush(self.queue, json.dumps(task_with_serializable_params)) 37 | 38 | def dequeue(self): 39 | task_json = self.redis_client.lpop(self.queue) 40 | if task_json: 41 | task_info = json.loads(task_json) 42 | # 将函数名称转换回函数对象 43 | task_info["func"] = FUNC_MAP[task_info["func"]] 44 | 45 | if "params" in task_info["kwargs"] and isinstance( 46 | task_info["kwargs"]["params"], dict 47 | ): 48 | task_info["kwargs"]["params"] = VideoParams( 49 | **task_info["kwargs"]["params"] 50 | ) 51 | 52 | return task_info 53 | return None 54 | 55 | def is_queue_empty(self): 56 | return self.redis_client.llen(self.queue) == 0 57 | -------------------------------------------------------------------------------- /app/controllers/ping.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter 2 | from fastapi import Request 3 | 4 | router = APIRouter() 5 | 6 | 7 | @router.get( 8 | "/ping", 9 | tags=["Health Check"], 10 | description="检查服务可用性", 11 | response_description="pong", 12 | ) 13 | def ping(request: Request) -> str: 14 | return "pong" 15 | -------------------------------------------------------------------------------- /app/controllers/v1/base.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Depends 2 | 3 | 4 | def new_router(dependencies=None): 5 | router = APIRouter() 6 | router.tags = ["V1"] 7 | router.prefix = "/api/v1" 8 | # 将认证依赖项应用于所有路由 9 | if dependencies: 10 | router.dependencies = dependencies 11 | return router 12 | -------------------------------------------------------------------------------- /app/controllers/v1/llm.py: -------------------------------------------------------------------------------- 1 | from fastapi import Request, File, UploadFile 2 | import os 3 | from app.controllers.v1.base import new_router 4 | from app.models.schema import ( 5 | VideoScriptResponse, 6 | VideoScriptRequest, 7 | VideoTermsResponse, 8 | VideoTermsRequest, 9 | VideoTranscriptionRequest, 10 | VideoTranscriptionResponse, 11 | ) 12 | from app.services import llm 13 | from app.utils import utils 14 | from app.config import config 15 | 16 | # 认证依赖项 17 | # router = new_router(dependencies=[Depends(base.verify_token)]) 18 | router = new_router() 19 | 20 | # 定义上传目录 21 | UPLOAD_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "uploads") 22 | 23 | @router.post( 24 | "/scripts", 25 | response_model=VideoScriptResponse, 26 | summary="Create a script for the video", 27 | ) 28 | def generate_video_script(request: Request, body: VideoScriptRequest): 29 | video_script = llm.generate_script( 30 | video_subject=body.video_subject, 31 | language=body.video_language, 32 | paragraph_number=body.paragraph_number, 33 | ) 34 | response = {"video_script": video_script} 35 | return utils.get_response(200, response) 36 | 37 | 38 | @router.post( 39 | "/terms", 40 | response_model=VideoTermsResponse, 41 | summary="Generate video terms based on the video script", 42 | ) 43 | def generate_video_terms(request: Request, body: VideoTermsRequest): 44 | video_terms = llm.generate_terms( 45 | video_subject=body.video_subject, 46 | video_script=body.video_script, 47 | amount=body.amount, 48 | ) 49 | response = {"video_terms": video_terms} 50 | return utils.get_response(200, response) 51 | 52 | 53 | @router.post( 54 | "/transcription", 55 | response_model=VideoTranscriptionResponse, 56 | summary="Transcribe video content using Gemini" 57 | ) 58 | async def transcribe_video( 59 | request: Request, 60 | video_name: str, 61 | language: str = "zh-CN", 62 | video_file: UploadFile = File(...) 63 | ): 64 | """ 65 | 使用 Gemini 转录视频内容,包括时间戳、画面描述和语音内容 66 | 67 | Args: 68 | video_name: 视频名称 69 | language: 语言代码,默认zh-CN 70 | video_file: 上传的视频文件 71 | """ 72 | # 创建临时目录用于存储上传的视频 73 | os.makedirs(UPLOAD_DIR, exist_ok=True) 74 | 75 | # 保存上传的视频文件 76 | video_path = os.path.join(UPLOAD_DIR, video_file.filename) 77 | with open(video_path, "wb") as buffer: 78 | content = await video_file.read() 79 | buffer.write(content) 80 | 81 | try: 82 | transcription = llm.gemini_video_transcription( 83 | video_name=video_name, 84 | video_path=video_path, 85 | language=language, 86 | llm_provider_video=config.app.get("video_llm_provider", "gemini") 87 | ) 88 | response = {"transcription": transcription} 89 | return utils.get_response(200, response) 90 | finally: 91 | # 处理完成后删除临时文件 92 | if os.path.exists(video_path): 93 | os.remove(video_path) 94 | -------------------------------------------------------------------------------- /app/controllers/v2/base.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Depends 2 | 3 | 4 | def v2_router(dependencies=None): 5 | router = APIRouter() 6 | router.tags = ["V2"] 7 | router.prefix = "/api/v2" 8 | # 将认证依赖项应用于所有路由 9 | if dependencies: 10 | router.dependencies = dependencies 11 | return router 12 | -------------------------------------------------------------------------------- /app/controllers/v2/script.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, BackgroundTasks 2 | from loguru import logger 3 | import os 4 | 5 | from app.models.schema_v2 import ( 6 | GenerateScriptRequest, 7 | GenerateScriptResponse, 8 | CropVideoRequest, 9 | CropVideoResponse, 10 | DownloadVideoRequest, 11 | DownloadVideoResponse, 12 | StartSubclipRequest, 13 | StartSubclipResponse 14 | ) 15 | from app.models.schema import VideoClipParams 16 | from app.services.script_service import ScriptGenerator 17 | from app.services.video_service import VideoService 18 | from app.utils import utils 19 | from app.controllers.v2.base import v2_router 20 | from app.models.schema import VideoClipParams 21 | from app.services.youtube_service import YoutubeService 22 | from app.services import task as task_service 23 | 24 | router = v2_router() 25 | 26 | 27 | @router.post( 28 | "/scripts/generate", 29 | response_model=GenerateScriptResponse, 30 | summary="同步请求;生成视频脚本 (V2)" 31 | ) 32 | async def generate_script( 33 | request: GenerateScriptRequest, 34 | background_tasks: BackgroundTasks 35 | ): 36 | """ 37 | 生成视频脚本的V2版本API 38 | """ 39 | task_id = utils.get_uuid() 40 | 41 | try: 42 | generator = ScriptGenerator() 43 | script = await generator.generate_script( 44 | video_path=request.video_path, 45 | video_theme=request.video_theme, 46 | custom_prompt=request.custom_prompt, 47 | skip_seconds=request.skip_seconds, 48 | threshold=request.threshold, 49 | vision_batch_size=request.vision_batch_size, 50 | vision_llm_provider=request.vision_llm_provider 51 | ) 52 | 53 | return { 54 | "task_id": task_id, 55 | "script": script 56 | } 57 | 58 | except Exception as e: 59 | logger.exception(f"Generate script failed: {str(e)}") 60 | raise 61 | 62 | 63 | @router.post( 64 | "/scripts/crop", 65 | response_model=CropVideoResponse, 66 | summary="同步请求;裁剪视频 (V2)" 67 | ) 68 | async def crop_video( 69 | request: CropVideoRequest, 70 | background_tasks: BackgroundTasks 71 | ): 72 | """ 73 | 根据脚本裁剪视频的V2版本API 74 | """ 75 | try: 76 | # 调用视频裁剪服务 77 | video_service = VideoService() 78 | task_id, subclip_videos = await video_service.crop_video( 79 | video_path=request.video_origin_path, 80 | video_script=request.video_script 81 | ) 82 | logger.debug(f"裁剪视频成功,视频片段路径: {subclip_videos}") 83 | logger.debug(type(subclip_videos)) 84 | return { 85 | "task_id": task_id, 86 | "subclip_videos": subclip_videos 87 | } 88 | 89 | except Exception as e: 90 | logger.exception(f"Crop video failed: {str(e)}") 91 | raise 92 | 93 | 94 | @router.post( 95 | "/youtube/download", 96 | response_model=DownloadVideoResponse, 97 | summary="同步请求;下载YouTube视频 (V2)" 98 | ) 99 | async def download_youtube_video( 100 | request: DownloadVideoRequest, 101 | background_tasks: BackgroundTasks 102 | ): 103 | """ 104 | 下载指定分辨率的YouTube视频 105 | """ 106 | try: 107 | youtube_service = YoutubeService() 108 | task_id, output_path, filename = await youtube_service.download_video( 109 | url=request.url, 110 | resolution=request.resolution, 111 | output_format=request.output_format, 112 | rename=request.rename 113 | ) 114 | 115 | return { 116 | "task_id": task_id, 117 | "output_path": output_path, 118 | "resolution": request.resolution, 119 | "format": request.output_format, 120 | "filename": filename 121 | } 122 | 123 | except Exception as e: 124 | logger.exception(f"Download YouTube video failed: {str(e)}") 125 | raise 126 | 127 | 128 | @router.post( 129 | "/scripts/start-subclip", 130 | response_model=StartSubclipResponse, 131 | summary="异步请求;开始视频剪辑任务 (V2)" 132 | ) 133 | async def start_subclip( 134 | request: VideoClipParams, 135 | task_id: str, 136 | subclip_videos: dict, 137 | background_tasks: BackgroundTasks 138 | ): 139 | """ 140 | 开始视频剪辑任务的V2版本API 141 | """ 142 | try: 143 | # 构建参数对象 144 | params = VideoClipParams( 145 | video_origin_path=request.video_origin_path, 146 | video_clip_json_path=request.video_clip_json_path, 147 | voice_name=request.voice_name, 148 | voice_rate=request.voice_rate, 149 | voice_pitch=request.voice_pitch, 150 | subtitle_enabled=request.subtitle_enabled, 151 | video_aspect=request.video_aspect, 152 | n_threads=request.n_threads 153 | ) 154 | 155 | # 在后台任务中执行视频剪辑 156 | background_tasks.add_task( 157 | task_service.start_subclip, 158 | task_id=task_id, 159 | params=params, 160 | subclip_path_videos=subclip_videos 161 | ) 162 | 163 | return { 164 | "task_id": task_id, 165 | "state": "PROCESSING" # 初始状态 166 | } 167 | 168 | except Exception as e: 169 | logger.exception(f"Start subclip task failed: {str(e)}") 170 | raise 171 | -------------------------------------------------------------------------------- /app/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/app/models/__init__.py -------------------------------------------------------------------------------- /app/models/const.py: -------------------------------------------------------------------------------- 1 | PUNCTUATIONS = [ 2 | "?", 3 | ",", 4 | ".", 5 | "、", 6 | ";", 7 | ":", 8 | "!", 9 | "…", 10 | "?", 11 | ",", 12 | "。", 13 | "、", 14 | ";", 15 | ":", 16 | "!", 17 | "...", 18 | ] 19 | 20 | TASK_STATE_FAILED = -1 21 | TASK_STATE_COMPLETE = 1 22 | TASK_STATE_PROCESSING = 4 23 | 24 | FILE_TYPE_VIDEOS = ["mp4", "mov", "mkv", "webm"] 25 | FILE_TYPE_IMAGES = ["jpg", "jpeg", "png", "bmp"] 26 | -------------------------------------------------------------------------------- /app/models/exception.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | from typing import Any 3 | 4 | from loguru import logger 5 | 6 | 7 | class HttpException(Exception): 8 | def __init__( 9 | self, task_id: str, status_code: int, message: str = "", data: Any = None 10 | ): 11 | self.message = message 12 | self.status_code = status_code 13 | self.data = data 14 | # 获取异常堆栈信息 15 | tb_str = traceback.format_exc().strip() 16 | if not tb_str or tb_str == "NoneType: None": 17 | msg = f"HttpException: {status_code}, {task_id}, {message}" 18 | else: 19 | msg = f"HttpException: {status_code}, {task_id}, {message}\n{tb_str}" 20 | 21 | if status_code == 400: 22 | logger.warning(msg) 23 | else: 24 | logger.error(msg) 25 | 26 | 27 | class FileNotFoundException(Exception): 28 | pass 29 | -------------------------------------------------------------------------------- /app/models/schema_v2.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List 2 | from pydantic import BaseModel 3 | 4 | 5 | class GenerateScriptRequest(BaseModel): 6 | video_path: str 7 | video_theme: Optional[str] = "" 8 | custom_prompt: Optional[str] = "" 9 | frame_interval_input: Optional[int] = 5 10 | skip_seconds: Optional[int] = 0 11 | threshold: Optional[int] = 30 12 | vision_batch_size: Optional[int] = 5 13 | vision_llm_provider: Optional[str] = "gemini" 14 | 15 | 16 | class GenerateScriptResponse(BaseModel): 17 | task_id: str 18 | script: List[dict] 19 | 20 | 21 | class CropVideoRequest(BaseModel): 22 | video_origin_path: str 23 | video_script: List[dict] 24 | 25 | 26 | class CropVideoResponse(BaseModel): 27 | task_id: str 28 | subclip_videos: dict 29 | 30 | 31 | class DownloadVideoRequest(BaseModel): 32 | url: str 33 | resolution: str 34 | output_format: Optional[str] = "mp4" 35 | rename: Optional[str] = None 36 | 37 | 38 | class DownloadVideoResponse(BaseModel): 39 | task_id: str 40 | output_path: str 41 | resolution: str 42 | format: str 43 | filename: str 44 | 45 | 46 | class StartSubclipRequest(BaseModel): 47 | task_id: str 48 | video_origin_path: str 49 | video_clip_json_path: str 50 | voice_name: Optional[str] = None 51 | voice_rate: Optional[int] = 0 52 | voice_pitch: Optional[int] = 0 53 | subtitle_enabled: Optional[bool] = True 54 | video_aspect: Optional[str] = "16:9" 55 | n_threads: Optional[int] = 4 56 | subclip_videos: list # 从裁剪视频接口获取的视频片段字典 57 | 58 | 59 | class StartSubclipResponse(BaseModel): 60 | task_id: str 61 | state: str 62 | videos: Optional[List[str]] = None 63 | combined_videos: Optional[List[str]] = None 64 | -------------------------------------------------------------------------------- /app/router.py: -------------------------------------------------------------------------------- 1 | """Application configuration - root APIRouter. 2 | 3 | Defines all FastAPI application endpoints. 4 | 5 | Resources: 6 | 1. https://fastapi.tiangolo.com/tutorial/bigger-applications 7 | 8 | """ 9 | 10 | from fastapi import APIRouter 11 | 12 | from app.controllers.v1 import llm, video 13 | from app.controllers.v2 import script 14 | 15 | root_api_router = APIRouter() 16 | # v1 17 | root_api_router.include_router(video.router) 18 | root_api_router.include_router(llm.router) 19 | 20 | # v2 21 | root_api_router.include_router(script.router) 22 | -------------------------------------------------------------------------------- /app/services/SDE/prompt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: UTF-8 -*- 3 | 4 | ''' 5 | @Project: NarratoAI 6 | @File : prompt 7 | @Author : 小林同学 8 | @Date : 2025/5/9 上午12:57 9 | ''' 10 | # 字幕剧情分析提示词 11 | subtitle_plot_analysis_v1 = """ 12 | # 角色 13 | 你是一位专业的剧本分析师和剧情概括助手。 14 | 15 | # 任务 16 | 我将为你提供一部短剧的完整字幕文本。请你基于这些字幕,完成以下任务: 17 | 1. **整体剧情分析**:简要概括整个短剧的核心剧情脉络、主要冲突和结局(如果有的话)。 18 | 2. **分段剧情解析与时间戳定位**: 19 | * 将整个短剧划分为若干个关键的剧情段落(例如:开端、发展、转折、高潮、结局,或根据具体情节自然划分)。 20 | * 段落数应该与字幕长度成正比。 21 | * 对于每一个剧情段落: 22 | * **概括该段落的主要内容**:用简洁的语言描述这段剧情发生了什么。 23 | * **标注对应的时间戳范围**:明确指出该剧情段落对应的开始字幕时间戳和结束字幕时间戳。请直接从字幕中提取时间信息。 24 | 25 | # 输入格式 26 | 字幕内容通常包含时间戳和对话,例如: 27 | ``` 28 | 00:00:05,000 --> 00:00:10,000 29 | [角色A]: 你好吗? 30 | 00:00:10,500 --> 00:00:15,000 31 | [角色B]: 我很好,谢谢。发生了一些有趣的事情。 32 | ... (更多字幕内容) ... 33 | ``` 34 | 我将把实际字幕粘贴在下方。 35 | 36 | # 输出格式要求 37 | 请按照以下格式清晰地呈现分析结果: 38 | 39 | **一、整体剧情概括:** 40 | [此处填写对整个短剧剧情的概括] 41 | 42 | **二、分段剧情解析:** 43 | 44 | **剧情段落 1:[段落主题/概括,例如:主角登场与背景介绍]** 45 | * **时间戳:** [开始时间戳] --> [结束时间戳] 46 | * **内容概要:** [对这段剧情的详细描述] 47 | 48 | **剧情段落 2:[段落主题/概括,例如:第一个冲突出现]** 49 | * **时间戳:** [开始时间戳] --> [结束时间戳] 50 | * **内容概要:** [对这段剧情的详细描述] 51 | 52 | ... (根据实际剧情段落数量继续) ... 53 | 54 | **剧情段落 N:[段落主题/概括,例如:结局与反思]** 55 | * **时间戳:** [开始时间戳] --> [结束时间戳] 56 | * **内容概要:** [对这段剧情的详细描述] 57 | 58 | # 注意事项 59 | * 请确保时间戳的准确性,直接引用字幕中的时间。 60 | * 剧情段落的划分应合乎逻辑,能够反映剧情的起承转合。 61 | * 语言表达应简洁、准确、客观。 62 | 63 | # 限制 64 | 1. 严禁输出与分析结果无关的内容 65 | 2. 66 | 67 | # 请处理以下字幕: 68 | """ 69 | 70 | plot_writing = """ 71 | 我是一个影视解说up主,需要为我的粉丝讲解短剧《%s》的剧情,目前正在解说剧情,希望能让粉丝通过我的解说了解剧情,并且产生 继续观看的兴趣,请生成一篇解说脚本,包含解说文案,以及穿插原声的片段,下面中的内容是短剧的剧情概述: 72 | 73 | 74 | %s 75 | 76 | 77 | 请使用 json 格式进行输出;使用 中的输出格式: 78 | 79 | { 80 | "items": [ 81 | { 82 | "_id": 1, # 唯一递增id 83 | "timestamp": "00:00:05,390-00:00:10,430", 84 | "picture": "剧情描述或者备注", 85 | "narration": "解说文案,如果片段为穿插的原片片段,可以直接使用 ‘播放原片+_id‘ 进行占位", 86 | "OST": "值为 0 表示当前片段为解说片段,值为 1 表示当前片段为穿插的原片" 87 | } 88 | } 89 | 90 | 91 | 92 | 1. 只输出 json 内容,不要输出其他任何说明性的文字 93 | 2. 解说文案的语言使用 简体中文 94 | 3. 严禁虚构剧情,所有画面只能从 中摘取 95 | 4. 严禁虚构时间戳,所有时间戳范围只能从 中摘取 96 | 97 | """ -------------------------------------------------------------------------------- /app/services/SDP/generate_script_short.py: -------------------------------------------------------------------------------- 1 | """ 2 | 视频脚本生成pipeline,串联各个处理步骤 3 | """ 4 | import os 5 | from .utils.step1_subtitle_analyzer_openai import analyze_subtitle 6 | from .utils.step5_merge_script import merge_script 7 | 8 | 9 | def generate_script(srt_path: str, api_key: str, model_name: str, output_path: str, base_url: str = None, custom_clips: int = 5): 10 | """生成视频混剪脚本 11 | 12 | Args: 13 | srt_path: 字幕文件路径 14 | output_path: 输出文件路径,可选 15 | 16 | Returns: 17 | str: 生成的脚本内容 18 | """ 19 | # 验证输入文件 20 | if not os.path.exists(srt_path): 21 | raise FileNotFoundError(f"字幕文件不存在: {srt_path}") 22 | 23 | # 分析字幕 24 | print("开始分析...") 25 | openai_analysis = analyze_subtitle( 26 | srt_path=srt_path, 27 | api_key=api_key, 28 | model_name=model_name, 29 | base_url=base_url, 30 | custom_clips=custom_clips 31 | ) 32 | 33 | # 合并生成最终脚本 34 | adjusted_results = openai_analysis['plot_points'] 35 | final_script = merge_script(adjusted_results, output_path) 36 | 37 | return final_script 38 | -------------------------------------------------------------------------------- /app/services/SDP/utils/short_schema.py: -------------------------------------------------------------------------------- 1 | """ 2 | 定义项目中使用的数据类型 3 | """ 4 | from typing import List, Dict, Optional 5 | from dataclasses import dataclass 6 | 7 | 8 | @dataclass 9 | class PlotPoint: 10 | timestamp: str 11 | title: str 12 | picture: str 13 | 14 | 15 | @dataclass 16 | class Commentary: 17 | timestamp: str 18 | title: str 19 | copywriter: str 20 | 21 | 22 | @dataclass 23 | class SubtitleSegment: 24 | start_time: float 25 | end_time: float 26 | text: str 27 | 28 | 29 | @dataclass 30 | class ScriptItem: 31 | timestamp: str 32 | title: str 33 | picture: str 34 | copywriter: str 35 | 36 | 37 | @dataclass 38 | class PipelineResult: 39 | output_video_path: str 40 | plot_points: List[PlotPoint] 41 | subtitle_segments: List[SubtitleSegment] 42 | commentaries: List[Commentary] 43 | final_script: List[ScriptItem] 44 | error: Optional[str] = None 45 | 46 | 47 | class VideoProcessingError(Exception): 48 | pass 49 | 50 | 51 | class SubtitleProcessingError(Exception): 52 | pass 53 | 54 | 55 | class PlotAnalysisError(Exception): 56 | pass 57 | 58 | 59 | class CopywritingError(Exception): 60 | pass 61 | -------------------------------------------------------------------------------- /app/services/SDP/utils/step1_subtitle_analyzer_openai.py: -------------------------------------------------------------------------------- 1 | """ 2 | 使用OpenAI API,分析字幕文件,返回剧情梗概和爆点 3 | """ 4 | import traceback 5 | from openai import OpenAI, BadRequestError 6 | import os 7 | import json 8 | 9 | from .utils import load_srt 10 | 11 | 12 | def analyze_subtitle( 13 | srt_path: str, 14 | model_name: str, 15 | api_key: str = None, 16 | base_url: str = None, 17 | custom_clips: int = 5 18 | ) -> dict: 19 | """分析字幕内容,返回完整的分析结果 20 | 21 | Args: 22 | srt_path (str): SRT字幕文件路径 23 | api_key (str, optional): 大模型API密钥. Defaults to None. 24 | model_name (str, optional): 大模型名称. Defaults to "gpt-4o-2024-11-20". 25 | base_url (str, optional): 大模型API基础URL. Defaults to None. 26 | 27 | Returns: 28 | dict: 包含剧情梗概和结构化的时间段分析的字典 29 | """ 30 | try: 31 | # 加载字幕文件 32 | subtitles = load_srt(srt_path) 33 | subtitle_content = "\n".join([f"{sub['timestamp']}\n{sub['text']}" for sub in subtitles]) 34 | 35 | # 初始化客户端 36 | global client 37 | if "deepseek" in model_name.lower(): 38 | client = OpenAI( 39 | api_key=api_key or os.getenv('DeepSeek_API_KEY'), 40 | base_url="https://api.siliconflow.cn/v1" # 使用第三方 硅基流动 API 41 | ) 42 | else: 43 | client = OpenAI( 44 | api_key=api_key or os.getenv('OPENAI_API_KEY'), 45 | base_url=base_url 46 | ) 47 | 48 | messages = [ 49 | { 50 | "role": "system", 51 | "content": """你是一名经验丰富的短剧编剧,擅长根据字幕内容按照先后顺序分析关键剧情,并找出 %s 个关键片段。 52 | 请返回一个JSON对象,包含以下字段: 53 | { 54 | "summary": "整体剧情梗概", 55 | "plot_titles": [ 56 | "关键剧情1", 57 | "关键剧情2", 58 | "关键剧情3", 59 | "关键剧情4", 60 | "关键剧情5", 61 | "..." 62 | ] 63 | } 64 | 请确保返回的是合法的JSON格式, 请确保返回的是 %s 个片段。 65 | """ % (custom_clips, custom_clips) 66 | }, 67 | { 68 | "role": "user", 69 | "content": f"srt字幕如下:{subtitle_content}" 70 | } 71 | ] 72 | # DeepSeek R1 和 V3 不支持 response_format=json_object 73 | try: 74 | completion = client.chat.completions.create( 75 | model=model_name, 76 | messages=messages, 77 | response_format={"type": "json_object"} 78 | ) 79 | summary_data = json.loads(completion.choices[0].message.content) 80 | except BadRequestError as e: 81 | completion = client.chat.completions.create( 82 | model=model_name, 83 | messages=messages 84 | ) 85 | # 去除 completion 字符串前的 ```json 和 结尾的 ``` 86 | completion = completion.choices[0].message.content.replace("```json", "").replace("```", "") 87 | summary_data = json.loads(completion) 88 | except Exception as e: 89 | raise Exception(f"大模型解析发生错误:{str(e)}\n{traceback.format_exc()}") 90 | 91 | print(json.dumps(summary_data, indent=4, ensure_ascii=False)) 92 | 93 | # 获取爆点时间段分析 94 | prompt = f"""剧情梗概: 95 | {summary_data['summary']} 96 | 97 | 需要定位的爆点内容: 98 | """ 99 | print(f"找到 {len(summary_data['plot_titles'])} 个片段") 100 | for i, point in enumerate(summary_data['plot_titles'], 1): 101 | prompt += f"{i}. {point}\n" 102 | 103 | messages = [ 104 | { 105 | "role": "system", 106 | "content": """你是一名短剧编剧,非常擅长根据字幕中分析视频中关键剧情出现的具体时间段。 107 | 请仔细阅读剧情梗概和爆点内容,然后在字幕中找出每个爆点发生的具体时间段和爆点前后的详细剧情。 108 | 109 | 请返回一个JSON对象,包含一个名为"plot_points"的数组,数组中包含多个对象,每个对象都要包含以下字段: 110 | { 111 | "plot_points": [ 112 | { 113 | "timestamp": "时间段,格式为xx:xx:xx,xxx-xx:xx:xx,xxx", 114 | "title": "关键剧情的主题", 115 | "picture": "关键剧情前后的详细剧情描述" 116 | } 117 | ] 118 | } 119 | 请确保返回的是合法的JSON格式。""" 120 | }, 121 | { 122 | "role": "user", 123 | "content": f"""字幕内容: 124 | {subtitle_content} 125 | 126 | {prompt}""" 127 | } 128 | ] 129 | # DeepSeek R1 和 V3 不支持 response_format=json_object 130 | try: 131 | completion = client.chat.completions.create( 132 | model=model_name, 133 | messages=messages, 134 | response_format={"type": "json_object"} 135 | ) 136 | plot_points_data = json.loads(completion.choices[0].message.content) 137 | except BadRequestError as e: 138 | completion = client.chat.completions.create( 139 | model=model_name, 140 | messages=messages 141 | ) 142 | # 去除 completion 字符串前的 ```json 和 结尾的 ``` 143 | completion = completion.choices[0].message.content.replace("```json", "").replace("```", "") 144 | plot_points_data = json.loads(completion) 145 | except Exception as e: 146 | raise Exception(f"大模型解析错误:{str(e)}\n{traceback.format_exc()}") 147 | 148 | print(json.dumps(plot_points_data, indent=4, ensure_ascii=False)) 149 | 150 | # 合并结果 151 | return { 152 | "plot_summary": summary_data, 153 | "plot_points": plot_points_data["plot_points"] 154 | } 155 | 156 | except Exception as e: 157 | raise Exception(f"分析字幕时发生错误:{str(e)}\n{traceback.format_exc()}") 158 | -------------------------------------------------------------------------------- /app/services/SDP/utils/step5_merge_script.py: -------------------------------------------------------------------------------- 1 | """ 2 | 合并生成最终脚本 3 | """ 4 | import os 5 | import json 6 | from typing import List, Dict, Tuple 7 | 8 | 9 | def merge_script( 10 | plot_points: List[Dict], 11 | output_path: str 12 | ): 13 | """合并生成最终脚本 14 | 15 | Args: 16 | plot_points: 校对后的剧情点 17 | output_path: 输出文件路径,如果提供则保存到文件 18 | 19 | Returns: 20 | str: 最终合并的脚本 21 | """ 22 | def parse_timestamp(ts: str) -> Tuple[float, float]: 23 | """解析时间戳,返回开始和结束时间(秒)""" 24 | start, end = ts.split('-') 25 | 26 | def parse_time(time_str: str) -> float: 27 | time_str = time_str.strip() 28 | if ',' in time_str: 29 | time_parts, ms_parts = time_str.split(',') 30 | ms = float(ms_parts) / 1000 31 | else: 32 | time_parts = time_str 33 | ms = 0 34 | 35 | hours, minutes, seconds = map(int, time_parts.split(':')) 36 | return hours * 3600 + minutes * 60 + seconds + ms 37 | 38 | return parse_time(start), parse_time(end) 39 | 40 | def format_timestamp(seconds: float) -> str: 41 | """将秒数转换为时间戳格式 HH:MM:SS""" 42 | hours = int(seconds // 3600) 43 | minutes = int((seconds % 3600) // 60) 44 | secs = int(seconds % 60) 45 | return f"{hours:02d}:{minutes:02d}:{secs:02d}" 46 | 47 | # 创建包含所有信息的临时列表 48 | final_script = [] 49 | 50 | # 处理原生画面条目 51 | number = 1 52 | for plot_point in plot_points: 53 | start, end = parse_timestamp(plot_point["timestamp"]) 54 | script_item = { 55 | "_id": number, 56 | "timestamp": plot_point["timestamp"], 57 | "picture": plot_point["picture"], 58 | "narration": f"播放原生_{os.urandom(4).hex()}", 59 | "OST": 1, # OST=0 仅保留解说 OST=2 保留解说和原声 60 | } 61 | final_script.append(script_item) 62 | number += 1 63 | 64 | # 保存结果 65 | with open(output_path, 'w', encoding='utf-8') as f: 66 | json.dump(final_script, f, ensure_ascii=False, indent=4) 67 | 68 | print(f"脚本生成完成:{output_path}") 69 | return final_script 70 | -------------------------------------------------------------------------------- /app/services/SDP/utils/utils.py: -------------------------------------------------------------------------------- 1 | # 公共方法 2 | import json 3 | import requests # 新增 4 | from typing import List, Dict 5 | 6 | 7 | def load_srt(file_path: str) -> List[Dict]: 8 | """加载并解析SRT文件 9 | 10 | Args: 11 | file_path: SRT文件路径 12 | 13 | Returns: 14 | 字幕内容列表 15 | """ 16 | with open(file_path, 'r', encoding='utf-8-sig') as f: 17 | content = f.read().strip() 18 | 19 | # 按空行分割字幕块 20 | subtitle_blocks = content.split('\n\n') 21 | subtitles = [] 22 | 23 | for block in subtitle_blocks: 24 | lines = block.split('\n') 25 | if len(lines) >= 3: # 确保块包含足够的行 26 | try: 27 | number = int(lines[0].strip()) 28 | timestamp = lines[1] 29 | text = ' '.join(lines[2:]) 30 | 31 | # 解析时间戳 32 | start_time, end_time = timestamp.split(' --> ') 33 | 34 | subtitles.append({ 35 | 'number': number, 36 | 'timestamp': timestamp, 37 | 'text': text, 38 | 'start_time': start_time, 39 | 'end_time': end_time 40 | }) 41 | except ValueError as e: 42 | print(f"Warning: 跳过无效的字幕块: {e}") 43 | continue 44 | 45 | return subtitles 46 | -------------------------------------------------------------------------------- /app/services/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/app/services/__init__.py -------------------------------------------------------------------------------- /app/services/audio_merger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import subprocess 4 | import edge_tts 5 | from edge_tts import submaker 6 | from pydub import AudioSegment 7 | from typing import List, Dict 8 | from loguru import logger 9 | from app.utils import utils 10 | 11 | 12 | def check_ffmpeg(): 13 | """检查FFmpeg是否已安装""" 14 | try: 15 | subprocess.run(['ffmpeg', '-version'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) 16 | return True 17 | except FileNotFoundError: 18 | return False 19 | 20 | 21 | def merge_audio_files(task_id: str, total_duration: float, list_script: list): 22 | """ 23 | 合并音频文件 24 | 25 | Args: 26 | task_id: 任务ID 27 | total_duration: 总时长 28 | list_script: 完整脚本信息,包含duration时长和audio路径 29 | 30 | Returns: 31 | str: 合并后的音频文件路径 32 | """ 33 | # 检查FFmpeg是否安装 34 | if not check_ffmpeg(): 35 | logger.error("FFmpeg未安装,无法合并音频文件") 36 | return None 37 | 38 | # 创建一个空的音频片段 39 | final_audio = AudioSegment.silent(duration=total_duration * 1000) # 总时长以毫秒为单位 40 | 41 | # 计算每个片段的开始位置(基于duration字段) 42 | current_position = 0 # 初始位置(秒) 43 | 44 | # 遍历脚本中的每个片段 45 | for segment in list_script: 46 | try: 47 | # 获取片段时长(秒) 48 | duration = segment['duration'] 49 | 50 | # 检查audio字段是否为空 51 | if segment['audio'] and os.path.exists(segment['audio']): 52 | # 加载TTS音频文件 53 | tts_audio = AudioSegment.from_file(segment['audio']) 54 | 55 | # 将TTS音频添加到最终音频 56 | final_audio = final_audio.overlay(tts_audio, position=current_position * 1000) 57 | else: 58 | # audio为空,不添加音频,仅保留间隔 59 | logger.info(f"片段 {segment.get('timestamp', '')} 没有音频文件,保留 {duration} 秒的间隔") 60 | 61 | # 更新下一个片段的开始位置 62 | current_position += duration 63 | 64 | except Exception as e: 65 | logger.error(f"处理音频片段时出错: {str(e)}") 66 | # 即使处理失败,也要更新位置,确保后续片段位置正确 67 | if 'duration' in segment: 68 | current_position += segment['duration'] 69 | continue 70 | 71 | # 保存合并后的音频文件 72 | output_audio_path = os.path.join(utils.task_dir(task_id), "merger_audio.mp3") 73 | final_audio.export(output_audio_path, format="mp3") 74 | logger.info(f"合并后的音频文件已保存: {output_audio_path}") 75 | 76 | return output_audio_path 77 | 78 | 79 | def time_to_seconds(time_str): 80 | """ 81 | 将时间字符串转换为秒数,支持多种格式: 82 | 1. 'HH:MM:SS,mmm' (时:分:秒,毫秒) 83 | 2. 'MM:SS,mmm' (分:秒,毫秒) 84 | 3. 'SS,mmm' (秒,毫秒) 85 | """ 86 | try: 87 | # 处理毫秒部分 88 | if ',' in time_str: 89 | time_part, ms_part = time_str.split(',') 90 | ms = float(ms_part) / 1000 91 | else: 92 | time_part = time_str 93 | ms = 0 94 | 95 | # 分割时间部分 96 | parts = time_part.split(':') 97 | 98 | if len(parts) == 3: # HH:MM:SS 99 | h, m, s = map(int, parts) 100 | seconds = h * 3600 + m * 60 + s 101 | elif len(parts) == 2: # MM:SS 102 | m, s = map(int, parts) 103 | seconds = m * 60 + s 104 | else: # SS 105 | seconds = int(parts[0]) 106 | 107 | return seconds + ms 108 | except (ValueError, IndexError) as e: 109 | logger.error(f"Error parsing time {time_str}: {str(e)}") 110 | return 0.0 111 | 112 | 113 | def extract_timestamp(filename): 114 | """ 115 | 从文件名中提取开始和结束时间戳 116 | 例如: "audio_00_06,500-00_24,800.mp3" -> (6.5, 24.8) 117 | """ 118 | try: 119 | # 从文件名中提取时间部分 120 | time_part = filename.split('_', 1)[1].split('.')[0] # 获取 "00_06,500-00_24,800" 部分 121 | start_time, end_time = time_part.split('-') # 分割成开始和结束时间 122 | 123 | # 将下划线格式转换回冒号格式 124 | start_time = start_time.replace('_', ':') 125 | end_time = end_time.replace('_', ':') 126 | 127 | # 将时间戳转换为秒 128 | start_seconds = time_to_seconds(start_time) 129 | end_seconds = time_to_seconds(end_time) 130 | 131 | return start_seconds, end_seconds 132 | except Exception as e: 133 | logger.error(f"Error extracting timestamp from {filename}: {str(e)}") 134 | return 0.0, 0.0 135 | 136 | 137 | if __name__ == "__main__": 138 | # 示例用法 139 | total_duration = 90 140 | 141 | video_script = [ 142 | {'picture': '【解说】好的,各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!', 143 | 'timestamp': '00:00:00-00:00:26', 144 | 'narration': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!', 145 | 'OST': 0, 'duration': 26, 146 | 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3'}, 147 | {'picture': '【解说】上一集我们看到,范闲在北齐遭遇了惊天变故,生死不明!', 'timestamp': '00:01:15-00:01:29', 148 | 'narration': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…', 149 | 'OST': 0, 'duration': 14, 150 | 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3'}, 151 | {'picture': '画面切到王启年小心翼翼地向范闲汇报。', 'timestamp': '00:04:41-00:04:58', 152 | 'narration': '我发现大人的死讯不光是在民间,在官场上也它传开了,所以呢,所以啊,可不是什么好事,将来您跟陛下怎么交代,这可是欺君之罪', 153 | 'OST': 1, 'duration': 17, 154 | 'audio': ''}, 155 | {'picture': '【解说】"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。', 156 | 'timestamp': '00:04:58-00:05:20', 157 | 'narration': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!', 158 | 'OST': 0, 'duration': 22, 159 | 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3'}, 160 | {'picture': '【解说】但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!', 161 | 'timestamp': '00:05:45-00:05:53', 162 | 'narration': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!', 163 | 'OST': 0, 'duration': 8, 164 | 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3'}, 165 | {'picture': '画面切换到范闲蒙面闯入皇宫,被侍卫包围的场景。', 'timestamp': '00:06:00-00:06:03', 166 | 'narration': '抓刺客', 167 | 'OST': 1, 'duration': 3, 168 | 'audio': ''}] 169 | 170 | output_file = merge_audio_files("test456", total_duration, video_script) 171 | print(output_file) 172 | -------------------------------------------------------------------------------- /app/services/state.py: -------------------------------------------------------------------------------- 1 | import ast 2 | from abc import ABC, abstractmethod 3 | from app.config import config 4 | from app.models import const 5 | 6 | 7 | # Base class for state management 8 | class BaseState(ABC): 9 | @abstractmethod 10 | def update_task(self, task_id: str, state: int, progress: int = 0, **kwargs): 11 | pass 12 | 13 | @abstractmethod 14 | def get_task(self, task_id: str): 15 | pass 16 | 17 | 18 | # Memory state management 19 | class MemoryState(BaseState): 20 | def __init__(self): 21 | self._tasks = {} 22 | 23 | def update_task( 24 | self, 25 | task_id: str, 26 | state: int = const.TASK_STATE_PROCESSING, 27 | progress: int = 0, 28 | **kwargs, 29 | ): 30 | progress = int(progress) 31 | if progress > 100: 32 | progress = 100 33 | 34 | self._tasks[task_id] = { 35 | "state": state, 36 | "progress": progress, 37 | **kwargs, 38 | } 39 | 40 | def get_task(self, task_id: str): 41 | return self._tasks.get(task_id, None) 42 | 43 | def delete_task(self, task_id: str): 44 | if task_id in self._tasks: 45 | del self._tasks[task_id] 46 | 47 | 48 | # Redis state management 49 | class RedisState(BaseState): 50 | def __init__(self, host="localhost", port=6379, db=0, password=None): 51 | import redis 52 | 53 | self._redis = redis.StrictRedis(host=host, port=port, db=db, password=password) 54 | 55 | def update_task( 56 | self, 57 | task_id: str, 58 | state: int = const.TASK_STATE_PROCESSING, 59 | progress: int = 0, 60 | **kwargs, 61 | ): 62 | progress = int(progress) 63 | if progress > 100: 64 | progress = 100 65 | 66 | fields = { 67 | "state": state, 68 | "progress": progress, 69 | **kwargs, 70 | } 71 | 72 | for field, value in fields.items(): 73 | self._redis.hset(task_id, field, str(value)) 74 | 75 | def get_task(self, task_id: str): 76 | task_data = self._redis.hgetall(task_id) 77 | if not task_data: 78 | return None 79 | 80 | task = { 81 | key.decode("utf-8"): self._convert_to_original_type(value) 82 | for key, value in task_data.items() 83 | } 84 | return task 85 | 86 | def delete_task(self, task_id: str): 87 | self._redis.delete(task_id) 88 | 89 | @staticmethod 90 | def _convert_to_original_type(value): 91 | """ 92 | Convert the value from byte string to its original data type. 93 | You can extend this method to handle other data types as needed. 94 | """ 95 | value_str = value.decode("utf-8") 96 | 97 | try: 98 | # try to convert byte string array to list 99 | return ast.literal_eval(value_str) 100 | except (ValueError, SyntaxError): 101 | pass 102 | 103 | if value_str.isdigit(): 104 | return int(value_str) 105 | # Add more conversions here if needed 106 | return value_str 107 | 108 | 109 | # Global state 110 | _enable_redis = config.app.get("enable_redis", False) 111 | _redis_host = config.app.get("redis_host", "localhost") 112 | _redis_port = config.app.get("redis_port", 6379) 113 | _redis_db = config.app.get("redis_db", 0) 114 | _redis_password = config.app.get("redis_password", None) 115 | 116 | state = ( 117 | RedisState( 118 | host=_redis_host, port=_redis_port, db=_redis_db, password=_redis_password 119 | ) 120 | if _enable_redis 121 | else MemoryState() 122 | ) 123 | -------------------------------------------------------------------------------- /app/services/subtitle_merger.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: UTF-8 -*- 3 | 4 | ''' 5 | @Project: NarratoAI 6 | @File : subtitle_merger 7 | @Author : viccy 8 | @Date : 2025/5/6 下午4:00 9 | ''' 10 | 11 | import re 12 | import os 13 | from datetime import datetime, timedelta 14 | 15 | 16 | def parse_time(time_str): 17 | """解析时间字符串为timedelta对象""" 18 | hours, minutes, seconds_ms = time_str.split(':') 19 | seconds, milliseconds = seconds_ms.split(',') 20 | 21 | td = timedelta( 22 | hours=int(hours), 23 | minutes=int(minutes), 24 | seconds=int(seconds), 25 | milliseconds=int(milliseconds) 26 | ) 27 | return td 28 | 29 | 30 | def format_time(td): 31 | """将timedelta对象格式化为SRT时间字符串""" 32 | total_seconds = int(td.total_seconds()) 33 | hours = total_seconds // 3600 34 | minutes = (total_seconds % 3600) // 60 35 | seconds = total_seconds % 60 36 | milliseconds = td.microseconds // 1000 37 | 38 | return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}" 39 | 40 | 41 | def parse_edited_time_range(time_range_str): 42 | """从editedTimeRange字符串中提取时间范围""" 43 | if not time_range_str: 44 | return None, None 45 | 46 | parts = time_range_str.split('-') 47 | if len(parts) != 2: 48 | return None, None 49 | 50 | start_time_str, end_time_str = parts 51 | 52 | # 将HH:MM:SS格式转换为timedelta 53 | start_h, start_m, start_s = map(int, start_time_str.split(':')) 54 | end_h, end_m, end_s = map(int, end_time_str.split(':')) 55 | 56 | start_time = timedelta(hours=start_h, minutes=start_m, seconds=start_s) 57 | end_time = timedelta(hours=end_h, minutes=end_m, seconds=end_s) 58 | 59 | return start_time, end_time 60 | 61 | 62 | def merge_subtitle_files(subtitle_items, output_file=None): 63 | """ 64 | 合并多个SRT字幕文件 65 | 66 | 参数: 67 | subtitle_items: 字典列表,每个字典包含subtitle文件路径和editedTimeRange 68 | output_file: 输出文件的路径,如果为None则自动生成 69 | 70 | 返回: 71 | 合并后的字幕文件路径 72 | """ 73 | # 按照editedTimeRange的开始时间排序 74 | sorted_items = sorted(subtitle_items, 75 | key=lambda x: parse_edited_time_range(x.get('editedTimeRange', ''))[0] or timedelta()) 76 | 77 | merged_subtitles = [] 78 | subtitle_index = 1 79 | 80 | for item in sorted_items: 81 | if not item.get('subtitle') or not os.path.exists(item.get('subtitle')): 82 | continue 83 | 84 | # 从editedTimeRange获取起始时间偏移 85 | offset_time, _ = parse_edited_time_range(item.get('editedTimeRange', '')) 86 | 87 | if offset_time is None: 88 | print(f"警告: 无法从项目 {item.get('_id')} 的editedTimeRange中提取时间范围,跳过该项") 89 | continue 90 | 91 | with open(item['subtitle'], 'r', encoding='utf-8') as file: 92 | content = file.read() 93 | 94 | # 解析字幕文件 95 | subtitle_blocks = re.split(r'\n\s*\n', content.strip()) 96 | 97 | for block in subtitle_blocks: 98 | lines = block.strip().split('\n') 99 | if len(lines) < 3: # 确保块有足够的行数 100 | continue 101 | 102 | # 解析时间轴行 103 | time_line = lines[1] 104 | time_parts = time_line.split(' --> ') 105 | if len(time_parts) != 2: 106 | continue 107 | 108 | start_time = parse_time(time_parts[0]) 109 | end_time = parse_time(time_parts[1]) 110 | 111 | # 应用时间偏移 112 | adjusted_start_time = start_time + offset_time 113 | adjusted_end_time = end_time + offset_time 114 | 115 | # 重建字幕块 116 | adjusted_time_line = f"{format_time(adjusted_start_time)} --> {format_time(adjusted_end_time)}" 117 | text_lines = lines[2:] 118 | 119 | new_block = [ 120 | str(subtitle_index), 121 | adjusted_time_line, 122 | *text_lines 123 | ] 124 | 125 | merged_subtitles.append('\n'.join(new_block)) 126 | subtitle_index += 1 127 | 128 | # 确定输出文件路径 129 | if output_file is None: 130 | dir_path = os.path.dirname(sorted_items[0]['subtitle']) 131 | first_start = parse_edited_time_range(sorted_items[0]['editedTimeRange'])[0] 132 | last_end = parse_edited_time_range(sorted_items[-1]['editedTimeRange'])[1] 133 | 134 | first_start_h, first_start_m, first_start_s = int(first_start.seconds // 3600), int((first_start.seconds % 3600) // 60), int(first_start.seconds % 60) 135 | last_end_h, last_end_m, last_end_s = int(last_end.seconds // 3600), int((last_end.seconds % 3600) // 60), int(last_end.seconds % 60) 136 | 137 | first_start_str = f"{first_start_h:02d}_{first_start_m:02d}_{first_start_s:02d}" 138 | last_end_str = f"{last_end_h:02d}_{last_end_m:02d}_{last_end_s:02d}" 139 | 140 | output_file = os.path.join(dir_path, f"merged_subtitle_{first_start_str}-{last_end_str}.srt") 141 | 142 | # 合并所有字幕块 143 | merged_content = '\n\n'.join(merged_subtitles) 144 | 145 | # 写入合并后的内容 146 | with open(output_file, 'w', encoding='utf-8') as file: 147 | file.write(merged_content) 148 | 149 | return output_file 150 | 151 | 152 | if __name__ == '__main__': 153 | # 测试数据 154 | test_data = [ 155 | {'picture': '【解说】好的,各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!', 156 | 'timestamp': '00:00:00-00:01:15', 157 | 'narration': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!', 158 | 'OST': 0, 159 | '_id': 1, 160 | 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3', 161 | 'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt', 162 | 'sourceTimeRange': '00:00:00-00:00:26', 163 | 'duration': 26, 164 | 'editedTimeRange': '00:00:00-00:00:26' 165 | }, 166 | {'picture': '【解说】上一集我们看到,范闲在北齐遭遇了惊天变故,生死不明!', 167 | 'timestamp': '00:01:15-00:04:40', 168 | 'narration': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…', 169 | 'OST': 0, 170 | '_id': 2, 171 | 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3', 172 | 'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt', 173 | 'sourceTimeRange': '00:01:15-00:01:29', 174 | 'duration': 14, 175 | 'editedTimeRange': '00:00:26-00:00:40' 176 | }, 177 | {'picture': '【解说】"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。', 178 | 'timestamp': '00:04:58-00:05:45', 179 | 'narration': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!', 180 | 'OST': 0, 181 | '_id': 4, 182 | 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3', 183 | 'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt', 184 | 'sourceTimeRange': '00:04:58-00:05:20', 185 | 'duration': 22, 186 | 'editedTimeRange': '00:00:57-00:01:19' 187 | }, 188 | {'picture': '【解说】但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!', 189 | 'timestamp': '00:05:45-00:06:00', 190 | 'narration': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!', 191 | 'OST': 0, 192 | '_id': 5, 193 | 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3', 194 | 'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt', 195 | 'sourceTimeRange': '00:05:45-00:05:53', 196 | 'duration': 8, 197 | 'editedTimeRange': '00:01:19-00:01:27' 198 | } 199 | ] 200 | 201 | output_file = merge_subtitle_files(test_data) 202 | print(f"字幕文件已合并至: {output_file}") 203 | -------------------------------------------------------------------------------- /app/services/video_service.py: -------------------------------------------------------------------------------- 1 | import os 2 | from uuid import uuid4 3 | from loguru import logger 4 | from typing import Dict, List, Optional, Tuple 5 | 6 | from app.services import material 7 | 8 | 9 | class VideoService: 10 | @staticmethod 11 | async def crop_video( 12 | video_path: str, 13 | video_script: List[dict] 14 | ) -> Tuple[str, Dict[str, str]]: 15 | """ 16 | 裁剪视频服务 17 | 18 | Args: 19 | video_path: 视频文件路径 20 | video_script: 视频脚本列表 21 | 22 | Returns: 23 | Tuple[str, Dict[str, str]]: (task_id, 裁剪后的视频片段字典) 24 | 视频片段字典格式: {timestamp: video_path} 25 | """ 26 | try: 27 | task_id = str(uuid4()) 28 | 29 | # 从脚本中提取时间戳列表 30 | time_list = [scene['timestamp'] for scene in video_script] 31 | 32 | # 调用裁剪服务 33 | subclip_videos = material.clip_videos( 34 | task_id=task_id, 35 | timestamp_terms=time_list, 36 | origin_video=video_path 37 | ) 38 | 39 | if subclip_videos is None: 40 | raise ValueError("裁剪视频失败") 41 | 42 | # 更新脚本中的视频路径 43 | for scene in video_script: 44 | try: 45 | scene['path'] = subclip_videos[scene['timestamp']] 46 | except KeyError as err: 47 | logger.error(f"更新视频路径失败: {err}") 48 | 49 | logger.debug(f"裁剪视频成功,共生成 {len(time_list)} 个视频片段") 50 | logger.debug(f"视频片段路径: {subclip_videos}") 51 | 52 | return task_id, subclip_videos 53 | 54 | except Exception as e: 55 | logger.exception("裁剪视频失败") 56 | raise -------------------------------------------------------------------------------- /app/services/youtube_service.py: -------------------------------------------------------------------------------- 1 | import yt_dlp 2 | import os 3 | from typing import List, Dict, Optional, Tuple 4 | from loguru import logger 5 | from uuid import uuid4 6 | 7 | from app.utils import utils 8 | from app.services import video as VideoService 9 | 10 | 11 | class YoutubeService: 12 | def __init__(self): 13 | self.supported_formats = ['mp4', 'mkv', 'webm', 'flv', 'avi'] 14 | 15 | def _get_video_formats(self, url: str) -> List[Dict]: 16 | """获取视频可用的格式列表""" 17 | ydl_opts = { 18 | 'quiet': True, 19 | 'no_warnings': True 20 | } 21 | 22 | try: 23 | with yt_dlp.YoutubeDL(ydl_opts) as ydl: 24 | info = ydl.extract_info(url, download=False) 25 | formats = info.get('formats', []) 26 | 27 | format_list = [] 28 | for f in formats: 29 | format_info = { 30 | 'format_id': f.get('format_id', 'N/A'), 31 | 'ext': f.get('ext', 'N/A'), 32 | 'resolution': f.get('format_note', 'N/A'), 33 | 'filesize': f.get('filesize', 'N/A'), 34 | 'vcodec': f.get('vcodec', 'N/A'), 35 | 'acodec': f.get('acodec', 'N/A') 36 | } 37 | format_list.append(format_info) 38 | 39 | return format_list 40 | except Exception as e: 41 | logger.error(f"获取视频格式失败: {str(e)}") 42 | raise 43 | 44 | def _validate_format(self, output_format: str) -> None: 45 | """验证输出格式是否支持""" 46 | if output_format.lower() not in self.supported_formats: 47 | raise ValueError( 48 | f"不支持的视频格式: {output_format}。" 49 | f"支持的格式: {', '.join(self.supported_formats)}" 50 | ) 51 | 52 | async def download_video( 53 | self, 54 | url: str, 55 | resolution: str, 56 | output_format: str = 'mp4', 57 | rename: Optional[str] = None 58 | ) -> Tuple[str, str, str]: 59 | """ 60 | 下载指定分辨率的视频 61 | 62 | Args: 63 | url: YouTube视频URL 64 | resolution: 目标分辨率 ('2160p', '1440p', '1080p', '720p' etc.) 65 | 注意:对于类似'1080p60'的输入会被处理为'1080p' 66 | output_format: 输出视频格式 67 | rename: 可选的重命名 68 | 69 | Returns: 70 | Tuple[str, str, str]: (task_id, output_path, filename) 71 | """ 72 | try: 73 | task_id = str(uuid4()) 74 | self._validate_format(output_format) 75 | 76 | # 标准化分辨率格式 77 | base_resolution = resolution.split('p')[0] + 'p' 78 | 79 | # 获取所有可用格式 80 | formats = self._get_video_formats(url) 81 | 82 | # 查找指定分辨率的最佳视频格式 83 | target_format = None 84 | for fmt in formats: 85 | fmt_resolution = fmt['resolution'] 86 | # 将格式的分辨率也标准化后进行比较 87 | if fmt_resolution != 'N/A': 88 | fmt_base_resolution = fmt_resolution.split('p')[0] + 'p' 89 | if fmt_base_resolution == base_resolution and fmt['vcodec'] != 'none': 90 | target_format = fmt 91 | break 92 | 93 | if target_format is None: 94 | # 收集可用分辨率时也进行标准化 95 | available_resolutions = set( 96 | fmt['resolution'].split('p')[0] + 'p' 97 | for fmt in formats 98 | if fmt['resolution'] != 'N/A' and fmt['vcodec'] != 'none' 99 | ) 100 | raise ValueError( 101 | f"未找到 {base_resolution} 分辨率的视频。" 102 | f"可用分辨率: {', '.join(sorted(available_resolutions))}" 103 | ) 104 | 105 | # 创建输出目录 106 | output_dir = utils.video_dir() 107 | os.makedirs(output_dir, exist_ok=True) 108 | 109 | # 设置下载选项 110 | if rename: 111 | # 如果指定了重命名,直接使用新名字 112 | filename = f"{rename}.{output_format}" 113 | output_template = os.path.join(output_dir, filename) 114 | else: 115 | # 否则使用任务ID和原标题 116 | output_template = os.path.join(output_dir, f'{task_id}_%(title)s.%(ext)s') 117 | 118 | ydl_opts = { 119 | 'format': f"{target_format['format_id']}+bestaudio[ext=m4a]/best", 120 | 'outtmpl': output_template, 121 | 'merge_output_format': output_format.lower(), 122 | 'postprocessors': [{ 123 | 'key': 'FFmpegVideoConvertor', 124 | 'preferedformat': output_format.lower(), 125 | }] 126 | } 127 | 128 | # 执行下载 129 | with yt_dlp.YoutubeDL(ydl_opts) as ydl: 130 | info = ydl.extract_info(url, download=True) 131 | if rename: 132 | # 如果指定了重命名,使用新文件名 133 | output_path = output_template 134 | filename = os.path.basename(output_path) 135 | else: 136 | # 否则使用原始标题 137 | video_title = info.get('title', task_id) 138 | filename = f"{task_id}_{video_title}.{output_format}" 139 | output_path = os.path.join(output_dir, filename) 140 | 141 | logger.info(f"视频下载成功: {output_path}") 142 | return task_id, output_path, filename 143 | 144 | except Exception as e: 145 | logger.exception("下载视频失败") 146 | raise 147 | -------------------------------------------------------------------------------- /app/test/test_gemini.py: -------------------------------------------------------------------------------- 1 | import google.generativeai as genai 2 | from app.config import config 3 | import os 4 | 5 | os.environ["HTTP_PROXY"] = config.proxy.get("http") 6 | os.environ["HTTPS_PROXY"] = config.proxy.get("https") 7 | 8 | genai.configure(api_key="") 9 | model = genai.GenerativeModel("gemini-1.5-pro") 10 | 11 | 12 | for i in range(50): 13 | response = model.generate_content("直接回复我文本'当前网络可用'") 14 | print(i, response.text) 15 | -------------------------------------------------------------------------------- /app/test/test_moviepy.py: -------------------------------------------------------------------------------- 1 | """ 2 | 使用 moviepy 库剪辑指定时间戳视频,支持时分秒毫秒精度 3 | """ 4 | 5 | from moviepy.editor import VideoFileClip 6 | from datetime import datetime 7 | import os 8 | 9 | 10 | def time_str_to_seconds(time_str: str) -> float: 11 | """ 12 | 将时间字符串转换为秒数 13 | 参数: 14 | time_str: 格式为"HH:MM:SS,mmm"的时间字符串,例如"00:01:23,456" 15 | 返回: 16 | 转换后的秒数(float) 17 | """ 18 | try: 19 | # 分离时间和毫秒 20 | time_part, ms_part = time_str.split(',') 21 | # 转换时分秒 22 | time_obj = datetime.strptime(time_part, "%H:%M:%S") 23 | # 计算总秒数 24 | total_seconds = time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second 25 | # 添加毫秒部分 26 | total_seconds += int(ms_part) / 1000 27 | return total_seconds 28 | except ValueError as e: 29 | raise ValueError("时间格式错误,请使用 HH:MM:SS,mmm 格式,例如 00:01:23,456") from e 30 | 31 | 32 | def format_duration(seconds: float) -> str: 33 | """ 34 | 将秒数转换为可读的时间格式 35 | 参数: 36 | seconds: 秒数 37 | 返回: 38 | 格式化的时间字符串 (HH:MM:SS,mmm) 39 | """ 40 | hours = int(seconds // 3600) 41 | minutes = int((seconds % 3600) // 60) 42 | seconds_remain = seconds % 60 43 | whole_seconds = int(seconds_remain) 44 | milliseconds = int((seconds_remain - whole_seconds) * 1000) 45 | 46 | return f"{hours:02d}:{minutes:02d}:{whole_seconds:02d},{milliseconds:03d}" 47 | 48 | 49 | def cut_video(video_path: str, start_time: str, end_time: str, output_path: str) -> None: 50 | """ 51 | 剪辑视频 52 | 参数: 53 | video_path: 视频文件路径 54 | start_time: 开始时间 (格式: "HH:MM:SS,mmm") 55 | end_time: 结束时间 (格式: "HH:MM:SS,mmm") 56 | output_path: 输出文件路径 57 | """ 58 | try: 59 | # 确保输出目录存在 60 | output_dir = os.path.dirname(output_path) 61 | if not os.path.exists(output_dir): 62 | os.makedirs(output_dir) 63 | 64 | # 如果输出文件已存在,先尝试删除 65 | if os.path.exists(output_path): 66 | try: 67 | os.remove(output_path) 68 | except PermissionError: 69 | print(f"无法删除已存在的文件:{output_path},请确保文件未被其他程序占用") 70 | return 71 | 72 | # 转换时间字符串为秒数 73 | start_seconds = time_str_to_seconds(start_time) 74 | end_seconds = time_str_to_seconds(end_time) 75 | 76 | # 加载视频文件 77 | video = VideoFileClip(video_path) 78 | 79 | # 验证时间范围 80 | if start_seconds >= video.duration or end_seconds > video.duration: 81 | raise ValueError(f"剪辑时间超出视频长度!视频总长度为: {format_duration(video.duration)}") 82 | 83 | if start_seconds >= end_seconds: 84 | raise ValueError("结束时间必须大于开始时间!") 85 | 86 | # 计算剪辑时长 87 | clip_duration = end_seconds - start_seconds 88 | print(f"原视频总长度: {format_duration(video.duration)}") 89 | print(f"剪辑时长: {format_duration(clip_duration)}") 90 | print(f"剪辑区间: {start_time} -> {end_time}") 91 | 92 | # 剪辑视频 93 | video = video.subclip(start_seconds, end_seconds) 94 | 95 | # 添加错误处理的写入过程 96 | try: 97 | video.write_videofile( 98 | output_path, 99 | codec='libx264', 100 | audio_codec='aac', 101 | temp_audiofile='temp-audio.m4a', 102 | remove_temp=True 103 | ) 104 | except IOError as e: 105 | print(f"写入视频文件时发生错误:{str(e)}") 106 | raise 107 | finally: 108 | # 确保资源被释放 109 | video.close() 110 | 111 | except Exception as e: 112 | print(f"视频剪辑过程中发生错误:{str(e)}") 113 | raise 114 | 115 | 116 | if __name__ == "__main__": 117 | cut_video( 118 | video_path="/Users/apple/Desktop/NarratoAI/resource/videos/duanju_yuansp.mp4", 119 | start_time="00:00:00,789", 120 | end_time="00:02:00,123", 121 | output_path="/Users/apple/Desktop/NarratoAI/resource/videos/duanju_yuansp_cut3.mp4" 122 | ) 123 | -------------------------------------------------------------------------------- /app/test/test_moviepy_merge.py: -------------------------------------------------------------------------------- 1 | """ 2 | 使用 moviepy 合并视频、音频、字幕和背景音乐 3 | """ 4 | 5 | from moviepy.editor import ( 6 | VideoFileClip, 7 | AudioFileClip, 8 | TextClip, 9 | CompositeVideoClip, 10 | concatenate_videoclips 11 | ) 12 | # from moviepy.config import change_settings 13 | import os 14 | 15 | # 设置字体文件路径(用于中文字幕显示) 16 | FONT_PATH = "../../resource/fonts/STHeitiMedium.ttc" # 请确保此路径下有对应字体文件 17 | # change_settings( 18 | # {"IMAGEMAGICK_BINARY": r"C:\Program Files\ImageMagick-7.1.1-Q16\magick.exe"}) # Windows系统需要设置 ImageMagick 路径 19 | 20 | 21 | class VideoMerger: 22 | """视频合并处理类""" 23 | 24 | def __init__(self, output_path: str = "../../resource/videos/merged_video.mp4"): 25 | """ 26 | 初始化视频合并器 27 | 参数: 28 | output_path: 输出文件路径 29 | """ 30 | self.output_path = output_path 31 | self.video_clips = [] 32 | self.background_music = None 33 | self.subtitles = [] 34 | 35 | def add_video(self, video_path: str, start_time: str = None, end_time: str = None) -> None: 36 | """ 37 | 添加视频片段 38 | 参数: 39 | video_path: 视频文件路径 40 | start_time: 开始时间 (格式: "MM:SS") 41 | end_time: 结束时间 (格式: "MM:SS") 42 | """ 43 | video = VideoFileClip(video_path) 44 | if start_time and end_time: 45 | video = video.subclip(self._time_to_seconds(start_time), 46 | self._time_to_seconds(end_time)) 47 | self.video_clips.append(video) 48 | 49 | def add_audio(self, audio_path: str, volume: float = 1.0) -> None: 50 | """ 51 | 添加背景音乐 52 | 参数: 53 | audio_path: 音频文件路径 54 | volume: 音量大小 (0.0-1.0) 55 | """ 56 | self.background_music = AudioFileClip(audio_path).volumex(volume) 57 | 58 | def add_subtitle(self, text: str, start_time: str, end_time: str, 59 | position: tuple = ('center', 'bottom'), fontsize: int = 24) -> None: 60 | """ 61 | 添加字幕 62 | 参数: 63 | text: 字幕文本 64 | start_time: 开始时间 (格式: "MM:SS") 65 | end_time: 结束时间 (格式: "MM:SS") 66 | position: 字幕位置 67 | fontsize: 字体大小 68 | """ 69 | subtitle = TextClip( 70 | text, 71 | font=FONT_PATH, 72 | fontsize=fontsize, 73 | color='white', 74 | stroke_color='black', 75 | stroke_width=2 76 | ) 77 | 78 | subtitle = subtitle.set_position(position).set_duration( 79 | self._time_to_seconds(end_time) - self._time_to_seconds(start_time) 80 | ).set_start(self._time_to_seconds(start_time)) 81 | 82 | self.subtitles.append(subtitle) 83 | 84 | def merge(self) -> None: 85 | """合并所有媒体元素并导出视频""" 86 | if not self.video_clips: 87 | raise ValueError("至少需要添加一个视频片段") 88 | 89 | # 合并视频片段 90 | final_video = concatenate_videoclips(self.video_clips) 91 | 92 | # 如果有背景音乐,设置其持续时间与视频相同 93 | if self.background_music: 94 | self.background_music = self.background_music.set_duration(final_video.duration) 95 | final_video = final_video.set_audio(self.background_music) 96 | 97 | # 添加字幕 98 | if self.subtitles: 99 | final_video = CompositeVideoClip([final_video] + self.subtitles) 100 | 101 | # 导出最终视频 102 | final_video.write_videofile( 103 | self.output_path, 104 | fps=24, 105 | codec='libx264', 106 | audio_codec='aac' 107 | ) 108 | 109 | # 释放资源 110 | final_video.close() 111 | for clip in self.video_clips: 112 | clip.close() 113 | if self.background_music: 114 | self.background_music.close() 115 | 116 | @staticmethod 117 | def _time_to_seconds(time_str: str) -> float: 118 | """将时间字符串转换为秒数""" 119 | minutes, seconds = map(int, time_str.split(':')) 120 | return minutes * 60 + seconds 121 | 122 | 123 | def test_merge_video(): 124 | """测试视频合并功能""" 125 | merger = VideoMerger() 126 | 127 | # 添加两个视频片段 128 | merger.add_video("../../resource/videos/cut_video.mp4", "00:00", "01:00") 129 | merger.add_video("../../resource/videos/demo.mp4", "00:00", "00:30") 130 | 131 | # 添加背景音乐 132 | merger.add_audio("../../resource/songs/output000.mp3", volume=0.3) 133 | 134 | # 添加字幕 135 | merger.add_subtitle("第一个精彩片段", "00:00", "00:05") 136 | merger.add_subtitle("第二个精彩片段", "01:00", "01:05") 137 | 138 | # 合并并导出 139 | merger.merge() 140 | 141 | 142 | if __name__ == "__main__": 143 | test_merge_video() 144 | -------------------------------------------------------------------------------- /app/test/test_moviepy_speed.py: -------------------------------------------------------------------------------- 1 | """ 2 | 使用 moviepy 优化视频处理速度的示例 3 | 包含:视频加速、多核处理、预设参数优化等 4 | """ 5 | 6 | from moviepy.editor import VideoFileClip 7 | from moviepy.video.fx.speedx import speedx 8 | import multiprocessing as mp 9 | import time 10 | 11 | 12 | class VideoSpeedProcessor: 13 | """视频速度处理器""" 14 | 15 | def __init__(self, input_path: str, output_path: str): 16 | self.input_path = input_path 17 | self.output_path = output_path 18 | # 获取CPU核心数 19 | self.cpu_cores = mp.cpu_count() 20 | 21 | def process_with_optimization(self, speed_factor: float = 1.0) -> None: 22 | """ 23 | 使用优化参数处理视频 24 | 参数: 25 | speed_factor: 速度倍数 (1.0 为原速, 2.0 为双倍速) 26 | """ 27 | start_time = time.time() 28 | 29 | # 加载视频时使用优化参数 30 | video = VideoFileClip( 31 | self.input_path, 32 | audio=True, # 如果不需要音频可以设为False 33 | target_resolution=(720, None), # 可以降低分辨率加快处理 34 | resize_algorithm='fast_bilinear' # 使用快速的重置算法 35 | ) 36 | 37 | # 应用速度变化 38 | if speed_factor != 1.0: 39 | video = speedx(video, factor=speed_factor) 40 | 41 | # 使用优化参数导出视频 42 | video.write_videofile( 43 | self.output_path, 44 | codec='libx264', # 使用h264编码 45 | audio_codec='aac', # 音频编码 46 | temp_audiofile='temp-audio.m4a', # 临时音频文件 47 | remove_temp=True, # 处理完成后删除临时文件 48 | write_logfile=False, # 关闭日志文件 49 | threads=self.cpu_cores, # 使用多核处理 50 | preset='ultrafast', # 使用最快的编码预设 51 | ffmpeg_params=[ 52 | '-brand', 'mp42', 53 | '-crf', '23', # 压缩率,范围0-51,数值越大压缩率越高 54 | ] 55 | ) 56 | 57 | # 释放资源 58 | video.close() 59 | 60 | end_time = time.time() 61 | print(f"处理完成!用时: {end_time - start_time:.2f} 秒") 62 | 63 | def batch_process_segments(self, segment_times: list, speed_factor: float = 1.0) -> None: 64 | """ 65 | 批量处理视频片段(并行处理) 66 | 参数: 67 | segment_times: 列表,包含多个(start, end)时间元组 68 | speed_factor: 速度倍数 69 | """ 70 | start_time = time.time() 71 | 72 | # 创建进程池 73 | with mp.Pool(processes=self.cpu_cores) as pool: 74 | # 准备参数 75 | args = [(self.input_path, start, end, speed_factor, i) 76 | for i, (start, end) in enumerate(segment_times)] 77 | 78 | # 并行处理片段 79 | pool.starmap(self._process_segment, args) 80 | 81 | end_time = time.time() 82 | print(f"批量处理完成!总用时: {end_time - start_time:.2f} 秒") 83 | 84 | @staticmethod 85 | def _process_segment(video_path: str, start: str, end: str, 86 | speed_factor: float, index: int) -> None: 87 | """处理单个视频片段""" 88 | # 转换时间格式 89 | start_sec = VideoSpeedProcessor._time_to_seconds(start) 90 | end_sec = VideoSpeedProcessor._time_to_seconds(end) 91 | 92 | # 加载并处理视频片段 93 | video = VideoFileClip( 94 | video_path, 95 | audio=True, 96 | target_resolution=(720, None) 97 | ).subclip(start_sec, end_sec) 98 | 99 | # 应用速度变化 100 | if speed_factor != 1.0: 101 | video = speedx(video, factor=speed_factor) 102 | 103 | # 保存处理后的片段 104 | output_path = f"../../resource/videos/segment_{index}.mp4" 105 | video.write_videofile( 106 | output_path, 107 | codec='libx264', 108 | audio_codec='aac', 109 | preset='ultrafast', 110 | threads=2 # 每个进程使用的线程数 111 | ) 112 | 113 | video.close() 114 | 115 | @staticmethod 116 | def _time_to_seconds(time_str: str) -> float: 117 | """将时间字符串(MM:SS)转换为秒数""" 118 | minutes, seconds = map(int, time_str.split(':')) 119 | return minutes * 60 + seconds 120 | 121 | 122 | def test_video_speed(): 123 | """测试视频加速处理""" 124 | processor = VideoSpeedProcessor( 125 | "../../resource/videos/best.mp4", 126 | "../../resource/videos/speed_up.mp4" 127 | ) 128 | 129 | # 测试1:简单加速 130 | processor.process_with_optimization(speed_factor=1.5) # 1.5倍速 131 | 132 | # 测试2:并行处理多个片段 133 | segments = [ 134 | ("00:00", "01:00"), 135 | ("01:00", "02:00"), 136 | ("02:00", "03:00") 137 | ] 138 | processor.batch_process_segments(segments, speed_factor=2.0) # 2倍速 139 | 140 | 141 | if __name__ == "__main__": 142 | test_video_speed() 143 | -------------------------------------------------------------------------------- /app/test/test_qwen.py: -------------------------------------------------------------------------------- 1 | import os 2 | import traceback 3 | import json 4 | from openai import OpenAI 5 | from pydantic import BaseModel 6 | from typing import List 7 | from app.utils import utils 8 | from app.services.subtitle import extract_audio_and_create_subtitle 9 | 10 | 11 | class Step(BaseModel): 12 | timestamp: str 13 | picture: str 14 | narration: str 15 | OST: int 16 | new_timestamp: str 17 | 18 | class MathReasoning(BaseModel): 19 | result: List[Step] 20 | 21 | 22 | def chat_with_qwen(prompt: str, system_message: str, subtitle_path: str) -> str: 23 | """ 24 | 与通义千问AI模型进行对话 25 | 26 | Args: 27 | prompt (str): 用户输入的问题或提示 28 | system_message (str): 系统提示信息,用于设定AI助手的行为。默认为"You are a helpful assistant." 29 | subtitle_path (str): 字幕文件路径 30 | Returns: 31 | str: AI助手的回复内容 32 | 33 | Raises: 34 | Exception: 当API调用失败时抛出异常 35 | """ 36 | try: 37 | client = OpenAI( 38 | api_key="sk-a1acd853d88d41d3ae92777d7bfa2612", 39 | base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", 40 | ) 41 | 42 | # 读取字幕文件 43 | with open(subtitle_path, "r", encoding="utf-8") as file: 44 | subtitle_content = file.read() 45 | 46 | completion = client.chat.completions.create( 47 | model="qwen-turbo-2024-11-01", 48 | messages=[ 49 | {'role': 'system', 'content': system_message}, 50 | {'role': 'user', 'content': prompt + subtitle_content} 51 | ] 52 | ) 53 | return completion.choices[0].message.content 54 | 55 | except Exception as e: 56 | error_message = f"调用千问API时发生错误:{str(e)}" 57 | print(error_message) 58 | print("请参考文档:https://help.aliyun.com/zh/model-studio/developer-reference/error-code") 59 | raise Exception(error_message) 60 | 61 | 62 | # 使用示例 63 | if __name__ == "__main__": 64 | try: 65 | video_path = utils.video_dir("duanju_yuansp.mp4") 66 | # # 判断视频是否存在 67 | # if not os.path.exists(video_path): 68 | # print(f"视频文件不存在:{video_path}") 69 | # exit(1) 70 | # 提取字幕 71 | subtitle_path = os.path.join(utils.video_dir(""), f"duanju_yuan.srt") 72 | extract_audio_and_create_subtitle(video_file=video_path, subtitle_file=subtitle_path) 73 | # 分析字幕 74 | system_message = """ 75 | 你是一个视频srt字幕分析剪辑器, 输入视频的srt字幕, 分析其中的精彩且尽可能连续的片段并裁剪出来, 注意确保文字与时间戳的正确匹配。 76 | 输出需严格按照如下 json 格式: 77 | [ 78 | { 79 | "timestamp": "00:00:50,020-00,01:44,000", 80 | "picture": "画面1", 81 | "narration": "播放原声", 82 | "OST": 0, 83 | "new_timestamp": "00:00:00,000-00:00:54,020" 84 | }, 85 | { 86 | "timestamp": "01:49-02:30", 87 | "picture": "画面2", 88 | "narration": "播放原声", 89 | "OST": 2, 90 | "new_timestamp": "00:54-01:35" 91 | }, 92 | ] 93 | """ 94 | prompt = "字幕如下:\n" 95 | response = chat_with_qwen(prompt, system_message, subtitle_path) 96 | print(response) 97 | # 保存json,注意json中是时间戳需要转换为 分:秒(现在的时间是 "timestamp": "00:00:00,020-00:00:01,660", 需要转换为 "timestamp": "00:00-01:66") 98 | # response = json.loads(response) 99 | # for item in response: 100 | # item["timestamp"] = item["timestamp"].replace(":", "-") 101 | # with open(os.path.join(utils.video_dir(""), "duanju_yuan.json"), "w", encoding="utf-8") as file: 102 | # json.dump(response, file, ensure_ascii=False) 103 | 104 | except Exception as e: 105 | print(traceback.format_exc()) 106 | -------------------------------------------------------------------------------- /app/utils/check_script.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Dict, Any 3 | 4 | def check_format(script_content: str) -> Dict[str, Any]: 5 | """检查脚本格式 6 | Args: 7 | script_content: 脚本内容 8 | Returns: 9 | Dict: {'success': bool, 'message': str} 10 | """ 11 | try: 12 | # 检查是否为有效的JSON 13 | data = json.loads(script_content) 14 | 15 | # 检查是否为列表 16 | if not isinstance(data, list): 17 | return { 18 | 'success': False, 19 | 'message': '脚本必须是JSON数组格式' 20 | } 21 | 22 | # 检查每个片段 23 | for i, clip in enumerate(data): 24 | # 检查必需字段 25 | required_fields = ['narration', 'picture', 'timestamp'] 26 | for field in required_fields: 27 | if field not in clip: 28 | return { 29 | 'success': False, 30 | 'message': f'第{i+1}个片段缺少必需字段: {field}' 31 | } 32 | 33 | # 检查字段类型 34 | if not isinstance(clip['narration'], str): 35 | return { 36 | 'success': False, 37 | 'message': f'第{i+1}个片段的narration必须是字符串' 38 | } 39 | if not isinstance(clip['picture'], str): 40 | return { 41 | 'success': False, 42 | 'message': f'第{i+1}个片段的picture必须是字符串' 43 | } 44 | if not isinstance(clip['timestamp'], str): 45 | return { 46 | 'success': False, 47 | 'message': f'第{i+1}个片段的timestamp必须是字符串' 48 | } 49 | 50 | # 检查字段内容不能为空 51 | if not clip['narration'].strip(): 52 | return { 53 | 'success': False, 54 | 'message': f'第{i+1}个片段的narration不能为空' 55 | } 56 | if not clip['picture'].strip(): 57 | return { 58 | 'success': False, 59 | 'message': f'第{i+1}个片段的picture不能为空' 60 | } 61 | if not clip['timestamp'].strip(): 62 | return { 63 | 'success': False, 64 | 'message': f'第{i+1}个片段的timestamp不能为空' 65 | } 66 | 67 | return { 68 | 'success': True, 69 | 'message': '脚本格式检查通过' 70 | } 71 | 72 | except json.JSONDecodeError as e: 73 | return { 74 | 'success': False, 75 | 'message': f'JSON格式错误: {str(e)}' 76 | } 77 | except Exception as e: 78 | return { 79 | 'success': False, 80 | 'message': f'检查过程中发生错误: {str(e)}' 81 | } 82 | -------------------------------------------------------------------------------- /app/utils/video_processor.py: -------------------------------------------------------------------------------- 1 | """ 2 | 视频帧提取工具 3 | 4 | 这个模块提供了简单高效的视频帧提取功能。主要特点: 5 | 1. 使用ffmpeg进行视频处理,支持硬件加速 6 | 2. 按指定时间间隔提取视频关键帧 7 | 3. 支持多种视频格式 8 | 4. 支持高清视频帧输出 9 | 5. 直接从原视频提取高质量关键帧 10 | 11 | 不依赖OpenCV和sklearn等库,只使用ffmpeg作为外部依赖,降低了安装和使用的复杂度。 12 | """ 13 | 14 | import os 15 | import re 16 | import time 17 | import subprocess 18 | from typing import List, Dict 19 | from loguru import logger 20 | from tqdm import tqdm 21 | 22 | from app.utils import ffmpeg_utils 23 | 24 | 25 | class VideoProcessor: 26 | def __init__(self, video_path: str): 27 | """ 28 | 初始化视频处理器 29 | 30 | Args: 31 | video_path: 视频文件路径 32 | """ 33 | if not os.path.exists(video_path): 34 | raise FileNotFoundError(f"视频文件不存在: {video_path}") 35 | 36 | self.video_path = video_path 37 | self.video_info = self._get_video_info() 38 | self.fps = float(self.video_info.get('fps', 25)) 39 | self.duration = float(self.video_info.get('duration', 0)) 40 | self.width = int(self.video_info.get('width', 0)) 41 | self.height = int(self.video_info.get('height', 0)) 42 | self.total_frames = int(self.fps * self.duration) 43 | 44 | def _get_video_info(self) -> Dict[str, str]: 45 | """ 46 | 使用ffprobe获取视频信息 47 | 48 | Returns: 49 | Dict[str, str]: 包含视频基本信息的字典 50 | """ 51 | cmd = [ 52 | "ffprobe", 53 | "-v", "error", 54 | "-select_streams", "v:0", 55 | "-show_entries", "stream=width,height,r_frame_rate,duration", 56 | "-of", "default=noprint_wrappers=1:nokey=0", 57 | self.video_path 58 | ] 59 | 60 | try: 61 | result = subprocess.run(cmd, capture_output=True, text=True, check=True) 62 | lines = result.stdout.strip().split('\n') 63 | info = {} 64 | for line in lines: 65 | if '=' in line: 66 | key, value = line.split('=', 1) 67 | info[key] = value 68 | 69 | # 处理帧率(可能是分数形式) 70 | if 'r_frame_rate' in info: 71 | try: 72 | num, den = map(int, info['r_frame_rate'].split('/')) 73 | info['fps'] = str(num / den) 74 | except ValueError: 75 | info['fps'] = info.get('r_frame_rate', '25') 76 | 77 | return info 78 | 79 | except subprocess.CalledProcessError as e: 80 | logger.error(f"获取视频信息失败: {e.stderr}") 81 | return { 82 | 'width': '1280', 83 | 'height': '720', 84 | 'fps': '25', 85 | 'duration': '0' 86 | } 87 | 88 | def extract_frames_by_interval(self, output_dir: str, interval_seconds: float = 5.0, 89 | use_hw_accel: bool = True) -> List[int]: 90 | """ 91 | 按指定时间间隔提取视频帧 92 | 93 | Args: 94 | output_dir: 输出目录 95 | interval_seconds: 帧提取间隔(秒) 96 | use_hw_accel: 是否使用硬件加速 97 | 98 | Returns: 99 | List[int]: 提取的帧号列表 100 | """ 101 | if not os.path.exists(output_dir): 102 | os.makedirs(output_dir) 103 | 104 | # 计算起始时间和帧提取点 105 | start_time = 0 106 | end_time = self.duration 107 | extraction_times = [] 108 | 109 | current_time = start_time 110 | while current_time < end_time: 111 | extraction_times.append(current_time) 112 | current_time += interval_seconds 113 | 114 | if not extraction_times: 115 | logger.warning("未找到需要提取的帧") 116 | return [] 117 | 118 | # 确定硬件加速器选项 119 | hw_accel = [] 120 | if use_hw_accel and ffmpeg_utils.is_ffmpeg_hwaccel_available(): 121 | hw_accel = ffmpeg_utils.get_ffmpeg_hwaccel_args() 122 | 123 | # 提取帧 124 | frame_numbers = [] 125 | for i, timestamp in enumerate(tqdm(extraction_times, desc="提取视频帧")): 126 | frame_number = int(timestamp * self.fps) 127 | frame_numbers.append(frame_number) 128 | 129 | # 格式化时间戳字符串 (HHMMSSmmm) 130 | hours = int(timestamp // 3600) 131 | minutes = int((timestamp % 3600) // 60) 132 | seconds = int(timestamp % 60) 133 | milliseconds = int((timestamp % 1) * 1000) 134 | time_str = f"{hours:02d}{minutes:02d}{seconds:02d}{milliseconds:03d}" 135 | 136 | output_path = os.path.join(output_dir, f"keyframe_{frame_number:06d}_{time_str}.jpg") 137 | 138 | # 使用ffmpeg提取单帧 139 | cmd = [ 140 | "ffmpeg", 141 | "-hide_banner", 142 | "-loglevel", "error", 143 | ] 144 | 145 | # 添加硬件加速参数 146 | cmd.extend(hw_accel) 147 | 148 | cmd.extend([ 149 | "-ss", str(timestamp), 150 | "-i", self.video_path, 151 | "-vframes", "1", 152 | "-q:v", "1", # 最高质量 153 | "-y", 154 | output_path 155 | ]) 156 | 157 | try: 158 | subprocess.run(cmd, check=True, capture_output=True) 159 | except subprocess.CalledProcessError as e: 160 | logger.warning(f"提取帧 {frame_number} 失败: {e.stderr}") 161 | 162 | logger.info(f"成功提取了 {len(frame_numbers)} 个视频帧") 163 | return frame_numbers 164 | 165 | def _detect_hw_accelerator(self) -> List[str]: 166 | """ 167 | 检测系统可用的硬件加速器 168 | 169 | Returns: 170 | List[str]: 硬件加速器ffmpeg命令参数 171 | """ 172 | # 使用集中式硬件加速检测 173 | if ffmpeg_utils.is_ffmpeg_hwaccel_available(): 174 | return ffmpeg_utils.get_ffmpeg_hwaccel_args() 175 | return [] 176 | 177 | def process_video_pipeline(self, 178 | output_dir: str, 179 | interval_seconds: float = 5.0, # 帧提取间隔(秒) 180 | use_hw_accel: bool = True) -> None: 181 | """ 182 | 执行简化的视频处理流程,直接从原视频按固定时间间隔提取帧 183 | 184 | Args: 185 | output_dir: 输出目录 186 | interval_seconds: 帧提取间隔(秒) 187 | use_hw_accel: 是否使用硬件加速 188 | """ 189 | # 创建输出目录 190 | os.makedirs(output_dir, exist_ok=True) 191 | 192 | try: 193 | # 直接从原视频提取关键帧 194 | logger.info(f"从视频间隔 {interval_seconds} 秒提取关键帧...") 195 | self.extract_frames_by_interval( 196 | output_dir, 197 | interval_seconds=interval_seconds, 198 | use_hw_accel=use_hw_accel 199 | ) 200 | 201 | logger.info(f"处理完成!视频帧已保存在: {output_dir}") 202 | 203 | except Exception as e: 204 | import traceback 205 | logger.error(f"视频处理失败: \n{traceback.format_exc()}") 206 | raise 207 | 208 | 209 | if __name__ == "__main__": 210 | import time 211 | 212 | start_time = time.time() 213 | 214 | # 使用示例 215 | processor = VideoProcessor("./resource/videos/test.mp4") 216 | 217 | # 设置间隔为3秒提取帧 218 | processor.process_video_pipeline( 219 | output_dir="output", 220 | interval_seconds=3.0, 221 | use_hw_accel=True 222 | ) 223 | 224 | end_time = time.time() 225 | print(f"处理完成!总耗时: {end_time - start_time:.2f} 秒") 226 | -------------------------------------------------------------------------------- /changelog.py: -------------------------------------------------------------------------------- 1 | from git_changelog.cli import build_and_render 2 | 3 | # 运行这段脚本自动生成CHANGELOG.md文件 4 | 5 | build_and_render( 6 | repository=".", 7 | output="CHANGELOG.md", 8 | convention="angular", 9 | provider="github", 10 | template="keepachangelog", 11 | parse_trailers=True, 12 | parse_refs=False, 13 | sections=["build", "deps", "feat", "fix", "refactor"], 14 | versioning="pep440", 15 | bump="1.1.2", # 指定bump版本 16 | in_place=True, 17 | ) 18 | -------------------------------------------------------------------------------- /config.example.toml: -------------------------------------------------------------------------------- 1 | [app] 2 | project_version="0.6.2" 3 | # 支持视频理解的大模型提供商 4 | # gemini (谷歌, 需要 VPN) 5 | # siliconflow (硅基流动) 6 | # qwenvl (通义千问) 7 | vision_llm_provider="Siliconflow" 8 | 9 | ########## Gemini 视觉模型 10 | vision_gemini_api_key = "" 11 | vision_gemini_model_name = "gemini-2.0-flash-lite" 12 | 13 | ########## QwenVL 视觉模型 14 | vision_qwenvl_api_key = "" 15 | vision_qwenvl_model_name = "qwen2.5-vl-32b-instruct" 16 | vision_qwenvl_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1" 17 | 18 | ########## siliconflow 视觉模型 19 | vision_siliconflow_api_key = "" 20 | vision_siliconflow_model_name = "Qwen/Qwen2.5-VL-32B-Instruct" 21 | vision_siliconflow_base_url = "https://api.siliconflow.cn/v1" 22 | 23 | ########## OpenAI 视觉模型 24 | vision_openai_api_key = "" 25 | vision_openai_model_name = "gpt-4.1-nano-2025-04-14" 26 | vision_openai_base_url = "https://api.openai.com/v1" 27 | 28 | ########### NarratoAPI 微调模型 (未发布) 29 | narrato_api_key = "" 30 | narrato_api_url = "" 31 | narrato_model = "narra-1.0-2025-05-09" 32 | 33 | # 用于生成文案的大模型支持的提供商 (Supported providers): 34 | # openai (默认, 需要 VPN) 35 | # siliconflow (硅基流动) 36 | # deepseek (深度求索) 37 | # gemini (谷歌, 需要 VPN) 38 | # qwen (通义千问) 39 | # moonshot (月之暗面) 40 | text_llm_provider="openai" 41 | 42 | ########## OpenAI API Key 43 | # Get your API key at https://platform.openai.com/api-keys 44 | text_openai_api_key = "" 45 | text_openai_base_url = "https://api.openai.com/v1" 46 | text_openai_model_name = "gpt-4.1-mini-2025-04-14" 47 | 48 | # 使用 硅基流动 第三方 API Key,使用手机号注册:https://cloud.siliconflow.cn/i/pyOKqFCV 49 | # 访问 https://cloud.siliconflow.cn/account/ak 获取你的 API 密钥 50 | text_siliconflow_api_key = "" 51 | text_siliconflow_base_url = "https://api.siliconflow.cn/v1" 52 | text_siliconflow_model_name = "deepseek-ai/DeepSeek-R1" 53 | 54 | ########## DeepSeek API Key 55 | # 访问 https://platform.deepseek.com/api_keys 获取你的 API 密钥 56 | text_deepseek_api_key = "" 57 | text_deepseek_base_url = "https://api.deepseek.com" 58 | text_deepseek_model_name = "deepseek-chat" 59 | 60 | ########## Gemini API Key 61 | text_gemini_api_key="" 62 | text_gemini_model_name = "gemini-2.0-flash" 63 | text_gemini_base_url = "https://generativelanguage.googleapis.com/v1beta/openai" 64 | 65 | ########## Qwen API Key 66 | # 访问 https://bailian.console.aliyun.com/?tab=model#/api-key 获取你的 API 密钥 67 | text_qwen_api_key = "" 68 | text_qwen_model_name = "qwen-plus-1127" 69 | text_qwen_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1" 70 | 71 | ########## Moonshot API Key 72 | # 访问 https://platform.moonshot.cn/console/api-keys 获取你的 API 密钥 73 | text_moonshot_api_key="" 74 | text_moonshot_base_url = "https://api.moonshot.cn/v1" 75 | text_moonshot_model_name = "moonshot-v1-8k" 76 | 77 | # webui界面是否显示配置项 78 | hide_config = true 79 | 80 | [proxy] 81 | http = "http://127.0.0.1:7890" 82 | https = "http://127.0.0.1:7890" 83 | enabled = false 84 | 85 | [frames] 86 | # 提取关键帧的间隔时间 87 | frame_interval_input = 3 88 | # 大模型单次处理的关键帧数量 89 | vision_batch_size = 10 90 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | x-common: &common 2 | build: 3 | context: . 4 | dockerfile: Dockerfile 5 | image: linyq1/narratoai:latest 6 | volumes: 7 | - ./:/NarratoAI 8 | environment: 9 | - VPN_PROXY_URL=http://host.docker.internal:7890 10 | - PYTHONUNBUFFERED=1 11 | - PYTHONMALLOC=malloc 12 | - OPENCV_OPENCL_RUNTIME=disabled 13 | - OPENCV_CPU_DISABLE=0 14 | restart: always 15 | mem_limit: 4g 16 | mem_reservation: 2g 17 | memswap_limit: 6g 18 | cpus: 2.0 19 | cpu_shares: 1024 20 | 21 | services: 22 | webui: 23 | <<: *common 24 | container_name: webui 25 | ports: 26 | - "8501:8501" 27 | command: ["webui"] 28 | logging: 29 | driver: "json-file" 30 | options: 31 | max-size: "200m" 32 | max-file: "3" 33 | tmpfs: 34 | - /tmp:size=1G 35 | ulimits: 36 | nofile: 37 | soft: 65536 38 | hard: 65536 39 | -------------------------------------------------------------------------------- /docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | if [ "$1" = "webui" ]; then 5 | exec streamlit run webui.py --browser.serverAddress=127.0.0.1 --server.enableCORS=True --browser.gatherUsageStats=False 6 | else 7 | exec "$@" 8 | fi -------------------------------------------------------------------------------- /docker/Dockerfile_MiniCPM: -------------------------------------------------------------------------------- 1 | ARG BASE=nvidia/cuda:12.1.0-devel-ubuntu22.04 2 | FROM ${BASE} 3 | 4 | # 设置环境变量 5 | ENV http_proxy=http://host.docker.internal:7890 6 | ENV https_proxy=http://host.docker.internal:7890 7 | ENV DEBIAN_FRONTEND=noninteractive 8 | 9 | # 安装系统依赖 10 | RUN apt-get update && apt-get install -y --no-install-recommends \ 11 | gcc g++ make git python3 python3-dev python3-pip python3-venv python3-wheel \ 12 | espeak-ng libsndfile1-dev nano vim unzip wget xz-utils && \ 13 | apt-get clean && \ 14 | rm -rf /var/lib/apt/lists/* 15 | 16 | # 设置工作目录 17 | WORKDIR /root/MiniCPM-V/ 18 | 19 | # 安装 Python 依赖 20 | RUN git clone https://github.com/OpenBMB/MiniCPM-V.git && \ 21 | cd MiniCPM-V && \ 22 | pip3 install decord && \ 23 | pip3 install --no-cache-dir -r requirements.txt && \ 24 | pip3 install flash_attn 25 | 26 | # 清理代理环境变量 27 | ENV http_proxy="" 28 | ENV https_proxy="" 29 | 30 | # 设置 PYTHONPATH 31 | ENV PYTHONPATH="/root/MiniCPM-V/" 32 | -------------------------------------------------------------------------------- /docs/check-en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/docs/check-en.png -------------------------------------------------------------------------------- /docs/check-zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/docs/check-zh.png -------------------------------------------------------------------------------- /docs/img001-en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/docs/img001-en.png -------------------------------------------------------------------------------- /docs/img001-zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/docs/img001-zh.png -------------------------------------------------------------------------------- /docs/img002-en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/docs/img002-en.png -------------------------------------------------------------------------------- /docs/img002-zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/docs/img002-zh.png -------------------------------------------------------------------------------- /docs/img003-en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/docs/img003-en.png -------------------------------------------------------------------------------- /docs/img003-zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/docs/img003-zh.png -------------------------------------------------------------------------------- /docs/img004-en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/docs/img004-en.png -------------------------------------------------------------------------------- /docs/img004-zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/docs/img004-zh.png -------------------------------------------------------------------------------- /docs/img005-en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/docs/img005-en.png -------------------------------------------------------------------------------- /docs/img005-zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/docs/img005-zh.png -------------------------------------------------------------------------------- /docs/img006-en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/docs/img006-en.png -------------------------------------------------------------------------------- /docs/img006-zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/docs/img006-zh.png -------------------------------------------------------------------------------- /docs/img007-en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/docs/img007-en.png -------------------------------------------------------------------------------- /docs/img007-zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/docs/img007-zh.png -------------------------------------------------------------------------------- /docs/index-en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/docs/index-en.png -------------------------------------------------------------------------------- /docs/index-zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/docs/index-zh.png -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import uvicorn 3 | from loguru import logger 4 | 5 | from app.config import config 6 | 7 | if __name__ == "__main__": 8 | logger.info( 9 | "start server, docs: http://127.0.0.1:" + str(config.listen_port) + "/docs" 10 | ) 11 | os.environ["HTTP_PROXY"] = config.proxy.get("http") 12 | os.environ["HTTPS_PROXY"] = config.proxy.get("https") 13 | uvicorn.run( 14 | app="app.asgi:app", 15 | host=config.listen_host, 16 | port=config.listen_port, 17 | reload=config.reload_debug, 18 | log_level="warning", 19 | ) 20 | -------------------------------------------------------------------------------- /project_version: -------------------------------------------------------------------------------- 1 | 0.6.3 -------------------------------------------------------------------------------- /release-notes.md: -------------------------------------------------------------------------------- 1 | # Release Notes 2 | 3 | ## Latest Changes 4 | 5 | * docs(README): 更新README. PR [#138](https://github.com/linyqh/NarratoAI/pull/138) by [@linyqh](https://github.com/linyqh). 6 | * Dev 0.6.0. PR [#137](https://github.com/linyqh/NarratoAI/pull/137) by [@linyqh](https://github.com/linyqh). 7 | * Dev 0.6.0 . PR [#134](https://github.com/linyqh/NarratoAI/pull/134) by [@linyqh](https://github.com/linyqh). 8 | * Dev-0.3.9. PR [#73](https://github.com/linyqh/NarratoAI/pull/73) by [@linyqh](https://github.com/linyqh). 9 | * 0.3.9 版本发布. PR [#71](https://github.com/linyqh/NarratoAI/pull/71) by [@linyqh](https://github.com/linyqh). 10 | * docs: add Japanese README. PR [#66](https://github.com/linyqh/NarratoAI/pull/66) by [@eltociear](https://github.com/eltociear). 11 | * docs: 测试 release 2. PR [#62](https://github.com/linyqh/NarratoAI/pull/62) by [@linyqh](https://github.com/linyqh). 12 | * docs: 测试 release. PR [#61](https://github.com/linyqh/NarratoAI/pull/61) by [@linyqh](https://github.com/linyqh). 13 | * docs: 测试commit. PR [#60](https://github.com/linyqh/NarratoAI/pull/60) by [@linyqh](https://github.com/linyqh). 14 | * Dev. PR [#59](https://github.com/linyqh/NarratoAI/pull/59) by [@linyqh](https://github.com/linyqh). 15 | * 0.2.0新版预发布. PR [#37](https://github.com/linyqh/NarratoAI/pull/37) by [@linyqh](https://github.com/linyqh). 16 | * v0.3.6. PR [#58](https://github.com/linyqh/NarratoAI/pull/58) by [@linyqh](https://github.com/linyqh). 17 | * 0.3.4 修改各种bug. PR [#49](https://github.com/linyqh/NarratoAI/pull/49) by [@linyqh](https://github.com/linyqh). 18 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # 必须项 2 | requests~=2.32.0 3 | moviepy==2.1.1 4 | edge-tts==6.1.19 5 | streamlit~=1.45.0 6 | watchdog==6.0.0 7 | loguru~=0.7.3 8 | tomli~=2.2.1 9 | pydub==0.25.1 10 | pysrt==1.1.2 11 | 12 | openai~=1.77.0 13 | google-generativeai>=0.8.5 14 | 15 | # 待优化项 16 | # opencv-python==4.11.0.86 17 | # scikit-learn==1.6.1 18 | 19 | # fastapi~=0.115.4 20 | # uvicorn~=0.27.1 21 | # pydantic~=2.11.4 22 | 23 | # faster-whisper~=1.0.1 24 | # tomli~=2.0.1 25 | # aiohttp~=3.10.10 26 | # httpx==0.27.2 27 | # urllib3~=2.2.1 28 | 29 | # python-multipart~=0.0.9 30 | # redis==5.0.3 31 | # opencv-python~=4.10.0.84 32 | # azure-cognitiveservices-speech~=1.37.0 33 | # git-changelog~=2.5.2 34 | # watchdog==5.0.2 35 | # pydub==0.25.1 36 | # psutil>=5.9.0 37 | # scikit-learn~=1.5.2 38 | # pillow==10.3.0 39 | # python-dotenv~=1.0.1 40 | 41 | # tqdm>=4.66.6 42 | # tenacity>=9.0.0 43 | # tiktoken==0.8.0 44 | # pysrt==1.1.2 45 | # transformers==4.50.0 46 | 47 | # yt-dlp==2025.4.30 -------------------------------------------------------------------------------- /resource/fonts/fonts_in_here.txt: -------------------------------------------------------------------------------- 1 | 此处放字体文件 -------------------------------------------------------------------------------- /resource/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | NarratoAI 6 | 7 | 8 |

NarratoAI

9 | 项目地址:https://github.com/linyqh/NarratoAI 10 |
11 | 12 | webui 地址:http://127.0.0.1:8501 13 |
14 | api swagger 地址:http://127.0.0.1:8080/docs 15 |
16 | 17 |

18 | NarratoAI 是一个自动化影视解说工具,基于LLM实现文案撰写、自动化视频剪辑、配音和字幕生成的一站式流程,助力高效内容创作。 19 |

20 | 21 |

22 | NarratoAI is an automated film and television commentary tool that implements a one-stop process of copywriting, automated video editing, dubbing and subtitle generation based on LLM, facilitating efficient content creation. 23 |

24 | 25 | -------------------------------------------------------------------------------- /resource/scripts/script_in_here.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/resource/scripts/script_in_here.txt -------------------------------------------------------------------------------- /resource/songs/song_in_here.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/resource/songs/song_in_here.txt -------------------------------------------------------------------------------- /resource/srt/srt_in_here.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/resource/srt/srt_in_here.txt -------------------------------------------------------------------------------- /resource/videos/video_in_here.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/linyqh/NarratoAI/7a8de5e79175ff7a7a633f43418f427badc4af53/resource/videos/video_in_here.txt -------------------------------------------------------------------------------- /video_pipeline.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import os 4 | import time 5 | from typing import Dict, Any 6 | 7 | class VideoPipeline: 8 | def __init__(self, base_url: str = "http://127.0.0.1:8080"): 9 | self.base_url = base_url 10 | 11 | def download_video(self, url: str, resolution: str = "1080p", 12 | output_format: str = "mp4", rename: str = None) -> Dict[str, Any]: 13 | """下载视频的第一步""" 14 | endpoint = f"{self.base_url}/api/v2/youtube/download" 15 | payload = { 16 | "url": url, 17 | "resolution": resolution, 18 | "output_format": output_format, 19 | "rename": rename or time.strftime("%Y-%m-%d") 20 | } 21 | 22 | response = requests.post(endpoint, json=payload) 23 | response.raise_for_status() 24 | return response.json() 25 | 26 | def generate_script(self, video_path: str, skip_seconds: int = 0, 27 | threshold: int = 30, vision_batch_size: int = 10, 28 | vision_llm_provider: str = "gemini") -> Dict[str, Any]: 29 | """生成脚本的第二步""" 30 | endpoint = f"{self.base_url}/api/v2/scripts/generate" 31 | payload = { 32 | "video_path": video_path, 33 | "skip_seconds": skip_seconds, 34 | "threshold": threshold, 35 | "vision_batch_size": vision_batch_size, 36 | "vision_llm_provider": vision_llm_provider 37 | } 38 | 39 | response = requests.post(endpoint, json=payload) 40 | response.raise_for_status() 41 | return response.json() 42 | 43 | def crop_video(self, video_path: str, script: list) -> Dict[str, Any]: 44 | """剪辑视频的第三步""" 45 | endpoint = f"{self.base_url}/api/v2/scripts/crop" 46 | payload = { 47 | "video_origin_path": video_path, 48 | "video_script": script 49 | } 50 | 51 | response = requests.post(endpoint, json=payload) 52 | response.raise_for_status() 53 | return response.json() 54 | 55 | def generate_final_video(self, task_id: str, video_path: str, 56 | script_path: str, script: list, subclip_videos: Dict[str, str], voice_name: str) -> Dict[str, Any]: 57 | """生成最终视频的第四步""" 58 | endpoint = f"{self.base_url}/api/v2/scripts/start-subclip" 59 | 60 | request_data = { 61 | "video_clip_json": script, 62 | "video_clip_json_path": script_path, 63 | "video_origin_path": video_path, 64 | "video_aspect": "16:9", 65 | "video_language": "zh-CN", 66 | "voice_name": voice_name, 67 | "voice_volume": 1, 68 | "voice_rate": 1.2, 69 | "voice_pitch": 1, 70 | "bgm_name": "random", 71 | "bgm_type": "random", 72 | "bgm_file": "", 73 | "bgm_volume": 0.3, 74 | "subtitle_enabled": True, 75 | "subtitle_position": "bottom", 76 | "font_name": "STHeitiMedium.ttc", 77 | "text_fore_color": "#FFFFFF", 78 | "text_background_color": "transparent", 79 | "font_size": 75, 80 | "stroke_color": "#000000", 81 | "stroke_width": 1.5, 82 | "custom_position": 70, 83 | "n_threads": 8 84 | } 85 | 86 | payload = { 87 | "request": request_data, 88 | "subclip_videos": subclip_videos 89 | } 90 | 91 | params = {"task_id": task_id} 92 | response = requests.post(endpoint, params=params, json=payload) 93 | response.raise_for_status() 94 | return response.json() 95 | 96 | def save_script_to_json(self, script: list, script_path: str) -> str: 97 | """保存脚本到json文件""" 98 | try: 99 | with open(script_path, 'w', encoding='utf-8') as f: 100 | json.dump(script, f, ensure_ascii=False, indent=2) 101 | print(f"脚本已保存到: {script_path}") 102 | return script_path 103 | except Exception as e: 104 | print(f"保存脚本失败: {str(e)}") 105 | raise 106 | 107 | def run_pipeline(self, task_id: str, script_name: str, youtube_url: str, video_name: str="null", skip_seconds: int = 0, threshold: int = 30, vision_batch_size: int = 10, vision_llm_provider: str = "gemini", voice_name: str = "zh-CN-YunjianNeural") -> Dict[str, Any]: 108 | """运行完整的pipeline""" 109 | try: 110 | current_path = os.path.dirname(os.path.abspath(__file__)) 111 | video_path = os.path.join(current_path, "resource", "videos", f"{video_name}.mp4") 112 | # 判断视频是否存在 113 | if not os.path.exists(video_path): 114 | # 1. 下载视频 115 | print(f"视频不存在, 开始下载视频: {video_path}") 116 | download_result = self.download_video(url=youtube_url, resolution="1080p", output_format="mp4", rename=video_name) 117 | video_path = download_result["output_path"] 118 | else: 119 | print(f"视频已存在: {video_path}") 120 | 121 | # 2. 判断script_name是否存在 122 | # 2.1.1 拼接脚本路径 NarratoAI/resource/scripts 123 | script_path = os.path.join(current_path, "resource", "scripts", script_name) 124 | if os.path.exists(script_path): 125 | script = json.load(open(script_path, "r", encoding="utf-8")) 126 | else: 127 | # 2.1.2 生成脚本 128 | print("开始生成脚本...") 129 | script_result = self.generate_script(video_path=video_path, skip_seconds=skip_seconds, threshold=threshold, vision_batch_size=vision_batch_size, vision_llm_provider=vision_llm_provider) 130 | script = script_result["script"] 131 | 132 | # 2.2 保存脚本到json文件 133 | print("保存脚本到json文件...") 134 | self.save_script_to_json(script=script, script_path=script_path) 135 | 136 | # 3. 剪辑视频 137 | print("开始剪辑视频...") 138 | crop_result = self.crop_video(video_path=video_path, script=script) 139 | subclip_videos = crop_result["subclip_videos"] 140 | 141 | # 4. 生成最终视频 142 | print("开始生成最终视频...") 143 | self.generate_final_video( 144 | task_id=task_id, 145 | video_path=video_path, 146 | script_path=script_path, 147 | script=script, 148 | subclip_videos=subclip_videos, 149 | voice_name=voice_name 150 | ) 151 | 152 | return { 153 | "status": "等待异步生成视频", 154 | "path": os.path.join(current_path, "storage", "tasks", task_id) 155 | } 156 | 157 | except Exception as e: 158 | return { 159 | "status": "error", 160 | "error": str(e) 161 | } 162 | 163 | 164 | # 使用示例 165 | if __name__ == "__main__": 166 | pipeline = VideoPipeline() 167 | result = pipeline.run_pipeline( 168 | task_id="test_111901", 169 | script_name="test.json", 170 | youtube_url="https://www.youtube.com/watch?v=vLJ7Yed6FQ4", 171 | video_name="2024-11-19-01", 172 | skip_seconds=50, 173 | threshold=35, 174 | vision_batch_size=10, 175 | vision_llm_provider="gemini", 176 | voice_name="zh-CN-YunjianNeural", 177 | ) 178 | print(result) 179 | -------------------------------------------------------------------------------- /webui.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import os 3 | import sys 4 | from loguru import logger 5 | from app.config import config 6 | from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, \ 7 | review_settings, merge_settings, system_settings 8 | # from webui.utils import cache, file_utils 9 | from app.utils import utils 10 | from app.utils import ffmpeg_utils 11 | from app.models.schema import VideoClipParams, VideoAspect 12 | 13 | 14 | # 初始化配置 - 必须是第一个 Streamlit 命令 15 | st.set_page_config( 16 | page_title="NarratoAI", 17 | page_icon="📽️", 18 | layout="wide", 19 | initial_sidebar_state="auto", 20 | menu_items={ 21 | "Report a bug": "https://github.com/linyqh/NarratoAI/issues", 22 | 'About': f"# Narrato:blue[AI] :sunglasses: 📽️ \n #### Version: v{config.project_version} \n " 23 | f"自动化影视解说视频详情请移步:https://github.com/linyqh/NarratoAI" 24 | }, 25 | ) 26 | 27 | # 设置页面样式 28 | hide_streamlit_style = """ 29 | 30 | """ 31 | st.markdown(hide_streamlit_style, unsafe_allow_html=True) 32 | 33 | 34 | def init_log(): 35 | """初始化日志配置""" 36 | from loguru import logger 37 | logger.remove() 38 | _lvl = "DEBUG" 39 | 40 | def format_record(record): 41 | # 简化日志格式化处理,不尝试按特定字符串过滤torch相关内容 42 | file_path = record["file"].path 43 | relative_path = os.path.relpath(file_path, config.root_dir) 44 | record["file"].path = f"./{relative_path}" 45 | record['message'] = record['message'].replace(config.root_dir, ".") 46 | 47 | _format = '{time:%Y-%m-%d %H:%M:%S} | ' + \ 48 | '{level} | ' + \ 49 | '"{file.path}:{line}": {function} ' + \ 50 | '- {message}' + "\n" 51 | return _format 52 | 53 | # 替换为更简单的过滤方式,避免在过滤时访问message内容 54 | # 此处先不设置复杂的过滤器,等应用启动后再动态添加 55 | logger.add( 56 | sys.stdout, 57 | level=_lvl, 58 | format=format_record, 59 | colorize=True 60 | ) 61 | 62 | # 应用启动后,可以再添加更复杂的过滤器 63 | def setup_advanced_filters(): 64 | """在应用完全启动后设置高级过滤器""" 65 | try: 66 | for handler_id in logger._core.handlers: 67 | logger.remove(handler_id) 68 | 69 | # 重新添加带有高级过滤的处理器 70 | def advanced_filter(record): 71 | """更复杂的过滤器,在应用启动后安全使用""" 72 | ignore_messages = [ 73 | "Examining the path of torch.classes raised", 74 | "torch.cuda.is_available()", 75 | "CUDA initialization" 76 | ] 77 | return not any(msg in record["message"] for msg in ignore_messages) 78 | 79 | logger.add( 80 | sys.stdout, 81 | level=_lvl, 82 | format=format_record, 83 | colorize=True, 84 | filter=advanced_filter 85 | ) 86 | except Exception as e: 87 | # 如果过滤器设置失败,确保日志仍然可用 88 | logger.add( 89 | sys.stdout, 90 | level=_lvl, 91 | format=format_record, 92 | colorize=True 93 | ) 94 | logger.error(f"设置高级日志过滤器失败: {e}") 95 | 96 | # 将高级过滤器设置放到启动主逻辑后 97 | import threading 98 | threading.Timer(5.0, setup_advanced_filters).start() 99 | 100 | 101 | def init_global_state(): 102 | """初始化全局状态""" 103 | if 'video_clip_json' not in st.session_state: 104 | st.session_state['video_clip_json'] = [] 105 | if 'video_plot' not in st.session_state: 106 | st.session_state['video_plot'] = '' 107 | if 'ui_language' not in st.session_state: 108 | st.session_state['ui_language'] = config.ui.get("language", utils.get_system_locale()) 109 | if 'subclip_videos' not in st.session_state: 110 | st.session_state['subclip_videos'] = {} 111 | 112 | 113 | def tr(key): 114 | """翻译函数""" 115 | i18n_dir = os.path.join(os.path.dirname(__file__), "webui", "i18n") 116 | locales = utils.load_locales(i18n_dir) 117 | loc = locales.get(st.session_state['ui_language'], {}) 118 | return loc.get("Translation", {}).get(key, key) 119 | 120 | 121 | def render_generate_button(): 122 | """渲染生成按钮和处理逻辑""" 123 | if st.button(tr("Generate Video"), use_container_width=True, type="primary"): 124 | from app.services import task as tm 125 | 126 | # 重置日志容器和记录 127 | log_container = st.empty() 128 | log_records = [] 129 | 130 | def log_received(msg): 131 | with log_container: 132 | log_records.append(msg) 133 | st.code("\n".join(log_records)) 134 | 135 | from loguru import logger 136 | logger.add(log_received) 137 | 138 | config.save_config() 139 | task_id = st.session_state.get('task_id') 140 | 141 | if not task_id: 142 | st.error(tr("请先裁剪视频")) 143 | return 144 | if not st.session_state.get('video_clip_json_path'): 145 | st.error(tr("脚本文件不能为空")) 146 | return 147 | if not st.session_state.get('video_origin_path'): 148 | st.error(tr("视频文件不能为空")) 149 | return 150 | 151 | st.toast(tr("生成视频")) 152 | logger.info(tr("开始生成视频")) 153 | 154 | # 获取所有参数 155 | script_params = script_settings.get_script_params() 156 | video_params = video_settings.get_video_params() 157 | audio_params = audio_settings.get_audio_params() 158 | subtitle_params = subtitle_settings.get_subtitle_params() 159 | 160 | # 合并所有参数 161 | all_params = { 162 | **script_params, 163 | **video_params, 164 | **audio_params, 165 | **subtitle_params 166 | } 167 | 168 | # 创建参数对象 169 | params = VideoClipParams(**all_params) 170 | 171 | result = tm.start_subclip( 172 | task_id=task_id, 173 | params=params, 174 | subclip_path_videos=st.session_state['subclip_videos'] 175 | ) 176 | 177 | video_files = result.get("videos", []) 178 | st.success(tr("视生成完成")) 179 | 180 | try: 181 | if video_files: 182 | player_cols = st.columns(len(video_files) * 2 + 1) 183 | for i, url in enumerate(video_files): 184 | player_cols[i * 2 + 1].video(url) 185 | except Exception as e: 186 | logger.error(f"播放视频失败: {e}") 187 | 188 | # file_utils.open_task_folder(config.root_dir, task_id) 189 | logger.info(tr("视频生成完成")) 190 | 191 | 192 | # 全局变量,记录是否已经打印过硬件加速信息 193 | _HAS_LOGGED_HWACCEL_INFO = False 194 | 195 | def main(): 196 | """主函数""" 197 | global _HAS_LOGGED_HWACCEL_INFO 198 | init_log() 199 | init_global_state() 200 | 201 | # 检测FFmpeg硬件加速,但只打印一次日志 202 | hwaccel_info = ffmpeg_utils.detect_hardware_acceleration() 203 | if not _HAS_LOGGED_HWACCEL_INFO: 204 | if hwaccel_info["available"]: 205 | logger.info(f"FFmpeg硬件加速检测结果: 可用 | 类型: {hwaccel_info['type']} | 编码器: {hwaccel_info['encoder']} | 独立显卡: {hwaccel_info['is_dedicated_gpu']} | 参数: {hwaccel_info['hwaccel_args']}") 206 | else: 207 | logger.warning(f"FFmpeg硬件加速不可用: {hwaccel_info['message']}, 将使用CPU软件编码") 208 | _HAS_LOGGED_HWACCEL_INFO = True 209 | 210 | # 仅初始化基本资源,避免过早地加载依赖PyTorch的资源 211 | # 检查是否能分解utils.init_resources()为基本资源和高级资源(如依赖PyTorch的资源) 212 | try: 213 | utils.init_resources() 214 | except Exception as e: 215 | logger.warning(f"资源初始化时出现警告: {e}") 216 | 217 | st.title(f"Narrato:blue[AI]:sunglasses: 📽️") 218 | st.write(tr("Get Help")) 219 | 220 | # 首先渲染不依赖PyTorch的UI部分 221 | # 渲染基础设置面板 222 | basic_settings.render_basic_settings(tr) 223 | # 渲染合并设置 224 | merge_settings.render_merge_settings(tr) 225 | 226 | # 渲染主面板 227 | panel = st.columns(3) 228 | with panel[0]: 229 | script_settings.render_script_panel(tr) 230 | with panel[1]: 231 | video_settings.render_video_panel(tr) 232 | audio_settings.render_audio_panel(tr) 233 | with panel[2]: 234 | subtitle_settings.render_subtitle_panel(tr) 235 | 236 | # 渲染视频审查面板 237 | review_settings.render_review_panel(tr) 238 | 239 | # 放到最后渲染可能使用PyTorch的部分 240 | # 渲染系统设置面板 241 | with panel[2]: 242 | system_settings.render_system_panel(tr) 243 | 244 | # 放到最后渲染生成按钮和处理逻辑 245 | render_generate_button() 246 | 247 | 248 | if __name__ == "__main__": 249 | main() 250 | -------------------------------------------------------------------------------- /webui/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | NarratoAI WebUI Package 3 | """ 4 | from webui.config.settings import config 5 | from webui.components import ( 6 | basic_settings, 7 | video_settings, 8 | audio_settings, 9 | subtitle_settings 10 | ) 11 | from webui.utils import cache, file_utils 12 | 13 | __all__ = [ 14 | 'config', 15 | 'basic_settings', 16 | 'video_settings', 17 | 'audio_settings', 18 | 'subtitle_settings', 19 | 'cache', 20 | 'file_utils' 21 | ] -------------------------------------------------------------------------------- /webui/components/__init__.py: -------------------------------------------------------------------------------- 1 | from .basic_settings import render_basic_settings 2 | from .script_settings import render_script_panel 3 | from .video_settings import render_video_panel 4 | from .audio_settings import render_audio_panel 5 | from .subtitle_settings import render_subtitle_panel 6 | from .review_settings import render_review_panel 7 | 8 | __all__ = [ 9 | 'render_basic_settings', 10 | 'render_script_panel', 11 | 'render_video_panel', 12 | 'render_audio_panel', 13 | 'render_subtitle_panel', 14 | 'render_review_panel' 15 | ] -------------------------------------------------------------------------------- /webui/components/audio_settings.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import os 3 | from uuid import uuid4 4 | from app.config import config 5 | from app.services import voice 6 | from app.utils import utils 7 | from webui.utils.cache import get_songs_cache 8 | 9 | 10 | def render_audio_panel(tr): 11 | """渲染音频设置面板""" 12 | with st.container(border=True): 13 | st.write(tr("Audio Settings")) 14 | 15 | # 渲染TTS设置 16 | render_tts_settings(tr) 17 | 18 | # 渲染背景音乐设置 19 | render_bgm_settings(tr) 20 | 21 | 22 | def render_tts_settings(tr): 23 | """渲染TTS(文本转语音)设置""" 24 | # 获取支持的语音列表 25 | support_locales = ["zh-CN", "en-US"] 26 | voices = voice.get_all_azure_voices(filter_locals=support_locales) 27 | 28 | # 创建友好的显示名称 29 | friendly_names = { 30 | v: v.replace("Female", tr("Female")) 31 | .replace("Male", tr("Male")) 32 | .replace("Neural", "") 33 | for v in voices 34 | } 35 | 36 | # 获取保存的语音设置 37 | saved_voice_name = config.ui.get("voice_name", "") 38 | saved_voice_name_index = 0 39 | 40 | if saved_voice_name in friendly_names: 41 | saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name) 42 | else: 43 | # 如果没有保存的设置,选择与UI语言匹配的第一个语音 44 | for i, v in enumerate(voices): 45 | if (v.lower().startswith(st.session_state["ui_language"].lower()) 46 | and "V2" not in v): 47 | saved_voice_name_index = i 48 | break 49 | 50 | # 语音选择下拉框 51 | selected_friendly_name = st.selectbox( 52 | tr("Speech Synthesis"), 53 | options=list(friendly_names.values()), 54 | index=saved_voice_name_index, 55 | ) 56 | 57 | # 获取实际的语音名称 58 | voice_name = list(friendly_names.keys())[ 59 | list(friendly_names.values()).index(selected_friendly_name) 60 | ] 61 | 62 | # 保存设置 63 | config.ui["voice_name"] = voice_name 64 | 65 | # Azure V2语音特殊处理 66 | if voice.is_azure_v2_voice(voice_name): 67 | render_azure_v2_settings(tr) 68 | 69 | # 语音参数设置 70 | render_voice_parameters(tr) 71 | 72 | # 试听按钮 73 | render_voice_preview(tr, voice_name) 74 | 75 | 76 | def render_azure_v2_settings(tr): 77 | """渲染Azure V2语音设置""" 78 | saved_azure_speech_region = config.azure.get("speech_region", "") 79 | saved_azure_speech_key = config.azure.get("speech_key", "") 80 | 81 | azure_speech_region = st.text_input( 82 | tr("Speech Region"), 83 | value=saved_azure_speech_region 84 | ) 85 | azure_speech_key = st.text_input( 86 | tr("Speech Key"), 87 | value=saved_azure_speech_key, 88 | type="password" 89 | ) 90 | 91 | config.azure["speech_region"] = azure_speech_region 92 | config.azure["speech_key"] = azure_speech_key 93 | 94 | 95 | def render_voice_parameters(tr): 96 | """渲染语音参数设置""" 97 | # 音量 98 | voice_volume = st.slider( 99 | tr("Speech Volume"), 100 | min_value=0.0, 101 | max_value=1.0, 102 | value=1.0, 103 | step=0.01, 104 | help=tr("Adjust the volume of the original audio") 105 | ) 106 | st.session_state['voice_volume'] = voice_volume 107 | 108 | 109 | # 语速 110 | voice_rate = st.selectbox( 111 | tr("Speech Rate"), 112 | options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0], 113 | index=2, 114 | ) 115 | st.session_state['voice_rate'] = voice_rate 116 | 117 | # 音调 118 | voice_pitch = st.selectbox( 119 | tr("Speech Pitch"), 120 | options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0], 121 | index=2, 122 | ) 123 | st.session_state['voice_pitch'] = voice_pitch 124 | 125 | 126 | def render_voice_preview(tr, voice_name): 127 | """渲染语音试听功能""" 128 | if st.button(tr("Play Voice")): 129 | play_content = "感谢关注 NarratoAI,有任何问题或建议,可以关注微信公众号,求助或讨论" 130 | if not play_content: 131 | play_content = st.session_state.get('video_script', '') 132 | if not play_content: 133 | play_content = tr("Voice Example") 134 | 135 | with st.spinner(tr("Synthesizing Voice")): 136 | temp_dir = utils.storage_dir("temp", create=True) 137 | audio_file = os.path.join(temp_dir, f"tmp-voice-{str(uuid4())}.mp3") 138 | 139 | sub_maker = voice.tts( 140 | text=play_content, 141 | voice_name=voice_name, 142 | voice_rate=st.session_state.get('voice_rate', 1.0), 143 | voice_pitch=st.session_state.get('voice_pitch', 1.0), 144 | voice_file=audio_file, 145 | ) 146 | 147 | # 如果语音文件生成失败,使用默认内容重试 148 | if not sub_maker: 149 | play_content = "This is a example voice. if you hear this, the voice synthesis failed with the original content." 150 | sub_maker = voice.tts( 151 | text=play_content, 152 | voice_name=voice_name, 153 | voice_rate=st.session_state.get('voice_rate', 1.0), 154 | voice_pitch=st.session_state.get('voice_pitch', 1.0), 155 | voice_file=audio_file, 156 | ) 157 | 158 | if sub_maker and os.path.exists(audio_file): 159 | st.audio(audio_file, format="audio/mp3") 160 | if os.path.exists(audio_file): 161 | os.remove(audio_file) 162 | 163 | 164 | def render_bgm_settings(tr): 165 | """渲染背景音乐设置""" 166 | # 背景音乐选项 167 | bgm_options = [ 168 | (tr("No Background Music"), ""), 169 | (tr("Random Background Music"), "random"), 170 | (tr("Custom Background Music"), "custom"), 171 | ] 172 | 173 | selected_index = st.selectbox( 174 | tr("Background Music"), 175 | index=1, 176 | options=range(len(bgm_options)), 177 | format_func=lambda x: bgm_options[x][0], 178 | ) 179 | 180 | # 获取选择的背景音乐类型 181 | bgm_type = bgm_options[selected_index][1] 182 | st.session_state['bgm_type'] = bgm_type 183 | 184 | # 自定义背景音乐处理 185 | if bgm_type == "custom": 186 | custom_bgm_file = st.text_input(tr("Custom Background Music File")) 187 | if custom_bgm_file and os.path.exists(custom_bgm_file): 188 | st.session_state['bgm_file'] = custom_bgm_file 189 | 190 | # 背景音乐音量 191 | bgm_volume = st.slider( 192 | tr("Background Music Volume"), 193 | min_value=0.0, 194 | max_value=1.0, 195 | value=0.3, 196 | step=0.01, 197 | help=tr("Adjust the volume of the original audio") 198 | ) 199 | st.session_state['bgm_volume'] = bgm_volume 200 | 201 | 202 | def get_audio_params(): 203 | """获取音频参数""" 204 | return { 205 | 'voice_name': config.ui.get("voice_name", ""), 206 | 'voice_volume': st.session_state.get('voice_volume', 1.0), 207 | 'voice_rate': st.session_state.get('voice_rate', 1.0), 208 | 'voice_pitch': st.session_state.get('voice_pitch', 1.0), 209 | 'bgm_type': st.session_state.get('bgm_type', 'random'), 210 | 'bgm_file': st.session_state.get('bgm_file', ''), 211 | 'bgm_volume': st.session_state.get('bgm_volume', 0.3), 212 | } 213 | -------------------------------------------------------------------------------- /webui/components/review_settings.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import os 3 | from loguru import logger 4 | 5 | 6 | def render_review_panel(tr): 7 | """渲染视频审查面板""" 8 | with st.expander(tr("Video Check"), expanded=False): 9 | try: 10 | video_list = st.session_state.get('video_clip_json', []) 11 | subclip_videos = st.session_state.get('subclip_videos', {}) 12 | except KeyError: 13 | video_list = [] 14 | subclip_videos = {} 15 | 16 | # 计算列数和行数 17 | num_videos = len(video_list) 18 | cols_per_row = 3 19 | rows = (num_videos + cols_per_row - 1) // cols_per_row # 向上取整计算行数 20 | 21 | # 使用容器展示视频 22 | for row in range(rows): 23 | cols = st.columns(cols_per_row) 24 | for col in range(cols_per_row): 25 | index = row * cols_per_row + col 26 | if index < num_videos: 27 | with cols[col]: 28 | render_video_item(tr, video_list, subclip_videos, index) 29 | 30 | 31 | def render_video_item(tr, video_list, subclip_videos, index): 32 | """渲染单个视频项""" 33 | video_script = video_list[index] 34 | 35 | # 显示时间戳 36 | timestamp = video_script.get('_id', '') 37 | st.text_area( 38 | tr("Timestamp"), 39 | value=timestamp, 40 | height=70, 41 | disabled=True, 42 | key=f"timestamp_{index}" 43 | ) 44 | 45 | # 显示视频播放器 46 | video_path = subclip_videos.get(timestamp) 47 | if video_path and os.path.exists(video_path): 48 | try: 49 | st.video(video_path) 50 | except Exception as e: 51 | logger.error(f"加载视频失败 {video_path}: {e}") 52 | st.error(f"无法加载视频: {os.path.basename(video_path)}") 53 | else: 54 | st.warning(tr("视频文件未找到")) 55 | 56 | # 显示画面描述 57 | st.text_area( 58 | tr("Picture Description"), 59 | value=video_script.get('picture', ''), 60 | height=150, 61 | disabled=True, 62 | key=f"picture_{index}" 63 | ) 64 | 65 | # 显示旁白文本 66 | narration = st.text_area( 67 | tr("Narration"), 68 | value=video_script.get('narration', ''), 69 | height=150, 70 | key=f"narration_{index}" 71 | ) 72 | # 保存修改后的旁白文本 73 | if narration != video_script.get('narration', ''): 74 | video_script['narration'] = narration 75 | st.session_state['video_clip_json'] = video_list 76 | 77 | # 显示剪辑模式 78 | ost = st.selectbox( 79 | tr("Clip Mode"), 80 | options=range(0, 3), 81 | index=video_script.get('OST', 0), 82 | key=f"ost_{index}", 83 | help=tr("0: Keep the audio only, 1: Keep the original sound only, 2: Keep the original sound and audio") 84 | ) 85 | # 保存修改后的剪辑模式 86 | if ost != video_script.get('OST', 0): 87 | video_script['OST'] = ost 88 | st.session_state['video_clip_json'] = video_list 89 | -------------------------------------------------------------------------------- /webui/components/subtitle_settings.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from app.config import config 3 | from webui.utils.cache import get_fonts_cache 4 | import os 5 | 6 | 7 | def render_subtitle_panel(tr): 8 | """渲染字幕设置面板""" 9 | with st.container(border=True): 10 | st.write(tr("Subtitle Settings")) 11 | 12 | # 启用字幕选项 13 | enable_subtitles = st.checkbox(tr("Enable Subtitles"), value=True) 14 | st.session_state['subtitle_enabled'] = enable_subtitles 15 | 16 | if enable_subtitles: 17 | render_font_settings(tr) 18 | render_position_settings(tr) 19 | render_style_settings(tr) 20 | 21 | 22 | def render_font_settings(tr): 23 | """渲染字体设置""" 24 | # 获取字体列表 25 | font_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "resource", "fonts") 26 | font_names = get_fonts_cache(font_dir) 27 | 28 | # 获取保存的字体设置 29 | saved_font_name = config.ui.get("font_name", "") 30 | saved_font_name_index = 0 31 | if saved_font_name in font_names: 32 | saved_font_name_index = font_names.index(saved_font_name) 33 | 34 | # 字体选择 35 | font_name = st.selectbox( 36 | tr("Font"), 37 | options=font_names, 38 | index=saved_font_name_index 39 | ) 40 | config.ui["font_name"] = font_name 41 | st.session_state['font_name'] = font_name 42 | 43 | # 字体大小 和 字幕大小 44 | font_cols = st.columns([0.3, 0.7]) 45 | with font_cols[0]: 46 | saved_text_fore_color = config.ui.get("text_fore_color", "#FFFFFF") 47 | text_fore_color = st.color_picker( 48 | tr("Font Color"), 49 | saved_text_fore_color 50 | ) 51 | config.ui["text_fore_color"] = text_fore_color 52 | st.session_state['text_fore_color'] = text_fore_color 53 | 54 | with font_cols[1]: 55 | saved_font_size = config.ui.get("font_size", 60) 56 | font_size = st.slider( 57 | tr("Font Size"), 58 | min_value=20, 59 | max_value=100, 60 | value=saved_font_size 61 | ) 62 | config.ui["font_size"] = font_size 63 | st.session_state['font_size'] = font_size 64 | 65 | 66 | def render_position_settings(tr): 67 | """渲染位置设置""" 68 | subtitle_positions = [ 69 | (tr("Top"), "top"), 70 | (tr("Center"), "center"), 71 | (tr("Bottom"), "bottom"), 72 | (tr("Custom"), "custom"), 73 | ] 74 | 75 | selected_index = st.selectbox( 76 | tr("Position"), 77 | index=2, 78 | options=range(len(subtitle_positions)), 79 | format_func=lambda x: subtitle_positions[x][0], 80 | ) 81 | 82 | subtitle_position = subtitle_positions[selected_index][1] 83 | st.session_state['subtitle_position'] = subtitle_position 84 | 85 | # 自定义位置处理 86 | if subtitle_position == "custom": 87 | custom_position = st.text_input( 88 | tr("Custom Position (% from top)"), 89 | value="70.0" 90 | ) 91 | try: 92 | custom_position_value = float(custom_position) 93 | if custom_position_value < 0 or custom_position_value > 100: 94 | st.error(tr("Please enter a value between 0 and 100")) 95 | else: 96 | st.session_state['custom_position'] = custom_position_value 97 | except ValueError: 98 | st.error(tr("Please enter a valid number")) 99 | 100 | 101 | def render_style_settings(tr): 102 | """渲染样式设置""" 103 | stroke_cols = st.columns([0.3, 0.7]) 104 | 105 | with stroke_cols[0]: 106 | stroke_color = st.color_picker( 107 | tr("Stroke Color"), 108 | value="#000000" 109 | ) 110 | st.session_state['stroke_color'] = stroke_color 111 | 112 | with stroke_cols[1]: 113 | stroke_width = st.slider( 114 | tr("Stroke Width"), 115 | min_value=0.0, 116 | max_value=10.0, 117 | value=1.0, 118 | step=0.01 119 | ) 120 | st.session_state['stroke_width'] = stroke_width 121 | 122 | 123 | def get_subtitle_params(): 124 | """获取字幕参数""" 125 | return { 126 | 'subtitle_enabled': st.session_state.get('subtitle_enabled', True), 127 | 'font_name': st.session_state.get('font_name', ''), 128 | 'font_size': st.session_state.get('font_size', 60), 129 | 'text_fore_color': st.session_state.get('text_fore_color', '#FFFFFF'), 130 | 'subtitle_position': st.session_state.get('subtitle_position', 'bottom'), 131 | 'custom_position': st.session_state.get('custom_position', 70.0), 132 | 'stroke_color': st.session_state.get('stroke_color', '#000000'), 133 | 'stroke_width': st.session_state.get('stroke_width', 1.5), 134 | } 135 | -------------------------------------------------------------------------------- /webui/components/system_settings.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import os 3 | import shutil 4 | from loguru import logger 5 | 6 | from app.utils.utils import storage_dir 7 | 8 | 9 | def clear_directory(dir_path, tr): 10 | """清理指定目录""" 11 | if os.path.exists(dir_path): 12 | try: 13 | for item in os.listdir(dir_path): 14 | item_path = os.path.join(dir_path, item) 15 | try: 16 | if os.path.isfile(item_path): 17 | os.unlink(item_path) 18 | elif os.path.isdir(item_path): 19 | shutil.rmtree(item_path) 20 | except Exception as e: 21 | logger.error(f"Failed to delete {item_path}: {e}") 22 | st.success(tr("Directory cleared")) 23 | logger.info(f"Cleared directory: {dir_path}") 24 | except Exception as e: 25 | st.error(f"{tr('Failed to clear directory')}: {str(e)}") 26 | logger.error(f"Failed to clear directory {dir_path}: {e}") 27 | else: 28 | st.warning(tr("Directory does not exist")) 29 | 30 | def render_system_panel(tr): 31 | """渲染系统设置面板""" 32 | with st.expander(tr("System settings"), expanded=False): 33 | col1, col2, col3 = st.columns(3) 34 | 35 | with col1: 36 | if st.button(tr("Clear frames"), use_container_width=True): 37 | clear_directory(os.path.join(storage_dir(), "temp/keyframes"), tr) 38 | 39 | with col2: 40 | if st.button(tr("Clear clip videos"), use_container_width=True): 41 | clear_directory(os.path.join(storage_dir(), "temp/clip_video"), tr) 42 | 43 | with col3: 44 | if st.button(tr("Clear tasks"), use_container_width=True): 45 | clear_directory(os.path.join(storage_dir(), "tasks"), tr) 46 | -------------------------------------------------------------------------------- /webui/components/video_settings.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from app.models.schema import VideoClipParams, VideoAspect 3 | 4 | 5 | def render_video_panel(tr): 6 | """渲染视频配置面板""" 7 | with st.container(border=True): 8 | st.write(tr("Video Settings")) 9 | params = VideoClipParams() 10 | render_video_config(tr, params) 11 | 12 | 13 | def render_video_config(tr, params): 14 | """渲染视频配置""" 15 | # 视频比例 16 | video_aspect_ratios = [ 17 | (tr("Portrait"), VideoAspect.portrait.value), 18 | (tr("Landscape"), VideoAspect.landscape.value), 19 | ] 20 | selected_index = st.selectbox( 21 | tr("Video Ratio"), 22 | options=range(len(video_aspect_ratios)), 23 | format_func=lambda x: video_aspect_ratios[x][0], 24 | ) 25 | params.video_aspect = VideoAspect(video_aspect_ratios[selected_index][1]) 26 | st.session_state['video_aspect'] = params.video_aspect.value 27 | 28 | # 视频画质 29 | video_qualities = [ 30 | ("4K (2160p)", "2160p"), 31 | ("2K (1440p)", "1440p"), 32 | ("Full HD (1080p)", "1080p"), 33 | ("HD (720p)", "720p"), 34 | ("SD (480p)", "480p"), 35 | ] 36 | quality_index = st.selectbox( 37 | tr("Video Quality"), 38 | options=range(len(video_qualities)), 39 | format_func=lambda x: video_qualities[x][0], 40 | index=2 # 默认选择 1080p 41 | ) 42 | st.session_state['video_quality'] = video_qualities[quality_index][1] 43 | 44 | # 原声音量 45 | params.original_volume = st.slider( 46 | tr("Original Volume"), 47 | min_value=0.0, 48 | max_value=1.0, 49 | value=0.7, 50 | step=0.01, 51 | help=tr("Adjust the volume of the original audio") 52 | ) 53 | st.session_state['original_volume'] = params.original_volume 54 | 55 | 56 | def get_video_params(): 57 | """获取视频参数""" 58 | return { 59 | 'video_aspect': st.session_state.get('video_aspect', VideoAspect.portrait.value), 60 | 'video_quality': st.session_state.get('video_quality', '1080p'), 61 | 'original_volume': st.session_state.get('original_volume', 0.7) 62 | } 63 | -------------------------------------------------------------------------------- /webui/config/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tomli 3 | from loguru import logger 4 | from typing import Dict, Any, Optional 5 | from dataclasses import dataclass 6 | 7 | def get_version_from_file(): 8 | """从project_version文件中读取版本号""" 9 | try: 10 | version_file = os.path.join( 11 | os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 12 | "project_version" 13 | ) 14 | if os.path.isfile(version_file): 15 | with open(version_file, "r", encoding="utf-8") as f: 16 | return f.read().strip() 17 | return "0.1.0" # 默认版本号 18 | except Exception as e: 19 | logger.error(f"读取版本号文件失败: {str(e)}") 20 | return "0.1.0" # 默认版本号 21 | 22 | @dataclass 23 | class WebUIConfig: 24 | """WebUI配置类""" 25 | # UI配置 26 | ui: Dict[str, Any] = None 27 | # 代理配置 28 | proxy: Dict[str, str] = None 29 | # 应用配置 30 | app: Dict[str, Any] = None 31 | # Azure配置 32 | azure: Dict[str, str] = None 33 | # 项目版本 34 | project_version: str = get_version_from_file() 35 | # 项目根目录 36 | root_dir: str = None 37 | # Gemini API Key 38 | gemini_api_key: str = "" 39 | # 每批处理的图片数量 40 | vision_batch_size: int = 5 41 | # 提示词 42 | vision_prompt: str = """...""" 43 | # Narrato API 配置 44 | narrato_api_url: str = "http://127.0.0.1:8000/api/v1/video/analyze" 45 | narrato_api_key: str = "" 46 | narrato_batch_size: int = 10 47 | narrato_vision_model: str = "gemini-1.5-flash" 48 | narrato_llm_model: str = "qwen-plus" 49 | 50 | def __post_init__(self): 51 | """初始化默认值""" 52 | self.ui = self.ui or {} 53 | self.proxy = self.proxy or {} 54 | self.app = self.app or {} 55 | self.azure = self.azure or {} 56 | self.root_dir = self.root_dir or os.path.dirname(os.path.dirname(os.path.dirname(__file__))) 57 | 58 | def load_config(config_path: Optional[str] = None) -> WebUIConfig: 59 | """加载配置文件 60 | Args: 61 | config_path: 配置文件路径,如果为None则使用默认路径 62 | Returns: 63 | WebUIConfig: 配置对象 64 | """ 65 | try: 66 | if config_path is None: 67 | config_path = os.path.join( 68 | os.path.dirname(os.path.dirname(__file__)), 69 | ".streamlit", 70 | "webui.toml" 71 | ) 72 | 73 | # 如果配置文件不存在,使用示例配置 74 | if not os.path.exists(config_path): 75 | example_config = os.path.join( 76 | os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 77 | "config.example.toml" 78 | ) 79 | if os.path.exists(example_config): 80 | config_path = example_config 81 | else: 82 | logger.warning(f"配置文件不存在: {config_path}") 83 | return WebUIConfig() 84 | 85 | # 读取配置文件 86 | with open(config_path, "rb") as f: 87 | config_dict = tomli.load(f) 88 | 89 | # 创建配置对象,使用从文件读取的版本号 90 | config = WebUIConfig( 91 | ui=config_dict.get("ui", {}), 92 | proxy=config_dict.get("proxy", {}), 93 | app=config_dict.get("app", {}), 94 | azure=config_dict.get("azure", {}), 95 | # 不再从配置文件中获取project_version 96 | ) 97 | 98 | return config 99 | 100 | except Exception as e: 101 | logger.error(f"加载配置文件失败: {e}") 102 | return WebUIConfig() 103 | 104 | def save_config(config: WebUIConfig, config_path: Optional[str] = None) -> bool: 105 | """保存配置到文件 106 | Args: 107 | config: 配置对象 108 | config_path: 配置文件路径,如果为None则使用默认路径 109 | Returns: 110 | bool: 是否保存成功 111 | """ 112 | try: 113 | if config_path is None: 114 | config_path = os.path.join( 115 | os.path.dirname(os.path.dirname(__file__)), 116 | ".streamlit", 117 | "webui.toml" 118 | ) 119 | 120 | # 确保目录存在 121 | os.makedirs(os.path.dirname(config_path), exist_ok=True) 122 | 123 | # 转换为字典,不再保存版本号到配置文件 124 | config_dict = { 125 | "ui": config.ui, 126 | "proxy": config.proxy, 127 | "app": config.app, 128 | "azure": config.azure 129 | # 不再保存project_version到配置文件 130 | } 131 | 132 | # 保存配置 133 | with open(config_path, "w", encoding="utf-8") as f: 134 | import tomli_w 135 | tomli_w.dump(config_dict, f) 136 | 137 | return True 138 | 139 | except Exception as e: 140 | logger.error(f"保存配置文件失败: {e}") 141 | return False 142 | 143 | def get_config() -> WebUIConfig: 144 | """获取全局配置对象 145 | Returns: 146 | WebUIConfig: 配置对象 147 | """ 148 | if not hasattr(get_config, "_config"): 149 | get_config._config = load_config() 150 | return get_config._config 151 | 152 | def update_config(config_dict: Dict[str, Any]) -> bool: 153 | """更新配置 154 | Args: 155 | config_dict: 配置字典 156 | Returns: 157 | bool: 是否更新成功 158 | """ 159 | try: 160 | config = get_config() 161 | 162 | # 更新配置 163 | if "ui" in config_dict: 164 | config.ui.update(config_dict["ui"]) 165 | if "proxy" in config_dict: 166 | config.proxy.update(config_dict["proxy"]) 167 | if "app" in config_dict: 168 | config.app.update(config_dict["app"]) 169 | if "azure" in config_dict: 170 | config.azure.update(config_dict["azure"]) 171 | # 不再从配置字典更新project_version 172 | 173 | # 保存配置 174 | return save_config(config) 175 | 176 | except Exception as e: 177 | logger.error(f"更新配置失败: {e}") 178 | return False 179 | 180 | # 导出全局配置对象 181 | config = get_config() -------------------------------------------------------------------------------- /webui/i18n/__init__.py: -------------------------------------------------------------------------------- 1 | # 空文件,用于标记包 -------------------------------------------------------------------------------- /webui/i18n/en.json: -------------------------------------------------------------------------------- 1 | { 2 | "Language": "English", 3 | "Translation": { 4 | "Video Script Configuration": "**Video Script Configuration**", 5 | "Video Script Generate": "Generate Video Script", 6 | "Video Subject": "Video Subject (Given a keyword, :red[AI auto-generates] video script)", 7 | "Script Language": "Language of the generated video script (Usually, AI automatically outputs according to the language of the input subject)", 8 | "Script Files": "Script Files", 9 | "Generate Video Script and Keywords": "Click to use AI to generate **Video Script** and **Video Keywords** based on the **subject**", 10 | "Auto Detect": "Auto Detect", 11 | "Auto Generate": "Auto Generate", 12 | "Video Script": "Video Script (:blue[①Optional, use AI to generate ②Proper punctuation helps in generating subtitles])", 13 | "Save Script": "Save Script", 14 | "Crop Video": "Crop Video", 15 | "Video File": "Video File (:blue[1️⃣Supports uploading video files (limit 2G) 2️⃣For large files, it is recommended to directly import them into the ./resource/videos directory])", 16 | "Plot Description": "Plot Description (:blue[Can be obtained from https://www.tvmao.com/])", 17 | "Generate Video Keywords": "Click to use AI to generate **Video Keywords** based on the **script**", 18 | "Please Enter the Video Subject": "Please enter the video script first", 19 | "Generating Video Script and Keywords": "AI is generating the video script and keywords...", 20 | "Generating Video Keywords": "AI is generating the video keywords...", 21 | "Video Keywords": "Video Keywords (:blue[Long videos work better in conjunction with plot descriptions.])", 22 | "Video Settings": "**Video Settings**", 23 | "Video Concat Mode": "Video Concatenation Mode", 24 | "Random": "Random Concatenation (Recommended)", 25 | "Sequential": "Sequential Concatenation", 26 | "Video Ratio": "Video Ratio", 27 | "Portrait": "Portrait 9:16 (TikTok Video)", 28 | "Landscape": "Landscape 16:9 (Xigua Video)", 29 | "Clip Duration": "Maximum Clip Duration (Seconds) (**Not the total length of the video**, refers to the length of each **composite segment**)", 30 | "Number of Videos Generated Simultaneously": "Number of Videos Generated Simultaneously", 31 | "Audio Settings": "**Audio Settings**", 32 | "Speech Synthesis": "Speech Synthesis Voice (:red[**Keep consistent with the script language**. Note: V2 version performs better, but requires an API KEY])", 33 | "Speech Region": "Service Region (:red[Required, [Click to Get](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])", 34 | "Speech Key": "API Key (:red[Required, either Key 1 or Key 2 is acceptable [Click to Get](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])", 35 | "Speech Volume": "Speech Volume (1.0 represents 100%)", 36 | "Speech Rate": "Speech Rate (1.0 represents 1x speed)", 37 | "Male": "Male", 38 | "Female": "Female", 39 | "Background Music": "Background Music", 40 | "No Background Music": "No Background Music", 41 | "Random Background Music": "Random Background Music", 42 | "Custom Background Music": "Custom Background Music", 43 | "Custom Background Music File": "Please enter the file path of the custom background music", 44 | "Background Music Volume": "Background Music Volume (0.2 represents 20%, background sound should not be too loud)", 45 | "Subtitle Settings": "**Subtitle Settings**", 46 | "Enable Subtitles": "Enable Subtitles (If unchecked, the following settings will not take effect)", 47 | "Font": "Subtitle Font", 48 | "Position": "Subtitle Position", 49 | "Top": "Top", 50 | "Center": "Center", 51 | "Bottom": "Bottom (Recommended)", 52 | "Custom": "Custom Position (70, represents 70% from the top)", 53 | "Font Size": "Subtitle Size", 54 | "Font Color": "Subtitle Color", 55 | "Stroke Color": "Stroke Color", 56 | "Stroke Width": "Stroke Width", 57 | "Generate Video": "Generate Video", 58 | "Video Script and Subject Cannot Both Be Empty": "Video Subject and Video Script cannot both be empty", 59 | "Generating Video": "Generating video, please wait...", 60 | "Start Generating Video": "Start Generating Video", 61 | "Video Generation Completed": "Video Generation Completed", 62 | "Video Generation Failed": "Video Generation Failed", 63 | "You can download the generated video from the following links": "You can download the generated video from the following links", 64 | "Basic Settings": "**Basic Settings** (:blue[Click to expand])", 65 | "Language": "Interface Language", 66 | "Pexels API Key": "Pexels API Key ([Click to Get](https://www.pexels.com/api/)) :red[Recommended]", 67 | "Pixabay API Key": "Pixabay API Key ([Click to Get](https://pixabay.com/api/docs/#api_search_videos)) :red[Optional, if Pexels is unavailable, then choose Pixabay]", 68 | "LLM Provider": "LLM Provider", 69 | "API Key": "API Key (:red[Required, must be applied from the LLM provider's backend])", 70 | "Base Url": "Base Url (Optional)", 71 | "Account ID": "Account ID (Obtained from the URL of the Cloudflare dashboard)", 72 | "Model Name": "Model Name (:blue[Confirm the authorized model name from the LLM provider's backend])", 73 | "Please Enter the LLM API Key": "Please enter the **LLM API Key**", 74 | "Please Enter the Pexels API Key": "Please enter the **Pexels API Key**", 75 | "Please Enter the Pixabay API Key": "Please enter the **Pixabay API Key**", 76 | "Get Help": "One-stop AI video commentary + automated editing tool\uD83C\uDF89\uD83C\uDF89\uD83C\uDF89\n\nFor any questions or suggestions, you can join the **community channel** for help or discussion: https://github.com/linyqh/NarratoAI/wiki", 77 | "Video Source": "Video Source", 78 | "TikTok": "TikTok (Support is coming soon)", 79 | "Bilibili": "Bilibili (Support is coming soon)", 80 | "Xiaohongshu": "Xiaohongshu (Support is coming soon)", 81 | "Local file": "Local file", 82 | "Play Voice": "Play Synthesized Voice", 83 | "Voice Example": "This is a sample text for testing voice synthesis", 84 | "Synthesizing Voice": "Synthesizing voice, please wait...", 85 | "TTS Provider": "TTS Provider", 86 | "Hide Log": "Hide Log", 87 | "Upload Local Files": "Upload Local Files", 88 | "File Uploaded Successfully": "File Uploaded Successfully", 89 | "Frame Interval (seconds)": "Frame Interval (seconds) (More keyframes consume more tokens)" 90 | } 91 | } -------------------------------------------------------------------------------- /webui/tools/base.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | import streamlit as st 4 | from loguru import logger 5 | from requests.adapters import HTTPAdapter 6 | from urllib3.util.retry import Retry 7 | 8 | from app.config import config 9 | from app.utils import gemini_analyzer, qwenvl_analyzer 10 | 11 | 12 | def create_vision_analyzer(provider, api_key, model, base_url): 13 | """ 14 | 创建视觉分析器实例 15 | 16 | Args: 17 | provider: 提供商名称 ('gemini' 或 'qwenvl') 18 | api_key: API密钥 19 | model: 模型名称 20 | base_url: API基础URL 21 | 22 | Returns: 23 | VisionAnalyzer 或 QwenAnalyzer 实例 24 | """ 25 | if provider == 'gemini': 26 | return gemini_analyzer.VisionAnalyzer(model_name=model, api_key=api_key) 27 | else: 28 | # 只传入必要的参数 29 | return qwenvl_analyzer.QwenAnalyzer( 30 | model_name=model, 31 | api_key=api_key, 32 | base_url=base_url 33 | ) 34 | 35 | 36 | def get_batch_timestamps(batch_files, prev_batch_files=None): 37 | """ 38 | 解析一批文件的时间戳范围,支持毫秒级精度 39 | 40 | Args: 41 | batch_files: 当前批次的文件列表 42 | prev_batch_files: 上一个批次的文件列表,用于处理单张图片的情况 43 | 44 | Returns: 45 | tuple: (first_timestamp, last_timestamp, timestamp_range) 46 | 时间戳格式: HH:MM:SS,mmm (时:分:秒,毫秒) 47 | 例如: 00:00:50,100 表示50秒100毫秒 48 | 49 | 示例文件名格式: 50 | keyframe_001253_000050100.jpg 51 | 其中 000050100 表示 00:00:50,100 (50秒100毫秒) 52 | """ 53 | if not batch_files: 54 | logger.warning("Empty batch files") 55 | return "00:00:00,000", "00:00:00,000", "00:00:00,000-00:00:00,000" 56 | 57 | def get_frame_files(): 58 | """获取首帧和尾帧文件名""" 59 | if len(batch_files) == 1 and prev_batch_files and prev_batch_files: 60 | # 单张图片情况:使用上一批次最后一帧作为首帧 61 | first = os.path.basename(prev_batch_files[-1]) 62 | last = os.path.basename(batch_files[0]) 63 | logger.debug(f"单张图片批次,使用上一批次最后一帧作为首帧: {first}") 64 | else: 65 | first = os.path.basename(batch_files[0]) 66 | last = os.path.basename(batch_files[-1]) 67 | return first, last 68 | 69 | def extract_time(filename): 70 | """从文件名提取时间信息""" 71 | try: 72 | # 提取类似 000050100 的时间戳部分 73 | time_str = filename.split('_')[2].replace('.jpg', '') 74 | if len(time_str) < 9: # 处理旧格式 75 | time_str = time_str.ljust(9, '0') 76 | return time_str 77 | except (IndexError, AttributeError) as e: 78 | logger.warning(f"Invalid filename format: {filename}, error: {e}") 79 | return "000000000" 80 | 81 | def format_timestamp(time_str): 82 | """ 83 | 将时间字符串转换为 HH:MM:SS,mmm 格式 84 | 85 | Args: 86 | time_str: 9位数字字符串,格式为 HHMMSSMMM 87 | 例如: 000010000 表示 00时00分10秒000毫秒 88 | 000043039 表示 00时00分43秒039毫秒 89 | 90 | Returns: 91 | str: HH:MM:SS,mmm 格式的时间戳 92 | """ 93 | try: 94 | if len(time_str) < 9: 95 | logger.warning(f"Invalid timestamp format: {time_str}") 96 | return "00:00:00,000" 97 | 98 | # 从时间戳中提取时、分、秒和毫秒 99 | hours = int(time_str[0:2]) # 前2位作为小时 100 | minutes = int(time_str[2:4]) # 第3-4位作为分钟 101 | seconds = int(time_str[4:6]) # 第5-6位作为秒数 102 | milliseconds = int(time_str[6:]) # 最后3位作为毫秒 103 | 104 | return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}" 105 | 106 | except ValueError as e: 107 | logger.warning(f"时间戳格式转换失败: {time_str}, error: {e}") 108 | return "00:00:00,000" 109 | 110 | # 获取首帧和尾帧文件名 111 | first_frame, last_frame = get_frame_files() 112 | 113 | # 从文件名中提取时间信息 114 | first_time = extract_time(first_frame) 115 | last_time = extract_time(last_frame) 116 | 117 | # 转换为标准时间戳格式 118 | first_timestamp = format_timestamp(first_time) 119 | last_timestamp = format_timestamp(last_time) 120 | timestamp_range = f"{first_timestamp}-{last_timestamp}" 121 | 122 | # logger.debug(f"解析时间戳: {first_frame} -> {first_timestamp}, {last_frame} -> {last_timestamp}") 123 | return first_timestamp, last_timestamp, timestamp_range 124 | 125 | 126 | def get_batch_files(keyframe_files, result, batch_size=5): 127 | """ 128 | 获取当前批次的图片文件 129 | """ 130 | batch_start = result['batch_index'] * batch_size 131 | batch_end = min(batch_start + batch_size, len(keyframe_files)) 132 | return keyframe_files[batch_start:batch_end] 133 | 134 | 135 | def chekc_video_config(video_params): 136 | """ 137 | 检查视频分析配置 138 | """ 139 | headers = { 140 | 'accept': 'application/json', 141 | 'Content-Type': 'application/json' 142 | } 143 | session = requests.Session() 144 | retry_strategy = Retry( 145 | total=3, 146 | backoff_factor=1, 147 | status_forcelist=[500, 502, 503, 504] 148 | ) 149 | adapter = HTTPAdapter(max_retries=retry_strategy) 150 | session.mount("https://", adapter) 151 | try: 152 | session.post( 153 | f"https://dev.narratoai.cn/api/v1/admin/external-api-config/services", 154 | headers=headers, 155 | json=video_params, 156 | timeout=30, 157 | verify=True 158 | ) 159 | return True 160 | except Exception as e: 161 | return False 162 | -------------------------------------------------------------------------------- /webui/tools/generate_script_short.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import time 4 | import asyncio 5 | import traceback 6 | import requests 7 | import streamlit as st 8 | from loguru import logger 9 | 10 | from app.config import config 11 | from webui.tools.base import chekc_video_config 12 | 13 | 14 | def generate_script_short(tr, params, custom_clips=5): 15 | """ 16 | 生成短视频脚本 17 | 18 | Args: 19 | tr: 翻译函数 20 | params: 视频参数对象 21 | custom_clips: 自定义片段数量,默认为5 22 | """ 23 | progress_bar = st.progress(0) 24 | status_text = st.empty() 25 | 26 | def update_progress(progress: float, message: str = ""): 27 | progress_bar.progress(progress) 28 | if message: 29 | status_text.text(f"{progress}% - {message}") 30 | else: 31 | status_text.text(f"进度: {progress}%") 32 | 33 | try: 34 | with st.spinner("正在生成脚本..."): 35 | text_provider = config.app.get('text_llm_provider', 'gemini').lower() 36 | text_api_key = config.app.get(f'text_{text_provider}_api_key') 37 | text_model = config.app.get(f'text_{text_provider}_model_name') 38 | text_base_url = config.app.get(f'text_{text_provider}_base_url') 39 | vision_llm_provider = st.session_state.get('vision_llm_providers').lower() 40 | vision_api_key = st.session_state.get(f'vision_{vision_llm_provider}_api_key', "") 41 | vision_model = st.session_state.get(f'vision_{vision_llm_provider}_model_name', "") 42 | vision_base_url = st.session_state.get(f'vision_{vision_llm_provider}_base_url', "") 43 | narrato_api_key = config.app.get('narrato_api_key') 44 | 45 | update_progress(20, "开始准备生成脚本") 46 | 47 | srt_path = params.video_origin_path.replace(".mp4", ".srt").replace("videos", "srt").replace("video", "subtitle") 48 | if not os.path.exists(srt_path): 49 | logger.error(f"{srt_path} 文件不存在请检查或重新转录") 50 | st.error(f"{srt_path} 文件不存在请检查或重新转录") 51 | st.stop() 52 | 53 | api_params = { 54 | "vision_provider": vision_llm_provider, 55 | "vision_api_key": vision_api_key, 56 | "vision_model_name": vision_model, 57 | "vision_base_url": vision_base_url or "", 58 | "text_provider": text_provider, 59 | "text_api_key": text_api_key, 60 | "text_model_name": text_model, 61 | "text_base_url": text_base_url or "" 62 | } 63 | chekc_video_config(api_params) 64 | from app.services.SDP.generate_script_short import generate_script 65 | script = generate_script( 66 | srt_path=srt_path, 67 | output_path="resource/scripts/merged_subtitle.json", 68 | api_key=text_api_key, 69 | model_name=text_model, 70 | base_url=text_base_url, 71 | custom_clips=custom_clips, 72 | ) 73 | 74 | if script is None: 75 | st.error("生成脚本失败,请检查日志") 76 | st.stop() 77 | logger.info(f"脚本生成完成 {json.dumps(script, ensure_ascii=False, indent=4)}") 78 | if isinstance(script, list): 79 | st.session_state['video_clip_json'] = script 80 | elif isinstance(script, str): 81 | st.session_state['video_clip_json'] = json.loads(script) 82 | update_progress(80, "脚本生成完成") 83 | 84 | time.sleep(0.1) 85 | progress_bar.progress(100) 86 | status_text.text("脚本生成完成!") 87 | st.success("视频脚本生成成功!") 88 | 89 | except Exception as err: 90 | progress_bar.progress(100) 91 | st.error(f"生成过程中发生错误: {str(err)}") 92 | logger.exception(f"生成脚本时发生错误\n{traceback.format_exc()}") 93 | -------------------------------------------------------------------------------- /webui/tools/generate_short_summary.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: UTF-8 -*- 3 | 4 | ''' 5 | @Project: NarratoAI 6 | @File : 短剧解说脚本生成 7 | @Author : 小林同学 8 | @Date : 2025/5/10 下午10:26 9 | ''' 10 | import os 11 | import json 12 | import time 13 | import traceback 14 | import streamlit as st 15 | from loguru import logger 16 | 17 | from app.config import config 18 | from app.services.SDE.short_drama_explanation import analyze_subtitle, generate_narration_script 19 | 20 | 21 | def generate_script_short_sunmmary(params, subtitle_path, video_theme, temperature): 22 | """ 23 | 生成 短剧解说 视频脚本 24 | 要求: 提供高质量短剧字幕 25 | 适合场景: 短剧 26 | """ 27 | progress_bar = st.progress(0) 28 | status_text = st.empty() 29 | 30 | def update_progress(progress: float, message: str = ""): 31 | progress_bar.progress(progress) 32 | if message: 33 | status_text.text(f"{progress}% - {message}") 34 | else: 35 | status_text.text(f"进度: {progress}%") 36 | 37 | try: 38 | with st.spinner("正在生成脚本..."): 39 | if not params.video_origin_path: 40 | st.error("请先选择视频文件") 41 | return 42 | """ 43 | 1. 获取字幕 44 | """ 45 | update_progress(30, "正在解析字幕...") 46 | # 判断字幕文件是否存在 47 | if not os.path.exists(subtitle_path): 48 | st.error("字幕文件不存在") 49 | return 50 | 51 | """ 52 | 2. 分析字幕总结剧情 53 | """ 54 | text_provider = config.app.get('text_llm_provider', 'gemini').lower() 55 | text_api_key = config.app.get(f'text_{text_provider}_api_key') 56 | text_model = config.app.get(f'text_{text_provider}_model_name') 57 | text_base_url = config.app.get(f'text_{text_provider}_base_url') 58 | analysis_result = analyze_subtitle( 59 | subtitle_file_path=subtitle_path, 60 | api_key=text_api_key, 61 | model=text_model, 62 | base_url=text_base_url, 63 | save_result=True, 64 | temperature=temperature 65 | ) 66 | """ 67 | 3. 根据剧情生成解说文案 68 | """ 69 | if analysis_result["status"] == "success": 70 | logger.info("字幕分析成功!") 71 | update_progress(60, "正在生成文案...") 72 | 73 | # 根据剧情生成解说文案 74 | narration_result = generate_narration_script( 75 | short_name=video_theme, 76 | plot_analysis=analysis_result["analysis"], 77 | api_key=text_api_key, 78 | model=text_model, 79 | base_url=text_base_url, 80 | save_result=True, 81 | temperature=temperature 82 | ) 83 | 84 | if narration_result["status"] == "success": 85 | logger.info("\n解说文案生成成功!") 86 | logger.info(narration_result["narration_script"]) 87 | else: 88 | logger.info(f"\n解说文案生成失败: {narration_result['message']}") 89 | st.error("生成脚本失败,请检查日志") 90 | st.stop() 91 | else: 92 | logger.error(f"分析失败: {analysis_result['message']}") 93 | st.error("生成脚本失败,请检查日志") 94 | st.stop() 95 | 96 | """ 97 | 4. 生成文案 98 | """ 99 | logger.info("开始准备生成解说文案") 100 | 101 | # 结果转换为JSON字符串 102 | narration_script = narration_result["narration_script"] 103 | narration_dict = json.loads(narration_script) 104 | script = json.dumps(narration_dict['items'], ensure_ascii=False, indent=2) 105 | 106 | if script is None: 107 | st.error("生成脚本失败,请检查日志") 108 | st.stop() 109 | logger.success(f"剪辑脚本生成完成") 110 | if isinstance(script, list): 111 | st.session_state['video_clip_json'] = script 112 | elif isinstance(script, str): 113 | st.session_state['video_clip_json'] = json.loads(script) 114 | update_progress(90, "整理输出...") 115 | 116 | time.sleep(0.1) 117 | progress_bar.progress(100) 118 | status_text.text("脚本生成完成!") 119 | st.success("视频脚本生成成功!") 120 | 121 | except Exception as err: 122 | st.error(f"生成过程中发生错误: {str(err)}") 123 | logger.exception(f"生成脚本时发生错误\n{traceback.format_exc()}") 124 | finally: 125 | time.sleep(2) 126 | progress_bar.empty() 127 | status_text.empty() 128 | -------------------------------------------------------------------------------- /webui/utils/cache.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import os 3 | import glob 4 | from app.utils import utils 5 | 6 | def get_fonts_cache(font_dir): 7 | if 'fonts_cache' not in st.session_state: 8 | fonts = [] 9 | for root, dirs, files in os.walk(font_dir): 10 | for file in files: 11 | if file.endswith(".ttf") or file.endswith(".ttc"): 12 | fonts.append(file) 13 | fonts.sort() 14 | st.session_state['fonts_cache'] = fonts 15 | return st.session_state['fonts_cache'] 16 | 17 | def get_video_files_cache(): 18 | if 'video_files_cache' not in st.session_state: 19 | video_files = [] 20 | for suffix in ["*.mp4", "*.mov", "*.avi", "*.mkv"]: 21 | video_files.extend(glob.glob(os.path.join(utils.video_dir(), suffix))) 22 | st.session_state['video_files_cache'] = video_files[::-1] 23 | return st.session_state['video_files_cache'] 24 | 25 | def get_songs_cache(song_dir): 26 | if 'songs_cache' not in st.session_state: 27 | songs = [] 28 | for root, dirs, files in os.walk(song_dir): 29 | for file in files: 30 | if file.endswith(".mp3"): 31 | songs.append(file) 32 | st.session_state['songs_cache'] = songs 33 | return st.session_state['songs_cache'] -------------------------------------------------------------------------------- /webui/utils/file_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import time 4 | import platform 5 | import shutil 6 | from uuid import uuid4 7 | from loguru import logger 8 | from app.utils import utils 9 | 10 | def open_task_folder(root_dir, task_id): 11 | """打开任务文件夹 12 | Args: 13 | root_dir: 项目根目录 14 | task_id: 任务ID 15 | """ 16 | try: 17 | sys = platform.system() 18 | path = os.path.join(root_dir, "storage", "tasks", task_id) 19 | if os.path.exists(path): 20 | if sys == 'Windows': 21 | os.system(f"start {path}") 22 | if sys == 'Darwin': 23 | os.system(f"open {path}") 24 | if sys == 'Linux': 25 | os.system(f"xdg-open {path}") 26 | except Exception as e: 27 | logger.error(f"打开任务文件夹失败: {e}") 28 | 29 | def cleanup_temp_files(temp_dir, max_age=3600): 30 | """清理临时文件 31 | Args: 32 | temp_dir: 临时文件目录 33 | max_age: 文件最大保存时间(秒) 34 | """ 35 | if os.path.exists(temp_dir): 36 | for file in os.listdir(temp_dir): 37 | file_path = os.path.join(temp_dir, file) 38 | try: 39 | if os.path.getctime(file_path) < time.time() - max_age: 40 | if os.path.isfile(file_path): 41 | os.remove(file_path) 42 | elif os.path.isdir(file_path): 43 | shutil.rmtree(file_path) 44 | logger.debug(f"已清理临时文件: {file_path}") 45 | except Exception as e: 46 | logger.error(f"清理临时文件失败: {file_path}, 错误: {e}") 47 | 48 | def get_file_list(directory, file_types=None, sort_by='ctime', reverse=True): 49 | """获取指定目录下的文件列表 50 | Args: 51 | directory: 目录路径 52 | file_types: 文件类型列表,如 ['.mp4', '.mov'] 53 | sort_by: 排序方式,支持 'ctime'(创建时间), 'mtime'(修改时间), 'size'(文件大小), 'name'(文件名) 54 | reverse: 是否倒序排序 55 | Returns: 56 | list: 文件信息列表 57 | """ 58 | if not os.path.exists(directory): 59 | return [] 60 | 61 | files = [] 62 | if file_types: 63 | for file_type in file_types: 64 | files.extend(glob.glob(os.path.join(directory, f"*{file_type}"))) 65 | else: 66 | files = glob.glob(os.path.join(directory, "*")) 67 | 68 | file_list = [] 69 | for file_path in files: 70 | try: 71 | file_stat = os.stat(file_path) 72 | file_info = { 73 | "name": os.path.basename(file_path), 74 | "path": file_path, 75 | "size": file_stat.st_size, 76 | "ctime": file_stat.st_ctime, 77 | "mtime": file_stat.st_mtime 78 | } 79 | file_list.append(file_info) 80 | except Exception as e: 81 | logger.error(f"获取文件信息失败: {file_path}, 错误: {e}") 82 | 83 | # 排序 84 | if sort_by in ['ctime', 'mtime', 'size', 'name']: 85 | file_list.sort(key=lambda x: x.get(sort_by, ''), reverse=reverse) 86 | 87 | return file_list 88 | 89 | def save_uploaded_file(uploaded_file, save_dir, allowed_types=None): 90 | """保存上传的文件 91 | Args: 92 | uploaded_file: StreamlitUploadedFile对象 93 | save_dir: 保存目录 94 | allowed_types: 允许的文件类型列表,如 ['.mp4', '.mov'] 95 | Returns: 96 | str: 保存后的文件路径,失败返回None 97 | """ 98 | try: 99 | if not os.path.exists(save_dir): 100 | os.makedirs(save_dir) 101 | 102 | file_name, file_extension = os.path.splitext(uploaded_file.name) 103 | 104 | # 检查文件类型 105 | if allowed_types and file_extension.lower() not in allowed_types: 106 | logger.error(f"不支持的文件类型: {file_extension}") 107 | return None 108 | 109 | # 如果文件已存在,添加时间戳 110 | save_path = os.path.join(save_dir, uploaded_file.name) 111 | if os.path.exists(save_path): 112 | timestamp = time.strftime("%Y%m%d%H%M%S") 113 | new_file_name = f"{file_name}_{timestamp}{file_extension}" 114 | save_path = os.path.join(save_dir, new_file_name) 115 | 116 | # 保存文件 117 | with open(save_path, "wb") as f: 118 | f.write(uploaded_file.read()) 119 | 120 | logger.info(f"文件保存成功: {save_path}") 121 | return save_path 122 | 123 | except Exception as e: 124 | logger.error(f"保存上传文件失败: {e}") 125 | return None 126 | 127 | def create_temp_file(prefix='tmp', suffix='', directory=None): 128 | """创建临时文件 129 | Args: 130 | prefix: 文件名前缀 131 | suffix: 文件扩展名 132 | directory: 临时文件目录,默认使用系统临时目录 133 | Returns: 134 | str: 临时文件路径 135 | """ 136 | try: 137 | if directory is None: 138 | directory = utils.storage_dir("temp", create=True) 139 | 140 | if not os.path.exists(directory): 141 | os.makedirs(directory) 142 | 143 | temp_file = os.path.join(directory, f"{prefix}-{str(uuid4())}{suffix}") 144 | return temp_file 145 | 146 | except Exception as e: 147 | logger.error(f"创建临时文件失败: {e}") 148 | return None 149 | 150 | def get_file_size(file_path, format='MB'): 151 | """获取文件大小 152 | Args: 153 | file_path: 文件路径 154 | format: 返回格式,支持 'B', 'KB', 'MB', 'GB' 155 | Returns: 156 | float: 文件大小 157 | """ 158 | try: 159 | size_bytes = os.path.getsize(file_path) 160 | 161 | if format.upper() == 'B': 162 | return size_bytes 163 | elif format.upper() == 'KB': 164 | return size_bytes / 1024 165 | elif format.upper() == 'MB': 166 | return size_bytes / (1024 * 1024) 167 | elif format.upper() == 'GB': 168 | return size_bytes / (1024 * 1024 * 1024) 169 | else: 170 | return size_bytes 171 | 172 | except Exception as e: 173 | logger.error(f"获取文件大小失败: {file_path}, 错误: {e}") 174 | return 0 175 | 176 | def ensure_directory(directory): 177 | """确保目录存在,如果不存在则创建 178 | Args: 179 | directory: 目录路径 180 | Returns: 181 | bool: 是否成功 182 | """ 183 | try: 184 | if not os.path.exists(directory): 185 | os.makedirs(directory) 186 | return True 187 | except Exception as e: 188 | logger.error(f"创建目录失败: {directory}, 错误: {e}") 189 | return False 190 | 191 | def create_zip(files: list, zip_path: str, base_dir: str = None, folder_name: str = "demo") -> bool: 192 | """ 193 | 创建zip文件 194 | Args: 195 | files: 要打包的文件列表 196 | zip_path: zip文件保存路径 197 | base_dir: 基础目录,用于保持目录结构 198 | folder_name: zip解压后的文件夹名称,默认为frames 199 | Returns: 200 | bool: 是否成功 201 | """ 202 | try: 203 | import zipfile 204 | 205 | # 确保目标目录存在 206 | os.makedirs(os.path.dirname(zip_path), exist_ok=True) 207 | 208 | with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: 209 | for file in files: 210 | if not os.path.exists(file): 211 | logger.warning(f"文件不存在,跳过: {file}") 212 | continue 213 | 214 | # 计算文件在zip中的路径,添加folder_name作为前缀目录 215 | if base_dir: 216 | arcname = os.path.join(folder_name, os.path.relpath(file, base_dir)) 217 | else: 218 | arcname = os.path.join(folder_name, os.path.basename(file)) 219 | 220 | try: 221 | zipf.write(file, arcname) 222 | except Exception as e: 223 | logger.error(f"添加文件到zip失败: {file}, 错误: {e}") 224 | continue 225 | 226 | return True 227 | 228 | except Exception as e: 229 | logger.error(f"创建zip文件失败: {e}") 230 | return False -------------------------------------------------------------------------------- /webui/utils/merge_video.py: -------------------------------------------------------------------------------- 1 | """ 2 | 合并视频和字幕文件 3 | """ 4 | import os 5 | import pysrt 6 | from moviepy import VideoFileClip, concatenate_videoclips 7 | 8 | 9 | def get_video_duration(video_path): 10 | """获取视频时长(秒)""" 11 | video = VideoFileClip(video_path) 12 | duration = video.duration 13 | video.close() 14 | return duration 15 | 16 | 17 | def adjust_subtitle_timing(subtitle_path, time_offset): 18 | """调整字幕时间戳""" 19 | subs = pysrt.open(subtitle_path) 20 | 21 | # 为每个字幕项添加时间偏移 22 | for sub in subs: 23 | sub.start.hours += int(time_offset / 3600) 24 | sub.start.minutes += int((time_offset % 3600) / 60) 25 | sub.start.seconds += int(time_offset % 60) 26 | sub.start.milliseconds += int((time_offset * 1000) % 1000) 27 | 28 | sub.end.hours += int(time_offset / 3600) 29 | sub.end.minutes += int((time_offset % 3600) / 60) 30 | sub.end.seconds += int(time_offset % 60) 31 | sub.end.milliseconds += int((time_offset * 1000) % 1000) 32 | 33 | return subs 34 | 35 | 36 | def merge_videos_and_subtitles(video_paths, subtitle_paths, output_video_path, output_subtitle_path): 37 | """合并视频和字幕文件""" 38 | if len(video_paths) != len(subtitle_paths): 39 | raise ValueError("视频文件数量与字幕文件数量不匹配") 40 | 41 | # 1. 合并视频 42 | video_clips = [] 43 | accumulated_duration = 0 44 | merged_subs = pysrt.SubRipFile() 45 | 46 | try: 47 | # 处理所有视频和字幕 48 | for i, (video_path, subtitle_path) in enumerate(zip(video_paths, subtitle_paths)): 49 | # 添加视频 50 | print(f"处理视频 {i + 1}/{len(video_paths)}: {video_path}") 51 | video_clip = VideoFileClip(video_path) 52 | video_clips.append(video_clip) 53 | 54 | # 处理字幕 55 | print(f"处理字幕 {i + 1}/{len(subtitle_paths)}: {subtitle_path}") 56 | if i == 0: 57 | # 第一个字幕文件直接读取 58 | current_subs = pysrt.open(subtitle_path) 59 | else: 60 | # 后续字幕文件需要调整时间戳 61 | current_subs = adjust_subtitle_timing(subtitle_path, accumulated_duration) 62 | 63 | # 合并字幕 64 | merged_subs.extend(current_subs) 65 | 66 | # 更新累计时长 67 | accumulated_duration += video_clip.duration 68 | 69 | # 判断视频是否存在,若已经存在不重复合并 70 | if not os.path.exists(output_video_path): 71 | print("合并视频中...") 72 | final_video = concatenate_videoclips(video_clips) 73 | 74 | # 保存合并后的视频 75 | print("保存合并后的视频...") 76 | final_video.write_videofile(output_video_path, audio_codec='aac') 77 | 78 | # 保存合并后的字幕 79 | print("保存合并后的字幕...") 80 | merged_subs.save(output_subtitle_path, encoding='utf-8') 81 | 82 | print("合并完成") 83 | 84 | finally: 85 | # 清理资源 86 | for clip in video_clips: 87 | clip.close() 88 | 89 | 90 | def main(): 91 | # 示例用法 92 | video_paths = [ 93 | "temp/1.mp4", 94 | "temp/2.mp4", 95 | "temp/3.mp4", 96 | "temp/4.mp4", 97 | "temp/5.mp4", 98 | ] 99 | 100 | subtitle_paths = [ 101 | "temp/1.srt", 102 | "temp/2.srt", 103 | "temp/3.srt", 104 | "temp/4.srt", 105 | "temp/5.srt", 106 | ] 107 | 108 | output_video_path = "temp/merged_video.mp4" 109 | output_subtitle_path = "temp/merged_subtitle.srt" 110 | 111 | merge_videos_and_subtitles(video_paths, subtitle_paths, output_video_path, output_subtitle_path) 112 | 113 | 114 | if __name__ == "__main__": 115 | main() 116 | -------------------------------------------------------------------------------- /webui/utils/vision_analyzer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List, Dict, Any, Optional 3 | from app.utils import gemini_analyzer, qwenvl_analyzer 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | class VisionAnalyzer: 8 | def __init__(self): 9 | self.provider = None 10 | self.api_key = None 11 | self.model = None 12 | self.base_url = None 13 | self.analyzer = None 14 | 15 | def initialize_gemini(self, api_key: str, model: str, base_url: str) -> None: 16 | """ 17 | 初始化Gemini视觉分析器 18 | 19 | Args: 20 | api_key: Gemini API密钥 21 | model: 模型名称 22 | base_url: API基础URL 23 | """ 24 | self.provider = 'gemini' 25 | self.api_key = api_key 26 | self.model = model 27 | self.base_url = base_url 28 | self.analyzer = gemini_analyzer.VisionAnalyzer( 29 | model_name=model, 30 | api_key=api_key 31 | ) 32 | 33 | def initialize_qwenvl(self, api_key: str, model: str, base_url: str) -> None: 34 | """ 35 | 初始化QwenVL视觉分析器 36 | 37 | Args: 38 | api_key: 阿里云API密钥 39 | model: 模型名称 40 | base_url: API基础URL 41 | """ 42 | self.provider = 'qwenvl' 43 | self.api_key = api_key 44 | self.model = model 45 | self.base_url = base_url 46 | self.analyzer = qwenvl_analyzer.QwenAnalyzer( 47 | model_name=model, 48 | api_key=api_key 49 | ) 50 | 51 | async def analyze_images(self, images: List[str], prompt: str, batch_size: int = 5) -> Dict[str, Any]: 52 | """ 53 | 分析图片内容 54 | 55 | Args: 56 | images: 图片路径列表 57 | prompt: 分析提示词 58 | batch_size: 每批处理的图片数量,默认为5 59 | 60 | Returns: 61 | Dict: 分析结果 62 | """ 63 | if not self.analyzer: 64 | raise ValueError("未初始化视觉分析器") 65 | 66 | return await self.analyzer.analyze_images( 67 | images=images, 68 | prompt=prompt, 69 | batch_size=batch_size 70 | ) 71 | 72 | def create_vision_analyzer(provider: str, **kwargs) -> VisionAnalyzer: 73 | """ 74 | 创建视觉分析器实例 75 | 76 | Args: 77 | provider: 提供商名称 ('gemini' 或 'qwenvl') 78 | **kwargs: 提供商特定的配置参数 79 | 80 | Returns: 81 | VisionAnalyzer: 配置好的视觉分析器实例 82 | """ 83 | analyzer = VisionAnalyzer() 84 | 85 | if provider.lower() == 'gemini': 86 | analyzer.initialize_gemini( 87 | api_key=kwargs.get('api_key'), 88 | model=kwargs.get('model'), 89 | base_url=kwargs.get('base_url') 90 | ) 91 | elif provider.lower() == 'qwenvl': 92 | analyzer.initialize_qwenvl( 93 | api_key=kwargs.get('api_key'), 94 | model=kwargs.get('model'), 95 | base_url=kwargs.get('base_url') 96 | ) 97 | else: 98 | raise ValueError(f"不支持的视觉分析提供商: {provider}") 99 | 100 | return analyzer --------------------------------------------------------------------------------