├── .github ├── ISSUE_TEMPLATE │ ├── 01_bug.yaml │ ├── 02_request.yaml │ └── 03_question.yaml └── workflows │ ├── docker-image.yml │ └── docker-publish.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── app ├── __init__.py ├── common │ ├── config.py │ └── signal_bus.py ├── components │ ├── DonateDialog.py │ ├── EditComboBoxSettingCard.py │ ├── FasterWhisperSettingWidget.py │ ├── LanguageSettingDialog.py │ ├── LineEditSettingCard.py │ ├── MySettingCard.py │ ├── MyVideoWidget.py │ ├── SimpleSettingCard.py │ ├── SpinBoxSettingCard.py │ ├── SubtitleSettingDialog.py │ ├── WhisperAPISettingWidget.py │ ├── WhisperCppSettingWidget.py │ └── transcription_setting_card.py ├── config.py ├── core │ ├── __init__.py │ ├── bk_asr │ │ ├── __init__.py │ │ ├── asr_data.py │ │ ├── base.py │ │ ├── bcut.py │ │ ├── faster_whisper.py │ │ ├── jianying.py │ │ ├── kuaishou.py │ │ ├── transcribe.py │ │ ├── whisper_api.py │ │ └── whisper_cpp.py │ ├── entities.py │ ├── storage │ │ ├── __init__.py │ │ ├── cache_manager.py │ │ ├── constants.py │ │ ├── database.py │ │ └── models.py │ ├── subtitle_processor │ │ ├── __init__.py │ │ ├── alignment.py │ │ ├── optimize.py │ │ ├── prompt.py │ │ ├── run.py │ │ ├── split.py │ │ ├── split_by_llm.py │ │ ├── summarization.py │ │ └── translate.py │ ├── task_factory.py │ └── utils │ │ ├── __init__.py │ │ ├── ass_auto_wrap.py │ │ ├── get_subtitle_style.py │ │ ├── json_repair.py │ │ ├── logger.py │ │ ├── optimize_subtitles.py │ │ ├── subtitle_preview.py │ │ ├── test_opanai.py │ │ └── video_utils.py ├── thread │ ├── batch_process_thread.py │ ├── file_download_thread.py │ ├── modelscope_download_thread.py │ ├── subtitle_pipeline_thread.py │ ├── subtitle_thread.py │ ├── transcript_thread.py │ ├── version_manager_thread.py │ ├── video_download_thread.py │ ├── video_info_thread.py │ └── video_synthesis_thread.py └── view │ ├── batch_process_interface.py │ ├── home_interface.py │ ├── log_window.py │ ├── main_window.py │ ├── setting_interface.py │ ├── subtitle_interface.py │ ├── subtitle_style_interface.py │ ├── task_creation_interface.py │ ├── transcription_interface.py │ ├── video_synthesis_interface.py │ └── view.md ├── docs ├── README_EN.md ├── README_JA.md ├── README_TW.md ├── get_cookies.md ├── images │ ├── alipay.jpg │ ├── api-setting-2.png │ ├── api-setting.png │ ├── bath.png │ ├── cookies_error.png │ ├── cookies_export.png │ ├── get_api.png │ ├── logo.png │ ├── main.png │ ├── preview1.png │ ├── preview2.png │ ├── setting.png │ ├── style.png │ ├── subtitle.png │ ├── test_spend.png │ ├── test_ted1.png │ ├── test_ted2.png │ ├── test_ted3.png │ ├── test_zl.png │ ├── wechat.jpg │ ├── whisper.png │ └── zl.png ├── llm_config.md └── test.md ├── main.py ├── requirements.txt ├── resource ├── assets │ ├── audio-thumbnail.png │ ├── default_bg.png │ ├── default_bg_landscape.png │ ├── default_bg_portrait.png │ ├── default_thumbnail.jpg │ ├── donate_blue.jpg │ ├── donate_green.jpg │ ├── logo-big.png │ ├── logo.png │ └── qss │ │ ├── dark │ │ └── demo.qss │ │ └── light │ │ └── demo.qss ├── subtitle_style │ ├── default.txt │ ├── 毕导科普风.txt │ ├── 番剧可爱风.txt │ └── 竖屏.txt └── translations │ ├── VideoCaptioner_en_US.qm │ ├── VideoCaptioner_en_US.ts │ ├── VideoCaptioner_zh_CN.qm │ ├── VideoCaptioner_zh_CN.ts │ ├── VideoCaptioner_zh_HK.qm │ └── VideoCaptioner_zh_HK.ts ├── streamlit_app.py └── streamlit_app └── requirements.txt /.github/ISSUE_TEMPLATE/01_bug.yaml: -------------------------------------------------------------------------------- 1 | name: 错误 | Bug 2 | description: 反馈程序出现的错误 | Report bugs 3 | labels: ["bug"] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | 感谢您报告问题!请提供以下信息帮助我更好地解决问题。 9 | 10 | Thank you for reporting the issue! Using English or Chinese. 11 | 12 | - type: textarea 13 | id: description 14 | attributes: 15 | label: 问题描述 | Problem Description 16 | description: | 17 | 描述您遇到的问题,如果能提供一个复现步骤将帮我更好定位修复问题。(例如:错误字幕内容、或者视频链接、或者具体报错) 18 | 19 | Please describe in detail the problem you encountered. 20 | validations: 21 | required: true 22 | 23 | - type: textarea 24 | id: logs 25 | attributes: 26 | label: 日志信息(可选)| Logs (Optional) 27 | description: | 28 | (可选)如果你在生成字幕视频过程遇到了错误,请打开根目录下的 AppData/logs/app.log 文件,根据日志的时间复制最近一次运行错误的日志信息并填写。这样可以更好帮助我排查。 29 | 30 | (Optional) Please open the AppData/logs/app.log file in the root directory and copy the log information from the most recent run error. 31 | render: shell 32 | validations: 33 | required: false 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/02_request.yaml: -------------------------------------------------------------------------------- 1 | name: 功能请求 | Feature Request 2 | description: 提出增加新功能的请求 | Create the request for a new feature 3 | labels: ["enhancement"] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | ✨ 感谢您提出功能建议!请描述您希望的新功能,对于有用可行的建议我会努力实现的。 9 | 10 | 🌟 Thank you for your feature suggestion! Please describe the new feature you expect. Using English or Chinese. 11 | - type: textarea 12 | id: feature 13 | attributes: 14 | label: 💡 预期的功能 | Expected Feature 15 | description: | 16 | 请详细描述您期望添加的功能,包括使用场景和希望达到的效果。 17 | 18 | Please describe in detail the feature you want to add, including usage scenarios and desired effects. 19 | validations: 20 | required: true 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/03_question.yaml: -------------------------------------------------------------------------------- 1 | name: 问题咨询 Question 2 | description: 向作者咨询软件使用或配置相关的问题 | Consult about software usage or configuration 3 | 4 | labels: ["question"] 5 | 6 | body: 7 | - type: textarea 8 | id: problem 9 | attributes: 10 | label: 🤔 问题描述 Problem Description 11 | validations: 12 | required: true 13 | -------------------------------------------------------------------------------- /.github/workflows/docker-image.yml: -------------------------------------------------------------------------------- 1 | name: Docker Image CI 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | jobs: 10 | 11 | build: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | - name: Build the Docker image 18 | run: docker build . --file Dockerfile --tag videocaptioner:$(date +%s) 19 | -------------------------------------------------------------------------------- /.github/workflows/docker-publish.yml: -------------------------------------------------------------------------------- 1 | name: Docker 2 | 3 | # This workflow uses actions that are not certified by GitHub. 4 | # They are provided by a third-party and are governed by 5 | # separate terms of service, privacy policy, and support 6 | # documentation. 7 | 8 | on: 9 | schedule: 10 | - cron: '38 22 * * *' 11 | push: 12 | branches: [ "master" ] 13 | # Publish semver tags as releases. 14 | tags: [ 'v*.*.*' ] 15 | pull_request: 16 | branches: [ "master" ] 17 | 18 | env: 19 | # Use docker.io for Docker Hub if empty 20 | REGISTRY: ghcr.io 21 | # github.repository as / 22 | IMAGE_NAME: ${{ github.repository }} 23 | 24 | 25 | jobs: 26 | build: 27 | 28 | runs-on: ubuntu-latest 29 | permissions: 30 | contents: read 31 | packages: write 32 | # This is used to complete the identity challenge 33 | # with sigstore/fulcio when running outside of PRs. 34 | id-token: write 35 | 36 | steps: 37 | - name: Checkout repository 38 | uses: actions/checkout@v4 39 | 40 | # Install the cosign tool except on PR 41 | # https://github.com/sigstore/cosign-installer 42 | - name: Install cosign 43 | if: github.event_name != 'pull_request' 44 | uses: sigstore/cosign-installer@59acb6260d9c0ba8f4a2f9d9b48431a222b68e20 #v3.5.0 45 | with: 46 | cosign-release: 'v2.2.4' 47 | 48 | # Set up BuildKit Docker container builder to be able to build 49 | # multi-platform images and export cache 50 | # https://github.com/docker/setup-buildx-action 51 | - name: Set up Docker Buildx 52 | uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0 53 | 54 | # Login against a Docker registry except on PR 55 | # https://github.com/docker/login-action 56 | - name: Log into registry ${{ env.REGISTRY }} 57 | if: github.event_name != 'pull_request' 58 | uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0 59 | with: 60 | registry: ${{ env.REGISTRY }} 61 | username: ${{ github.actor }} 62 | password: ${{ secrets.GITHUB_TOKEN }} 63 | 64 | # Extract metadata (tags, labels) for Docker 65 | # https://github.com/docker/metadata-action 66 | - name: Extract Docker metadata 67 | id: meta 68 | uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 # v5.0.0 69 | with: 70 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 71 | 72 | # Build and push Docker image with Buildx (don't push on PR) 73 | # https://github.com/docker/build-push-action 74 | - name: Build and push Docker image 75 | id: build-and-push 76 | uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5.0.0 77 | with: 78 | context: . 79 | push: ${{ github.event_name != 'pull_request' }} 80 | tags: ${{ steps.meta.outputs.tags }} 81 | labels: ${{ steps.meta.outputs.labels }} 82 | cache-from: type=gha 83 | cache-to: type=gha,mode=max 84 | 85 | # Sign the resulting Docker image digest except on PRs. 86 | # This will only write to the public Rekor transparency log when the Docker 87 | # repository is public to avoid leaking data. If you would like to publish 88 | # transparency data even for private images, pass --force to cosign below. 89 | # https://github.com/sigstore/cosign 90 | - name: Sign the published Docker image 91 | if: ${{ github.event_name != 'pull_request' }} 92 | env: 93 | # https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-an-intermediate-environment-variable 94 | TAGS: ${{ steps.meta.outputs.tags }} 95 | DIGEST: ${{ steps.build-and-push.outputs.digest }} 96 | # This step uses the identity token to provision an ephemeral certificate 97 | # against the sigstore community Fulcio instance. 98 | run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST} 99 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # win 二进制文件资源目录 2 | /resource/bin/ 3 | !/resource/bin/bin_environment.txt 4 | 5 | # 开发环境 6 | .idea/ 7 | *.pyc 8 | */__pycache__/ 9 | *.env 10 | 11 | # 测试和脚本 12 | /test/ 13 | /release/ 14 | /my_content/ 15 | 16 | # 媒体文件 17 | *.srt 18 | *.mp4 19 | *.exe 20 | 21 | # 应用数据 22 | /AppData/ 23 | /output/ 24 | /work-dir/ 25 | .vscode/ 26 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim-bookworm 2 | 3 | WORKDIR /app 4 | 5 | # 配置apt镜像源 6 | RUN rm -rf /etc/apt/sources.list.d/* && \ 7 | rm -f /etc/apt/sources.list && \ 8 | echo "deb https://mirrors.tuna.tsinghua.edu.cn/debian/ bookworm main contrib non-free non-free-firmware" > /etc/apt/sources.list && \ 9 | echo "deb https://mirrors.tuna.tsinghua.edu.cn/debian/ bookworm-updates main contrib non-free non-free-firmware" >> /etc/apt/sources.list && \ 10 | echo "deb https://mirrors.tuna.tsinghua.edu.cn/debian/ bookworm-backports main contrib non-free non-free-firmware" >> /etc/apt/sources.list && \ 11 | echo "deb https://mirrors.tuna.tsinghua.edu.cn/debian-security bookworm-security main contrib non-free non-free-firmware" >> /etc/apt/sources.list 12 | 13 | # 安装系统依赖 14 | RUN apt-get update && \ 15 | apt-get install -y \ 16 | curl \ 17 | ffmpeg \ 18 | && rm -rf /var/lib/apt/lists/* 19 | 20 | # 先复制依赖文件并安装 21 | COPY streamlit_app/requirements.txt . 22 | RUN pip3 install --no-cache-dir -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple 23 | 24 | # 复制应用代码 25 | COPY . . 26 | 27 | # 设置环境变量 28 | ARG OPENAI_BASE_URL 29 | ARG OPENAI_API_KEY 30 | ENV OPENAI_BASE_URL=${OPENAI_BASE_URL} 31 | ENV OPENAI_API_KEY=${OPENAI_API_KEY} 32 | 33 | # 创建临时目录并设置权限 34 | RUN mkdir -p temp && chmod 777 temp 35 | 36 | # 暴露端口 37 | EXPOSE 8501 38 | 39 | # 健康检查 40 | HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health 41 | 42 | # 启动应用 43 | ENTRYPOINT ["streamlit", "run", "streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"] 44 | -------------------------------------------------------------------------------- /app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/app/__init__.py -------------------------------------------------------------------------------- /app/common/config.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | from enum import Enum 3 | 4 | from PyQt5.QtCore import QLocale 5 | from PyQt5.QtGui import QColor 6 | import openai 7 | from qfluentwidgets import ( 8 | qconfig, 9 | QConfig, 10 | ConfigItem, 11 | OptionsConfigItem, 12 | BoolValidator, 13 | OptionsValidator, 14 | RangeConfigItem, 15 | RangeValidator, 16 | Theme, 17 | FolderValidator, 18 | ConfigSerializer, 19 | EnumSerializer, 20 | ) 21 | 22 | from app.config import WORK_PATH, SETTINGS_PATH 23 | from ..core.entities import ( 24 | LLMServiceEnum, 25 | SplitTypeEnum, 26 | TargetLanguageEnum, 27 | TranscribeModelEnum, 28 | TranscribeLanguageEnum, 29 | TranslatorServiceEnum, 30 | WhisperModelEnum, 31 | FasterWhisperModelEnum, 32 | VadMethodEnum, 33 | ) 34 | 35 | 36 | class Language(Enum): 37 | """软件语言""" 38 | 39 | CHINESE_SIMPLIFIED = QLocale(QLocale.Chinese, QLocale.China) 40 | CHINESE_TRADITIONAL = QLocale(QLocale.Chinese, QLocale.HongKong) 41 | ENGLISH = QLocale(QLocale.English) 42 | AUTO = QLocale() 43 | 44 | 45 | class SubtitleLayoutEnum(Enum): 46 | """字幕布局""" 47 | 48 | TRANSLATE_ON_TOP = "译文在上" 49 | ORIGINAL_ON_TOP = "原文在上" 50 | ONLY_ORIGINAL = "仅原文" 51 | ONLY_TRANSLATE = "仅译文" 52 | 53 | 54 | class LanguageSerializer(ConfigSerializer): 55 | """Language serializer""" 56 | 57 | def serialize(self, language): 58 | return language.value.name() if language != Language.AUTO else "Auto" 59 | 60 | def deserialize(self, value: str): 61 | return Language(QLocale(value)) if value != "Auto" else Language.AUTO 62 | 63 | 64 | class Config(QConfig): 65 | """应用配置""" 66 | 67 | # LLM配置 68 | llm_service = OptionsConfigItem( 69 | "LLM", 70 | "LLMService", 71 | LLMServiceEnum.PUBLIC, 72 | OptionsValidator(LLMServiceEnum), 73 | EnumSerializer(LLMServiceEnum), 74 | ) 75 | 76 | openai_model = ConfigItem("LLM", "OpenAI_Model", "gpt-4o-mini") 77 | openai_api_key = ConfigItem("LLM", "OpenAI_API_Key", "") 78 | openai_api_base = ConfigItem("LLM", "OpenAI_API_Base", "https://api.openai.com/v1") 79 | 80 | silicon_cloud_model = ConfigItem("LLM", "SiliconCloud_Model", "gpt-4o-mini") 81 | silicon_cloud_api_key = ConfigItem("LLM", "SiliconCloud_API_Key", "") 82 | silicon_cloud_api_base = ConfigItem( 83 | "LLM", "SiliconCloud_API_Base", "https://api.siliconflow.cn/v1" 84 | ) 85 | 86 | deepseek_model = ConfigItem("LLM", "DeepSeek_Model", "deepseek-chat") 87 | deepseek_api_key = ConfigItem("LLM", "DeepSeek_API_Key", "") 88 | deepseek_api_base = ConfigItem( 89 | "LLM", "DeepSeek_API_Base", "https://api.deepseek.com/v1" 90 | ) 91 | 92 | ollama_model = ConfigItem("LLM", "Ollama_Model", "llama2") 93 | ollama_api_key = ConfigItem("LLM", "Ollama_API_Key", "ollama") 94 | ollama_api_base = ConfigItem("LLM", "Ollama_API_Base", "http://localhost:11434/v1") 95 | 96 | lm_studio_model = ConfigItem("LLM", "LmStudio_Model", "qwen2.5:7b") 97 | lm_studio_api_key = ConfigItem("LLM", "LmStudio_API_Key", "lmstudio") 98 | lm_studio_api_base = ConfigItem( 99 | "LLM", "LmStudio_API_Base", "http://localhost:1234/v1" 100 | ) 101 | 102 | gemini_model = ConfigItem("LLM", "Gemini_Model", "gemini-pro") 103 | gemini_api_key = ConfigItem("LLM", "Gemini_API_Key", "") 104 | gemini_api_base = ConfigItem( 105 | "LLM", 106 | "Gemini_API_Base", 107 | "https://generativelanguage.googleapis.com/v1beta/openai/", 108 | ) 109 | 110 | chatglm_model = ConfigItem("LLM", "ChatGLM_Model", "glm-4") 111 | chatglm_api_key = ConfigItem("LLM", "ChatGLM_API_Key", "") 112 | chatglm_api_base = ConfigItem( 113 | "LLM", "ChatGLM_API_Base", "https://open.bigmodel.cn/api/paas/v4" 114 | ) 115 | 116 | # 公益模型 117 | public_model = ConfigItem("LLM", "Public_Model", "gpt-4o-mini") 118 | public_api_key = ConfigItem( 119 | "LLM", "Public_API_Key", "please-do-not-use-for-personal-purposes" 120 | ) 121 | public_api_base = ConfigItem("LLM", "Public_API_Base", "https://ddg.bkfeng.top/v1") 122 | 123 | # ------------------- 翻译配置 ------------------- 124 | translator_service = OptionsConfigItem( 125 | "Translate", 126 | "TranslatorServiceEnum", 127 | TranslatorServiceEnum.BING, 128 | OptionsValidator(TranslatorServiceEnum), 129 | EnumSerializer(TranslatorServiceEnum), 130 | ) 131 | need_reflect_translate = ConfigItem( 132 | "Translate", "NeedReflectTranslate", False, BoolValidator() 133 | ) 134 | deeplx_endpoint = ConfigItem("Translate", "DeeplxEndpoint", "") 135 | batch_size = RangeConfigItem("Translate", "BatchSize", 10, RangeValidator(5, 30)) 136 | thread_num = RangeConfigItem("Translate", "ThreadNum", 10, RangeValidator(1, 100)) 137 | 138 | # ------------------- 转录配置 ------------------- 139 | transcribe_model = OptionsConfigItem( 140 | "Transcribe", 141 | "TranscribeModel", 142 | TranscribeModelEnum.BIJIAN, 143 | OptionsValidator(TranscribeModelEnum), 144 | EnumSerializer(TranscribeModelEnum), 145 | ) 146 | use_asr_cache = ConfigItem("Transcribe", "UseASRCache", True, BoolValidator()) 147 | transcribe_language = OptionsConfigItem( 148 | "Transcribe", 149 | "TranscribeLanguage", 150 | TranscribeLanguageEnum.ENGLISH, 151 | OptionsValidator(TranscribeLanguageEnum), 152 | EnumSerializer(TranscribeLanguageEnum), 153 | ) 154 | 155 | # ------------------- Whisper Cpp 配置 ------------------- 156 | whisper_model = OptionsConfigItem( 157 | "Whisper", 158 | "WhisperModel", 159 | WhisperModelEnum.TINY, 160 | OptionsValidator(WhisperModelEnum), 161 | EnumSerializer(WhisperModelEnum), 162 | ) 163 | 164 | # ------------------- Faster Whisper 配置 ------------------- 165 | faster_whisper_program = ConfigItem( 166 | "FasterWhisper", 167 | "Program", 168 | "faster-whisper-xxl.exe", 169 | ) 170 | faster_whisper_model = OptionsConfigItem( 171 | "FasterWhisper", 172 | "Model", 173 | FasterWhisperModelEnum.TINY, 174 | OptionsValidator(FasterWhisperModelEnum), 175 | EnumSerializer(FasterWhisperModelEnum), 176 | ) 177 | faster_whisper_model_dir = ConfigItem("FasterWhisper", "ModelDir", "") 178 | faster_whisper_device = OptionsConfigItem( 179 | "FasterWhisper", "Device", "cuda", OptionsValidator(["cuda", "cpu"]) 180 | ) 181 | # VAD 参数 182 | faster_whisper_vad_filter = ConfigItem( 183 | "FasterWhisper", "VadFilter", True, BoolValidator() 184 | ) 185 | faster_whisper_vad_threshold = RangeConfigItem( 186 | "FasterWhisper", "VadThreshold", 0.4, RangeValidator(0, 1) 187 | ) 188 | faster_whisper_vad_method = OptionsConfigItem( 189 | "FasterWhisper", 190 | "VadMethod", 191 | VadMethodEnum.SILERO_V4, 192 | OptionsValidator(VadMethodEnum), 193 | EnumSerializer(VadMethodEnum), 194 | ) 195 | # 人声提取 196 | faster_whisper_ff_mdx_kim2 = ConfigItem( 197 | "FasterWhisper", "FfMdxKim2", False, BoolValidator() 198 | ) 199 | # 文本处理参数 200 | faster_whisper_one_word = ConfigItem( 201 | "FasterWhisper", "OneWord", True, BoolValidator() 202 | ) 203 | # 提示词 204 | faster_whisper_prompt = ConfigItem("FasterWhisper", "Prompt", "") 205 | 206 | # ------------------- Whisper API 配置 ------------------- 207 | whisper_api_base = ConfigItem("WhisperAPI", "WhisperApiBase", "") 208 | whisper_api_key = ConfigItem("WhisperAPI", "WhisperApiKey", "") 209 | whisper_api_model = OptionsConfigItem("WhisperAPI", "WhisperApiModel", "") 210 | whisper_api_prompt = ConfigItem("WhisperAPI", "WhisperApiPrompt", "") 211 | 212 | # ------------------- 字幕配置 ------------------- 213 | need_optimize = ConfigItem("Subtitle", "NeedOptimize", False, BoolValidator()) 214 | need_translate = ConfigItem("Subtitle", "NeedTranslate", False, BoolValidator()) 215 | need_split = ConfigItem("Subtitle", "NeedSplit", False, BoolValidator()) 216 | split_type = OptionsConfigItem( 217 | "Subtitle", 218 | "SplitType", 219 | SplitTypeEnum.SENTENCE, 220 | OptionsValidator(SplitTypeEnum), 221 | EnumSerializer(SplitTypeEnum), 222 | ) 223 | target_language = OptionsConfigItem( 224 | "Subtitle", 225 | "TargetLanguage", 226 | TargetLanguageEnum.CHINESE_SIMPLIFIED, 227 | OptionsValidator(TargetLanguageEnum), 228 | EnumSerializer(TargetLanguageEnum), 229 | ) 230 | max_word_count_cjk = ConfigItem( 231 | "Subtitle", "MaxWordCountCJK", 25, RangeValidator(8, 100) 232 | ) 233 | max_word_count_english = ConfigItem( 234 | "Subtitle", "MaxWordCountEnglish", 20, RangeValidator(8, 100) 235 | ) 236 | needs_remove_punctuation = ConfigItem( 237 | "Subtitle", "NeedsRemovePunctuation", True, BoolValidator() 238 | ) 239 | custom_prompt_text = ConfigItem("Subtitle", "CustomPromptText", "") 240 | 241 | # ------------------- 字幕合成配置 ------------------- 242 | soft_subtitle = ConfigItem("Video", "SoftSubtitle", False, BoolValidator()) 243 | need_video = ConfigItem("Video", "NeedVideo", True, BoolValidator()) 244 | 245 | # ------------------- 字幕样式配置 ------------------- 246 | subtitle_style_name = ConfigItem("SubtitleStyle", "StyleName", "default") 247 | subtitle_layout = ConfigItem("SubtitleStyle", "Layout", "译文在上") 248 | subtitle_preview_image = ConfigItem("SubtitleStyle", "PreviewImage", "") 249 | 250 | # ------------------- 保存配置 ------------------- 251 | work_dir = ConfigItem("Save", "Work_Dir", WORK_PATH, FolderValidator()) 252 | 253 | # ------------------- 软件页面配置 ------------------- 254 | micaEnabled = ConfigItem("MainWindow", "MicaEnabled", False, BoolValidator()) 255 | dpiScale = OptionsConfigItem( 256 | "MainWindow", 257 | "DpiScale", 258 | "Auto", 259 | OptionsValidator([1, 1.25, 1.5, 1.75, 2, "Auto"]), 260 | restart=True, 261 | ) 262 | language = OptionsConfigItem( 263 | "MainWindow", 264 | "Language", 265 | Language.AUTO, 266 | OptionsValidator(Language), 267 | LanguageSerializer(), 268 | restart=True, 269 | ) 270 | 271 | # ------------------- 更新配置 ------------------- 272 | checkUpdateAtStartUp = ConfigItem( 273 | "Update", "CheckUpdateAtStartUp", True, BoolValidator() 274 | ) 275 | 276 | 277 | cfg = Config() 278 | cfg.themeMode.value = Theme.DARK 279 | cfg.themeColor.value = QColor("#ff28f08b") 280 | qconfig.load(SETTINGS_PATH, cfg) 281 | -------------------------------------------------------------------------------- /app/common/signal_bus.py: -------------------------------------------------------------------------------- 1 | from PyQt5.QtCore import QObject, pyqtSignal, QUrl 2 | 3 | 4 | class SignalBus(QObject): 5 | # 字幕排布信号 6 | subtitle_layout_changed = pyqtSignal(str) 7 | # 字幕优化信号 8 | subtitle_optimization_changed = pyqtSignal(bool) 9 | # 字幕翻译信号 10 | subtitle_translation_changed = pyqtSignal(bool) 11 | # 翻译语言 12 | target_language_changed = pyqtSignal(str) 13 | # 转录模型 14 | transcription_model_changed = pyqtSignal(str) 15 | # 软字幕信号 16 | soft_subtitle_changed = pyqtSignal(bool) 17 | # 视频合成信号 18 | need_video_changed = pyqtSignal(bool) 19 | 20 | # 新增视频控制相关信号 21 | video_play = pyqtSignal() # 播放信号 22 | video_pause = pyqtSignal() # 暂停信号 23 | video_stop = pyqtSignal() # 停止信号 24 | video_source_changed = pyqtSignal(QUrl) # 视频源改变信号 25 | video_segment_play = pyqtSignal(int, int) # 播放片段信号,参数为开始和结束时间(ms) 26 | video_subtitle_added = pyqtSignal(str) # 添加字幕文件信号 27 | 28 | # 新增视频控制相关方法 29 | def play_video(self): 30 | """触发视频播放""" 31 | self.video_play.emit() 32 | 33 | def pause_video(self): 34 | """触发视频暂停""" 35 | self.video_pause.emit() 36 | 37 | def stop_video(self): 38 | """触发视频停止""" 39 | self.video_stop.emit() 40 | 41 | def set_video_source(self, url: QUrl): 42 | """设置视频源 43 | 44 | Args: 45 | url: 视频文件的URL 46 | """ 47 | self.video_source_changed.emit(url) 48 | 49 | def play_video_segment(self, start_time: int, end_time: int): 50 | """播放指定时间段的视频 51 | 52 | Args: 53 | start_time: 开始时间(毫秒) 54 | end_time: 结束时间(毫秒) 55 | """ 56 | self.video_segment_play.emit(start_time, end_time) 57 | 58 | def add_subtitle(self, subtitle_file: str): 59 | """添加字幕文件 60 | 61 | Args: 62 | subtitle_file: 字幕文件路径 63 | """ 64 | self.video_subtitle_added.emit(subtitle_file) 65 | 66 | 67 | signalBus = SignalBus() 68 | -------------------------------------------------------------------------------- /app/components/DonateDialog.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from PyQt5.QtCore import Qt 4 | from PyQt5.QtGui import QPixmap 5 | from PyQt5.QtWidgets import QHBoxLayout, QLabel, QMessageBox, QVBoxLayout 6 | from qfluentwidgets import BodyLabel, MessageBoxBase 7 | 8 | from app.config import ASSETS_PATH 9 | 10 | 11 | class DonateDialog(MessageBoxBase): 12 | def __init__(self, parent=None): 13 | super().__init__(parent) 14 | # 定义二维码路径 15 | self.WECHAT_QR_PATH = os.path.join(ASSETS_PATH, "donate_green.jpg") 16 | self.ALIPAY_QR_PATH = os.path.join(ASSETS_PATH, "donate_blue.jpg") 17 | 18 | self.setup_ui() 19 | self.setWindowTitle(self.tr("支持作者")) 20 | 21 | def setup_ui(self): 22 | # 创建标题标签 23 | self.titleLabel = BodyLabel(self.tr("感谢支持"), self) 24 | 25 | # 创建说明文本 26 | self.descLabel = BodyLabel( 27 | self.tr( 28 | "目前本人精力有限,您的支持让我有动力继续折腾这个项目!\n感谢您对开源事业的热爱与支持!" 29 | ), 30 | self, 31 | ) 32 | self.descLabel.setAlignment(Qt.AlignCenter) 33 | 34 | # 创建水平布局放置两个二维码 35 | self.qrLayout = QHBoxLayout() 36 | 37 | # 创建支付宝二维码标签 38 | self.alipayContainer = QVBoxLayout() 39 | self.alipayQR = QLabel() 40 | self.alipayQR.setPixmap( 41 | QPixmap(self.ALIPAY_QR_PATH).scaled( 42 | 300, 300, Qt.KeepAspectRatio, Qt.SmoothTransformation 43 | ) 44 | ) 45 | self.alipayLabel = BodyLabel(self.tr("支付宝")) 46 | self.alipayLabel.setAlignment(Qt.AlignCenter) 47 | self.alipayContainer.addWidget(self.alipayQR, alignment=Qt.AlignCenter) 48 | self.alipayContainer.addWidget(self.alipayLabel) 49 | 50 | # 创建微信二维码标签 51 | self.wechatContainer = QVBoxLayout() 52 | self.wechatQR = QLabel() 53 | self.wechatQR.setPixmap( 54 | QPixmap(self.WECHAT_QR_PATH).scaled( 55 | 300, 300, Qt.KeepAspectRatio, Qt.SmoothTransformation 56 | ) 57 | ) 58 | self.wechatLabel = BodyLabel(self.tr("微信")) 59 | self.wechatLabel.setAlignment(Qt.AlignCenter) 60 | self.wechatContainer.addWidget(self.wechatQR, alignment=Qt.AlignCenter) 61 | self.wechatContainer.addWidget(self.wechatLabel) 62 | 63 | # 将二维码添加到水平布局 64 | self.qrLayout.addLayout(self.alipayContainer) 65 | self.qrLayout.addLayout(self.wechatContainer) 66 | 67 | self.viewLayout.setSpacing(30) 68 | # 添加到主布局 69 | self.viewLayout.addWidget(self.titleLabel) 70 | self.viewLayout.addWidget(self.descLabel) 71 | # 添加垂直间距 72 | self.viewLayout.addLayout(self.qrLayout) 73 | 74 | # 设置对话框最小宽度 75 | self.widget.setMinimumWidth(800) 76 | # 设置对话框最小高度 77 | self.widget.setMinimumHeight(500) 78 | 79 | # 隐藏是按钮,只显示取消按钮 80 | self.yesButton.hide() 81 | self.cancelButton.setText(self.tr("关闭")) 82 | -------------------------------------------------------------------------------- /app/components/EditComboBoxSettingCard.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union 2 | 3 | from PyQt5.QtCore import Qt, pyqtSignal 4 | from PyQt5.QtGui import QIcon 5 | from qfluentwidgets import EditableComboBox, SettingCard 6 | from qfluentwidgets.common.config import ConfigItem, qconfig 7 | 8 | 9 | class EditComboBoxSettingCard(SettingCard): 10 | """可编辑的下拉框设置卡片""" 11 | 12 | currentTextChanged = pyqtSignal(str) 13 | 14 | def __init__( 15 | self, 16 | configItem: ConfigItem, 17 | icon: Union[str, QIcon], 18 | title: str, 19 | content: str = None, 20 | items: List[str] = None, 21 | parent=None, 22 | ): 23 | super().__init__(icon, title, content, parent) 24 | 25 | self.configItem = configItem 26 | self.items = items or [] 27 | 28 | # 创建可编辑的组合框 29 | self.comboBox = EditableComboBox(self) 30 | for item in self.items: 31 | self.comboBox.addItem(item) 32 | 33 | # 设置布局 34 | self.hBoxLayout.addWidget(self.comboBox, 1, Qt.AlignRight) 35 | self.hBoxLayout.addSpacing(16) 36 | 37 | # 设置最小宽度 38 | self.comboBox.setMinimumWidth(280) 39 | 40 | # 设置初始值 41 | self.setValue(qconfig.get(configItem)) 42 | 43 | # 连接信号 44 | self.comboBox.currentTextChanged.connect(self.__onTextChanged) 45 | configItem.valueChanged.connect(self.setValue) 46 | 47 | def __onTextChanged(self, text: str): 48 | """当文本改变时触发""" 49 | self.setValue(text) 50 | self.currentTextChanged.emit(text) 51 | 52 | def setValue(self, value: str): 53 | """设置值""" 54 | qconfig.set(self.configItem, value) 55 | self.comboBox.setText(value) 56 | 57 | def addItems(self, items: List[str]): 58 | """添加选项""" 59 | for item in items: 60 | self.comboBox.addItem(item) 61 | 62 | def setItems(self, items: List[str]): 63 | """重新设置选项列表""" 64 | self.comboBox.clear() 65 | self.items = items 66 | for item in items: 67 | self.comboBox.addItem(item) 68 | -------------------------------------------------------------------------------- /app/components/LanguageSettingDialog.py: -------------------------------------------------------------------------------- 1 | from PyQt5.QtCore import Qt 2 | from PyQt5.QtWidgets import QVBoxLayout, QWidget 3 | from qfluentwidgets import ComboBoxSettingCard 4 | from qfluentwidgets import FluentIcon as FIF 5 | from qfluentwidgets import InfoBar, InfoBarPosition, MessageBoxBase, SettingCardGroup 6 | 7 | from app.common.config import cfg 8 | from app.core.entities import TranscribeLanguageEnum 9 | 10 | 11 | class LanguageSettingDialog(MessageBoxBase): 12 | """语言设置对话框""" 13 | 14 | def __init__(self, parent=None): 15 | super().__init__(parent) 16 | self.widget.setMinimumWidth(500) 17 | self._setup_ui() 18 | self._connect_signals() 19 | 20 | def _setup_ui(self): 21 | """设置UI""" 22 | self.yesButton.setText(self.tr("确定")) 23 | self.cancelButton.setText(self.tr("取消")) 24 | 25 | # 主布局 26 | layout = QVBoxLayout() 27 | 28 | self.setting_group = SettingCardGroup(self.tr("语言设置"), self) 29 | 30 | # 语言选择卡片 31 | self.language_card = ComboBoxSettingCard( 32 | cfg.transcribe_language, 33 | FIF.LANGUAGE, 34 | self.tr("源语言"), 35 | self.tr("音频的源语言"), 36 | [lang.value for lang in TranscribeLanguageEnum], 37 | ) 38 | self.language_card.comboBox.setMaxVisibleItems(6) 39 | 40 | self.setting_group.addSettingCard(self.language_card) 41 | layout.addWidget(self.setting_group) 42 | layout.addStretch(1) 43 | 44 | self.viewLayout.addLayout(layout) 45 | 46 | def _connect_signals(self): 47 | """连接信号""" 48 | self.yesButton.clicked.connect(self.__onYesButtonClicked) 49 | 50 | def __onYesButtonClicked(self): 51 | self.accept() 52 | InfoBar.success( 53 | self.tr("设置已保存"), 54 | self.tr("语言设置已更新"), 55 | duration=3000, 56 | parent=self.window(), 57 | position=InfoBarPosition.BOTTOM, 58 | ) 59 | if cfg.transcribe_language.value == TranscribeLanguageEnum.JAPANESE: 60 | InfoBar.warning( 61 | self.tr("请注意身体!!"), 62 | self.tr("小心肝儿,注意身体哦~"), 63 | duration=2000, 64 | parent=self.window(), 65 | position=InfoBarPosition.BOTTOM, 66 | ) 67 | -------------------------------------------------------------------------------- /app/components/LineEditSettingCard.py: -------------------------------------------------------------------------------- 1 | from PyQt5.QtCore import Qt, pyqtSignal 2 | from qfluentwidgets import SettingCard, LineEdit 3 | from qfluentwidgets.common.config import ConfigItem, qconfig 4 | 5 | 6 | class LineEditSettingCard(SettingCard): 7 | """行输入卡片""" 8 | 9 | textChanged = pyqtSignal(str) 10 | 11 | def __init__( 12 | self, 13 | configItem: ConfigItem, 14 | icon, 15 | title: str, 16 | content: str = None, 17 | placeholder: str = "", 18 | parent=None, 19 | ): 20 | super().__init__(icon, title, content, parent) 21 | 22 | self.configItem = configItem 23 | 24 | self.lineEdit = LineEdit(self) 25 | self.lineEdit.setPlaceholderText(placeholder) 26 | self.hBoxLayout.addWidget(self.lineEdit, 1, Qt.AlignRight) 27 | self.hBoxLayout.addSpacing(16) 28 | 29 | self.lineEdit.setMinimumWidth(280) 30 | 31 | self.setValue(qconfig.get(configItem)) 32 | 33 | self.lineEdit.textChanged.connect(self.__onTextChanged) 34 | configItem.valueChanged.connect(self.setValue) 35 | 36 | def __onTextChanged(self, text: str): 37 | self.setValue(text) 38 | self.textChanged.emit(text) 39 | 40 | def setValue(self, value: str): 41 | qconfig.set(self.configItem, value) 42 | self.lineEdit.setText(value) 43 | -------------------------------------------------------------------------------- /app/components/SimpleSettingCard.py: -------------------------------------------------------------------------------- 1 | from PyQt5.QtCore import * 2 | from PyQt5.QtWidgets import QHBoxLayout 3 | from qfluentwidgets import ( 4 | CaptionLabel, 5 | CardWidget, 6 | ComboBox, 7 | SwitchButton, 8 | ToolTipFilter, 9 | ToolTipPosition, 10 | ) 11 | 12 | 13 | class SimpleSettingCard(CardWidget): 14 | """基础设置卡片类""" 15 | 16 | def __init__(self, title, content, parent=None): 17 | super().__init__(parent) 18 | self.title = title 19 | self.content = content 20 | self.setup_ui() 21 | 22 | def setup_ui(self): 23 | self.layout = QHBoxLayout(self) 24 | self.layout.setContentsMargins(16, 10, 8, 10) 25 | self.layout.setSpacing(8) 26 | 27 | self.label = CaptionLabel(self) 28 | self.label.setText(self.title) 29 | self.layout.addWidget(self.label) 30 | 31 | self.layout.addStretch(1) 32 | 33 | self.setToolTip(self.content) 34 | self.installEventFilter(ToolTipFilter(self, 100, ToolTipPosition.BOTTOM)) 35 | 36 | 37 | class ComboBoxSimpleSettingCard(SimpleSettingCard): 38 | """下拉框设置卡片""" 39 | 40 | valueChanged = pyqtSignal(str) 41 | 42 | def __init__(self, title, content, items=None, parent=None): 43 | super().__init__(title, content, parent) 44 | self.items = items or [] 45 | self.setup_combobox() 46 | 47 | def setup_combobox(self): 48 | self.comboBox = ComboBox(self) 49 | self.comboBox.addItems(self.items) 50 | self.comboBox.setMaxVisibleItems(6) 51 | self.comboBox.currentTextChanged.connect(self.valueChanged) 52 | self.layout.addWidget(self.comboBox) 53 | 54 | def setValue(self, value): 55 | self.comboBox.setCurrentIndex(self.items.index(value)) 56 | 57 | def value(self): 58 | return self.comboBox.currentText() 59 | 60 | 61 | class SwitchButtonSimpleSettingCard(SimpleSettingCard): 62 | """开关设置卡片""" 63 | 64 | checkedChanged = pyqtSignal(bool) 65 | 66 | def __init__(self, title, content, parent=None): 67 | super().__init__(title, content, parent) 68 | self.setup_switch() 69 | 70 | def setup_switch(self): 71 | self.switchButton = SwitchButton(self) 72 | self.switchButton.setOnText("开") 73 | self.switchButton.setOffText("关") 74 | self.switchButton.checkedChanged.connect(self.checkedChanged) 75 | self.layout.addWidget(self.switchButton) 76 | 77 | self.clicked.connect( 78 | lambda: self.switchButton.setChecked(not self.switchButton.isChecked()) 79 | ) 80 | 81 | def setChecked(self, checked): 82 | self.switchButton.setChecked(checked) 83 | 84 | def isChecked(self): 85 | return self.switchButton.isChecked() 86 | -------------------------------------------------------------------------------- /app/components/SpinBoxSettingCard.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from PyQt5.QtCore import Qt, pyqtSignal 4 | from PyQt5.QtGui import QIcon 5 | from qfluentwidgets import CompactDoubleSpinBox, CompactSpinBox, SettingCard 6 | from qfluentwidgets.common.config import ConfigItem, qconfig 7 | 8 | 9 | class DoubleSpinBoxSettingCard(SettingCard): 10 | """小数输入设置卡片""" 11 | 12 | valueChanged = pyqtSignal(float) 13 | 14 | def __init__( 15 | self, 16 | configItem: ConfigItem, 17 | icon: Union[str, QIcon], 18 | title: str, 19 | content: str = None, 20 | minimum: float = 0.0, 21 | maximum: float = 100.0, 22 | decimals: int = 1, 23 | step: float = 0.1, 24 | parent=None, 25 | ): 26 | super().__init__(icon, title, content, parent) 27 | 28 | self.configItem = configItem 29 | 30 | # 创建CompactDoubleSpinBox 31 | self.spinBox = CompactDoubleSpinBox(self) 32 | self.spinBox.setRange(minimum, maximum) 33 | self.spinBox.setDecimals(decimals) 34 | self.spinBox.setMinimumWidth(60) 35 | self.spinBox.setSingleStep(step) # 设置步长为0.2 36 | 37 | # 添加到布局 38 | self.hBoxLayout.addWidget(self.spinBox, 0, Qt.AlignRight) 39 | self.hBoxLayout.addSpacing(8) 40 | 41 | # 设置初始值和连接信号 42 | self.setValue(qconfig.get(configItem)) 43 | self.spinBox.valueChanged.connect(self.__onValueChanged) 44 | configItem.valueChanged.connect(self.setValue) 45 | 46 | def __onValueChanged(self, value: float): 47 | """数值改变时的槽函数""" 48 | self.setValue(value) 49 | self.valueChanged.emit(value) 50 | 51 | def setValue(self, value: float): 52 | """设置数值""" 53 | qconfig.set(self.configItem, value) 54 | self.spinBox.setValue(value) 55 | 56 | 57 | class SpinBoxSettingCard(SettingCard): 58 | """数值输入设置卡片""" 59 | 60 | valueChanged = pyqtSignal(int) 61 | 62 | def __init__( 63 | self, 64 | configItem: ConfigItem, 65 | icon: Union[str, QIcon], 66 | title: str, 67 | content: str = None, 68 | minimum: int = 0, 69 | maximum: int = 100, 70 | parent=None, 71 | ): 72 | super().__init__(icon, title, content, parent) 73 | 74 | self.configItem = configItem 75 | 76 | # 创建SpinBox 77 | self.spinBox = CompactSpinBox(self) 78 | self.spinBox.setRange(minimum, maximum) 79 | self.spinBox.setMinimumWidth(60) 80 | 81 | # 添加到布局 82 | self.hBoxLayout.addWidget(self.spinBox, 0, Qt.AlignRight) 83 | self.hBoxLayout.addSpacing(8) 84 | 85 | # 设置初始值和连接信号 86 | self.setValue(qconfig.get(configItem)) 87 | self.spinBox.valueChanged.connect(self.__onValueChanged) 88 | configItem.valueChanged.connect(self.setValue) 89 | 90 | def __onValueChanged(self, value: int): 91 | """数值改变时的槽函数""" 92 | self.setValue(value) 93 | self.valueChanged.emit(value) 94 | 95 | def setValue(self, value: int): 96 | """设置数值""" 97 | qconfig.set(self.configItem, value) 98 | self.spinBox.setValue(value) 99 | -------------------------------------------------------------------------------- /app/components/SubtitleSettingDialog.py: -------------------------------------------------------------------------------- 1 | from PyQt5.QtCore import Qt 2 | from PyQt5.QtWidgets import QWidget 3 | from qfluentwidgets import BodyLabel 4 | from qfluentwidgets import FluentIcon as FIF 5 | from qfluentwidgets import MessageBoxBase, SwitchSettingCard, ComboBoxSettingCard 6 | 7 | from app.common.config import cfg 8 | from app.components.SpinBoxSettingCard import SpinBoxSettingCard 9 | from app.core.entities import SplitTypeEnum 10 | 11 | 12 | class SubtitleSettingDialog(MessageBoxBase): 13 | """字幕设置对话框""" 14 | 15 | def __init__(self, parent=None): 16 | super().__init__(parent) 17 | self.titleLabel = BodyLabel(self.tr("字幕设置"), self) 18 | 19 | # 创建设置卡片 20 | self.split_card = SwitchSettingCard( 21 | FIF.ALIGNMENT, 22 | self.tr("字幕分割"), 23 | self.tr("字幕是否使用大语言模型进行智能断句"), 24 | cfg.need_split, 25 | self, 26 | ) 27 | 28 | self.split_type_card = ComboBoxSettingCard( 29 | cfg.split_type, 30 | FIF.TILES, 31 | self.tr("字幕分割类型"), 32 | self.tr("根据句子或者根据语义对字幕进行断句"), 33 | texts=[model.value for model in cfg.split_type.validator.options], 34 | parent=self, 35 | ) 36 | 37 | self.word_count_cjk_card = SpinBoxSettingCard( 38 | cfg.max_word_count_cjk, 39 | FIF.TILES, 40 | self.tr("中文最大字数"), 41 | self.tr("单条字幕的最大字数 (对于中日韩等字符)"), 42 | minimum=8, 43 | maximum=50, 44 | parent=self, 45 | ) 46 | 47 | self.word_count_english_card = SpinBoxSettingCard( 48 | cfg.max_word_count_english, 49 | FIF.TILES, 50 | self.tr("英文最大单词数"), 51 | self.tr("单条字幕的最大单词数 (英文)"), 52 | minimum=8, 53 | maximum=50, 54 | parent=self, 55 | ) 56 | 57 | self.remove_punctuation_card = SwitchSettingCard( 58 | FIF.ALIGNMENT, 59 | self.tr("去除末尾标点符号"), 60 | self.tr("是否去除中文字幕中的末尾标点符号"), 61 | cfg.needs_remove_punctuation, 62 | self, 63 | ) 64 | 65 | # 添加到布局 66 | self.viewLayout.addWidget(self.titleLabel) 67 | self.viewLayout.addWidget(self.split_card) 68 | self.viewLayout.addWidget(self.split_type_card) 69 | self.viewLayout.addWidget(self.word_count_cjk_card) 70 | self.viewLayout.addWidget(self.word_count_english_card) 71 | self.viewLayout.addWidget(self.remove_punctuation_card) 72 | # 设置间距 73 | 74 | self.viewLayout.setSpacing(10) 75 | 76 | # 设置窗口标题 77 | self.setWindowTitle(self.tr("字幕设置")) 78 | 79 | # 只显示取消按钮 80 | self.yesButton.hide() 81 | self.cancelButton.setText(self.tr("关闭")) 82 | -------------------------------------------------------------------------------- /app/components/WhisperAPISettingWidget.py: -------------------------------------------------------------------------------- 1 | from PyQt5.QtCore import Qt 2 | from PyQt5.QtWidgets import ( 3 | QHBoxLayout, 4 | QScrollArea, 5 | QStackedWidget, 6 | QVBoxLayout, 7 | QWidget, 8 | ) 9 | from qfluentwidgets import BodyLabel, CardWidget, ComboBox, ComboBoxSettingCard 10 | from qfluentwidgets import FluentIcon as FIF 11 | from qfluentwidgets import ( 12 | HyperlinkCard, 13 | RangeSettingCard, 14 | SettingCardGroup, 15 | SingleDirectionScrollArea, 16 | SwitchSettingCard, 17 | ) 18 | 19 | from ..common.config import cfg 20 | from ..core.entities import TranscribeLanguageEnum 21 | from .EditComboBoxSettingCard import EditComboBoxSettingCard 22 | from .LineEditSettingCard import LineEditSettingCard 23 | 24 | 25 | class WhisperAPISettingWidget(QWidget): 26 | def __init__(self, parent=None): 27 | super().__init__(parent) 28 | self.setup_ui() 29 | 30 | def setup_ui(self): 31 | self.main_layout = QVBoxLayout(self) 32 | 33 | # 创建单向滚动区域和容器 34 | self.scrollArea = SingleDirectionScrollArea(orient=Qt.Vertical, parent=self) 35 | self.scrollArea.setStyleSheet( 36 | "QScrollArea{background: transparent; border: none}" 37 | ) 38 | 39 | self.container = QWidget(self) 40 | self.container.setStyleSheet("QWidget{background: transparent}") 41 | self.containerLayout = QVBoxLayout(self.container) 42 | 43 | self.setting_group = SettingCardGroup(self.tr("Whisper API 设置"), self) 44 | 45 | # API Base URL 46 | self.base_url_card = LineEditSettingCard( 47 | cfg.whisper_api_base, 48 | FIF.LINK, 49 | self.tr("API Base URL"), 50 | self.tr("输入 Whisper API Base URL"), 51 | "https://api.openai.com/v1", 52 | self.setting_group, 53 | ) 54 | 55 | # API Key 56 | self.api_key_card = LineEditSettingCard( 57 | cfg.whisper_api_key, 58 | FIF.FINGERPRINT, 59 | self.tr("API Key"), 60 | self.tr("输入 Whisper API Key"), 61 | "sk-", 62 | self.setting_group, 63 | ) 64 | 65 | # Model 66 | self.model_card = EditComboBoxSettingCard( 67 | cfg.whisper_api_model, 68 | FIF.ROBOT, 69 | self.tr("Whisper 模型"), 70 | self.tr("选择 Whisper 模型"), 71 | ["whisper-large-v3", "whisper-large-v3-turbo", "whisper-1"], 72 | self.setting_group, 73 | ) 74 | 75 | # 添加 Language 选择 76 | self.language_card = ComboBoxSettingCard( 77 | cfg.transcribe_language, 78 | FIF.LANGUAGE, 79 | self.tr("原语言"), 80 | self.tr("音频的原语言"), 81 | [lang.value for lang in TranscribeLanguageEnum], 82 | self.setting_group, 83 | ) 84 | 85 | # 添加 Prompt 86 | self.prompt_card = LineEditSettingCard( 87 | cfg.whisper_api_prompt, 88 | FIF.CHAT, 89 | self.tr("提示词"), 90 | self.tr("可选的提示词,默认空"), 91 | "", 92 | self.setting_group, 93 | ) 94 | 95 | # 设置最小宽度 96 | self.base_url_card.lineEdit.setMinimumWidth(200) 97 | self.api_key_card.lineEdit.setMinimumWidth(200) 98 | self.model_card.comboBox.setMinimumWidth(200) 99 | self.language_card.comboBox.setMinimumWidth(200) 100 | self.prompt_card.lineEdit.setMinimumWidth(200) 101 | 102 | # 使用 addSettingCard 添加所有卡片到组 103 | self.setting_group.addSettingCard(self.base_url_card) 104 | self.setting_group.addSettingCard(self.api_key_card) 105 | self.setting_group.addSettingCard(self.model_card) 106 | self.setting_group.addSettingCard(self.language_card) 107 | self.setting_group.addSettingCard(self.prompt_card) 108 | 109 | # 将设置组添加到容器布局 110 | self.containerLayout.addWidget(self.setting_group) 111 | self.containerLayout.addStretch(1) 112 | 113 | # 设置滚动区域 114 | self.scrollArea.setWidget(self.container) 115 | self.scrollArea.setWidgetResizable(True) 116 | 117 | # 将滚动区域添加到主布局 118 | self.main_layout.addWidget(self.scrollArea) 119 | -------------------------------------------------------------------------------- /app/components/transcription_setting_card.py: -------------------------------------------------------------------------------- 1 | from PyQt5.QtCore import Qt 2 | from PyQt5.QtWidgets import ( 3 | QHBoxLayout, 4 | QScrollArea, 5 | QStackedWidget, 6 | QVBoxLayout, 7 | QWidget, 8 | ) 9 | from qfluentwidgets import BodyLabel, CardWidget, ComboBox, ComboBoxSettingCard 10 | from qfluentwidgets import FluentIcon as FIF 11 | from qfluentwidgets import ( 12 | HyperlinkCard, 13 | RangeSettingCard, 14 | SettingCardGroup, 15 | SingleDirectionScrollArea, 16 | SwitchSettingCard, 17 | ) 18 | 19 | from app.components.SpinBoxSettingCard import DoubleSpinBoxSettingCard 20 | 21 | from ..common.config import cfg 22 | from ..core.entities import ( 23 | FasterWhisperModelEnum, 24 | TranscribeLanguageEnum, 25 | TranscribeModelEnum, 26 | VadMethodEnum, 27 | WhisperModelEnum, 28 | ) 29 | from .EditComboBoxSettingCard import EditComboBoxSettingCard 30 | from .FasterWhisperSettingWidget import FasterWhisperSettingWidget 31 | from .LineEditSettingCard import LineEditSettingCard 32 | from .WhisperAPISettingWidget import WhisperAPISettingWidget 33 | from .WhisperCppSettingWidget import WhisperCppSettingWidget 34 | 35 | 36 | class TranscriptionSettingCard(QWidget): 37 | def __init__(self, parent=None): 38 | super().__init__(parent) 39 | self.setup_ui() 40 | 41 | def setup_ui(self): 42 | self.main_layout = QVBoxLayout(self) 43 | self.main_layout.setContentsMargins(0, 0, 0, 0) 44 | 45 | # 设置界面堆叠 46 | self.stacked_widget = QStackedWidget(self) 47 | 48 | # 添加各个设置界面 49 | self.empty_widget = QWidget(self) # 添加空白页面作为默认显示 50 | self.whisper_cpp_widget = WhisperCppSettingWidget(self) 51 | self.whisper_api_widget = WhisperAPISettingWidget(self) 52 | self.faster_whisper_widget = FasterWhisperSettingWidget(self) 53 | 54 | self.stacked_widget.addWidget(self.empty_widget) # 添加空白页面 55 | self.stacked_widget.addWidget(self.whisper_cpp_widget) 56 | self.stacked_widget.addWidget(self.whisper_api_widget) 57 | self.stacked_widget.addWidget(self.faster_whisper_widget) 58 | 59 | self.main_layout.addWidget(self.stacked_widget) 60 | 61 | def on_model_changed(self, value): 62 | # 切换对应的设置界面 63 | if value == TranscribeModelEnum.WHISPER_CPP.value: 64 | self.stacked_widget.setCurrentWidget(self.whisper_cpp_widget) 65 | elif value == TranscribeModelEnum.WHISPER_API.value: 66 | self.stacked_widget.setCurrentWidget(self.whisper_api_widget) 67 | elif value == TranscribeModelEnum.FASTER_WHISPER.value: 68 | self.stacked_widget.setCurrentWidget(self.faster_whisper_widget) 69 | else: 70 | self.stacked_widget.setCurrentWidget(self.empty_widget) 71 | -------------------------------------------------------------------------------- /app/config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from pathlib import Path 4 | 5 | VERSION = "v1.3.3" 6 | YEAR = 2025 7 | APP_NAME = "VideoCaptioner" 8 | AUTHOR = "Weifeng" 9 | 10 | HELP_URL = "https://github.com/WEIFENG2333/VideoCaptioner" 11 | GITHUB_REPO_URL = "https://github.com/WEIFENG2333/VideoCaptioner" 12 | RELEASE_URL = "https://github.com/WEIFENG2333/VideoCaptioner/releases/latest" 13 | FEEDBACK_URL = "https://github.com/WEIFENG2333/VideoCaptioner/issues" 14 | 15 | # 路径 16 | ROOT_PATH = Path(__file__).parent 17 | 18 | RESOURCE_PATH = ROOT_PATH.parent / "resource" 19 | APPDATA_PATH = ROOT_PATH.parent / "AppData" 20 | WORK_PATH = ROOT_PATH.parent / "work-dir" 21 | 22 | 23 | BIN_PATH = RESOURCE_PATH / "bin" 24 | ASSETS_PATH = RESOURCE_PATH / "assets" 25 | SUBTITLE_STYLE_PATH = RESOURCE_PATH / "subtitle_style" 26 | 27 | LOG_PATH = APPDATA_PATH / "logs" 28 | SETTINGS_PATH = APPDATA_PATH / "settings.json" 29 | CACHE_PATH = APPDATA_PATH / "cache" 30 | MODEL_PATH = APPDATA_PATH / "models" 31 | 32 | FASER_WHISPER_PATH = BIN_PATH / "Faster-Whisper-XXL" 33 | 34 | # 日志配置 35 | LOG_LEVEL = logging.INFO 36 | LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" 37 | 38 | # 环境变量添加 bin 路径,添加到PATH开头以优先使用 39 | os.environ["PATH"] = str(BIN_PATH) + os.pathsep + os.environ["PATH"] 40 | os.environ["PATH"] = str(FASER_WHISPER_PATH) + os.pathsep + os.environ["PATH"] 41 | 42 | # 添加 VLC 路径 43 | os.environ["PYTHON_VLC_MODULE_PATH"] = str(BIN_PATH / "vlc") 44 | 45 | # 创建路径 46 | for p in [CACHE_PATH, LOG_PATH, WORK_PATH, MODEL_PATH]: 47 | p.mkdir(parents=True, exist_ok=True) 48 | -------------------------------------------------------------------------------- /app/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/app/core/__init__.py -------------------------------------------------------------------------------- /app/core/bk_asr/__init__.py: -------------------------------------------------------------------------------- 1 | from .bcut import BcutASR 2 | from .faster_whisper import FasterWhisperASR 3 | from .jianying import JianYingASR 4 | from .kuaishou import KuaiShouASR 5 | 6 | from .transcribe import transcribe 7 | from .whisper_api import WhisperAPI 8 | from .whisper_cpp import WhisperCppASR 9 | 10 | __all__ = [ 11 | "bcut", 12 | "jianying", 13 | "kuaishou", 14 | "whisper_cpp", 15 | "whisper_api", 16 | "faster_whisper", 17 | "transcribe", 18 | ] 19 | -------------------------------------------------------------------------------- /app/core/bk_asr/base.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import tempfile 4 | import threading 5 | import zlib 6 | from typing import Optional, Union 7 | 8 | from app.config import CACHE_PATH 9 | from app.core.storage.cache_manager import CacheManager 10 | 11 | from .asr_data import ASRData, ASRDataSeg 12 | 13 | 14 | class BaseASR: 15 | SUPPORTED_SOUND_FORMAT = ["flac", "m4a", "mp3", "wav"] 16 | _lock = threading.Lock() 17 | 18 | def __init__( 19 | self, 20 | audio_path: Optional[Union[str, bytes]] = None, 21 | use_cache: bool = False, 22 | need_word_time_stamp: bool = False, 23 | ): 24 | self.audio_path = audio_path 25 | self.file_binary = None 26 | self.use_cache = use_cache 27 | self._set_data() 28 | self.cache_manager = CacheManager(str(CACHE_PATH)) 29 | 30 | def _set_data(self): 31 | if isinstance(self.audio_path, bytes): 32 | self.file_binary = self.audio_path 33 | else: 34 | ext = self.audio_path.split(".")[-1].lower() 35 | assert ( 36 | ext in self.SUPPORTED_SOUND_FORMAT 37 | ), f"Unsupported sound format: {ext}" 38 | assert os.path.exists(self.audio_path), f"File not found: {self.audio_path}" 39 | with open(self.audio_path, "rb") as f: 40 | self.file_binary = f.read() 41 | crc32_value = zlib.crc32(self.file_binary) & 0xFFFFFFFF 42 | self.crc32_hex = format(crc32_value, "08x") 43 | 44 | def run(self, callback=None, **kwargs) -> ASRData: 45 | if self.use_cache: 46 | cached_result = self.cache_manager.get_asr_result( 47 | self._get_key(), self.__class__.__name__ 48 | ) 49 | if cached_result: 50 | segments = self._make_segments(cached_result) 51 | 52 | return ASRData(segments) 53 | 54 | resp_data = self._run(callback, **kwargs) 55 | 56 | if self.use_cache: 57 | self.cache_manager.set_asr_result( 58 | self._get_key(), self.__class__.__name__, resp_data 59 | ) 60 | 61 | segments = self._make_segments(resp_data) 62 | return ASRData(segments) 63 | 64 | def _get_key(self): 65 | """获取缓存key""" 66 | return self.crc32_hex 67 | 68 | def _make_segments(self, resp_data: dict) -> list[ASRDataSeg]: 69 | """将响应数据转换为ASRDataSeg列表""" 70 | raise NotImplementedError( 71 | "_make_segments method must be implemented in subclass" 72 | ) 73 | 74 | def _run(self, callback=None, **kwargs) -> dict: 75 | """运行ASR服务并返回响应数据""" 76 | raise NotImplementedError("_run method must be implemented in subclass") 77 | -------------------------------------------------------------------------------- /app/core/bk_asr/bcut.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import time 4 | from typing import Optional 5 | 6 | import requests 7 | 8 | from ..utils.logger import setup_logger 9 | from .asr_data import ASRDataSeg 10 | from .base import BaseASR 11 | 12 | logger = setup_logger("bcut_asr") 13 | 14 | __version__ = "0.0.3" 15 | API_BASE_URL = "https://member.bilibili.com/x/bcut/rubick-interface" 16 | # 申请上传 17 | API_REQ_UPLOAD = API_BASE_URL + "/resource/create" 18 | # 提交上传 19 | API_COMMIT_UPLOAD = API_BASE_URL + "/resource/create/complete" 20 | # 创建任务 21 | API_CREATE_TASK = API_BASE_URL + "/task" 22 | # 查询结果 23 | API_QUERY_RESULT = API_BASE_URL + "/task/result" 24 | 25 | 26 | class BcutASR(BaseASR): 27 | """必剪 语音识别接口""" 28 | 29 | headers = { 30 | "User-Agent": "Bilibili/1.0.0 (https://www.bilibili.com)", 31 | "Content-Type": "application/json", 32 | } 33 | 34 | def __init__( 35 | self, 36 | audio_path: str | bytes, 37 | use_cache: bool = True, 38 | need_word_time_stamp: bool = False, 39 | ): 40 | super().__init__(audio_path, use_cache=use_cache) 41 | self.session = requests.Session() 42 | self.task_id: Optional[str] = None 43 | self.__etags: list[str] = [] 44 | 45 | self.__in_boss_key: Optional[str] = None 46 | self.__resource_id: Optional[str] = None 47 | self.__upload_id: Optional[str] = None 48 | self.__upload_urls: list[str] = [] 49 | self.__per_size: Optional[int] = None 50 | self.__clips: Optional[int] = None 51 | 52 | self.__etags: Optional[list[str]] = [] 53 | self.__download_url: Optional[str] = None 54 | self.task_id: Optional[str] = None 55 | 56 | self.need_word_time_stamp = need_word_time_stamp 57 | 58 | def upload(self) -> None: 59 | """申请上传""" 60 | if not self.file_binary: 61 | raise ValueError("none set data") 62 | payload = json.dumps( 63 | { 64 | "type": 2, 65 | "name": "audio.mp3", 66 | "size": len(self.file_binary), 67 | "ResourceFileType": "mp3", 68 | "model_id": "8", 69 | } 70 | ) 71 | 72 | resp = requests.post(API_REQ_UPLOAD, data=payload, headers=self.headers) 73 | resp.raise_for_status() 74 | resp = resp.json() 75 | resp_data = resp["data"] 76 | 77 | self.__in_boss_key = resp_data["in_boss_key"] 78 | self.__resource_id = resp_data["resource_id"] 79 | self.__upload_id = resp_data["upload_id"] 80 | self.__upload_urls = resp_data["upload_urls"] 81 | self.__per_size = resp_data["per_size"] 82 | self.__clips = len(resp_data["upload_urls"]) 83 | 84 | logger.info( 85 | f"申请上传成功, 总计大小{resp_data['size'] // 1024}KB, {self.__clips}分片, 分片大小{resp_data['per_size'] // 1024}KB: {self.__in_boss_key}" 86 | ) 87 | self.__upload_part() 88 | self.__commit_upload() 89 | 90 | def __upload_part(self) -> None: 91 | """上传音频数据""" 92 | for clip in range(self.__clips): 93 | start_range = clip * self.__per_size 94 | end_range = (clip + 1) * self.__per_size 95 | logger.info(f"开始上传分片{clip}: {start_range}-{end_range}") 96 | resp = requests.put( 97 | self.__upload_urls[clip], 98 | data=self.file_binary[start_range:end_range], 99 | headers=self.headers, 100 | ) 101 | resp.raise_for_status() 102 | etag = resp.headers.get("Etag") 103 | self.__etags.append(etag) 104 | logger.info(f"分片{clip}上传成功: {etag}") 105 | 106 | def __commit_upload(self) -> None: 107 | """提交上传数据""" 108 | data = json.dumps( 109 | { 110 | "InBossKey": self.__in_boss_key, 111 | "ResourceId": self.__resource_id, 112 | "Etags": ",".join(self.__etags), 113 | "UploadId": self.__upload_id, 114 | "model_id": "8", 115 | } 116 | ) 117 | resp = requests.post(API_COMMIT_UPLOAD, data=data, headers=self.headers) 118 | resp.raise_for_status() 119 | resp = resp.json() 120 | self.__download_url = resp["data"]["download_url"] 121 | logger.info(f"提交成功") 122 | 123 | def create_task(self) -> str: 124 | """开始创建转换任务""" 125 | resp = requests.post( 126 | API_CREATE_TASK, 127 | json={"resource": self.__download_url, "model_id": "8"}, 128 | headers=self.headers, 129 | ) 130 | resp.raise_for_status() 131 | resp = resp.json() 132 | self.task_id = resp["data"]["task_id"] 133 | logger.info(f"任务已创建: {self.task_id}") 134 | return self.task_id 135 | 136 | def result(self, task_id: Optional[str] = None): 137 | """查询转换结果""" 138 | resp = requests.get( 139 | API_QUERY_RESULT, 140 | params={"model_id": 7, "task_id": task_id or self.task_id}, 141 | headers=self.headers, 142 | ) 143 | resp.raise_for_status() 144 | resp = resp.json() 145 | return resp["data"] 146 | 147 | def _run(self, callback=None, **kwargs): 148 | if callback is None: 149 | callback = lambda x, y: None 150 | 151 | callback(0, "上传中") 152 | self.upload() 153 | 154 | callback(40, "创建任务中") 155 | 156 | self.create_task() 157 | 158 | callback(60, "正在转录") 159 | 160 | # 轮询检查任务状态 161 | for _ in range(500): 162 | task_resp = self.result() 163 | if task_resp["state"] == 4: 164 | break 165 | time.sleep(1) 166 | 167 | callback(100, "转录成功") 168 | 169 | logger.info(f"转换成功") 170 | return json.loads(task_resp["result"]) 171 | 172 | def _make_segments(self, resp_data: dict) -> list[ASRDataSeg]: 173 | if self.need_word_time_stamp: 174 | return [ 175 | ASRDataSeg(w["label"].strip(), w["start_time"], w["end_time"]) 176 | for u in resp_data["utterances"] 177 | for w in u["words"] 178 | ] 179 | else: 180 | return [ 181 | ASRDataSeg(u["transcript"], u["start_time"], u["end_time"]) 182 | for u in resp_data["utterances"] 183 | ] 184 | 185 | 186 | if __name__ == "__main__": 187 | # Example usage 188 | audio_file = r"test.mp3" 189 | asr = BcutASR(audio_file) 190 | asr_data = asr.run() 191 | print(asr_data) 192 | -------------------------------------------------------------------------------- /app/core/bk_asr/faster_whisper.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import os 3 | import re 4 | import shutil 5 | import subprocess 6 | import tempfile 7 | from pathlib import Path 8 | from typing import List, Optional, Union 9 | 10 | from ..utils.logger import setup_logger 11 | from .asr_data import ASRDataSeg, ASRData 12 | from .base import BaseASR 13 | 14 | logger = setup_logger("faster_whisper") 15 | 16 | 17 | class FasterWhisperASR(BaseASR): 18 | def __init__( 19 | self, 20 | audio_path: str, 21 | faster_whisper_program: str, 22 | whisper_model: str, 23 | model_dir: str, 24 | language: str = "zh", 25 | device: str = "cpu", 26 | output_dir: str = None, 27 | output_format: str = "srt", 28 | use_cache: bool = False, 29 | need_word_time_stamp: bool = False, 30 | # VAD 相关参数 31 | vad_filter: bool = True, 32 | vad_threshold: float = 0.4, 33 | vad_method: str = "", # https://github.com/Purfview/whisper-standalone-win/discussions/231 34 | # 音频处理 35 | ff_mdx_kim2: bool = False, 36 | # 文本处理参数 37 | one_word: int = 0, 38 | sentence: bool = False, 39 | max_line_width: int = 100, 40 | max_line_count: int = 1, 41 | max_comma: int = 20, 42 | max_comma_cent: int = 50, 43 | prompt: str = None, 44 | ): 45 | super().__init__(audio_path, use_cache) 46 | 47 | # 基本参数 48 | self.model_path = whisper_model 49 | self.model_dir = model_dir 50 | self.faster_whisper_program = faster_whisper_program 51 | self.need_word_time_stamp = need_word_time_stamp 52 | self.language = language 53 | self.device = device 54 | self.output_dir = output_dir 55 | self.output_format = output_format 56 | 57 | # VAD 参数 58 | self.vad_filter = vad_filter 59 | self.vad_threshold = vad_threshold 60 | self.vad_method = vad_method 61 | 62 | # 音频处理参数 63 | self.ff_mdx_kim2 = ff_mdx_kim2 64 | 65 | # 文本处理参数 66 | self.one_word = one_word 67 | self.sentence = sentence 68 | self.max_line_width = max_line_width 69 | self.max_line_count = max_line_count 70 | self.max_comma = max_comma 71 | self.max_comma_cent = max_comma_cent 72 | self.prompt = prompt 73 | 74 | self.process = None 75 | 76 | # 断句宽度 77 | if self.language in ["zh", "ja", "ko"]: 78 | self.max_line_width = 30 79 | else: 80 | self.max_line_width = 90 81 | 82 | # 断句选项 83 | if self.need_word_time_stamp: 84 | self.one_word = 1 85 | else: 86 | self.one_word = 0 87 | self.sentence = True 88 | 89 | # 根据设备选择程序 90 | if self.device == "cpu": 91 | if shutil.which("faster-whisper-xxl"): 92 | self.faster_whisper_program = "faster-whisper-xxl" 93 | else: 94 | if not shutil.which("faster-whisper"): 95 | raise EnvironmentError("faster-whisper程序未找到,请确保已经下载。") 96 | self.faster_whisper_program = "faster-whisper" 97 | self.vad_method = None 98 | elif self.device == "cuda": 99 | if not shutil.which("faster-whisper-xxl"): 100 | raise EnvironmentError( 101 | "faster-whisper-xxl 程序未找到,请确保已经下载。" 102 | ) 103 | self.faster_whisper_program = "faster-whisper-xxl" 104 | 105 | def _build_command(self, audio_path: str) -> List[str]: 106 | """构建命令行参数""" 107 | 108 | cmd = [ 109 | str(self.faster_whisper_program), 110 | "-m", 111 | str(self.model_path), 112 | # "--verbose", "true", 113 | "--print_progress", 114 | ] 115 | 116 | # 添加模型目录参数 117 | if self.model_dir: 118 | cmd.extend(["--model_dir", str(self.model_dir)]) 119 | 120 | # 基本参数 121 | cmd.extend( 122 | [ 123 | str(audio_path), 124 | "-l", 125 | self.language, 126 | "-d", 127 | self.device, 128 | "--output_format", 129 | self.output_format, 130 | ] 131 | ) 132 | 133 | # 输出目录 134 | if self.output_dir: 135 | cmd.extend(["-o", str(self.output_dir)]) 136 | else: 137 | cmd.extend(["-o", "source"]) 138 | 139 | # VAD 相关参数 140 | if self.vad_filter: 141 | cmd.extend( 142 | [ 143 | "--vad_filter", 144 | "true", 145 | "--vad_threshold", 146 | f"{self.vad_threshold:.2f}", 147 | ] 148 | ) 149 | if self.vad_method: 150 | cmd.extend(["--vad_method", self.vad_method]) 151 | else: 152 | cmd.extend(["--vad_filter", "false"]) 153 | 154 | # 人声分离 155 | if self.ff_mdx_kim2 and self.faster_whisper_program.startswith( 156 | "faster-whisper-xxl" 157 | ): 158 | cmd.append("--ff_mdx_kim2") 159 | 160 | # 文本处理参数 161 | if self.one_word: 162 | self.one_word = 1 163 | else: 164 | self.one_word = 0 165 | if self.one_word in [0, 1, 2]: 166 | cmd.extend(["--one_word", str(self.one_word)]) 167 | 168 | if self.sentence: 169 | cmd.extend( 170 | [ 171 | "--sentence", 172 | "--max_line_width", 173 | str(self.max_line_width), 174 | "--max_line_count", 175 | str(self.max_line_count), 176 | "--max_comma", 177 | str(self.max_comma), 178 | "--max_comma_cent", 179 | str(self.max_comma_cent), 180 | ] 181 | ) 182 | 183 | # 提示词 184 | if self.prompt: 185 | cmd.extend(["--prompt", self.prompt]) 186 | 187 | # 完成的提示音 188 | cmd.extend(["--beep_off"]) 189 | 190 | return cmd 191 | 192 | def _make_segments(self, resp_data: str) -> list[ASRDataSeg]: 193 | asr_data = ASRData.from_srt(resp_data) 194 | # 过滤掉纯音乐标记 195 | filtered_segments = [] 196 | for seg in asr_data.segments: 197 | text = seg.text.strip() 198 | if not ( 199 | text.startswith("【") 200 | or text.startswith("[") 201 | or text.startswith("(") 202 | or text.startswith("(") 203 | ): 204 | filtered_segments.append(seg) 205 | return filtered_segments 206 | 207 | def _run(self, callback=None) -> str: 208 | if callback is None: 209 | callback = lambda x, y: None 210 | 211 | temp_dir = Path(tempfile.gettempdir()) / "bk_asr" 212 | temp_dir.mkdir(parents=True, exist_ok=True) 213 | 214 | with tempfile.TemporaryDirectory(dir=temp_dir) as temp_path: 215 | temp_dir = Path(temp_path) 216 | wav_path = temp_dir / "audio.wav" 217 | output_path = wav_path.with_suffix(".srt") 218 | 219 | shutil.copy2(self.audio_path, wav_path) 220 | 221 | cmd = self._build_command(wav_path) 222 | 223 | logger.info("Faster Whisper 执行命令: %s", " ".join(cmd)) 224 | callback(5, "Whisper识别") 225 | 226 | self.process = subprocess.Popen( 227 | cmd, 228 | stdout=subprocess.PIPE, 229 | stderr=subprocess.STDOUT, 230 | text=True, 231 | encoding="utf-8", 232 | errors="ignore", 233 | creationflags=subprocess.CREATE_NO_WINDOW if os.name == "nt" else 0, 234 | ) 235 | 236 | is_finish = False 237 | error_msg = "" 238 | 239 | # 实时打印日志和错误输出 240 | while self.process.poll() is None: 241 | output = self.process.stdout.readline() 242 | output = output.strip() 243 | if output: 244 | # 解析进度百分比 245 | if match := re.search(r"(\d+)%", output): 246 | progress = int(match.group(1)) 247 | if progress == 100: 248 | is_finish = True 249 | mapped_progress = int(5 + (progress * 0.9)) 250 | callback(mapped_progress, f"{mapped_progress} %") 251 | if "Subtitles are written to" in output: 252 | is_finish = True 253 | callback(100, "识别完成") 254 | if "error" in output: 255 | error_msg += output 256 | logger.error(output) 257 | else: 258 | logger.info(output) 259 | 260 | # 获取所有输出和错误信息 261 | self.process.communicate() 262 | 263 | logger.info("Faster Whisper 返回值: %s", self.process.returncode) 264 | if not is_finish: 265 | logger.error("Faster Whisper 错误: %s", error_msg) 266 | raise RuntimeError(error_msg) 267 | 268 | # 判断是否识别成功 269 | if not output_path.exists(): 270 | raise RuntimeError(f"Faster Whisper 输出文件不存在: {output_path}") 271 | 272 | logger.info("Faster Whisper 识别完成") 273 | 274 | callback(100, "识别完成") 275 | 276 | return output_path.read_text(encoding="utf-8") 277 | 278 | def _get_key(self): 279 | """获取缓存key""" 280 | cmd = self._build_command("") 281 | cmd_hash = hashlib.md5(str(cmd).encode()).hexdigest() 282 | return f"{self.crc32_hex}-{cmd_hash}" 283 | -------------------------------------------------------------------------------- /app/core/bk_asr/kuaishou.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import requests 4 | 5 | from ..utils.logger import setup_logger 6 | from .asr_data import ASRDataSeg 7 | from .base import BaseASR 8 | 9 | logger = setup_logger("kuaishou_asr") 10 | 11 | 12 | class KuaiShouASR(BaseASR): 13 | def __init__( 14 | self, audio_path, use_cache: bool = False, need_word_time_stamp: bool = False 15 | ): 16 | super().__init__(audio_path, use_cache) 17 | self.need_word_time_stamp = need_word_time_stamp 18 | logger.info("KuaiShouASR initialized with audio_path: %s", audio_path) 19 | 20 | def _run(self, callback=None) -> dict: 21 | logger.info("Running ASR process") 22 | return self._submit() 23 | 24 | def _make_segments(self, resp_data: dict) -> list[ASRDataSeg]: 25 | logger.debug("Making segments from response data") 26 | return [ 27 | ASRDataSeg( 28 | u["text"], float(u["start_time"]) * 1000, float(u["end_time"]) * 1000 29 | ) 30 | for u in resp_data["data"]["text"] 31 | ] 32 | 33 | def _submit(self) -> dict: 34 | logger.info("Submitting audio file for ASR") 35 | payload = {"typeId": "1"} 36 | files = [("file", ("test.mp3", self.file_binary, "audio/mpeg"))] 37 | try: 38 | result = requests.post( 39 | "https://ai.kuaishou.com/api/effects/subtitle_generate", 40 | data=payload, 41 | files=files, 42 | ) 43 | result.raise_for_status() 44 | logger.info("Submission successful") 45 | except requests.exceptions.RequestException as e: 46 | logger.error("Submission failed: %s", e) 47 | raise 48 | return result.json() 49 | 50 | 51 | if __name__ == "__main__": 52 | # Example usage 53 | audio_file = r"test.mp3" 54 | asr = KuaiShouASR(audio_file) 55 | asr_data = asr.run() 56 | print(asr_data) 57 | -------------------------------------------------------------------------------- /app/core/bk_asr/transcribe.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from app.core.bk_asr.bcut import BcutASR 4 | from app.core.bk_asr.faster_whisper import FasterWhisperASR 5 | from app.core.bk_asr.jianying import JianYingASR 6 | from app.core.bk_asr.kuaishou import KuaiShouASR 7 | from app.core.bk_asr.whisper_api import WhisperAPI 8 | from app.core.bk_asr.whisper_cpp import WhisperCppASR 9 | from app.core.bk_asr.asr_data import ASRData 10 | from app.core.entities import TranscribeConfig, TranscribeModelEnum 11 | 12 | 13 | def transcribe(audio_path: str, config: TranscribeConfig, callback=None) -> ASRData: 14 | """ 15 | 使用指定的转录配置对音频文件进行转录 16 | 17 | Args: 18 | audio_path: 音频文件路径 19 | config: 转录配置 20 | callback: 进度回调函数,接收两个参数(progress: int, message: str) 21 | 22 | Returns: 23 | ASRData: 转录结果数据 24 | """ 25 | if callback is None: 26 | callback = lambda x, y: None 27 | 28 | # 获取ASR模型类 29 | ASR_MODELS = { 30 | TranscribeModelEnum.JIANYING: JianYingASR, 31 | # TranscribeModelEnum.KUAISHOU: KuaiShouASR, 32 | TranscribeModelEnum.BIJIAN: BcutASR, 33 | TranscribeModelEnum.WHISPER_CPP: WhisperCppASR, 34 | TranscribeModelEnum.WHISPER_API: WhisperAPI, 35 | TranscribeModelEnum.FASTER_WHISPER: FasterWhisperASR, 36 | } 37 | 38 | asr_class = ASR_MODELS.get(config.transcribe_model) 39 | if not asr_class: 40 | raise ValueError(f"无效的转录模型: {config.transcribe_model}") 41 | 42 | # 构建ASR参数 43 | asr_args = { 44 | "use_cache": config.use_asr_cache, 45 | "need_word_time_stamp": config.need_word_time_stamp, 46 | } 47 | 48 | # 根据不同模型添加特定参数 49 | if config.transcribe_model == TranscribeModelEnum.WHISPER_CPP: 50 | asr_args.update( 51 | { 52 | "language": config.transcribe_language, 53 | "whisper_model": config.whisper_model, 54 | } 55 | ) 56 | elif config.transcribe_model == TranscribeModelEnum.WHISPER_API: 57 | asr_args.update( 58 | { 59 | "language": config.transcribe_language, 60 | "whisper_model": config.whisper_api_model, 61 | "api_key": config.whisper_api_key, 62 | "base_url": config.whisper_api_base, 63 | "prompt": config.whisper_api_prompt, 64 | } 65 | ) 66 | elif config.transcribe_model == TranscribeModelEnum.FASTER_WHISPER: 67 | asr_args.update( 68 | { 69 | "faster_whisper_program": config.faster_whisper_program, 70 | "language": config.transcribe_language, 71 | "whisper_model": config.faster_whisper_model, 72 | "model_dir": config.faster_whisper_model_dir, 73 | "device": config.faster_whisper_device, 74 | "vad_filter": config.faster_whisper_vad_filter, 75 | "vad_threshold": config.faster_whisper_vad_threshold, 76 | "vad_method": config.faster_whisper_vad_method, 77 | "ff_mdx_kim2": config.faster_whisper_ff_mdx_kim2, 78 | "one_word": config.faster_whisper_one_word, 79 | "prompt": config.faster_whisper_prompt, 80 | } 81 | ) 82 | 83 | # 创建ASR实例并运行 84 | asr = asr_class(audio_path, **asr_args) 85 | 86 | asr_data = asr.run(callback=callback) 87 | 88 | # 优化字幕显示时间 #161 89 | if not config.need_word_time_stamp: 90 | asr_data.optimize_timing() 91 | 92 | return asr_data 93 | 94 | 95 | if __name__ == "__main__": 96 | # 示例用法 97 | from app.core.entities import WhisperModelEnum 98 | 99 | # 创建配置 100 | config = TranscribeConfig( 101 | transcribe_model=TranscribeModelEnum.WHISPER_CPP, 102 | transcribe_language="zh", 103 | whisper_model=WhisperModelEnum.MEDIUM, 104 | use_asr_cache=True, 105 | ) 106 | 107 | # 转录音频 108 | audio_file = "test.wav" 109 | 110 | def progress_callback(progress: int, message: str): 111 | print(f"Progress: {progress}%, Message: {message}") 112 | 113 | result = transcribe(audio_file, config, callback=progress_callback) 114 | print(result) 115 | -------------------------------------------------------------------------------- /app/core/bk_asr/whisper_api.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional 3 | 4 | from openai import OpenAI 5 | 6 | from ..utils.logger import setup_logger 7 | from .asr_data import ASRData, ASRDataSeg 8 | from .base import BaseASR 9 | 10 | logger = setup_logger("whisper_api") 11 | 12 | 13 | class WhisperAPI(BaseASR): 14 | def __init__( 15 | self, 16 | audio_path: str, 17 | whisper_model: str, 18 | need_word_time_stamp: bool = False, 19 | language: str = "zh", 20 | prompt: str = "", 21 | base_url: Optional[str] = None, 22 | api_key: Optional[str] = None, 23 | use_cache: bool = False, 24 | ): 25 | """ 26 | 初始化 Whisper API 27 | 28 | Args: 29 | audio_path: 音频文件路径 30 | model: 模型名称 31 | language: 语言代码,默认中文 32 | prompt: 提示词 33 | base_url: API基础URL,可选 34 | api_key: API密钥,可选 35 | use_cache: 是否使用缓存 36 | """ 37 | super().__init__(audio_path, use_cache) 38 | 39 | # 优先使用传入的参数,否则使用环境变量 40 | self.base_url = base_url 41 | self.api_key = api_key 42 | 43 | if not self.base_url or not self.api_key: 44 | raise ValueError("必须设置 OPENAI_BASE_URL 和 OPENAI_API_KEY") 45 | 46 | self.model = whisper_model 47 | self.language = language 48 | self.prompt = prompt 49 | self.need_word_time_stamp = need_word_time_stamp 50 | 51 | logger.info( 52 | f"初始化 WhisperCppASR: model={whisper_model}, language={language}, prompt={prompt}" 53 | ) 54 | self.client = OpenAI(base_url=self.base_url, api_key=self.api_key) 55 | 56 | def _run(self, callback=None) -> dict: 57 | """执行语音识别""" 58 | return self._submit() 59 | 60 | def _make_segments(self, resp_data: dict) -> list[ASRDataSeg]: 61 | """从响应数据构建语音片段""" 62 | segments = [] 63 | for seg in resp_data["segments"]: 64 | segments.append( 65 | ASRDataSeg( 66 | text=seg["text"].strip(), 67 | start_time=int(float(seg["start"]) * 1000), 68 | end_time=int(float(seg["end"]) * 1000), 69 | ) 70 | ) 71 | return segments 72 | 73 | def _get_key(self) -> str: 74 | """获取缓存键值""" 75 | return f"{self.crc32_hex}-{self.model}-{self.language}-{self.prompt}" 76 | 77 | def _submit(self) -> dict: 78 | """提交音频进行识别""" 79 | try: 80 | if self.language == "zh" and not self.prompt: 81 | self.prompt = "你好,我们需要使用简体中文,以下是普通话的句子。" 82 | args = {} 83 | if self.need_word_time_stamp and "groq" not in self.base_url: 84 | args["timestamp_granularities"] = ["word", "segment"] 85 | logger.info("开始识别音频...") 86 | completion = self.client.audio.transcriptions.create( 87 | model=self.model, 88 | temperature=0, 89 | response_format="verbose_json", 90 | file=("audio.mp3", self.file_binary, "audio/mp3"), 91 | prompt=self.prompt, 92 | language=None, 93 | **args, 94 | ) 95 | logger.info("音频识别完成") 96 | return completion.to_dict() 97 | except Exception as e: 98 | logger.exception(f"音频识别失败: {str(e)}") 99 | raise e 100 | -------------------------------------------------------------------------------- /app/core/bk_asr/whisper_cpp.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import shutil 4 | import subprocess 5 | import tempfile 6 | import time 7 | from pathlib import Path 8 | 9 | from ...config import MODEL_PATH 10 | from ..utils.logger import setup_logger 11 | from .asr_data import ASRDataSeg, ASRData 12 | from .base import BaseASR 13 | 14 | logger = setup_logger("whisper_asr") 15 | 16 | 17 | class WhisperCppASR(BaseASR): 18 | def __init__( 19 | self, 20 | audio_path, 21 | language="en", 22 | whisper_cpp_path="whisper-cpp", 23 | whisper_model=None, 24 | use_cache: bool = False, 25 | need_word_time_stamp: bool = False, 26 | ): 27 | super().__init__(audio_path, False) 28 | assert os.path.exists(audio_path), f"音频文件 {audio_path} 不存在" 29 | assert audio_path.endswith(".wav"), f"音频文件 {audio_path} 必须是WAV格式" 30 | 31 | # 如果指定了 whisper_model,则在 models 目录下查找对应模型 32 | if whisper_model: 33 | models_dir = Path(MODEL_PATH) 34 | model_files = list(models_dir.glob(f"*ggml*{whisper_model}*.bin")) 35 | if not model_files: 36 | raise ValueError( 37 | f"在 {models_dir} 目录下未找到包含 '{whisper_model}' 的模型文件" 38 | ) 39 | model_path = str(model_files[0]) 40 | logger.info(f"找到模型文件: {model_path}") 41 | else: 42 | raise ValueError("whisper_model 不能为空") 43 | 44 | self.model_path = model_path 45 | self.whisper_cpp_path = Path(whisper_cpp_path) 46 | self.need_word_time_stamp = need_word_time_stamp 47 | self.language = language 48 | 49 | self.process = None 50 | 51 | def _make_segments(self, resp_data: str) -> list[ASRDataSeg]: 52 | asr_data = ASRData.from_srt(resp_data) 53 | # 过滤掉纯音乐标记 54 | filtered_segments = [] 55 | for seg in asr_data.segments: 56 | text = seg.text.strip() 57 | # 保留不以【、[、(、(开头的文本 58 | if not ( 59 | text.startswith("【") 60 | or text.startswith("[") 61 | or text.startswith("(") 62 | or text.startswith("(") 63 | ): 64 | filtered_segments.append(seg) 65 | return filtered_segments 66 | 67 | def _build_command( 68 | self, wav_path, output_path, is_const_me_version: bool 69 | ) -> list[str]: 70 | """构建 whisper-cpp 命令行参数 71 | 72 | Args: 73 | wav_path: 输入的WAV文件路径 74 | output_path: 输出文件路径 75 | is_const_me_version: 是否为 const_me 版本 76 | 77 | Returns: 78 | list[str]: 命令行参数列表 79 | """ 80 | # 构建基础命令参数列表 81 | whisper_params = [ 82 | str(self.whisper_cpp_path), 83 | "-m", 84 | str(self.model_path), 85 | "-f", 86 | str(wav_path), 87 | "-l", 88 | self.language, 89 | "--output-srt", 90 | ] 91 | 92 | # 根据版本添加额外参数 93 | if not is_const_me_version: 94 | whisper_params.extend( 95 | ["--no-gpu", "--output-file", str(output_path.with_suffix(""))] 96 | ) 97 | 98 | # 中文模式下添加提示语 99 | if self.language == "zh": 100 | whisper_params.extend( 101 | ["--prompt", "你好,我们需要使用简体中文,以下是普通话的句子。"] 102 | ) 103 | 104 | return whisper_params 105 | 106 | def _run(self, callback=None) -> str: 107 | if callback is None: 108 | callback = lambda x, y: None 109 | 110 | temp_dir = Path(tempfile.gettempdir()) / "bk_asr" 111 | temp_dir.mkdir(parents=True, exist_ok=True) 112 | 113 | is_const_me_version = True if os.name == "nt" else False 114 | 115 | # 使用 with 语句管理临时文件的生命周期 116 | with tempfile.TemporaryDirectory(dir=temp_dir) as temp_path: 117 | temp_dir = Path(temp_path) 118 | wav_path = temp_dir / "audio.wav" 119 | output_path = wav_path.with_suffix(".srt") 120 | 121 | try: 122 | # 把self.audio_path 复制到 wav_path 123 | shutil.copy2(self.audio_path, wav_path) 124 | 125 | # 使用新的 _build_command 方法构建命令 126 | whisper_params = self._build_command( 127 | wav_path, output_path, is_const_me_version 128 | ) 129 | logger.info("完整命令行参数: %s", " ".join(whisper_params)) 130 | 131 | # 启动进程 132 | self.process = subprocess.Popen( 133 | whisper_params, 134 | stdout=subprocess.PIPE, 135 | stderr=subprocess.PIPE, 136 | text=True, 137 | encoding="utf-8", 138 | ) 139 | # 获取音频时长 140 | total_duration = self.get_audio_duration(self.audio_path) or 600 141 | logger.info("音频总时长: %d 秒", total_duration) 142 | 143 | # 处理输出和进度 144 | full_output = [] 145 | while True: 146 | try: 147 | line = self.process.stdout.readline() 148 | except Exception as e: 149 | break 150 | if not line: 151 | continue 152 | 153 | full_output.append(line) 154 | 155 | # 简化的进度处理 156 | if " --> " in line and "[" in line: 157 | try: 158 | time_str = line.split("[")[1].split(" -->")[0].strip() 159 | current_time = sum( 160 | float(x) * y 161 | for x, y in zip( 162 | reversed(time_str.split(":")), [1, 60, 3600] 163 | ) 164 | ) 165 | progress = int(min(current_time / total_duration * 100, 98)) 166 | callback(progress, f"{progress}% 正在转换") 167 | except (ValueError, IndexError): 168 | continue 169 | # 等待进程完成 170 | stdout, stderr = self.process.communicate() 171 | if self.process.returncode != 0: 172 | raise RuntimeError(f"WhisperCPP 执行失败: {stderr}") 173 | 174 | callback(100, "转换完成") 175 | 176 | # 读取结果文件 177 | srt_path = output_path 178 | if not srt_path.exists(): 179 | raise RuntimeError(f"输出文件未生成: {srt_path}") 180 | 181 | return srt_path.read_text(encoding="utf-8") 182 | 183 | except Exception as e: 184 | logger.exception("处理失败") 185 | raise RuntimeError(f"生成 SRT 文件失败: {str(e)}") 186 | 187 | def _get_key(self): 188 | return f"{self.crc32_hex}-{self.need_word_time_stamp}-{self.model_path}-{self.language}" 189 | 190 | def get_audio_duration(self, filepath: str) -> int: 191 | try: 192 | cmd = ["ffmpeg", "-i", filepath] 193 | result = subprocess.run( 194 | cmd, 195 | capture_output=True, 196 | text=True, 197 | encoding="utf-8", 198 | errors="replace", 199 | creationflags=subprocess.CREATE_NO_WINDOW if os.name == "nt" else 0, 200 | ) 201 | info = result.stderr 202 | # 提取时长 203 | if duration_match := re.search(r"Duration: (\d+):(\d+):(\d+\.\d+)", info): 204 | hours, minutes, seconds = map(float, duration_match.groups()) 205 | duration_seconds = hours * 3600 + minutes * 60 + seconds 206 | return int(duration_seconds) 207 | return 600 208 | except Exception as e: 209 | logger.exception("获取音频时长时出错: %s", str(e)) 210 | return 600 211 | 212 | 213 | if __name__ == "__main__": 214 | # 简短示例 215 | asr = WhisperCppASR( 216 | audio_path="audio.mp3", 217 | model_path="models/ggml-tiny.bin", 218 | whisper_cpp_path="bin/whisper-cpp.exe", 219 | language="en", 220 | need_word_time_stamp=True, 221 | ) 222 | asr_data = asr._run(callback=print) 223 | -------------------------------------------------------------------------------- /app/core/storage/__init__.py: -------------------------------------------------------------------------------- 1 | # app/core/storage/__init__.py 2 | from .cache_manager import CacheManager 3 | from .models import TranslationCache, LLMCache, UsageStatistics, ASRCache 4 | 5 | __all__ = [ 6 | "CacheManager", 7 | "TranslationCache", 8 | "LLMCache", 9 | "UsageStatistics", 10 | "ASRCache", 11 | ] 12 | -------------------------------------------------------------------------------- /app/core/storage/constants.py: -------------------------------------------------------------------------------- 1 | # app/core/storage/constants.py 2 | from enum import Enum 3 | from datetime import timedelta 4 | 5 | 6 | class TranslatorType(Enum): 7 | GOOGLE = "google" 8 | BING = "bing" 9 | LLM = "llm" 10 | DEEPLX = "deeplx" 11 | 12 | 13 | class OperationType(Enum): 14 | TRANSLATION = "translation" 15 | LLM_CALL = "llm_call" 16 | 17 | 18 | # 缓存配置 19 | CACHE_CONFIG = { 20 | "max_age": timedelta(days=30), # 缓存最大保存时间 21 | "db_filename": "cache.db", 22 | "cleanup_threshold": 10000, # 触发清理的记录数阈值 23 | } 24 | -------------------------------------------------------------------------------- /app/core/storage/database.py: -------------------------------------------------------------------------------- 1 | # app/core/storage/database.py 2 | import os 3 | import logging 4 | from contextlib import contextmanager 5 | from sqlalchemy import create_engine 6 | from sqlalchemy.orm import sessionmaker 7 | from .models import Base 8 | from .constants import CACHE_CONFIG 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class DatabaseManager: 14 | """数据库管理类,负责数据库连接和会话管理""" 15 | 16 | def __init__(self, app_data_path: str): 17 | self.db_path = os.path.join(app_data_path, CACHE_CONFIG["db_filename"]) 18 | self.db_url = f"sqlite:///{self.db_path}" 19 | self._engine = None 20 | self._session_maker = None 21 | self.init_db() 22 | 23 | def init_db(self): 24 | """初始化数据库连接和表结构""" 25 | try: 26 | os.makedirs(os.path.dirname(self.db_path), exist_ok=True) 27 | self._engine = create_engine( 28 | self.db_url, 29 | connect_args={"check_same_thread": False}, 30 | pool_pre_ping=True, 31 | pool_size=5, 32 | max_overflow=10, 33 | pool_recycle=3600, 34 | ) 35 | Base.metadata.create_all(self._engine) 36 | self._session_maker = sessionmaker(bind=self._engine) 37 | # logger.info(f"Database initialized at {self.db_path}") 38 | except Exception as e: 39 | logger.error(f"Failed to initialize database: {str(e)}") 40 | raise 41 | 42 | def close(self): 43 | """关闭数据库连接""" 44 | if self._engine: 45 | self._engine.dispose() 46 | self._engine = None 47 | self._session_maker = None 48 | 49 | @contextmanager 50 | def get_session(self): 51 | """获取数据库会话的上下文管理器""" 52 | if not self._engine or not self._session_maker: 53 | self.init_db() 54 | 55 | session = self._session_maker() 56 | try: 57 | yield session 58 | session.commit() 59 | except Exception as e: 60 | session.rollback() 61 | logger.error(f"Database session error: {str(e)}") 62 | raise 63 | finally: 64 | session.close() 65 | -------------------------------------------------------------------------------- /app/core/storage/models.py: -------------------------------------------------------------------------------- 1 | # app/core/storage/models.py 2 | from sqlalchemy import Column, Integer, String, Text, DateTime, JSON, Index, Date 3 | from sqlalchemy.ext.declarative import declarative_base 4 | from datetime import datetime, date 5 | from .constants import TranslatorType, OperationType 6 | 7 | Base = declarative_base() 8 | 9 | 10 | class ASRCache(Base): 11 | """语音识别缓存表""" 12 | 13 | __tablename__ = "asr_cache" 14 | 15 | id = Column(Integer, primary_key=True) 16 | crc32_hex = Column(String(8), nullable=False, index=True) 17 | asr_type = Column(String(50), nullable=False) # ASR服务类型 18 | result_data = Column(JSON, nullable=False) # ASR结果数据 19 | created_at = Column(DateTime, default=datetime.utcnow) 20 | updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) 21 | 22 | __table_args__ = ( 23 | Index("idx_asr_cache_unique", "crc32_hex", "asr_type", unique=True), 24 | ) 25 | 26 | 27 | class TranslationCache(Base): 28 | """翻译结果缓存表""" 29 | 30 | __tablename__ = "translation_cache" 31 | 32 | id = Column(Integer, primary_key=True) 33 | source_text = Column(Text, nullable=False) 34 | translated_text = Column(Text, nullable=False) 35 | translator_type = Column(String(50), nullable=False) 36 | params = Column(JSON) 37 | content_hash = Column(String(32), nullable=False) 38 | created_at = Column(DateTime, default=datetime.utcnow) 39 | 40 | __table_args__ = (Index("idx_translation_lookup", content_hash, translator_type),) 41 | 42 | def __repr__(self): 43 | return f"" 44 | 45 | 46 | class LLMCache(Base): 47 | """LLM调用结果缓存表""" 48 | 49 | __tablename__ = "llm_cache" 50 | 51 | id = Column(Integer, primary_key=True) 52 | prompt = Column(Text, nullable=False) 53 | result = Column(Text, nullable=False) 54 | model_name = Column(String(100), nullable=False) 55 | params = Column(JSON) 56 | content_hash = Column(String(32), nullable=False) 57 | created_at = Column(DateTime, default=datetime.utcnow) 58 | 59 | __table_args__ = (Index("idx_llm_lookup", content_hash, model_name),) 60 | 61 | def __repr__(self): 62 | return f"" 63 | 64 | 65 | class UsageStatistics(Base): 66 | """使用统计表""" 67 | 68 | __tablename__ = "usage_statistics" 69 | 70 | id = Column(Integer, primary_key=True) 71 | operation_type = Column(String(50), nullable=False) 72 | service_name = Column(String(50), nullable=False) 73 | call_count = Column(Integer, default=0) 74 | token_count = Column(Integer, default=0) 75 | last_updated = Column(DateTime, default=datetime.utcnow) 76 | 77 | __table_args__ = ( 78 | Index("idx_usage_lookup", operation_type, service_name, unique=True), 79 | ) 80 | 81 | def __repr__(self): 82 | return f"" 83 | 84 | 85 | class DailyServiceUsage(Base): 86 | """每日服务使用次数表""" 87 | 88 | __tablename__ = "daily_service_usage" 89 | 90 | id = Column(Integer, primary_key=True) 91 | service_name = Column(String(50), nullable=False) # 服务名称 92 | usage_date = Column(Date, nullable=False) # 使用日期,改用 Date 类型 93 | usage_count = Column(Integer, default=0) # 使用次数 94 | daily_limit = Column(Integer, nullable=False) # 每日限制次数 95 | created_at = Column(DateTime, default=datetime.utcnow) 96 | updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) 97 | 98 | __table_args__ = ( 99 | Index("idx_daily_usage_lookup", service_name, usage_date, unique=True), 100 | ) 101 | 102 | def __repr__(self): 103 | return f"" 104 | 105 | def __init__(self, **kwargs): 106 | """初始化时去除时分秒,只保留日期""" 107 | if "usage_date" in kwargs: 108 | if isinstance(kwargs["usage_date"], datetime): 109 | kwargs["usage_date"] = kwargs["usage_date"].date() 110 | elif isinstance(kwargs["usage_date"], date): 111 | pass 112 | super().__init__(**kwargs) 113 | -------------------------------------------------------------------------------- /app/core/subtitle_processor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/app/core/subtitle_processor/__init__.py -------------------------------------------------------------------------------- /app/core/subtitle_processor/alignment.py: -------------------------------------------------------------------------------- 1 | import difflib 2 | 3 | 4 | class SubtitleAligner: 5 | """ 6 | 字幕文本对齐器,用于对齐两个文本序列,支持基于相似度的匹配。当目标文本缺少某项时,会使用其上一项进行填充。 7 | 8 | 使用示例: 9 | # 输入文本 10 | text1 = ['ab', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'] # 源文本 11 | text2 = ['a', 'b', 'c', 'd', 'f', 'g', 'h', 'i'] # 目标文本 12 | 13 | # 创建对齐器并执行对齐 14 | text_aligner = SubtitleAligner() 15 | aligned_source, aligned_target = text_aligner.align_texts(text1, text2) 16 | 17 | # 对齐结果 18 | aligned_source: ['ab', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'] # 源文本保持不变 19 | aligned_target: ['a', 'b', 'c', 'd', 'd', 'f', 'g', 'h', 'i'] # 缺失的'e'由'd'填充 20 | """ 21 | 22 | def __init__(self): 23 | self.line_numbers = [0, 0] 24 | 25 | def align_texts(self, source_text, target_text): 26 | """ 27 | Align two texts and return the paired lines. 28 | 29 | Args: 30 | source_text (list): List of lines from the source text. 31 | target_text (list): List of lines from the target text. 32 | 33 | Returns: 34 | tuple: Two lists containing aligned lines from source and target texts. 35 | """ 36 | diff_iterator = difflib.ndiff(source_text, target_text) 37 | return self._pair_lines(diff_iterator) 38 | 39 | def _pair_lines(self, diff_iterator): 40 | """ 41 | Pair lines from the diff iterator. 42 | 43 | Args: 44 | diff_iterator: Iterator from difflib.ndiff() 45 | 46 | Returns: 47 | tuple: Two lists containing aligned lines from source and target texts. 48 | """ 49 | source_lines = [] 50 | target_lines = [] 51 | flag = 0 52 | 53 | for source_line, target_line, _ in self._line_iterator(diff_iterator): 54 | if source_line is not None: 55 | if source_line[1] == "\n": 56 | flag += 1 57 | continue 58 | source_lines.append(source_line[1]) 59 | if target_line is not None: 60 | if flag > 0: 61 | flag -= 1 62 | continue 63 | target_lines.append(target_line[1]) 64 | 65 | for i in range(1, len(target_lines)): 66 | if target_lines[i] == "\n": 67 | target_lines[i] = target_lines[i - 1] 68 | # target_lines[i] = source_lines[i] 69 | # target_lines[i + 1] = source_lines[i + 1] 70 | # target_lines[i - 1] = source_lines[i - 1] 71 | 72 | return source_lines, target_lines 73 | 74 | def _line_iterator(self, diff_iterator): 75 | """ 76 | Iterate through diff lines and yield paired lines. 77 | 78 | Args: 79 | diff_iterator: Iterator from difflib.ndiff() 80 | 81 | Yields: 82 | tuple: (source_line, target_line, has_diff) 83 | """ 84 | lines = [] 85 | blank_lines_pending = 0 86 | blank_lines_to_yield = 0 87 | 88 | while True: 89 | while len(lines) < 4: 90 | lines.append(next(diff_iterator, "X")) 91 | 92 | diff_type = "".join([line[0] for line in lines]) 93 | 94 | if diff_type.startswith("X"): 95 | blank_lines_to_yield = blank_lines_pending 96 | elif diff_type.startswith("-?+?"): 97 | yield self._format_line(lines, "?", 0), self._format_line( 98 | lines, "?", 1 99 | ), True 100 | continue 101 | elif diff_type.startswith("--++"): 102 | blank_lines_pending -= 1 103 | yield self._format_line(lines, "-", 0), None, True 104 | continue 105 | elif diff_type.startswith(("--?+", "--+", "- ")): 106 | source_line, target_line = self._format_line(lines, "-", 0), None 107 | blank_lines_to_yield, blank_lines_pending = blank_lines_pending - 1, 0 108 | elif diff_type.startswith("-+?"): 109 | yield self._format_line(lines, None, 0), self._format_line( 110 | lines, "?", 1 111 | ), True 112 | continue 113 | elif diff_type.startswith("-?+"): 114 | yield self._format_line(lines, "?", 0), self._format_line( 115 | lines, None, 1 116 | ), True 117 | continue 118 | elif diff_type.startswith("-"): 119 | blank_lines_pending -= 1 120 | yield self._format_line(lines, "-", 0), None, True 121 | continue 122 | elif diff_type.startswith("+--"): 123 | blank_lines_pending += 1 124 | yield None, self._format_line(lines, "+", 1), True 125 | continue 126 | elif diff_type.startswith(("+ ", "+-")): 127 | source_line, target_line = None, self._format_line(lines, "+", 1) 128 | blank_lines_to_yield, blank_lines_pending = blank_lines_pending + 1, 0 129 | elif diff_type.startswith("+"): 130 | blank_lines_pending += 1 131 | yield None, self._format_line(lines, "+", 1), True 132 | continue 133 | elif diff_type.startswith(" "): 134 | yield self._format_line(lines[:], None, 0), self._format_line( 135 | lines, None, 1 136 | ), False 137 | continue 138 | 139 | while blank_lines_to_yield < 0: 140 | blank_lines_to_yield += 1 141 | yield None, ("", "\n"), True 142 | while blank_lines_to_yield > 0: 143 | blank_lines_to_yield -= 1 144 | yield ("", "\n"), None, True 145 | 146 | if diff_type.startswith("X"): 147 | return 148 | else: 149 | yield source_line, target_line, True 150 | 151 | def _format_line(self, lines, format_key, side): 152 | """ 153 | Format a line with the appropriate markup. 154 | 155 | Args: 156 | lines (list): List of lines to process. 157 | format_key (str): Formatting key ('?', '-', '+', or None). 158 | side (int): 0 for source, 1 for target. 159 | 160 | Returns: 161 | tuple: (line_number, formatted_text) 162 | """ 163 | self.line_numbers[side] += 1 164 | if format_key is None: 165 | return self.line_numbers[side], lines.pop(0)[2:] 166 | if format_key == "?": 167 | text, markers = lines.pop(0), lines.pop(0) 168 | text = text[2:] 169 | else: 170 | text = lines.pop(0)[2:] 171 | if not text: 172 | text = "" 173 | return self.line_numbers[side], text 174 | 175 | 176 | if __name__ == "__main__": 177 | # 简短示例 178 | text1 = ["ab", "b", "c", "d", "e", "f", "g", "h", "i"] 179 | text2 = ["a", "b", "c", "d", "f", "g", "h", "i"] 180 | 181 | # 使用示例 182 | text_aligner = SubtitleAligner() 183 | 184 | aligned_source, aligned_target = text_aligner.align_texts(text1, text2) 185 | 186 | print("Aligned Source:", len(aligned_source)) 187 | print("Aligned Target:", len(aligned_target)) 188 | print(aligned_source) 189 | print(aligned_target) 190 | 191 | i = 1 192 | for l1, l2 in zip(aligned_source, aligned_target): 193 | print(f"行 {i}:") 194 | print(f"文本1: {l1}") 195 | print(f"文本2: {l2}") 196 | print(difflib.SequenceMatcher(None, l1, l2).ratio()) 197 | print("----") 198 | i += 1 199 | 200 | # d = difflib.HtmlDiff() 201 | # html = d.make_file(text1, text2) 202 | # with open('../output/diff.html', 'w', encoding='utf-8') as f: 203 | # f.write(html) 204 | -------------------------------------------------------------------------------- /app/core/subtitle_processor/optimize.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | from concurrent.futures import ThreadPoolExecutor, as_completed 5 | from pathlib import Path 6 | from typing import Callable, Dict, List, Optional, Union 7 | 8 | import retry 9 | from openai import OpenAI 10 | 11 | from app.config import CACHE_PATH 12 | from app.core.bk_asr.asr_data import ASRData, ASRDataSeg 13 | from app.core.storage.cache_manager import CacheManager 14 | from app.core.utils import json_repair 15 | from app.core.subtitle_processor.alignment import SubtitleAligner 16 | from app.core.subtitle_processor.prompt import OPTIMIZER_PROMPT 17 | from app.core.utils.logger import setup_logger 18 | 19 | logger = setup_logger("subtitle_optimizer") 20 | 21 | 22 | class SubtitleOptimizer: 23 | """字幕优化器,支持缓存功能""" 24 | 25 | def __init__( 26 | self, 27 | thread_num: int = 5, 28 | batch_num: int = 10, 29 | model: str = "gpt-4o-mini", 30 | custom_prompt: str = "", 31 | temperature: float = 0.7, 32 | timeout: int = 60, 33 | retry_times: int = 1, 34 | update_callback: Optional[Callable] = None, 35 | ): 36 | self._init_client() 37 | self.thread_num = thread_num 38 | self.batch_num = batch_num 39 | self.model = model 40 | self.custom_prompt = custom_prompt 41 | self.temperature = temperature 42 | self.timeout = timeout 43 | self.retry_times = retry_times 44 | self.is_running = True 45 | self.update_callback = update_callback 46 | self._init_thread_pool() 47 | self.cache_manager = CacheManager(str(CACHE_PATH)) 48 | 49 | def _init_client(self): 50 | """初始化OpenAI客户端""" 51 | base_url = os.getenv("OPENAI_BASE_URL") 52 | api_key = os.getenv("OPENAI_API_KEY") 53 | if not (base_url and api_key): 54 | raise ValueError("环境变量 OPENAI_BASE_URL 和 OPENAI_API_KEY 必须设置") 55 | 56 | self.client = OpenAI(base_url=base_url, api_key=api_key) 57 | 58 | def _init_thread_pool(self): 59 | """初始化线程池""" 60 | self.executor = ThreadPoolExecutor(max_workers=self.thread_num) 61 | import atexit 62 | 63 | atexit.register(self.stop) 64 | 65 | def optimize_subtitle(self, subtitle_data: Union[str, ASRData]) -> ASRData: 66 | """优化字幕文件""" 67 | try: 68 | # 读取字幕文件 69 | if isinstance(subtitle_data, str): 70 | asr_data = ASRData.from_subtitle_file(subtitle_data) 71 | else: 72 | asr_data = subtitle_data 73 | 74 | # 将ASRData转换为字典格式 75 | subtitle_dict = { 76 | str(i): seg.text for i, seg in enumerate(asr_data.segments, 1) 77 | } 78 | 79 | # 分批处理字幕 80 | chunks = self._split_chunks(subtitle_dict) 81 | 82 | # 多线程优化 83 | optimized_dict = self._parallel_optimize(chunks) 84 | 85 | # 创建新的ASRDataSeg列表 86 | new_segments = self._create_segments(asr_data.segments, optimized_dict) 87 | 88 | return ASRData(new_segments) 89 | except Exception as e: 90 | logger.error(f"优化失败:{str(e)}") 91 | raise RuntimeError(f"优化失败:{str(e)}") 92 | 93 | def _split_chunks(self, subtitle_dict: Dict[str, str]) -> List[Dict[str, str]]: 94 | """将字幕分割成块""" 95 | items = list(subtitle_dict.items()) 96 | return [ 97 | dict(items[i : i + self.batch_num]) 98 | for i in range(0, len(items), self.batch_num) 99 | ] 100 | 101 | def _parallel_optimize(self, chunks: List[Dict[str, str]]) -> Dict[str, str]: 102 | """并行优化所有块""" 103 | futures = [] 104 | optimized_dict = {} 105 | 106 | for chunk in chunks: 107 | if not self.executor: 108 | raise ValueError("线程池未初始化") 109 | future = self.executor.submit(self._safe_optimize_chunk, chunk) 110 | futures.append(future) 111 | 112 | for future in as_completed(futures): 113 | if not self.is_running: 114 | logger.info("优化器已停止运行,退出优化") 115 | break 116 | try: 117 | result = future.result() 118 | optimized_dict.update(result) 119 | except Exception as e: 120 | logger.error(f"优化块失败:{str(e)}") 121 | # 对于失败的块,保留原文 122 | for k, v in chunk.items(): 123 | optimized_dict[k] = v 124 | 125 | return optimized_dict 126 | 127 | def _safe_optimize_chunk(self, chunk: Dict[str, str]) -> Dict[str, str]: 128 | """安全的优化块,包含重试逻辑""" 129 | for i in range(self.retry_times): 130 | try: 131 | return self._optimize_chunk(chunk) 132 | except Exception as e: 133 | if i == self.retry_times - 1: 134 | raise 135 | logger.warning(f"优化重试 {i+1}/{self.retry_times}: {str(e)}") 136 | return chunk 137 | 138 | def _optimize_chunk(self, subtitle_chunk: Dict[str, str]) -> Dict[str, str]: 139 | """优化字幕块""" 140 | logger.info( 141 | f"[+]正在优化字幕:{next(iter(subtitle_chunk))} - {next(reversed(subtitle_chunk))}" 142 | ) 143 | user_prompt = f"Correct the following subtitles. Keep the original language, do not translate:\n{str(subtitle_chunk)}" 144 | if self.custom_prompt: 145 | user_prompt += ( 146 | f"\nReference content:\n{self.custom_prompt}" 147 | ) 148 | 149 | # 检查缓存 150 | cache_params = { 151 | "temperature": self.temperature, 152 | "model": self.model, 153 | } 154 | # 构建缓存key 155 | cache_key = f"{len(OPTIMIZER_PROMPT)}_{user_prompt}" 156 | cache_result = self.cache_manager.get_llm_result( 157 | cache_key, self.model, **cache_params 158 | ) 159 | 160 | if cache_result: 161 | logger.info("使用缓存的优化结果") 162 | return json.loads(cache_result) 163 | 164 | # 构建提示词 165 | messages = [ 166 | {"role": "system", "content": OPTIMIZER_PROMPT}, 167 | { 168 | "role": "user", 169 | "content": user_prompt, 170 | }, 171 | ] 172 | 173 | # 调用API优化 174 | response = self.client.chat.completions.create( 175 | model=self.model, 176 | messages=messages, # type: ignore 177 | temperature=self.temperature, 178 | timeout=self.timeout, 179 | ) 180 | 181 | # 解析结果 182 | result: Dict[str, str] = json_repair.loads(response.choices[0].message.content) # type: ignore 183 | 184 | # 修复字幕对齐 185 | aligned_result = self._repair_subtitle(subtitle_chunk, result) 186 | 187 | # 保存到缓存 188 | self.cache_manager.set_llm_result( 189 | cache_key, 190 | json.dumps(aligned_result, ensure_ascii=False), 191 | self.model, 192 | **cache_params, 193 | ) 194 | 195 | if self.update_callback: 196 | self.update_callback(aligned_result) 197 | 198 | return aligned_result 199 | 200 | @staticmethod 201 | def _repair_subtitle( 202 | original: Dict[str, str], optimized: Dict[str, str] 203 | ) -> Dict[str, str]: 204 | """修复字幕对齐问题""" 205 | aligner = SubtitleAligner() 206 | original_list = list(original.values()) 207 | optimized_list = list(optimized.values()) 208 | 209 | aligned_source, aligned_target = aligner.align_texts( 210 | original_list, optimized_list 211 | ) 212 | 213 | if len(aligned_source) != len(aligned_target): 214 | raise ValueError("对齐后字幕长度不一致") 215 | 216 | # 构建对齐后的字典 217 | start_id = next(iter(original.keys())) 218 | return {str(int(start_id) + i): text for i, text in enumerate(aligned_target)} 219 | 220 | @staticmethod 221 | def _create_segments( 222 | original_segments: List[ASRDataSeg], 223 | optimized_dict: Dict[str, str], 224 | ) -> List[ASRDataSeg]: 225 | """创建新的字幕段""" 226 | return [ 227 | ASRDataSeg( 228 | text=optimized_dict.get(str(i), seg.text), 229 | start_time=seg.start_time, 230 | end_time=seg.end_time, 231 | ) 232 | for i, seg in enumerate(original_segments, 1) 233 | ] 234 | 235 | def stop(self): 236 | """停止优化器""" 237 | if not self.is_running: 238 | return 239 | 240 | logger.info("正在停止优化器...") 241 | self.is_running = False 242 | if hasattr(self, "executor") and self.executor is not None: 243 | try: 244 | self.executor.shutdown(wait=False, cancel_futures=True) 245 | except Exception as e: 246 | logger.error(f"关闭线程池时出错:{str(e)}") 247 | finally: 248 | self.executor = None 249 | -------------------------------------------------------------------------------- /app/core/subtitle_processor/prompt.py: -------------------------------------------------------------------------------- 1 | SPLIT_PROMPT_SEMANTIC = """ 2 | 您是一位字幕分段专家,擅长将未分段的文本拆分为单独的部分,用
分隔。 3 | 4 | 要求: 5 | - 对于中文、日语或其他CJK语言,每个部分不得超过${max_word_count_cjk}个字。 6 | - 对于英语等拉丁语言,每个部分不得超过${max_word_count_english}个单词。 7 | - 分隔的每段之间也不应该太短。 8 | - 需要根据语义使用
进行分段。 9 | - 不修改或添加任何内容至原文,仅在每部分之间插入
。 10 | - 直接返回分段后的文本,无需额外解释。 11 | 12 | ## Examples 13 | Input: 14 | 大家好今天我们带来的3d创意设计作品是禁制演示器我是来自中山大学附属中学的方若涵我是陈欣然我们这一次作品介绍分为三个部分第一个部分提出问题第二个部分解决方案第三个部分作品介绍当我们学习进制的时候难以掌握老师教学 也比较抽象那有没有一种教具或演示器可以将进制的原理形象生动地展现出来 15 | Output: 16 | 大家好
今天我们带来的3d创意设计作品是
禁制演示器
我是来自中山大学附属中学的方若涵
我是陈欣然
我们这一次作品介绍分为三个部分
第一个部分提出问题
第二个部分解决方案
第三个部分作品介绍
当我们学习进制的时候难以掌握
老师教学也比较抽象
那有没有一种教具或演示器
可以将进制的原理形象生动地展现出来 17 | 18 | 19 | Input: 20 | the upgraded claude sonnet is now available for all users developers can build with the computer use beta on the anthropic api amazon bedrock and google cloud’s vertex ai the new claude haiku will be released later this month 21 | Output: 22 | the upgraded claude sonnet is now available for all users
developers can build with the computer use beta
on the anthropic api amazon bedrock and google cloud’s vertex ai
the new claude haiku will be released later this month 23 | """ 24 | 25 | 26 | SPLIT_PROMPT_SENTENCE = """ 27 | 您是一位字幕分句专家,擅长将未分段的文本拆分为单独的一小句,用
分隔。 28 | 即在本应该出现逗号、句号的地方加入
。 29 | 30 | 要求: 31 | - 对于中文、日语或其他CJK语言,每个部分不得超过${max_word_count_cjk}个字。 32 | - 对于英语等拉丁语言,每个部分不得超过${max_word_count_english}个单词。 33 | - 分隔的每段之间也不应该太短。 34 | - 不修改或添加任何内容至原文,仅在每个句子间之间插入
。 35 | - 直接返回分段后的文本,不需要任何额外解释。 36 | - 保持
之间的内容意思完整。 37 | 38 | ## Examples 39 | Input: 40 | 大家好今天我们带来的3d创意设计作品是禁制演示器我是来自中山大学附属中学的方若涵我是陈欣然我们这一次作品介绍分为三个部分第一个部分提出问题第二个部分解决方案第三个部分作品介绍当我们学习进制的时候难以掌握老师教学 也比较抽象那有没有一种教具或演示器可以将进制的原理形象生动地展现出来 41 | Output: 42 | 大家好
今天我们带来的3d创意设计作品是禁制演示器
我是来自中山大学附属中学的方若涵
我是陈欣然
我们这一次作品介绍分为三个部分
第一个部分提出问题
第二个部分解决方案
第三个部分作品介绍
当我们学习进制的时候难以掌握
老师教学也比较抽象
那有没有一种教具或演示器可以将进制的原理形象生动地展现出来 43 | 44 | Input: 45 | the upgraded claude sonnet is now available for all users developers can build with the computer use beta on the anthropic api amazon bedrock and google cloud’s vertex ai the new claude haiku will be released later this month 46 | Output: 47 | the upgraded claude sonnet is now available for all users
developers can build with the computer use beta on the anthropic api amazon bedrock and google cloud’s vertex ai
the new claude haiku will be released later this month 48 | """ 49 | 50 | SUMMARIZER_PROMPT = """ 51 | 您是一位**专业视频分析师**,擅长从视频字幕中准确提取信息,包括主要内容和重要术语。 52 | 53 | ## 您的任务 54 | 55 | ### 1. 总结视频内容 56 | - 确定视频类型,根据具体视频内容,解释翻译时需要注意的要点。 57 | - 提供详细总结:对视频内容提供详细说明。 58 | 59 | ### 2. 提取所有重要术语 60 | 61 | - 提取所有重要名词和短语(无需翻译)。你需要判断识别错误的词语,处理并纠正因同音字或相似音调造成的错误名称或者术语 62 | 63 | ## 输出格式 64 | 65 | 以JSON格式返回结果,请使用原字幕语言。例如,如果原字幕是英语,则返回结果也使用英语。 66 | 67 | JSON应包括两个字段:`summary`和`terms` 68 | 69 | - **summary**:视频内容的总结。给出翻译建议。 70 | - **terms**: 71 | - `entities`:人名、组织、物体、地点等名称。 72 | - `keywords`:全部专业或技术术语,以及其他重要关键词或短语。不需要翻译。 73 | """ 74 | 75 | OPTIMIZER_PROMPT = """ 76 | You are a subtitle correction expert. You will receive subtitle text and correct any errors while following specific rules. 77 | 78 | # Input Format 79 | - JSON object with numbered subtitle entries 80 | - Optional reference information/prompt with content context, terminology, and requirements 81 | 82 | # Correction Rules 83 | 1. Preserve original sentence structure and expression - no synonyms or paraphrasing 84 | 2. Remove filler words and non-verbal sounds (um, uh, laughter, coughing) 85 | 3. Standardize: 86 | - Punctuation 87 | - English capitalization 88 | - Mathematical formulas in plain text (using ×, ÷, etc.) 89 | - Code variable names and functions 90 | 4. Maintain one-to-one correspondence of subtitle numbers - no merging or splitting 91 | 5. Prioritize provided reference information when available 92 | 6. Keep original language (English→English, Chinese→Chinese) 93 | 7. No translations or explanations 94 | 95 | # Output Format 96 | Pure JSON object with corrected subtitles: 97 | ``` 98 | { 99 | "0": "[corrected subtitle]", 100 | "1": "[corrected subtitle]", 101 | ... 102 | } 103 | ``` 104 | 105 | # Examples 106 | Input: 107 | ``` 108 | { 109 | "0": "um today we'll learn about bython programming", 110 | "1": "it was created by guidoan rossum in uhh 1991", 111 | "2": "print hello world is an easy function *coughs*" 112 | } 113 | ``` 114 | Reference: 115 | ``` 116 | - Content: Python introduction 117 | - Terms: Python, Guido van Rossum 118 | ``` 119 | Output: 120 | ``` 121 | { 122 | "0": "Today we'll learn about Python programming", 123 | "1": "It was created by Guido van Rossum in 1991", 124 | "2": "print('Hello World') is an easy function" 125 | } 126 | ``` 127 | 128 | # Notes 129 | - Preserve original meaning while fixing technical errors 130 | - No content additions or explanations in output 131 | - Output should be pure JSON without commentary 132 | - Keep the original language, do not translate. 133 | """ 134 | 135 | TRANSLATE_PROMPT = """ 136 | # Role: 资深翻译专家 137 | 你是一位经验丰富的 Netflix 字幕翻译专家,精通${target_language}的翻译,擅长将视频字幕译成流畅易懂的${target_language}。 138 | 139 | # Attention: 140 | - 译文要符合${target_language}的表达习惯,通俗易懂,连贯流畅 141 | - 对于专有的名词或术语,可以适当保留或音译 142 | - 文化相关性:恰当运用成语、网络用语和文化适当的表达方式,使翻译内容更贴近目标受众的语言习惯和文化体验。 143 | - 严格保持字幕编号的一一对应,不要合并或拆分字幕! 144 | 145 | # 术语或要求: 146 | - 翻译过程中要遵循术语词汇(如果有) 147 | ${custom_prompt} 148 | 149 | # Examples 150 | 151 | Input: 152 | ```json 153 | 154 | { 155 | "0": "Original Subtitle 1", 156 | "1": "Original Subtitle 2" 157 | ... 158 | } 159 | ``` 160 | 161 | Output: 162 | ```json 163 | { 164 | "0": "Translated Subtitle 1", 165 | "1": "Translated Subtitle 2" 166 | ... 167 | } 168 | ``` 169 | """ 170 | 171 | REFLECT_TRANSLATE_PROMPT = """ 172 | # Role: 资深翻译专家 173 | 174 | ## Background: 175 | 你是一位经验丰富的字幕翻译专家,精通${target_language}的翻译,擅长将视频字幕译成流畅易懂的${target_language}。 176 | 177 | ## Attention: 178 | - 翻译过程中要始终坚持"信、达、雅"的原则。 179 | - 译文要符合${target_language}的语言文化表达习惯,通俗易懂,连贯流畅 。 180 | - 对于专有的名词或术语,可以适当保留或音译。 181 | - 文化相关性:恰当运用成语、网络用语和文化适当的表达方式。 182 | - 严格保持字幕编号的一一对应,不要合并或拆分字幕。 183 | 184 | ## Constraints: 185 | - 必须严格遵循四轮翻译流程:直译、意译、改善建议、定稿 186 | 187 | ## 术语词汇翻译对应表以及其他要求: 188 | ${custom_prompt} 189 | 190 | Input format: 191 | A JSON structure where each subtitle is identified by a unique numeric key: 192 | { 193 | "1": "<<< Original Content >>>", 194 | "2": "<<< Original Content >>>", 195 | ... 196 | } 197 | 198 | ## OutputFormat: 199 | Return a pure JSON following this structure and translate into ${target_language}: 200 | { 201 | "1": { 202 | "translation": "<<< 第一轮直译:逐字逐句忠实原文,不遗漏任何信息。直译时力求忠实原文,使用${target_language} >>>", 203 | "free_translation": "<<< 第二轮意译:在保证原文意思不改变的基础上用通俗流畅的${target_language}意译原文,适度采用一些中文成语、熟语、网络流行语等,使译文更加地道易懂 >>>", 204 | "revise_suggestions": "<<< 第三轮改进建议:仔细审视以上译文,检测是否参考术语词汇翻译对应表以及要求(如果有)。结合注意事项,指出格式准确性、语句连贯性,阅读习惯和语言文化,给出具体改进建议。 >>>", 205 | "revised_translation": "<<< 第四轮定稿:择优选取整合,修改润色,最终定稿出一个简洁畅达、符合${target_language}阅读习惯和语言文化的译文 >>>" 206 | }, 207 | ... 208 | } 209 | 注:示例中“<<<”、“>>>”仅为需要的遵循准则,实际输出应为对应的专业翻译结果 210 | 211 | 212 | # EXAMPLE_INPUT 213 | { 214 | "1": "为了实现双碳目标,中国正在努力推动碳达峰和碳中和。", 215 | "2": "这项技术真是YYDS!" 216 | } 217 | 218 | # EXAMPLE_OUTPUT 219 | { 220 | "1": { 221 | "translation": "In order to achieve the dual carbon goals, China is working hard to promote carbon peaking and carbon neutrality.", 222 | "free_translation": "To realize the dual carbon goals, China is striving to advance carbon peaking and carbon neutrality.", 223 | "revise_suggestions": "该句中涉及多个专业术语,如“dual carbon goals”(双碳目标)、“carbon peaking”(碳达峰)和“carbon neutrality”(碳中和),已参照相关术语词汇对应表进行翻译,确保专业性与准确性。在意译阶段,建议使用“To realize”替代冗长的“In order to achieve”,同时将“working hard to promote”调整为更简洁有力的“striving to advance”,以增强表达效果,符合视频字幕的简洁性和流畅性。", 224 | "revised_translation": "To realize the dual carbon goals, China is striving to advance carbon peaking and carbon neutrality." 225 | }, 226 | "2": { 227 | "translation": "This technology is really YYDS!", 228 | "free_translation": "This technology is absolutely the GOAT!", 229 | "revise_suggestions": "‘YYDS’作为中文网络流行语,在英语中缺乏直接对应。参考文化背景和表达习惯,将其意译为‘GOAT’(Greatest Of All Time),既保留了原文的赞美和推崇之情,又符合英语表达习惯。在此基础上,使用‘absolutely’替代‘really’使语气更加强烈和自然,适合视频聊天的语境。", 230 | "revised_translation": "This technology is absolutely the GOAT!" 231 | } 232 | } 233 | """ 234 | 235 | SINGLE_TRANSLATE_PROMPT = """ 236 | You are a professional ${target_language} translator. 237 | Please translate the following text into ${target_language}. 238 | Return the translation result directly without any explanation or other content. 239 | 240 | """ 241 | -------------------------------------------------------------------------------- /app/core/subtitle_processor/run.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Callable, Dict, Optional 3 | 4 | from app.core.bk_asr.asr_data import ASRData, from_subtitle_file 5 | from app.core.entities import SubtitleConfig 6 | from app.core.subtitle_processor.optimization import SubtitleOptimizer 7 | from app.core.subtitle_processor.splitting import merge_segments 8 | from app.core.utils.logger import setup_logger 9 | 10 | logger = setup_logger("subtitle_processor") 11 | 12 | 13 | def run( 14 | subtitle_path: str, 15 | config: SubtitleConfig, 16 | callback: Optional[Callable[[Dict], None]] = None, 17 | ) -> ASRData: 18 | """ 19 | 运行字幕处理流程 20 | 21 | Args: 22 | subtitle_path: 字幕文件路径 23 | config: 字幕处理配置 24 | callback: 回调函数,用于更新进度 25 | 26 | Returns: 27 | ASRData: 处理后的字幕数据 28 | """ 29 | logger.info(f"\n===========字幕处理任务开始===========") 30 | 31 | # 1. 加载字幕文件 32 | asr_data = from_subtitle_file(subtitle_path) 33 | 34 | # 2. 如果需要分割字幕 35 | # 检查是否需要合并重新断句 36 | if config.need_split: 37 | asr_data.split_to_word_segments() 38 | 39 | if asr_data.is_word_timestamp(): 40 | logger.info("正在进行字幕断句...") 41 | asr_data = merge_segments( 42 | asr_data, 43 | model=config.llm_model, 44 | num_threads=config.thread_num, 45 | max_word_count_cjk=config.max_word_count_cjk, 46 | max_word_count_english=config.max_word_count_english, 47 | ) 48 | 49 | # 3. 如果需要优化或翻译 50 | if config.need_optimize or config.need_translate: 51 | logger.info("正在进行字幕优化/翻译...") 52 | # 设置环境变量 53 | import os 54 | 55 | os.environ["OPENAI_BASE_URL"] = config.base_url 56 | os.environ["OPENAI_API_KEY"] = config.api_key 57 | 58 | # 创建优化器 59 | optimizer = SubtitleOptimizer( 60 | model=config.llm_model, 61 | target_language=config.target_language, 62 | batch_num=config.batch_size, 63 | thread_num=config.thread_num, 64 | ) 65 | 66 | # 制作成请求llm接口的格式 67 | subtitle_json = { 68 | str(k): v["original_subtitle"] for k, v in asr_data.to_json().items() 69 | } 70 | 71 | # 进行优化/翻译 72 | optimizer_result = optimizer.optimizer_multi_thread( 73 | subtitle_json, translate=config.need_translate, callback=callback 74 | ) 75 | 76 | # 更新字幕内容 77 | for i, subtitle_text in optimizer_result.items(): 78 | seg = asr_data.segments[int(i) - 1] 79 | seg.text = subtitle_text 80 | 81 | return asr_data 82 | -------------------------------------------------------------------------------- /app/core/subtitle_processor/split_by_llm.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import json 3 | import os 4 | import re 5 | from typing import List, Optional 6 | 7 | import openai 8 | import retry 9 | 10 | from app.config import CACHE_PATH 11 | 12 | from ..utils.logger import setup_logger 13 | from .prompt import SPLIT_SYSTEM_PROMPT 14 | 15 | logger = setup_logger("split_by_llm") 16 | 17 | MAX_WORD_COUNT = 20 # 英文单词或中文字符的最大数量 18 | 19 | 20 | def count_words(text: str) -> int: 21 | """ 22 | 统计混合文本中英文单词数和中文字符数的总和 23 | """ 24 | chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', text)) 25 | english_text = re.sub(r'[\u4e00-\u9fff]', ' ', text) 26 | english_words = len(english_text.strip().split()) 27 | return english_words + chinese_chars 28 | 29 | 30 | def get_cache_key(text: str, model: str) -> str: 31 | """ 32 | 生成缓存键值 33 | """ 34 | return hashlib.md5(f"{text}_{model}".encode()).hexdigest() 35 | 36 | 37 | def get_cache(text: str, model: str) -> Optional[List[str]]: 38 | """ 39 | 从缓存中获取断句结果 40 | """ 41 | cache_key = get_cache_key(text, model) 42 | cache_file = CACHE_PATH / f"{cache_key}.json" 43 | cache_file.parent.mkdir(parents=True, exist_ok=True) 44 | if cache_file.exists(): 45 | try: 46 | with open(cache_file, 'r', encoding='utf-8') as f: 47 | return json.load(f) 48 | except (IOError, json.JSONDecodeError): 49 | return None 50 | return None 51 | 52 | 53 | def set_cache(text: str, model: str, result: List[str]) -> None: 54 | """ 55 | 将断句结果设置到缓存中 56 | """ 57 | cache_key = get_cache_key(text, model) 58 | cache_file = CACHE_PATH / f"{cache_key}.json" 59 | cache_file.parent.mkdir(parents=True, exist_ok=True) 60 | try: 61 | with open(cache_file, 'w', encoding='utf-8') as f: 62 | json.dump(result, f, ensure_ascii=False) 63 | except IOError: 64 | pass 65 | 66 | 67 | def split_by_llm(text: str, 68 | model: str = "gpt-4o-mini", 69 | use_cache: bool = False, 70 | max_word_count_cjk: int = 18, 71 | max_word_count_english: int = 12) -> List[str]: 72 | """ 73 | 包装 split_by_llm_retry 函数,确保在重试全部失败后返回空列表 74 | """ 75 | try: 76 | return split_by_llm_retry(text, model, use_cache, max_word_count_cjk, max_word_count_english) 77 | except Exception as e: 78 | logger.error(f"断句失败: {e}") 79 | return [text] 80 | 81 | @retry.retry(tries=2) 82 | def split_by_llm_retry(text: str, 83 | model: str = "gpt-4o-mini", 84 | use_cache: bool = False, 85 | max_word_count_cjk: int = 18, 86 | max_word_count_english: int = 12) -> List[str]: 87 | """ 88 | 使用LLM进行文本断句 89 | """ 90 | system_prompt = SPLIT_SYSTEM_PROMPT.replace("[max_word_count_cjk]", str(max_word_count_cjk)) 91 | system_prompt = system_prompt.replace("[max_word_count_english]", str(max_word_count_english)) 92 | user_prompt = f"Please use multiple
tags to separate the following sentence:\n{text}" 93 | 94 | if use_cache: 95 | cached_result = get_cache(system_prompt+user_prompt, model) 96 | if cached_result: 97 | logger.info(f"从缓存中获取断句结果") 98 | return cached_result 99 | logger.info(f"未命中缓存,开始断句") 100 | # 初始化OpenAI客户端 101 | client = openai.OpenAI() 102 | response = client.chat.completions.create( 103 | model=model, 104 | messages=[ 105 | {"role": "system", "content": system_prompt}, 106 | {"role": "user", "content": user_prompt} 107 | ], 108 | temperature=0.2, 109 | timeout=80 110 | ) 111 | result = response.choices[0].message.content 112 | 113 | # print(f"断句结果: {result}") 114 | # 清理结果中的多余换行符 115 | result = re.sub(r'\n+', '', result) 116 | split_result = [segment.strip() for segment in result.split("
") if segment.strip()] 117 | 118 | br_count = len(split_result) 119 | if br_count < count_words(text) / MAX_WORD_COUNT * 0.9: 120 | raise Exception("断句失败") 121 | set_cache(system_prompt+user_prompt, model, split_result) 122 | return split_result 123 | 124 | 125 | if __name__ == "__main__": 126 | sample_text = ( 127 | "大家好我叫杨玉溪来自有着良好音乐氛围的福建厦门自记事起我眼中的世界就是朦胧的童话书是各色杂乱的线条电视机是颜色各异的雪花小伙伴是只听其声不便骑行的马赛克后来我才知道这是一种眼底黄斑疾病虽不至于失明但终身无法治愈" 128 | ) 129 | sentences = split_by_llm(sample_text, use_cache=True) 130 | print(sentences) 131 | -------------------------------------------------------------------------------- /app/core/subtitle_processor/summarization.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from openai import OpenAI 4 | 5 | from ..utils import json_repair 6 | from ..utils.logger import setup_logger 7 | from .prompt import SUMMARIZER_PROMPT 8 | 9 | logger = setup_logger("subtitle_summarizer") 10 | 11 | 12 | class SubtitleSummarizer: 13 | def __init__(self, model) -> None: 14 | base_url = os.getenv("OPENAI_BASE_URL") 15 | api_key = os.getenv("OPENAI_API_KEY") 16 | 17 | if not base_url or not api_key: 18 | raise ValueError("环境变量 OPENAI_BASE_URL 和 OPENAI_API_KEY 必须设置") 19 | 20 | self.model = model 21 | self.client = OpenAI(base_url=base_url, api_key=api_key) 22 | 23 | def summarize(self, subtitle_content: str) -> str: 24 | logger.info(f"开始摘要化字幕内容") 25 | try: 26 | subtitle_content = subtitle_content[:3000] 27 | response = self.client.chat.completions.create( 28 | model=self.model, 29 | stream=False, 30 | messages=[ 31 | {"role": "system", "content": SUMMARIZER_PROMPT}, 32 | { 33 | "role": "user", 34 | "content": f"summarize the video content:\n{subtitle_content}", 35 | }, 36 | ], 37 | ) 38 | return str(json_repair.loads(response.choices[0].message.content)) 39 | except Exception as e: 40 | logger.exception(f"摘要化字幕内容失败: {e}") 41 | return "" 42 | 43 | 44 | if __name__ == "__main__": 45 | summarizer = SubtitleSummarizer() 46 | example_subtitles = { 47 | 0: "既然是想做并发编程", 48 | 1: "比如说肯定是想干嘛", 49 | 2: "开启多条线程来同时执行任务", 50 | } 51 | example_subtitles = dict(list(example_subtitles.items())[:5]) 52 | 53 | content = "".join(example_subtitles.values()) 54 | result = summarizer.summarize(content) 55 | print(result) 56 | -------------------------------------------------------------------------------- /app/core/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/app/core/utils/__init__.py -------------------------------------------------------------------------------- /app/core/utils/ass_auto_wrap.py: -------------------------------------------------------------------------------- 1 | import re 2 | from pathlib import Path 3 | from typing import Dict, Optional 4 | 5 | 6 | def is_mainly_cjk(text: str) -> bool: 7 | """ 8 | 判断文本是否主要由中日韩文字组成 9 | """ 10 | # 定义CJK字符的Unicode范围 11 | cjk_patterns = [ 12 | r"[\u4e00-\u9fff]", # 中日韩统一表意文字 13 | r"[\u3040-\u309f]", # 平假名 14 | r"[\u30a0-\u30ff]", # 片假名 15 | r"[\uac00-\ud7af]", # 韩文音节 16 | ] 17 | cjk_count = 0 18 | for pattern in cjk_patterns: 19 | cjk_count += len(re.findall(pattern, text)) 20 | total_chars = len("".join(text.split())) 21 | return cjk_count / total_chars > 0.4 if total_chars > 0 else False 22 | 23 | 24 | def parse_ass_info(ass_content: str) -> tuple[int, Dict[str, int]]: 25 | """ 26 | 从ASS文件内容中解析视频宽度和各样式的字体大小 27 | 28 | Returns: 29 | tuple: (视频宽度, {样式名: 字体大小}) 30 | """ 31 | # 获取视频宽度 32 | play_res_x = 1280 # 默认宽度 33 | font_sizes = {"Default": 40} # 默认字体大小 34 | 35 | # 查找视频宽度 36 | res_x_match = re.search(r"PlayResX:\s*(\d+)", ass_content) 37 | if res_x_match: 38 | play_res_x = int(res_x_match.group(1)) 39 | 40 | # 查找所有样式的字体大小 41 | style_section = re.search(r"\[V4\+ Styles\].*?\[", ass_content, re.DOTALL) 42 | if style_section: 43 | style_content = style_section.group(0) 44 | 45 | # 获取Format行定义的字段顺序 46 | format_match = re.search(r"Format:(.*?)$", style_content, re.MULTILINE) 47 | if format_match: 48 | # 解析字段名称 49 | fields = [f.strip() for f in format_match.group(1).split(",")] 50 | # 找到Fontsize字段的位置 51 | try: 52 | fontsize_index = fields.index("Fontsize") 53 | name_index = fields.index("Name") 54 | 55 | # 使用正确的字段位置来匹配样式行 56 | for style_line in re.finditer( 57 | r"Style:(.*?)$", style_content, re.MULTILINE 58 | ): 59 | style_parts = [p.strip() for p in style_line.group(1).split(",")] 60 | if len(style_parts) >= max(fontsize_index + 1, name_index + 1): 61 | style_name = style_parts[name_index] 62 | font_size = int(style_parts[fontsize_index]) 63 | font_sizes[style_name] = font_size 64 | except ValueError: 65 | pass 66 | 67 | return play_res_x, font_sizes 68 | 69 | 70 | def estimate_text_width(text: str, font_size: int) -> int: 71 | """ 72 | 估算文本宽度(像素) 73 | 74 | Args: 75 | text: 文本内容 76 | font_size: 字体大小 77 | 78 | Returns: 79 | int: 估算的文本宽度(像素) 80 | """ 81 | # CJK字符通常是方形,宽度约等于字体大小 82 | # 英文字符宽度约为字体大小的一半 83 | width = 0 84 | for char in text: 85 | if re.match(r"[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]", char): 86 | width += font_size 87 | else: 88 | width += font_size * 0.5 89 | return int(width) 90 | 91 | 92 | def auto_wrap_text(text: str, max_width: int, font_size: int) -> str: 93 | """ 94 | 自动为文本添加换行符 95 | 96 | Args: 97 | text: 原始文本 98 | max_width: 最大宽度(像素) 99 | font_size: 字体大小 100 | 101 | Returns: 102 | str: 处理后的文本 103 | """ 104 | if not text or "\\N" in text: # 如果文本为空或已有换行符,则不处理 105 | return text 106 | 107 | # 如果不是主要由CJK字符组成,则不处理 108 | if not is_mainly_cjk(text): 109 | return text 110 | 111 | # 分割文本为字符列表 112 | chars = list(text) 113 | current_line = "" 114 | result = [] 115 | 116 | for char in chars: 117 | temp_line = current_line + char 118 | # 计算当前行宽度 119 | line_width = estimate_text_width(temp_line, font_size) 120 | 121 | if line_width > max_width: 122 | result.append(current_line) 123 | current_line = char 124 | else: 125 | current_line = temp_line 126 | 127 | if current_line: 128 | result.append(current_line) 129 | 130 | return "\\N".join(result) 131 | 132 | 133 | def auto_wrap_ass_file( 134 | input_file: str, 135 | output_file: str = None, 136 | video_width: Optional[int] = None, 137 | video_height: Optional[int] = None, 138 | ): 139 | """ 140 | 处理ASS文件,为文本添加自动换行 141 | 142 | Args: 143 | input_file: 输入ASS文件路径 144 | output_file: 输出ASS文件路径,如果为None则覆盖输入文件 145 | video_width: 视频宽度,如果提供则覆盖ASS文件中的设置 146 | video_height: 视频高度,如果提供则覆盖ASS文件中的设置 147 | """ 148 | if output_file is None: 149 | output_file = input_file 150 | 151 | # 读取ASS文件 152 | with open(input_file, "r", encoding="utf-8") as f: 153 | ass_content = f.read() 154 | 155 | # 解析字体大小(在修改分辨率之前) 156 | play_res_x, font_sizes = parse_ass_info(ass_content) 157 | 158 | # 如果没有提供视频宽度,使用ASS文件中的宽度 159 | if video_width is None: 160 | video_width = play_res_x 161 | 162 | # 计算最大文本宽度(考虑边距) 163 | max_text_width = int(video_width * 0.99) # 留出1%的边距 164 | 165 | # 处理对话行 166 | def process_dialogue_line(match): 167 | full_line = match.group(0) 168 | 169 | # 提取样式名 170 | style_pattern = r"Dialogue:[^,]*,[^,]*,[^,]*,([^,]*)," 171 | style_match = re.search(style_pattern, full_line) 172 | style_name = style_match.group(1).strip() if style_match else "Default" 173 | 174 | # 获取对应样式的字体大小 175 | font_size = font_sizes.get(style_name, font_sizes["Default"]) 176 | 177 | # 获取文本内容 178 | text_part = match.group(1) 179 | 180 | # 处理文本部分 181 | wrapped_text = auto_wrap_text(text_part, max_text_width, font_size) 182 | 183 | # 替换原文本 184 | return full_line.replace(text_part, wrapped_text) 185 | 186 | # 使用正则表达式匹配并处理对话行 187 | pattern = r"Dialogue:[^,]*(?:,[^,]*){8},(.*?)$" 188 | processed_content = re.sub( 189 | pattern, process_dialogue_line, ass_content, flags=re.MULTILINE 190 | ) 191 | 192 | # 保存处理后的文件 193 | with open(output_file, "w", encoding="utf-8") as f: 194 | f.write(processed_content) 195 | 196 | return output_file 197 | -------------------------------------------------------------------------------- /app/core/utils/get_subtitle_style.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from app.config import SUBTITLE_STYLE_PATH 4 | 5 | 6 | def get_subtitle_style(style_name: str) -> str: 7 | """获取字幕样式内容 8 | 9 | Args: 10 | style_name: 样式名称 11 | 12 | Returns: 13 | str: 样式内容字符串,如果样式文件不存在则返回None 14 | """ 15 | style_path = SUBTITLE_STYLE_PATH / f"{style_name}.txt" 16 | if style_path.exists(): 17 | return style_path.read_text(encoding="utf-8") 18 | return None 19 | -------------------------------------------------------------------------------- /app/core/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import logging.handlers 3 | from pathlib import Path 4 | 5 | from urllib3.exceptions import InsecureRequestWarning 6 | 7 | from ...config import LOG_LEVEL, LOG_PATH 8 | 9 | 10 | def setup_logger( 11 | name: str, 12 | level: int = LOG_LEVEL, 13 | info_fmt: str = "%(message)s", # INFO级别使用简化格式 14 | default_fmt: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s", # 其他级别使用详细格式 15 | datefmt: str = "%Y-%m-%d %H:%M:%S", 16 | log_file: str = str(LOG_PATH / "app.log"), 17 | console_output: bool = True, 18 | ) -> logging.Logger: 19 | """ 20 | 创建并配置一个日志记录器,INFO级别使用简化格式。 21 | 22 | 参数: 23 | - name: 日志记录器的名称 24 | - level: 日志级别 25 | - info_fmt: INFO级别的日志格式字符串 26 | - default_fmt: 其他级别的日志格式字符串 27 | - datefmt: 时间格式字符串 28 | - log_file: 日志文件路径 29 | """ 30 | 31 | logger = logging.getLogger(name) 32 | logger.setLevel(level) 33 | 34 | if not logger.handlers: 35 | # 创建级别特定的格式化器 36 | class LevelSpecificFormatter(logging.Formatter): 37 | def format(self, record): 38 | if record.levelno == logging.INFO: 39 | self._style._fmt = info_fmt 40 | else: 41 | self._style._fmt = default_fmt 42 | return super().format(record) 43 | 44 | level_formatter = LevelSpecificFormatter(default_fmt, datefmt=datefmt) 45 | 46 | # 只在console_output为True时添加控制台处理器 47 | if console_output: 48 | console_handler = logging.StreamHandler() 49 | console_handler.setLevel(level) 50 | console_handler.setFormatter(level_formatter) 51 | logger.addHandler(console_handler) 52 | 53 | # 文件处理器 54 | if log_file: 55 | Path(log_file).parent.mkdir(parents=True, exist_ok=True) 56 | file_handler = logging.handlers.RotatingFileHandler( 57 | log_file, maxBytes=10 * 1024 * 1024, backupCount=5, encoding="utf-8" 58 | ) 59 | file_handler.setLevel(level) 60 | file_handler.setFormatter(level_formatter) 61 | logger.addHandler(file_handler) 62 | 63 | # 设置特定库的日志级别为ERROR以减少日志噪音 64 | error_loggers = [ 65 | "urllib3", 66 | "requests", 67 | "openai", 68 | "httpx", 69 | "httpcore", 70 | "ssl", 71 | "certifi", 72 | ] 73 | for lib in error_loggers: 74 | logging.getLogger(lib).setLevel(logging.ERROR) 75 | 76 | return logger 77 | -------------------------------------------------------------------------------- /app/core/utils/optimize_subtitles.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | def count_words(text: str) -> int: 5 | """ 6 | 统计文本中的中文和英文词数。 7 | 8 | 对于英文,通过空格和标点符号分割单词。 9 | 对于中文,每个汉字视为一个词。 10 | 11 | 参数: 12 | text (str): 要统计的文本。 13 | 14 | 返回: 15 | int: 文本中的总词数。 16 | """ 17 | # 使用正则表达式统计英文单词和中文字符 18 | english_words = re.findall(r'\b\w+\b', text) 19 | chinese_chars = re.findall(r'[\u4e00-\u9fff]', text) 20 | return len(english_words) + len(chinese_chars) 21 | 22 | 23 | def optimize_subtitles(asr_data): 24 | """ 25 | 优化字幕分割,合并词数少于等于4且时间相邻的段落。 26 | 27 | 参数: 28 | asr_data (ASRData): 包含字幕段落的 ASRData 对象。 29 | """ 30 | segments = asr_data.segments 31 | for i in range(len(segments) - 1, 0, -1): 32 | seg = segments[i] 33 | prev_seg = segments[i - 1] 34 | 35 | # 判断前一个段落的词数是否小于等于4且时间相邻 36 | if count_words(prev_seg.text) <= 4 and abs(seg.start_time - prev_seg.end_time) < 100 and count_words(seg.text) <= 10: 37 | asr_data.merge_with_next_segment(i - 1) 38 | -------------------------------------------------------------------------------- /app/core/utils/subtitle_preview.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | from pathlib import Path 4 | from typing import Optional, Tuple 5 | 6 | from app.config import CACHE_PATH, RESOURCE_PATH 7 | 8 | from .logger import setup_logger 9 | from .ass_auto_wrap import auto_wrap_ass_file 10 | from .video_utils import get_video_info 11 | 12 | logger = setup_logger("subtitle_preview") 13 | 14 | SCRIPT_INFO_TEMPLATE = """[Script Info] 15 | ; Script generated by VideoSubtitleEditor 16 | ; https://github.com/WEIFENG2333 17 | ScriptType: v4.00+ 18 | PlayResX: {video_width} 19 | PlayResY: {video_height} 20 | 21 | 22 | {style_str} 23 | 24 | [Events] 25 | Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 26 | {dialogue} 27 | """ 28 | 29 | ASS_TEMP_FILENAME = CACHE_PATH / "preview.ass" # 预览的临时 ASS 文件路径 30 | PREVIEW_IMAGE_FILENAME = CACHE_PATH / "preview.png" # 预览的图片路径 31 | DEFAULT_BG_PATH = RESOURCE_PATH / "assets" / "default_bg.png" 32 | 33 | 34 | def run_subprocess(command: list): 35 | """运行子进程命令,并处理异常""" 36 | try: 37 | subprocess.run( 38 | command, 39 | check=True, 40 | capture_output=True, 41 | creationflags=subprocess.CREATE_NO_WINDOW if os.name == "nt" else 0, 42 | ) 43 | except subprocess.CalledProcessError as e: 44 | logger.error(f"Subprocess error: {e.stderr}") 45 | 46 | 47 | def generate_ass_file( 48 | style_str: str, 49 | preview_text: Tuple[str, Optional[str]], 50 | video_width: int = 1280, 51 | video_height: int = 720, 52 | ) -> str: 53 | """生成临时 ASS 文件""" 54 | original_text, translate_text = preview_text 55 | 56 | dialogue = ( 57 | [ 58 | f"Dialogue: 0,0:00:00.00,0:00:01.00,Secondary,,0,0,0,,{translate_text}", 59 | f"Dialogue: 0,0:00:00.00,0:00:01.00,Default,,0,0,0,,{original_text}", 60 | ] 61 | if translate_text 62 | else [f"Dialogue: 0,0:00:00.00,0:00:01.00,Default,,0,0,0,,{original_text}"] 63 | ) 64 | 65 | ass_content = SCRIPT_INFO_TEMPLATE.format( 66 | style_str=style_str, 67 | dialogue=os.linesep.join(dialogue), 68 | video_width=video_width, 69 | video_height=video_height, 70 | ) 71 | ASS_TEMP_FILENAME.parent.mkdir(parents=True, exist_ok=True) 72 | ASS_TEMP_FILENAME.write_text(ass_content, encoding="utf-8") 73 | return str(ASS_TEMP_FILENAME) 74 | 75 | 76 | def ensure_background(bg_path: Path) -> Path: 77 | """确保背景图片存在,若不存在则创建默认黑色背景""" 78 | if not bg_path.is_file() or not bg_path.exists(): 79 | if not Path(DEFAULT_BG_PATH).exists(): 80 | DEFAULT_BG_PATH.parent.mkdir(parents=True, exist_ok=True) 81 | run_subprocess( 82 | [ 83 | "ffmpeg", 84 | "-f", 85 | "lavfi", 86 | "-i", 87 | "color=c=black:s=1920x1080", 88 | "-frames:v", 89 | "1", 90 | str(DEFAULT_BG_PATH), 91 | ] 92 | ) 93 | return Path(DEFAULT_BG_PATH) 94 | return bg_path 95 | 96 | 97 | def generate_preview( 98 | style_str: str, 99 | preview_text: Tuple[str, Optional[str]], 100 | bg_path: str, 101 | width: int, 102 | height: int, 103 | ) -> str: 104 | """生成预览图片""" 105 | 106 | ass_file = generate_ass_file(style_str, preview_text, width, height) 107 | ass_file = auto_wrap_ass_file(ass_file) 108 | bg_path = ensure_background(Path(bg_path)) 109 | 110 | output_path = PREVIEW_IMAGE_FILENAME 111 | output_path.parent.mkdir(parents=True, exist_ok=True) 112 | 113 | ass_file_processed = ass_file.replace("\\", "/").replace(":", r"\\:") 114 | cmd = [ 115 | "ffmpeg", 116 | "-y", 117 | "-i", 118 | str(bg_path), 119 | "-vf", 120 | f"ass={ass_file_processed}", 121 | "-frames:v", 122 | "1", 123 | str(output_path), 124 | ] 125 | run_subprocess(cmd) 126 | return str(output_path) 127 | 128 | 129 | if __name__ == "__main__": 130 | style_str = """[V4+ Styles] 131 | Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,OutlineColour,BackColour,Bold,Italic,Underline,StrikeOut,ScaleX,ScaleY,Spacing,Angle,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,Encoding 132 | Style: Default,微软雅黑,70,&H00eb7f33,&H000000FF,&H00000000,&H00000000,-1,0,0,0,100,100,2.0,0,1,2.0,0,2,10,10,10,1 133 | Style: Translate,微软雅黑,40,&H00eff0f3,&H000000FF,&H00000000,&H00000000,-1,0,0,0,100,100,0.0,0,1,1.0,0,2,10,10,10,1 134 | """ 135 | bg_path = r"C:\Users\weifeng\Pictures\Animated_character_spraying_liquid.jpg" 136 | preview_text = ("Hello, world!", "你好,世界!") 137 | print(generate_preview(style_str, preview_text, bg_path)) 138 | -------------------------------------------------------------------------------- /app/core/utils/test_opanai.py: -------------------------------------------------------------------------------- 1 | import openai 2 | 3 | 4 | def test_openai(base_url, api_key, model): 5 | """ 6 | 这是一个测试OpenAI API的函数。 7 | 它使用指定的API设置与OpenAI的GPT模型进行对话。 8 | 9 | 参数: 10 | user_message (str): 用户输入的消息 11 | 12 | 返回: 13 | bool: 是否成功 14 | str: 错误信息或者AI助手的回复 15 | """ 16 | try: 17 | # 创建OpenAI客户端并发送请求到OpenAI API 18 | response = openai.OpenAI( 19 | base_url=base_url, api_key=api_key, timeout=10 20 | ).chat.completions.create( 21 | model=model, 22 | messages=[ 23 | {"role": "system", "content": "You are a helpful assistant."}, 24 | {"role": "user", "content": "Hello!"}, 25 | ], 26 | max_tokens=100, 27 | timeout=10, 28 | ) 29 | # 返回AI的回复 30 | return True, str(response.choices[0].message.content) 31 | except Exception as e: 32 | return False, str(e) 33 | 34 | 35 | def get_openai_models(base_url, api_key): 36 | try: 37 | # 创建OpenAI客户端并获取模型列表 38 | models = openai.OpenAI( 39 | base_url=base_url, api_key=api_key, timeout=5 40 | ).models.list() 41 | 42 | # 根据不同模型设置权重进行排序 43 | def get_model_weight(model_name): 44 | model_name = model_name.lower() 45 | if model_name.startswith(("gpt-4o", "claude-3-5")): 46 | return 10 47 | elif model_name.startswith("gpt-4"): 48 | return 5 49 | elif model_name.startswith("claude-3"): 50 | return 6 51 | elif model_name.startswith(("deepseek", "glm")): 52 | return 3 53 | return 0 54 | 55 | sorted_models = sorted( 56 | [model.id for model in models], key=lambda x: (-get_model_weight(x), x) 57 | ) 58 | return sorted_models 59 | except Exception: 60 | return [] 61 | -------------------------------------------------------------------------------- /app/thread/file_download_thread.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | import shutil 4 | import subprocess 5 | 6 | from PyQt5.QtCore import Qt, QThread, pyqtSignal 7 | 8 | from app.config import CACHE_PATH 9 | from app.core.utils.logger import setup_logger 10 | 11 | logger = setup_logger("download_thread") 12 | 13 | class FileDownloadThread(QThread): 14 | progress = pyqtSignal(float, str) 15 | finished = pyqtSignal() 16 | error = pyqtSignal(str) 17 | 18 | def __init__(self, url, save_path): 19 | super().__init__() 20 | self.url = url 21 | self.save_path = save_path 22 | self.process = None 23 | 24 | def run(self): 25 | try: 26 | # 创建缓存下载目录 27 | temp_dir = CACHE_PATH / "aria2c_download_cache" 28 | temp_dir.mkdir(parents=True, exist_ok=True) 29 | temp_file = temp_dir / os.path.basename(self.save_path) 30 | 31 | # 检查是否存在未完成的下载文件 32 | if temp_file.exists(): 33 | logger.info(f"发现未完成的下载文件: {temp_file}") 34 | self.progress.emit(0, self.tr("正在连接...")) 35 | cmd = [ 36 | 'aria2c', 37 | '--show-console-readout=false', 38 | '--summary-interval=1', 39 | '-x2', 40 | '-s2', 41 | '--connect-timeout=10', # 连接超时时间10秒 42 | '--timeout=10', # 数据传输超时时间10秒 43 | '--max-tries=2', # 最大重试次数2次 44 | '--retry-wait=1', # 重试等待时间1秒 45 | '--continue=true', # 开启断点续传 46 | '--auto-file-renaming=false', 47 | '--allow-overwrite=true', 48 | '--check-certificate=false', f'--dir={temp_dir}', 49 | f'--out={temp_file.name}', 50 | self.url 51 | ] 52 | 53 | # 根据操作系统设置不同的 subprocess 参数 54 | subprocess_args = { 55 | 'stdout': subprocess.PIPE, 56 | 'stderr': subprocess.PIPE, 57 | 'universal_newlines': True, 58 | 'encoding': 'utf-8' 59 | } 60 | 61 | # 仅在 Windows 系统上添加 CREATE_NO_WINDOW 标志 62 | if platform.system() == 'Windows': 63 | subprocess_args['creationflags'] = subprocess.CREATE_NO_WINDOW 64 | 65 | logger.info("运行下载命令: %s", " ".join(cmd)) 66 | 67 | self.process = subprocess.Popen( 68 | cmd, 69 | **subprocess_args 70 | ) 71 | 72 | while True: 73 | if self.process.poll() is not None: 74 | break 75 | 76 | line = self.process.stdout.readline() 77 | 78 | if '[#' in line and ']' in line: 79 | try: 80 | # 解析类似 "[#40ca1b 2.4MiB/74MiB(3%) CN:2 DL:3.9MiB ETA:18s]" 的格式 81 | progress_part = line.split('(')[1].split(')')[0] 82 | percent = float(progress_part.strip('%')) 83 | 84 | # 提取下载速度和剩余时间 85 | speed = "0" 86 | eta = "" 87 | if "DL:" in line: 88 | speed = line.split("DL:")[1].split()[0] 89 | if "ETA:" in line: 90 | eta = line.split("ETA:")[1].split(']')[0] 91 | status_msg = f"{self.tr('速度')}: {speed}/s, {self.tr('剩余时间')}: {eta}" 92 | self.progress.emit(percent, status_msg) 93 | except Exception as e: 94 | pass 95 | 96 | if self.process.returncode == 0: 97 | # 下载完成后移动文件到目标位置 98 | os.makedirs(os.path.dirname(self.save_path), exist_ok=True) 99 | shutil.move(str(temp_file), self.save_path) 100 | self.finished.emit() 101 | else: 102 | error = self.process.stderr.read() 103 | logger.error("下载失败: %s", error) 104 | self.error.emit(f"{self.tr('下载失败')}: {error}") 105 | 106 | except Exception as e: 107 | logger.error("下载异常: %s", str(e)) 108 | self.error.emit(str(e)) 109 | 110 | def stop(self): 111 | if self.process: 112 | self.process.terminate() 113 | self.process.wait() 114 | -------------------------------------------------------------------------------- /app/thread/modelscope_download_thread.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | 4 | from modelscope.hub.snapshot_download import snapshot_download 5 | from PyQt5.QtCore import QThread, pyqtSignal 6 | 7 | 8 | class ModelscopeDownloadThread(QThread): 9 | progress = pyqtSignal(int, str) # 进度值和状态消息 10 | error = pyqtSignal(str) # 错误信息 11 | 12 | def __init__(self, model_id, save_path): 13 | super().__init__() 14 | self.model_id = model_id 15 | self.save_path = save_path 16 | self._original_stdout = None 17 | self._original_stderr = None 18 | 19 | def custom_write(self, text): 20 | # 解析进度信息 21 | if '%|' in text: 22 | try: 23 | # 提取百分比 24 | match = re.search(r'(\d+)%', text) 25 | if match: 26 | percentage = int(match.group(1)) 27 | # 提取文件名 28 | file_match = re.search(r'\[(.*?)\]:', text) 29 | if file_match: 30 | filename = file_match.group(1) 31 | self.progress.emit(percentage, f"正在下载 {filename}: {percentage}%") 32 | except Exception: 33 | pass 34 | # 写入原始stdout 35 | self._original_stdout.write(text) 36 | self._original_stdout.flush() 37 | 38 | def run(self): 39 | try: 40 | # 发送开始下载信号 41 | self.progress.emit(0, "开始下载...") 42 | 43 | # 保存原始stdout 44 | self._original_stdout = sys.stdout 45 | self._original_stderr = sys.stderr 46 | 47 | # 创建自定义输出对象 48 | class CustomOutput: 49 | def __init__(self, callback): 50 | self.callback = callback 51 | def write(self, text): 52 | self.callback(text) 53 | def flush(self): 54 | pass 55 | 56 | # 重定向输出 57 | sys.stdout = CustomOutput(self.custom_write) 58 | sys.stderr = CustomOutput(self.custom_write) 59 | 60 | try: 61 | # 下载模型 62 | snapshot_download( 63 | self.model_id, 64 | local_dir=self.save_path 65 | ) 66 | finally: 67 | # 恢复原始输出 68 | sys.stdout = self._original_stdout 69 | sys.stderr = self._original_stderr 70 | 71 | # 发送完成信号 72 | self.progress.emit(100, "下载完成") 73 | 74 | except Exception as e: 75 | self.error.emit(str(e)) 76 | 77 | 78 | if __name__ == "__main__": 79 | import sys 80 | 81 | from PyQt5.QtCore import QCoreApplication 82 | app = QCoreApplication(sys.argv) 83 | model_id = "pengzhendong/faster-whisper-tiny" 84 | save_path = r"models\faster-whisper-tiny" # 保存到当前目录下的models文件夹 85 | downloader = ModelscopeDownloadThread(model_id, save_path) 86 | def on_progress(percentage, message): 87 | print(f"进度: {message}") 88 | def on_error(error_msg): 89 | print(f"错误: {error_msg}") 90 | app.quit() 91 | def on_finished(): 92 | print("下载完成!") 93 | app.quit() 94 | 95 | downloader.progress.connect(on_progress) 96 | downloader.error.connect(on_error) 97 | downloader.finished.connect(on_finished) 98 | 99 | # 开始下载 100 | print(f"开始下载模型 {model_id}") 101 | downloader.start() 102 | 103 | # 运行事件循环 104 | sys.exit(app.exec_()) 105 | 106 | 107 | -------------------------------------------------------------------------------- /app/thread/subtitle_pipeline_thread.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from PyQt5.QtCore import QThread, pyqtSignal 4 | 5 | from app.core.entities import Task 6 | from app.core.utils.logger import setup_logger 7 | 8 | from .subtitle_thread import SubtitleThread 9 | from .transcript_thread import TranscriptThread 10 | from .video_synthesis_thread import VideoSynthesisThread 11 | 12 | logger = setup_logger("subtitle_pipeline_thread") 13 | 14 | class SubtitlePipelineThread(QThread): 15 | """字幕处理全流程线程,包含: 16 | 1. 转录生成字幕 17 | 2. 字幕优化/翻译 18 | 3. 视频合成 19 | """ 20 | progress = pyqtSignal(int, str) # 进度值, 进度描述 21 | finished = pyqtSignal(Task) 22 | error = pyqtSignal(str) 23 | 24 | def __init__(self, task: Task): 25 | super().__init__() 26 | self.task = task 27 | self.has_error = False 28 | 29 | def run(self): 30 | try: 31 | def handle_error(error_msg): 32 | logger.error("pipeline 发生错误: %s", error_msg) 33 | self.has_error = True 34 | self.error.emit(error_msg) 35 | 36 | # 1. 转录生成字幕 37 | # self.task.status = Task.Status.TRANSCRIBING 38 | logger.info(f"\n===========任务开始===========") 39 | logger.info(f"时间:{datetime.datetime.now()}") 40 | logger.info("开始转录") 41 | self.progress.emit(0, self.tr("开始转录")) 42 | transcript_thread = TranscriptThread(self.task) 43 | transcript_thread.progress.connect(lambda value, msg: self.progress.emit(int(value * 0.4), msg)) 44 | transcript_thread.error.connect(handle_error) 45 | transcript_thread.run() 46 | 47 | if self.has_error: 48 | logger.info("转录过程中发生错误,终止流程") 49 | return 50 | 51 | # 2. 字幕优化/翻译 52 | # self.task.status = Task.Status.OPTIMIZING 53 | self.progress.emit(40, self.tr("开始优化字幕")) 54 | optimization_thread = SubtitleThread(self.task) 55 | optimization_thread.progress.connect(lambda value, msg: self.progress.emit(int(40 + value * 0.2), msg)) 56 | optimization_thread.error.connect(handle_error) 57 | optimization_thread.run() 58 | 59 | if self.has_error: 60 | logger.info("字幕优化过程中发生错误,终止流程") 61 | return 62 | 63 | # 3. 视频合成 64 | # self.task.status = Task.Status.GENERATING 65 | self.progress.emit(80, self.tr("开始合成视频")) 66 | synthesis_thread = VideoSynthesisThread(self.task) 67 | synthesis_thread.progress.connect(lambda value, msg: self.progress.emit(int(70 + value * 0.3), msg)) 68 | synthesis_thread.error.connect(handle_error) 69 | synthesis_thread.run() 70 | 71 | if self.has_error: 72 | logger.info("视频合成过程中发生错误,终止流程") 73 | return 74 | 75 | self.task.status = Task.Status.COMPLETED 76 | logger.info("处理完成") 77 | self.progress.emit(100, self.tr("处理完成")) 78 | self.finished.emit(self.task) 79 | 80 | except Exception as e: 81 | self.task.status = Task.Status.FAILED 82 | logger.exception("处理失败: %s", str(e)) 83 | self.error.emit(str(e)) 84 | -------------------------------------------------------------------------------- /app/thread/transcript_thread.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import tempfile 4 | from pathlib import Path 5 | 6 | from PyQt5.QtCore import QThread, pyqtSignal 7 | 8 | from app.core.bk_asr import transcribe 9 | from app.core.entities import TranscribeTask, TranscribeModelEnum 10 | from app.core.utils.logger import setup_logger 11 | from app.core.utils.video_utils import video2audio 12 | from app.core.storage.cache_manager import ServiceUsageManager 13 | from app.core.storage.database import DatabaseManager 14 | from app.config import CACHE_PATH 15 | 16 | logger = setup_logger("transcript_thread") 17 | 18 | 19 | class TranscriptThread(QThread): 20 | finished = pyqtSignal(TranscribeTask) 21 | progress = pyqtSignal(int, str) 22 | error = pyqtSignal(str) 23 | MAX_DAILY_ASR_CALLS = 40 24 | 25 | def __init__(self, task: TranscribeTask): 26 | super().__init__() 27 | self.task = task 28 | # 初始化服务管理器 29 | db_manager = DatabaseManager(CACHE_PATH) 30 | self.service_manager = ServiceUsageManager(db_manager) 31 | 32 | def run(self): 33 | temp_file = None 34 | try: 35 | logger.info(f"\n===========转录任务开始===========") 36 | logger.info(f"时间:{datetime.datetime.now()}") 37 | 38 | # 检查是否已经存在字幕文件 39 | # if Path(self.task.output_path).exists(): 40 | # logger.info("字幕文件已存在,跳过转录") 41 | # self.progress.emit(100, self.tr("字幕已存在")) 42 | # self.finished.emit(self.task) 43 | # return 44 | 45 | # 检查视频文件是否存在 46 | video_path = Path(self.task.file_path) 47 | if not video_path.exists(): 48 | logger.error(f"视频文件不存在:{video_path}") 49 | raise ValueError(self.tr("视频文件不存在")) 50 | 51 | # 对于BIJIAN和JIANYING模型,检查服务使用限制 52 | if self.task.transcribe_config.transcribe_model in [ 53 | TranscribeModelEnum.BIJIAN, 54 | TranscribeModelEnum.JIANYING, 55 | ]: 56 | if not self.service_manager.check_service_available( 57 | "asr", self.MAX_DAILY_ASR_CALLS 58 | ): 59 | raise Exception( 60 | self.tr("公益ASR服务已达到每日使用限制,建议使用本地转录") 61 | ) 62 | 63 | # 检查是否存在下载的字幕文件(对于视频url的任务,前面可能已下载字幕文件) 64 | if self.task.need_next_task: 65 | subtitle_dir = Path(self.task.file_path).parent / "subtitle" 66 | downloaded_subtitles = ( 67 | list(subtitle_dir.glob("【下载字幕】*")) 68 | if subtitle_dir.exists() 69 | else [] 70 | ) 71 | if downloaded_subtitles: 72 | subtitle_file = downloaded_subtitles[0] 73 | self.task.output_path = str( 74 | subtitle_file 75 | ) # 设置task输出路径为下载的字幕文件 76 | logger.info( 77 | f"字幕文件已下载,跳过转录。找到下载的字幕文件:{subtitle_file}" 78 | ) 79 | self.progress.emit(100, self.tr("字幕已下载")) 80 | self.finished.emit(self.task) 81 | return 82 | 83 | self.progress.emit(5, self.tr("转换音频中")) 84 | logger.info(f"开始转换音频") 85 | 86 | # 转换音频文件 87 | temp_dir = tempfile.gettempdir() 88 | temp_file = tempfile.NamedTemporaryFile( 89 | suffix=".wav", dir=temp_dir, delete=False 90 | ) 91 | temp_file.close() 92 | is_success = video2audio(str(video_path), output=temp_file.name) 93 | if not is_success: 94 | logger.error("音频转换失败") 95 | raise RuntimeError(self.tr("音频转换失败")) 96 | 97 | self.progress.emit(20, self.tr("语音转录中")) 98 | logger.info("开始语音转录") 99 | 100 | # 进行转录,并回调进度。 (传入 transcribe_config) 101 | asr_data = transcribe( 102 | temp_file.name, 103 | self.task.transcribe_config, 104 | callback=self.progress_callback, 105 | ) 106 | 107 | # 如果是BIJIAN或JIANYING模型,增加使用次数 108 | if self.task.transcribe_config.transcribe_model in [ 109 | TranscribeModelEnum.BIJIAN, 110 | TranscribeModelEnum.JIANYING, 111 | ]: 112 | self.service_manager.increment_usage("asr", self.MAX_DAILY_ASR_CALLS) 113 | 114 | # 保存字幕文件 115 | output_path = Path(self.task.output_path) 116 | output_path.parent.mkdir(parents=True, exist_ok=True) 117 | asr_data.to_srt(save_path=str(output_path)) 118 | logger.info("字幕文件已保存到: %s", str(output_path)) 119 | 120 | self.progress.emit(100, self.tr("转录完成")) 121 | self.finished.emit(self.task) 122 | except Exception as e: 123 | logger.exception("转录过程中发生错误: %s", str(e)) 124 | self.error.emit(str(e)) 125 | self.progress.emit(100, self.tr("转录失败")) 126 | finally: 127 | # 清理临时文件 128 | if temp_file and os.path.exists(temp_file.name): 129 | try: 130 | os.unlink(temp_file.name) 131 | except Exception as e: 132 | logger.warning(f"清理临时文件失败: {e}") 133 | 134 | def progress_callback(self, value, message): 135 | progress = min(20 + (value * 0.8), 100) 136 | self.progress.emit(int(progress), message) 137 | -------------------------------------------------------------------------------- /app/thread/version_manager_thread.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import hashlib 3 | import logging 4 | import os 5 | import re 6 | import subprocess 7 | import sys 8 | import time 9 | import uuid 10 | from datetime import datetime 11 | from pathlib import Path 12 | 13 | import requests 14 | from PyQt5.QtCore import QObject, QSettings, QVersionNumber, pyqtSignal 15 | 16 | from app.config import ROOT_PATH, VERSION 17 | from app.core.utils.logger import setup_logger 18 | 19 | # 配置日志 20 | logger = setup_logger("version_manager_thread") 21 | 22 | 23 | class VersionManager(QObject): 24 | """版本管理器""" 25 | 26 | # 定义信号 27 | newVersionAvailable = pyqtSignal(str, bool, str, str) 28 | announcementAvailable = pyqtSignal(str) 29 | checkCompleted = pyqtSignal() 30 | 31 | def __init__(self): 32 | super().__init__() 33 | self.currentVersion = VERSION 34 | self.latestVersion = VERSION 35 | self.versionPattern = re.compile(r"v(\d+)\.(\d+)\.(\d+)") 36 | self.updateInfo = "" 37 | self.forceUpdate = False 38 | self.downloadURL = "" 39 | self.announcement = {} 40 | self.history = [] 41 | 42 | # 修改 QSettings 的初始化方式,指定完整的组织和应用名称,并设置为 IniFormat 43 | self.settings = QSettings( 44 | QSettings.IniFormat, QSettings.UserScope, "VideoCaptioner", "VideoCaptioner" 45 | ) 46 | 47 | def getLatestVersionInfo(self): 48 | """获取最新版本信息""" 49 | url = "https://vc.bkfeng.top/api/version" 50 | headers = { 51 | "tdid": f"394{uuid.getnode():013d}", 52 | "app_version": VERSION, 53 | } 54 | try: 55 | response = requests.get(url, timeout=30, headers=headers) 56 | response.raise_for_status() 57 | except requests.RequestException as e: 58 | logger.info("Failed to fetch version info: %s") 59 | return {} 60 | 61 | # 解析 JSON 62 | data = response.json() 63 | 64 | # 解析版本 65 | version = data.get("version", self.currentVersion) 66 | match = self.versionPattern.search(version) 67 | if not match: 68 | version = self.currentVersion 69 | logger.warning( 70 | "Version pattern not matched, using current version: %s", 71 | self.currentVersion, 72 | ) 73 | 74 | self.latestVersion = version 75 | self.forceUpdate = data.get("force_update", False) 76 | self.updateInfo = data.get("update_info", "") 77 | self.downloadURL = data.get("download_url", "") 78 | self.announcement = data.get("announcement", {}) 79 | self.history = data.get("history", []) 80 | self.update_code = data.get("update_code", "") 81 | logger.info("Latest version info: %s", self.latestVersion) 82 | return data 83 | 84 | def execute_update_code(self, update_code: str) -> bool: 85 | """执行更新代码""" 86 | try: 87 | # 创建一个新的命名空间 88 | update_namespace = { 89 | "requests": requests, 90 | "subprocess": subprocess, 91 | "os": os, 92 | "time": time, 93 | "Path": Path, 94 | "ROOT_PATH": ROOT_PATH.parent, 95 | "logger": logger, 96 | "sys": sys, # 添加sys模块到命名空间 97 | } 98 | 99 | # 判断是否为base64编码 100 | try: 101 | import base64 102 | 103 | decoded_code = base64.b64decode(update_code).decode("utf-8") 104 | update_code = decoded_code 105 | except: 106 | pass 107 | 108 | # 执行更新下载 109 | exec(update_code, update_namespace) 110 | 111 | except Exception as e: 112 | logger.exception("执行更新代码失败: %s", str(e)) 113 | return False 114 | 115 | def hasNewVersion(self): 116 | """检查是否有新版本""" 117 | try: 118 | version_data = self.getLatestVersionInfo() 119 | if not version_data: 120 | return False 121 | except requests.RequestException: 122 | logger.exception("检查新版本时发生网络错误") 123 | return False 124 | 125 | # 检查历史版本中当前版本是否可用 126 | current_version_available = True 127 | for version_info in self.history: 128 | if version_info["version"] == self.currentVersion.lower(): 129 | if version_info["update_code"]: 130 | # 执行更新代码 131 | self.execute_update_code(version_info["update_code"]) 132 | current_version_available = version_info.get("available", True) 133 | break 134 | 135 | # 如果当前版本不可用,强制更新 136 | if not current_version_available: 137 | self.forceUpdate = True 138 | logger.info("当前版本不可用,设置为强制更新") 139 | 140 | latest_ver_num = QVersionNumber.fromString(self.latestVersion.split("v")[1]) 141 | current_ver_num = QVersionNumber.fromString(self.currentVersion.split("v")[1]) 142 | 143 | if latest_ver_num > current_ver_num: 144 | logger.info("New version available: %s", self.latestVersion) 145 | self.newVersionAvailable.emit( 146 | self.latestVersion, self.forceUpdate, self.updateInfo, self.downloadURL 147 | ) 148 | return True 149 | return False 150 | 151 | def checkAnnouncement(self): 152 | """检查公告是否需要显示""" 153 | ann = self.announcement 154 | if ann.get("enabled", False): 155 | content = ann.get("content", "") 156 | # 获取公告ID(使用内容的哈希值作为ID+当前日期) 157 | announcement_id = ( 158 | hashlib.md5(content.encode("utf-8")).hexdigest()[:10] 159 | + "_" 160 | + datetime.today().strftime("%Y-%m-%d") 161 | ) 162 | # 检查是否已经显示过 163 | if self.settings.value( 164 | f"announcement/shown_announcement_{announcement_id}", False, type=bool 165 | ): 166 | return 167 | start_date = datetime.strptime(ann.get("start_date"), "%Y-%m-%d").date() 168 | end_date = datetime.strptime(ann.get("end_date"), "%Y-%m-%d").date() 169 | today = datetime.today().date() 170 | if start_date <= today <= end_date: 171 | content = ann.get("content", "") 172 | # 标记该公告已显示 173 | self.settings.setValue( 174 | f"announcement/shown_announcement_{announcement_id}", True 175 | ) 176 | self.announcementAvailable.emit(content) 177 | logger.info("Announcement shown: %s", announcement_id) 178 | self.settings.sync() 179 | 180 | def checkNewVersionAnnouncement(self): 181 | """检查新版本公告是否需要显示""" 182 | # 获取当前版本的设置键 183 | version_key = f"version/shown_version_{self.latestVersion}" 184 | if not self.latestVersion == self.currentVersion: 185 | return 186 | # 检查是否已经显示过当前版本的公告 187 | if not self.settings.value(version_key, False, type=bool): 188 | # 标记该版本公告已显示 189 | self.settings.setValue(version_key, True) 190 | self.settings.sync() 191 | 192 | # 发送版本更新信息作为公告 193 | update_announcement = f"欢迎使用新版本 VideoCaptioner {self.currentVersion}\n\n更新内容:\n{self.updateInfo}" 194 | self.announcementAvailable.emit(update_announcement) 195 | logger.info( 196 | "New version announcement shown for version: %s", self.currentVersion 197 | ) 198 | 199 | def performCheck(self): 200 | """执行版本和公告检查""" 201 | try: 202 | self.hasNewVersion() 203 | self.checkNewVersionAnnouncement() # 添加新版本公告检查 204 | self.checkAnnouncement() 205 | self.checkCompleted.emit() 206 | except Exception as e: 207 | logger.exception("执行版本和公告检查失败: %s") 208 | -------------------------------------------------------------------------------- /app/thread/video_download_thread.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from pathlib import Path 4 | 5 | import requests 6 | import yt_dlp 7 | from PyQt5.QtCore import QThread, pyqtSignal 8 | 9 | from app.config import APPDATA_PATH 10 | from app.core.entities import VideoInfo 11 | from app.core.utils.logger import setup_logger 12 | 13 | logger = setup_logger("video_download_thread") 14 | 15 | 16 | class VideoDownloadThread(QThread): 17 | """视频下载线程类""" 18 | 19 | finished = pyqtSignal( 20 | str 21 | ) # 发送下载完成的信号(视频路径, 字幕路径, 缩略图路径, 视频信息) 22 | progress = pyqtSignal(int, str) # 发送下载进度的信号 23 | error = pyqtSignal(str) # 发送错误信息的信号 24 | 25 | def __init__(self, url: str, work_dir: str): 26 | super().__init__() 27 | self.url = url 28 | self.work_dir = work_dir 29 | 30 | def run(self): 31 | try: 32 | video_file_path, subtitle_file_path, thumbnail_file_path, info_dict = ( 33 | self.download() 34 | ) 35 | self.finished.emit(video_file_path) 36 | except Exception as e: 37 | logger.exception("下载视频失败: %s", str(e)) 38 | self.error.emit(str(e)) 39 | 40 | def progress_hook(self, d): 41 | """下载进度回调函数""" 42 | if d["status"] == "downloading": 43 | percent = d["_percent_str"] 44 | speed = d["_speed_str"] 45 | 46 | # 提取百分比和速度的纯文本 47 | clean_percent = ( 48 | percent.replace("\x1b[0;94m", "") 49 | .replace("\x1b[0m", "") 50 | .strip() 51 | .replace("%", "") 52 | ) 53 | clean_speed = speed.replace("\x1b[0;32m", "").replace("\x1b[0m", "").strip() 54 | 55 | self.progress.emit( 56 | int(float(clean_percent)), 57 | f"下载进度: {clean_percent}% 速度: {clean_speed}", 58 | ) 59 | 60 | def sanitize_filename(self, name: str, replacement: str = "_") -> str: 61 | """清理文件名中不允许的字符""" 62 | # 定义不允许的字符 63 | forbidden_chars = r'<>:"/\\|?*' 64 | 65 | # 替换不允许的字符 66 | sanitized = re.sub(f"[{re.escape(forbidden_chars)}]", replacement, name) 67 | 68 | # 移除控制字符 69 | sanitized = re.sub(r"[\0-\31]", "", sanitized) 70 | 71 | # 去除文件名末尾的空格和点 72 | sanitized = sanitized.rstrip(" .") 73 | 74 | # 限制文件名长度 75 | max_length = 255 76 | if len(sanitized) > max_length: 77 | base, ext = os.path.splitext(sanitized) 78 | base_max_length = max_length - len(ext) 79 | sanitized = base[:base_max_length] + ext 80 | 81 | # 处理Windows保留名称 82 | windows_reserved_names = { 83 | "CON", 84 | "PRN", 85 | "AUX", 86 | "NUL", 87 | "COM1", 88 | "COM2", 89 | "COM3", 90 | "COM4", 91 | "COM5", 92 | "COM6", 93 | "COM7", 94 | "COM8", 95 | "COM9", 96 | "LPT1", 97 | "LPT2", 98 | "LPT3", 99 | "LPT4", 100 | "LPT5", 101 | "LPT6", 102 | "LPT7", 103 | "LPT8", 104 | "LPT9", 105 | } 106 | name_without_ext = os.path.splitext(sanitized)[0].upper() 107 | if name_without_ext in windows_reserved_names: 108 | sanitized = f"{sanitized}_" 109 | 110 | # 如果文件名为空,返回默认名称 111 | if not sanitized: 112 | sanitized = "default_filename" 113 | 114 | return sanitized 115 | 116 | def download(self, need_subtitle: bool = True, need_thumbnail: bool = False): 117 | """下载视频""" 118 | logger.info("开始下载视频: %s", self.url) 119 | 120 | # 初始化 ydl 选项 121 | initial_ydl_opts = { 122 | "outtmpl": { 123 | "default": "%(title)s.%(ext)s", 124 | "subtitle": "【下载字幕】.%(ext)s", 125 | "thumbnail": "thumbnail", 126 | }, 127 | "format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best", # 优先下载mp4格式 128 | "progress_hooks": [self.progress_hook], # 下载进度钩子 129 | "quiet": True, # 禁用日志输出 130 | "no_warnings": True, # 禁用警告信息 131 | "noprogress": True, 132 | "writeautomaticsub": need_subtitle, # 下载自动生成的字幕 133 | "writethumbnail": need_thumbnail, # 下载缩略图 134 | "thumbnail_format": "jpg", # 指定缩略图的格式 135 | } 136 | 137 | # 检查 cookies 文件 138 | cookiefile_path = APPDATA_PATH / "cookies.txt" 139 | if cookiefile_path.exists(): 140 | logger.info(f"使用cookiefile: {cookiefile_path}") 141 | initial_ydl_opts["cookiefile"] = str(cookiefile_path) 142 | 143 | with yt_dlp.YoutubeDL(initial_ydl_opts) as ydl: 144 | # 提取视频信息(不下载) 145 | info_dict = ydl.extract_info(self.url, download=False) 146 | 147 | # 设置动态下载文件夹为视频标题 148 | video_title = self.sanitize_filename(info_dict.get("title", "MyVideo")) 149 | video_work_dir = Path(self.work_dir) / self.sanitize_filename(video_title) 150 | subtitle_language = info_dict.get("language", None) 151 | if subtitle_language: 152 | subtitle_language = subtitle_language.lower().split("-")[0] 153 | 154 | try: 155 | subtitle_download_link = None 156 | for l in info_dict["automatic_captions"]: 157 | if l.startswith(subtitle_language): 158 | subtitle_download_link = info_dict["automatic_captions"][l][-1][ 159 | "url" 160 | ] 161 | break 162 | except Exception: 163 | subtitle_download_link = None 164 | 165 | # 设置 yt-dlp 下载选项 166 | ydl_opts = { 167 | "paths": { 168 | "home": str(video_work_dir), 169 | "subtitle": str(video_work_dir / "subtitle"), 170 | "thumbnail": str(video_work_dir), 171 | }, 172 | } 173 | # 更新 yt-dlp 的配置 174 | ydl.params.update(ydl_opts) 175 | 176 | # 使用 process_info 进行下载 177 | ydl.process_info(info_dict) 178 | 179 | # 获取视频文件路径 180 | video_file_path = Path(ydl.prepare_filename(info_dict)) 181 | if video_file_path.exists(): 182 | video_file_path = str(video_file_path) 183 | else: 184 | video_file_path = None 185 | 186 | # 获取字幕文件路径 187 | subtitle_file_path = None 188 | for file in video_work_dir.glob("**/【下载字幕】*"): 189 | file_path = str(file) 190 | if subtitle_language and subtitle_language not in file_path: 191 | logger.info( 192 | "字幕语言错误,重新下载字幕: %s", subtitle_download_link 193 | ) 194 | os.remove(file_path) 195 | if subtitle_download_link: 196 | response = requests.get(subtitle_download_link) 197 | file_path = ( 198 | video_work_dir 199 | / "subtitle" 200 | / f"【下载字幕】{subtitle_language}.vtt" 201 | ) 202 | if res := response.text: 203 | with open(file_path, "w", encoding="utf-8") as f: 204 | f.write(res) 205 | subtitle_file_path = file_path 206 | else: 207 | subtitle_file_path = file_path 208 | break 209 | 210 | # 获取缩略图文件路径 211 | thumbnail_file_path = None 212 | for file in video_work_dir.glob("**/thumbnail*"): 213 | thumbnail_file_path = str(file) 214 | break 215 | 216 | logger.info(f"视频下载完成: {video_file_path}") 217 | logger.info(f"字幕文件路径: {subtitle_file_path}") 218 | return video_file_path, subtitle_file_path, thumbnail_file_path, info_dict 219 | -------------------------------------------------------------------------------- /app/thread/video_info_thread.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import subprocess 4 | import tempfile 5 | from pathlib import Path 6 | 7 | from PyQt5.QtCore import QThread, pyqtSignal 8 | 9 | from app.core.entities import VideoInfo 10 | from app.core.utils.logger import setup_logger 11 | 12 | logger = setup_logger("video_info_thread") 13 | 14 | class VideoInfoThread(QThread): 15 | finished = pyqtSignal(VideoInfo) 16 | error = pyqtSignal(str) 17 | 18 | def __init__(self, file_path): 19 | super().__init__() 20 | self.file_path = file_path 21 | 22 | def run(self): 23 | try: 24 | # 生成缩略图到临时文件 25 | temp_dir = tempfile.gettempdir() 26 | file_name = Path(self.file_path).stem 27 | thumbnail_path = os.path.join(temp_dir, f"{file_name}_thumbnail.jpg") 28 | 29 | # 获取视频信息 30 | video_info = self._get_video_info(thumbnail_path) 31 | self.finished.emit(video_info) 32 | 33 | except Exception as e: 34 | self.error.emit(str(e)) 35 | 36 | def _get_video_info(self, thumbnail_path: str) -> VideoInfo: 37 | """获取视频信息""" 38 | try: 39 | cmd = ["ffmpeg", "-i", self.file_path] 40 | # logger.info(f"获取视频信息执行命令: {' '.join(cmd)}") 41 | result = subprocess.run( 42 | cmd, 43 | capture_output=True, 44 | text=True, 45 | encoding='utf-8', 46 | errors='replace', 47 | creationflags=subprocess.CREATE_NO_WINDOW if hasattr(subprocess, 'CREATE_NO_WINDOW') else 0 48 | ) 49 | info = result.stderr 50 | 51 | video_info_dict = { 52 | 'file_name': Path(self.file_path).stem, 53 | 'file_path': self.file_path, 54 | 'duration_seconds': 0, 55 | 'bitrate_kbps': 0, 56 | 'video_codec': '', 57 | 'width': 0, 58 | 'height': 0, 59 | 'fps': 0, 60 | 'audio_codec': '', 61 | 'audio_sampling_rate': 0, 62 | 'thumbnail_path': '', 63 | } 64 | 65 | # 提取时长 66 | if duration_match := re.search(r'Duration: (\d+):(\d+):(\d+\.\d+)', info): 67 | hours, minutes, seconds = map(float, duration_match.groups()) 68 | video_info_dict['duration_seconds'] = hours * 3600 + minutes * 60 + seconds 69 | logger.info(f"视频时长: {video_info_dict['duration_seconds']}秒") 70 | 71 | # 提取比特率 72 | if bitrate_match := re.search(r'bitrate: (\d+) kb/s', info): 73 | video_info_dict['bitrate_kbps'] = int(bitrate_match.group(1)) 74 | 75 | # 提取视频流信息 76 | if video_stream_match := re.search(r'Stream #\d+:\d+.*Video: (\w+).*?, (\d+)x(\d+).*?, ([\d.]+) (?:fps|tb)', 77 | info, re.DOTALL): 78 | video_info_dict.update({ 79 | 'video_codec': video_stream_match.group(1), 80 | 'width': int(video_stream_match.group(2)), 81 | 'height': int(video_stream_match.group(3)), 82 | 'fps': float(video_stream_match.group(4)) 83 | }) 84 | 85 | if thumbnail_path: 86 | if self._extract_thumbnail(video_info_dict['duration_seconds'] * 0.3, thumbnail_path): 87 | video_info_dict['thumbnail_path'] = thumbnail_path 88 | else: 89 | video_info_dict['thumbnail_path'] = thumbnail_path 90 | logger.warning("未找到视频流信息") 91 | 92 | # 提取音频流信息 93 | if audio_stream_match := re.search(r'Stream #\d+:\d+.*Audio: (\w+).* (\d+) Hz', info): 94 | video_info_dict.update({ 95 | 'audio_codec': audio_stream_match.group(1), 96 | 'audio_sampling_rate': int(audio_stream_match.group(2)) 97 | }) 98 | 99 | return VideoInfo(**video_info_dict) 100 | except Exception as e: 101 | logger.exception(f"获取视频信息时出错: {str(e)}") 102 | raise 103 | 104 | def _extract_thumbnail(self, seek_time: float, thumbnail_path: str) -> bool: 105 | """提取视频缩略图""" 106 | if not Path(self.file_path).is_file(): 107 | logger.error(f"视频文件不存在: {self.file_path}") 108 | return False 109 | 110 | try: 111 | timestamp = f"{int(seek_time // 3600):02}:{int((seek_time % 3600) // 60):02}:{seek_time % 60:06.3f}" 112 | # 确保输出目录存在 113 | Path(thumbnail_path).parent.mkdir(parents=True, exist_ok=True) 114 | 115 | # 转换路径为合适的格式 116 | video_path = Path(self.file_path).as_posix() 117 | thumbnail_path = Path(thumbnail_path).as_posix() 118 | 119 | cmd = [ 120 | "ffmpeg", 121 | "-ss", timestamp, 122 | "-i", video_path, 123 | "-vframes", "1", 124 | "-q:v", "2", 125 | "-y", 126 | thumbnail_path 127 | ] 128 | # logger.info(f"提取缩略图执行命令: {' '.join(cmd)}") 129 | result = subprocess.run( 130 | cmd, 131 | capture_output=True, 132 | text=True, 133 | encoding='utf-8', 134 | errors='replace', 135 | creationflags=subprocess.CREATE_NO_WINDOW if os.name == 'nt' else 0 136 | ) 137 | return result.returncode == 0 138 | 139 | except Exception as e: 140 | logger.exception(f"提取缩略图时出错: {str(e)}") 141 | return False -------------------------------------------------------------------------------- /app/thread/video_synthesis_thread.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | 4 | from PyQt5.QtCore import QThread, pyqtSignal 5 | 6 | from app.common.config import cfg 7 | from app.core.entities import SynthesisTask 8 | from app.core.utils.logger import setup_logger 9 | from app.core.utils.video_utils import add_subtitles 10 | 11 | logger = setup_logger("video_synthesis_thread") 12 | 13 | 14 | class VideoSynthesisThread(QThread): 15 | finished = pyqtSignal(SynthesisTask) 16 | progress = pyqtSignal(int, str) 17 | error = pyqtSignal(str) 18 | 19 | def __init__(self, task: SynthesisTask): 20 | super().__init__() 21 | self.task = task 22 | logger.debug(f"初始化 VideoSynthesisThread,任务: {self.task}") 23 | 24 | def run(self): 25 | try: 26 | logger.info(f"\n===========视频合成任务开始===========") 27 | logger.info(f"时间:{datetime.datetime.now()}") 28 | video_file = self.task.video_path 29 | subtitle_file = self.task.subtitle_path 30 | output_path = self.task.output_path 31 | soft_subtitle = self.task.synthesis_config.soft_subtitle 32 | need_video = self.task.synthesis_config.need_video 33 | 34 | if not need_video: 35 | logger.info(f"不需要合成视频,跳过") 36 | self.progress.emit(100, self.tr("合成完成")) 37 | self.finished.emit(self.task) 38 | return 39 | 40 | logger.info(f"开始合成视频: {video_file}") 41 | self.progress.emit(5, self.tr("正在合成")) 42 | 43 | add_subtitles( 44 | video_file, 45 | subtitle_file, 46 | output_path, 47 | soft_subtitle=soft_subtitle, 48 | progress_callback=self.progress_callback, 49 | ) 50 | 51 | self.progress.emit(100, self.tr("合成完成")) 52 | logger.info(f"视频合成完成,保存路径: {output_path}") 53 | 54 | self.finished.emit(self.task) 55 | except Exception as e: 56 | logger.exception(f"视频合成失败: {e}") 57 | self.error.emit(str(e)) 58 | self.progress.emit(100, self.tr("视频合成失败")) 59 | 60 | def progress_callback(self, value, message): 61 | progress = int(5 + int(value) / 100 * 95) 62 | logger.debug(f"合成进度: {progress}% - {message}") 63 | self.progress.emit(progress, str(progress) + "% " + message) 64 | -------------------------------------------------------------------------------- /app/view/home_interface.py: -------------------------------------------------------------------------------- 1 | from PyQt5.QtCore import pyqtSignal 2 | from PyQt5.QtWidgets import QSizePolicy, QStackedWidget, QVBoxLayout, QWidget 3 | from qfluentwidgets import SegmentedWidget 4 | 5 | from app.core.task_factory import TaskFactory 6 | from app.view.subtitle_interface import SubtitleInterface 7 | from app.view.task_creation_interface import TaskCreationInterface 8 | from app.view.transcription_interface import TranscriptionInterface 9 | from app.view.video_synthesis_interface import VideoSynthesisInterface 10 | 11 | 12 | class HomeInterface(QWidget): 13 | 14 | def __init__(self, parent=None): 15 | super().__init__(parent) 16 | 17 | # 设置对象名称和样式 18 | self.setObjectName("HomeInterface") 19 | self.setStyleSheet( 20 | """ 21 | HomeInterface{background: white} 22 | """ 23 | ) 24 | 25 | # 创建分段控件和堆叠控件 26 | self.pivot = SegmentedWidget(self) 27 | self.pivot.setSizePolicy(QSizePolicy.Minimum, QSizePolicy.Fixed) 28 | 29 | self.stackedWidget = QStackedWidget(self) 30 | self.vBoxLayout = QVBoxLayout(self) 31 | 32 | # 添加子界面 33 | self.task_creation_interface = TaskCreationInterface(self) 34 | self.transcription_interface = TranscriptionInterface(self) 35 | self.subtitle_optimization_interface = SubtitleInterface(self) 36 | self.video_synthesis_interface = VideoSynthesisInterface(self) 37 | 38 | self.addSubInterface( 39 | self.task_creation_interface, "TaskCreationInterface", self.tr("任务创建") 40 | ) 41 | self.addSubInterface( 42 | self.transcription_interface, "TranscriptionInterface", self.tr("语音转录") 43 | ) 44 | self.addSubInterface( 45 | self.subtitle_optimization_interface, 46 | "SubtitleInterface", 47 | self.tr("字幕优化与翻译"), 48 | ) 49 | self.addSubInterface( 50 | self.video_synthesis_interface, 51 | "VideoSynthesisInterface", 52 | self.tr("字幕视频合成"), 53 | ) 54 | 55 | self.vBoxLayout.addWidget(self.pivot) 56 | self.vBoxLayout.addWidget(self.stackedWidget) 57 | self.vBoxLayout.setContentsMargins(30, 10, 30, 30) 58 | 59 | self.stackedWidget.currentChanged.connect(self.onCurrentIndexChanged) 60 | self.stackedWidget.setCurrentWidget(self.task_creation_interface) 61 | self.pivot.setCurrentItem("TaskCreationInterface") 62 | 63 | self.task_creation_interface.finished.connect(self.switch_to_transcription) 64 | self.transcription_interface.finished.connect( 65 | self.switch_to_subtitle_optimization 66 | ) 67 | self.subtitle_optimization_interface.finished.connect( 68 | self.switch_to_video_synthesis 69 | ) 70 | 71 | def switch_to_transcription(self, file_path): 72 | # 切换到转录界面 73 | transcribe_task = TaskFactory.create_transcribe_task( 74 | file_path, need_next_task=True 75 | ) 76 | self.transcription_interface.set_task(transcribe_task) 77 | self.transcription_interface.process() 78 | self.stackedWidget.setCurrentWidget(self.transcription_interface) 79 | self.pivot.setCurrentItem("TranscriptionInterface") 80 | 81 | def switch_to_subtitle_optimization(self, file_path, video_path): 82 | # 切换到字幕处理界面 83 | subtitle_task = TaskFactory.create_subtitle_task( 84 | file_path, video_path, need_next_task=True 85 | ) 86 | self.subtitle_optimization_interface.set_task(subtitle_task) 87 | self.subtitle_optimization_interface.process() 88 | self.stackedWidget.setCurrentWidget(self.subtitle_optimization_interface) 89 | self.pivot.setCurrentItem("SubtitleInterface") 90 | 91 | def switch_to_video_synthesis(self, video_path, subtitle_path): 92 | # 切换到视频合成界面 93 | synthesis_task = TaskFactory.create_synthesis_task( 94 | video_path, subtitle_path, need_next_task=True 95 | ) 96 | self.video_synthesis_interface.set_task(synthesis_task) 97 | self.video_synthesis_interface.process() 98 | self.stackedWidget.setCurrentWidget(self.video_synthesis_interface) 99 | self.pivot.setCurrentItem("VideoSynthesisInterface") 100 | 101 | def addSubInterface(self, widget, objectName, text): 102 | # 添加子界面到堆叠控件和分段控件 103 | widget.setObjectName(objectName) 104 | self.stackedWidget.addWidget(widget) 105 | self.pivot.addItem( 106 | routeKey=objectName, 107 | text=text, 108 | onClick=lambda: self.stackedWidget.setCurrentWidget(widget), 109 | ) 110 | 111 | def onCurrentIndexChanged(self, index): 112 | # 当堆叠控件的当前索引改变时,更新分段控件的当前项 113 | widget = self.stackedWidget.widget(index) 114 | if widget: 115 | self.pivot.setCurrentItem(widget.objectName()) 116 | 117 | def closeEvent(self, event): 118 | # 关闭事件,关闭所有子界面 119 | self.task_creation_interface.close() 120 | self.transcription_interface.close() 121 | self.subtitle_optimization_interface.close() 122 | self.video_synthesis_interface.close() 123 | super().closeEvent(event) 124 | -------------------------------------------------------------------------------- /app/view/log_window.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from pathlib import Path 4 | 5 | from PyQt5.QtCore import Qt, QTimer 6 | from PyQt5.QtGui import QTextCursor 7 | from PyQt5.QtWidgets import QTextEdit, QVBoxLayout, QWidget, QHBoxLayout 8 | from qfluentwidgets import Dialog, FluentStyleSheet, TextEdit, isDarkTheme, PushButton 9 | 10 | from app.config import LOG_PATH, RESOURCE_PATH 11 | 12 | 13 | class LogWindow(QWidget): 14 | def __init__(self, parent=None): 15 | super().__init__(parent) 16 | self.setWindowTitle("日志查看器") 17 | self.resize(800, 600) 18 | 19 | FluentStyleSheet.FLUENT_WINDOW.apply(self) 20 | 21 | theme = "dark" if isDarkTheme() else "light" 22 | with open( 23 | RESOURCE_PATH / "assets" / "qss" / theme / "demo.qss", encoding="utf-8" 24 | ) as f: 25 | self.setStyleSheet(f.read()) 26 | 27 | # 设置为非模态对话框 28 | self.setWindowModality(Qt.NonModal) 29 | # 设置窗口标志 30 | self.setWindowFlags( 31 | Qt.Window # 让窗口成为独立窗口 32 | | Qt.WindowCloseButtonHint # 添加关闭按钮 33 | | Qt.WindowMinMaxButtonsHint # 添加最小化最大化按钮 34 | ) 35 | # 创建主布局 36 | layout = QVBoxLayout(self) 37 | 38 | # 创建顶部按钮布局 39 | top_layout = QHBoxLayout() 40 | self.open_folder_btn = PushButton("打开日志文件夹", self) 41 | self.open_folder_btn.clicked.connect(self.open_log_folder) 42 | top_layout.addWidget(self.open_folder_btn) 43 | top_layout.addStretch() 44 | layout.addLayout(top_layout) 45 | 46 | # 创建文本编辑器用于显示日志 47 | self.log_text = TextEdit(self) 48 | self.log_text.setReadOnly(True) 49 | layout.addWidget(self.log_text) 50 | 51 | # 设置定时器用于更新日志 52 | self.timer = QTimer(self) 53 | self.timer.timeout.connect(self.update_log) 54 | self.timer.start(500) # 每2秒更新一次 55 | 56 | # 获取日志文件路径并打开文件 57 | self.log_path = LOG_PATH / "app.log" 58 | try: 59 | self.log_file = open(self.log_path, "r", encoding="utf-8") 60 | self.load_last_lines(20480) 61 | self.log_text.moveCursor(QTextCursor.End) 62 | self.log_text.insertPlainText(f"\n{'='*25}以上是历史日志{'='*25}\n\n") 63 | except Exception as e: 64 | self.log_file = None 65 | self.log_text.setPlainText(f"打开日志文件失败: {str(e)}") 66 | 67 | # 添加文件大小跟踪 68 | self.last_position = self.log_file.tell() 69 | self.max_lines = 100 # 最多显示100行 70 | 71 | self.auto_scroll = True # 添加自动滚动标志 72 | 73 | # 监听滚动条变化 74 | self.log_text.verticalScrollBar().valueChanged.connect(self.on_scroll_changed) 75 | 76 | # # 初始加载日志 77 | # self.update_log() 78 | 79 | def load_last_lines(self, read_size): 80 | """加载文件最后的内容 81 | Args: 82 | read_size: 要读取的字节数,比如102400表示读取最后100KB 83 | """ 84 | try: 85 | # 移动到文件末尾 86 | self.log_file.seek(0, 2) 87 | file_size = self.log_file.tell() 88 | 89 | # 向前读取指定大小或整个文件 90 | read_size = min(read_size, file_size) 91 | 92 | # 从文件开头读取以确保不会破坏UTF-8编码 93 | self.log_file.seek(0) 94 | content = self.log_file.read() 95 | 96 | # 只保留最后一部分内容 97 | if len(content) > read_size: 98 | content = content[-read_size:] 99 | # 找到第一个完整的行 100 | newline_pos = content.find("\n") 101 | if newline_pos != -1: 102 | content = content[newline_pos + 1 :] 103 | 104 | self.last_position = self.log_file.tell() 105 | self.log_text.moveCursor(QTextCursor.End) 106 | self.log_text.setPlainText(content) 107 | 108 | # 滚动到底部 109 | self.log_text.verticalScrollBar().setValue( 110 | self.log_text.verticalScrollBar().maximum() 111 | ) 112 | 113 | except Exception as e: 114 | self.log_text.setPlainText(f"读取日志文件失败: {str(e)}") 115 | 116 | # def closeEvent(self, event): 117 | # # 关闭窗口时同时关闭文件和定时器 118 | # self.timer.stop() 119 | # if self.log_file: 120 | # self.log_file.close() 121 | # event.accept() 122 | 123 | def on_scroll_changed(self, value): 124 | """监听滚动条变化""" 125 | scrollbar = self.log_text.verticalScrollBar() 126 | max_value = scrollbar.maximum() 127 | self.auto_scroll = value <= max_value and value >= max_value * 0.85 128 | 129 | def update_log(self): 130 | """更新日志内容""" 131 | if not self.log_file: 132 | return 133 | 134 | try: 135 | # 移动到上次读取的位置 136 | self.log_file.seek(self.last_position) 137 | new_content = self.log_file.read() 138 | 139 | if new_content: 140 | # 按行分割内容 141 | lines = new_content.splitlines(True) # keepends=True 保留换行符 142 | for line in lines: 143 | self.log_text.moveCursor(QTextCursor.End) 144 | self.log_text.insertPlainText(line) 145 | # time.sleep(0.02) 146 | self.log_text.repaint() 147 | 148 | self.last_position = self.log_file.tell() 149 | 150 | if self.auto_scroll: 151 | self.log_text.verticalScrollBar().setValue( 152 | self.log_text.verticalScrollBar().maximum() 153 | ) 154 | 155 | except Exception as e: 156 | self.log_text.setPlainText(f"读取日志文件出错: {str(e)}") 157 | 158 | def open_log_folder(self): 159 | """打开日志文件所在文件夹""" 160 | os.startfile(str(LOG_PATH)) 161 | -------------------------------------------------------------------------------- /app/view/main_window.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import psutil 4 | from PyQt5.QtCore import QSize, QThread, QUrl 5 | from PyQt5.QtGui import QDesktopServices, QIcon 6 | from PyQt5.QtWidgets import QApplication 7 | from qfluentwidgets import FluentIcon as FIF 8 | from qfluentwidgets import ( 9 | FluentWindow, 10 | MessageBox, 11 | NavigationAvatarWidget, 12 | NavigationItemPosition, 13 | SplashScreen, 14 | ) 15 | 16 | from app.common.config import cfg 17 | from app.components.DonateDialog import DonateDialog 18 | from app.config import ASSETS_PATH, GITHUB_REPO_URL 19 | from app.thread.version_manager_thread import VersionManager 20 | from app.view.batch_process_interface import BatchProcessInterface 21 | from app.view.home_interface import HomeInterface 22 | from app.view.setting_interface import SettingInterface 23 | from app.view.subtitle_style_interface import SubtitleStyleInterface 24 | 25 | LOGO_PATH = ASSETS_PATH / "logo.png" 26 | 27 | 28 | class MainWindow(FluentWindow): 29 | 30 | def __init__(self): 31 | super().__init__() 32 | self.initWindow() 33 | 34 | # 创建子界面 35 | self.homeInterface = HomeInterface(self) 36 | self.settingInterface = SettingInterface(self) 37 | self.subtitleStyleInterface = SubtitleStyleInterface(self) 38 | self.batchProcessInterface = BatchProcessInterface(self) 39 | 40 | # 初始化版本管理器 41 | self.versionManager = VersionManager() 42 | self.versionManager.newVersionAvailable.connect(self.onNewVersion) 43 | self.versionManager.announcementAvailable.connect(self.onAnnouncement) 44 | 45 | # 创建版本检查线程 46 | self.versionThread = QThread() 47 | self.versionManager.moveToThread(self.versionThread) 48 | self.versionThread.started.connect(self.versionManager.performCheck) 49 | self.versionThread.start() 50 | 51 | # 初始化导航界面 52 | self.initNavigation() 53 | self.splashScreen.finish() 54 | 55 | # 注册退出处理, 清理进程 56 | import atexit 57 | 58 | atexit.register(self.stop) 59 | 60 | def initNavigation(self): 61 | """初始化导航栏""" 62 | # 添加导航项 63 | self.addSubInterface(self.homeInterface, FIF.HOME, self.tr("主页")) 64 | self.addSubInterface(self.batchProcessInterface, FIF.VIDEO, self.tr("批量处理")) 65 | self.addSubInterface(self.subtitleStyleInterface, FIF.FONT, self.tr("字幕样式")) 66 | 67 | self.navigationInterface.addSeparator() 68 | 69 | # 在底部添加自定义小部件 70 | self.navigationInterface.addItem( 71 | routeKey="avatar", 72 | text="GitHub", 73 | icon=FIF.GITHUB, 74 | onClick=self.onGithubDialog, 75 | position=NavigationItemPosition.BOTTOM, 76 | ) 77 | self.addSubInterface( 78 | self.settingInterface, 79 | FIF.SETTING, 80 | self.tr("Settings"), 81 | NavigationItemPosition.BOTTOM, 82 | ) 83 | 84 | # 设置默认界面 85 | self.switchTo(self.homeInterface) 86 | 87 | def switchTo(self, interface): 88 | if interface.windowTitle(): 89 | self.setWindowTitle(interface.windowTitle()) 90 | else: 91 | self.setWindowTitle(self.tr("卡卡字幕助手 -- VideoCaptioner")) 92 | self.stackedWidget.setCurrentWidget(interface, popOut=False) 93 | 94 | def initWindow(self): 95 | """初始化窗口""" 96 | self.resize(1050, 800) 97 | self.setMinimumWidth(700) 98 | self.setWindowIcon(QIcon(str(LOGO_PATH))) 99 | self.setWindowTitle(self.tr("卡卡字幕助手 -- VideoCaptioner")) 100 | 101 | self.setMicaEffectEnabled(cfg.get(cfg.micaEnabled)) 102 | 103 | # 创建启动画面 104 | self.splashScreen = SplashScreen(self.windowIcon(), self) 105 | self.splashScreen.setIconSize(QSize(106, 106)) 106 | self.splashScreen.raise_() 107 | 108 | # 设置窗口位置, 居中 109 | desktop = QApplication.desktop().availableGeometry() 110 | w, h = desktop.width(), desktop.height() 111 | self.move(w // 2 - self.width() // 2, h // 2 - self.height() // 2) 112 | 113 | self.show() 114 | QApplication.processEvents() 115 | 116 | def onGithubDialog(self): 117 | """打开GitHub""" 118 | w = MessageBox( 119 | self.tr("GitHub信息"), 120 | self.tr( 121 | "VideoCaptioner 由本人在课余时间独立开发完成,目前托管在GitHub上,欢迎Star和Fork。项目诚然还有很多地方需要完善,遇到软件的问题或者BUG欢迎提交Issue。\n\n https://github.com/WEIFENG2333/VideoCaptioner" 122 | ), 123 | self, 124 | ) 125 | w.yesButton.setText(self.tr("打开 GitHub")) 126 | w.cancelButton.setText(self.tr("支持作者")) 127 | if w.exec(): 128 | QDesktopServices.openUrl(QUrl(GITHUB_REPO_URL)) 129 | else: 130 | # 点击"支持作者"按钮时打开捐赠对话框 131 | donate_dialog = DonateDialog(self) 132 | donate_dialog.exec_() 133 | 134 | def onNewVersion(self, version, force_update, update_info, download_url): 135 | """新版本提示""" 136 | title = "发现新版本" if not force_update else "当前版本已停用" 137 | content = f"发现新版本 {version}\n\n{update_info}" 138 | w = MessageBox(title, content, self) 139 | w.yesButton.setText("立即更新") 140 | w.cancelButton.setText("稍后再说" if not force_update else "退出程序") 141 | if w.exec(): 142 | QDesktopServices.openUrl(QUrl(download_url)) 143 | if force_update: 144 | QApplication.quit() 145 | 146 | def onAnnouncement(self, content): 147 | """显示公告""" 148 | w = MessageBox("公告", content, self) 149 | w.yesButton.setText("我知道了") 150 | w.cancelButton.hide() 151 | w.exec() 152 | 153 | def resizeEvent(self, e): 154 | super().resizeEvent(e) 155 | if hasattr(self, "splashScreen"): 156 | self.splashScreen.resize(self.size()) 157 | 158 | def closeEvent(self, event): 159 | # 关闭所有子界面 160 | # self.homeInterface.close() 161 | # self.batchProcessInterface.close() 162 | # self.subtitleStyleInterface.close() 163 | # self.settingInterface.close() 164 | super().closeEvent(event) 165 | 166 | # 强制退出应用程序 167 | QApplication.quit() 168 | 169 | # 确保所有线程和进程都被终止 要是一些错误退出就不会处理了。 170 | # import os 171 | # os._exit(0) 172 | 173 | def stop(self): 174 | # 找到 FFmpeg 进程并关闭 175 | process = psutil.Process(os.getpid()) 176 | for child in process.children(recursive=True): 177 | child.kill() 178 | -------------------------------------------------------------------------------- /app/view/view.md: -------------------------------------------------------------------------------- 1 | view/ 目录结构:用户界面 (UI) 模块 2 | 3 | 下面是本软件的一个主要页面结构,方便开发者查看和修改。 4 | 5 | 6 | ``` 7 | ├── main_window.py ------------------ 主窗口 (应用程序框架) 8 | │ │ 9 | │ └── 10 | │ ├── home_interface.py -------- 主页窗口 (程序主界面,包含核心功能) 11 | │ │ │ 12 | │ │ └── 包含以下子功能模块: 13 | │ │ ├── task_creation_interface.py - 任务创建窗口 14 | │ │ ├── transcription_interface.py - 语音转录窗口 15 | │ │ ├── subtitle_interface.py -------- 字幕优化窗口 16 | │ │ └── video_synthesis_interface.py - 视频合成窗口 17 | │ │ 18 | │ ├── batch_process_interface.py ------- 批量处理窗口 19 | │ ├── subtitle_style_interface.py ------ 字幕样式窗口 20 | │ └── setting_interface.py -------------- 设置窗口 21 | │ 22 | ├── log_window.py -------------------- 日志窗口 (独立窗口,集成在 home_interface) 23 | 24 | ``` -------------------------------------------------------------------------------- /docs/get_cookies.md: -------------------------------------------------------------------------------- 1 | # Cookie 配置说明 2 | 3 | ## 问题说明 4 | 在使用软件下载视频时,可能会遇到以下错误提示: 5 | 6 | ![alt text](images/cookies_error.png) 7 | 8 | 这是因为: 9 | 1. 某些视频平台(如B站)需要用户登录信息才能获取高质量视频 10 | 2. 部分网站(如YouTube)在网络条件较差时需要验证用户身份 11 | 12 | ## 解决方法 13 | 14 | ### 1. 安装浏览器扩展 15 | 根据你使用的浏览器选择安装: 16 | 17 | - Chrome浏览器: [Get CookieTxt Locally](https://chromewebstore.google.com/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc) 18 | - Edge浏览器: [Export Cookies File](https://microsoftedge.microsoft.com/addons/detail/export-cookies-file/hbglikhfdcfhdfikmocdflffaecbnedo) 19 | 20 | ### 2. 导出Cookie文件 21 | 1. 登录需要下载视频的网站(如B站、YouTube等) 22 | 2. 点击浏览器扩展图标 23 | 3. 选择"Export Cookies"选项 24 | 4. 将导出的cookies.txt文件保存到软件的AppData目录下 25 | 26 | ![alt text](images/cookies_export.png) 27 | 28 | ### 3. 确认文件位置 29 | 完成后的目录结构应如下: 30 | 31 | ``` 32 | ├─AppData 33 | │ ├─cache 34 | │ ├─logs 35 | │ ├─models 36 | │ ├─cookies.txt # Cookie文件 37 | │ └─settings.json 38 | 39 | ``` 40 | -------------------------------------------------------------------------------- /docs/images/alipay.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/alipay.jpg -------------------------------------------------------------------------------- /docs/images/api-setting-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/api-setting-2.png -------------------------------------------------------------------------------- /docs/images/api-setting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/api-setting.png -------------------------------------------------------------------------------- /docs/images/bath.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/bath.png -------------------------------------------------------------------------------- /docs/images/cookies_error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/cookies_error.png -------------------------------------------------------------------------------- /docs/images/cookies_export.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/cookies_export.png -------------------------------------------------------------------------------- /docs/images/get_api.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/get_api.png -------------------------------------------------------------------------------- /docs/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/logo.png -------------------------------------------------------------------------------- /docs/images/main.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/main.png -------------------------------------------------------------------------------- /docs/images/preview1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/preview1.png -------------------------------------------------------------------------------- /docs/images/preview2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/preview2.png -------------------------------------------------------------------------------- /docs/images/setting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/setting.png -------------------------------------------------------------------------------- /docs/images/style.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/style.png -------------------------------------------------------------------------------- /docs/images/subtitle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/subtitle.png -------------------------------------------------------------------------------- /docs/images/test_spend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/test_spend.png -------------------------------------------------------------------------------- /docs/images/test_ted1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/test_ted1.png -------------------------------------------------------------------------------- /docs/images/test_ted2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/test_ted2.png -------------------------------------------------------------------------------- /docs/images/test_ted3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/test_ted3.png -------------------------------------------------------------------------------- /docs/images/test_zl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/test_zl.png -------------------------------------------------------------------------------- /docs/images/wechat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/wechat.jpg -------------------------------------------------------------------------------- /docs/images/whisper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/whisper.png -------------------------------------------------------------------------------- /docs/images/zl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/docs/images/zl.png -------------------------------------------------------------------------------- /docs/llm_config.md: -------------------------------------------------------------------------------- 1 | 2 | 目前国内多家大模型厂商都提供了API接口,可以自行申请。也可以使用中转站,使用 OpenAI 或 Claude的API。 3 | 4 | 本教程以两种配置方式为例进行说明: 5 | 6 | [SiliconFlow-API 配置](./llm_config.md#SiliconFlow-API-配置) 7 | 8 | [中转站配置](./llm_config.md#中转站配置) 9 | 10 | 11 | # SiliconFlow-API 配置 12 | 13 | 1. 申请大模型API 14 | 15 | 这里以国内的 [SiliconCloud](https://cloud.siliconflow.cn/i/onCHcaDx) 的 API 为例子,其已经集合国内多家大模型厂商。(注意以上是我的推广链接,通过此可以获得14元额度,介意就百度自行搜索注册,非广告) 16 | 17 | ![api](images/get_api.png) 18 | 19 | 注册后,在[设置](https://cloud.siliconflow.cn/account/ak)中获取API Key。 20 | 21 | ![config](images/api-setting.png) 22 | 23 | API 接口地址: https://api.siliconflow.cn/v1 (需要添加 /v1) 24 | 25 | API Key: 将 SiliconCloud 平台的密钥粘贴到此处。 26 | 27 | 点击检查连接,“模型”设置栏会自动填充所有支持的模型名称。 28 | 29 | 选择需要的模型名称,推荐:deepseek-ai/DeepSeek-V3 30 | 31 | > 2025 年 2 月 6 日起,未实名用户每日最多请求此模型 100 次 32 | 33 | 根据官方要求该模型需要实名才能获取更多的调用次数。不想实名可以考虑使用其他中转站。 34 | 35 | `线程数 (Thread Count)`: SiliconCloud 并发有限,推荐只设置 5 个线程或以下。 36 | 37 | 38 | # 中转站配置 39 | 40 | 1. 先在 [本项目的中转站](https://api.videocaptioner.cn/register?aff=UrLB) 注册账号 41 | ,通过此链接注册默认赠送 $0.4 测试余额。 42 | 43 | 2. 然后获取 API Key: [https://api.videocaptioner.cn/token](https://api.videocaptioner.cn/token) 44 | 45 | 3. 在软件设置中配置 API Key 和 API 接口地址, 如下图: 46 | 47 | ![api_setting](images/api-setting-2.png) 48 | 49 | BaseURL: `https://api.videocaptioner.cn/v1` 50 | 51 | API-key: `上面获取的API Key` 52 | 53 | 💡 模型选择建议 (本人在各质量层级中选出的高性价比模型): 54 | 55 | - 高质量之选: `claude-3-5-sonnet-20241022` (耗费比例:3) 56 | 57 | - 较高质量之选: `gemini-2.0-flash`、`deepseek-chat` (耗费比例:1) 58 | 59 | - 中质量之选: `gpt-4o-mini`、`gemini-1.5-flash` (耗费比例:0.15) 60 | 61 | `线程数 (Thread Count)`: 本站支持超高并发,软件中线程数直接拉满即可~ 处理速度非常快~ 62 | 63 | > PS: 条件差一点的可直接使用 `gpt-4o-mini`, 便宜且速度快。这个模型也花不了几个钱的,建议不要折腾本地部署了。 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /docs/test.md: -------------------------------------------------------------------------------- 1 | ### 使用 Whisper 转录 2 | ![alt text](images/test_zl.png) 3 | 4 | ### 转录成功以后的字幕 5 | ``` 6 | 1 7 | 00:00:02,080 --> 00:00:08,600 8 | So in college, I was a government major, 9 | 10 | 2 11 | 00:00:08,600 --> 00:00:11,080 12 | which means I had to write a lot of papers. 13 | 14 | 3 15 | 00:00:11,080 --> 00:00:12,600 16 | Now, when a normal student writes a paper, 17 | 18 | 4 19 | 00:00:12,600 --> 00:00:15,460 20 | they might spread the work out a little like this. 21 | 22 | 5 23 | 00:00:15,460 --> 00:00:16,300 24 | So you know. 25 | 26 | 6 27 | 00:00:16,300 --> 00:00:20,040 28 | You get started maybe a little slowly, 29 | 30 | 7 31 | 00:00:20,040 --> 00:00:21,600 32 | but you get enough done in the first week 33 | 34 | 8 35 | 00:00:21,600 --> 00:00:24,000 36 | that with some heavier days later on, 37 | 38 | 9 39 | 00:00:24,000 --> 00:00:26,200 40 | everything gets done and things stay civil. 41 | 42 | 10 43 | 00:00:26,200 --> 00:00:29,840 44 | And I would wanna do that like that. 45 | 46 | 11 47 | 00:00:29,840 --> 00:00:30,840 48 | That would be the plan. 49 | 50 | 12 51 | 00:00:30,840 --> 00:00:33,580 52 | I would have it all ready to go, 53 | 54 | 13 55 | 00:00:33,580 --> 00:00:36,120 56 | but then actually the paper would come along 57 | 58 | 14 59 | 00:00:36,120 --> 00:00:37,720 60 | and then I would kinda do this. 61 | 62 | 15 63 | 00:00:40,480 --> 00:00:43,280 64 | And that would happen to every single paper. 65 | 66 | 16 67 | 00:00:43,280 --> 00:00:47,240 68 | But then came my 90 page senior thesis, 69 | 70 | 17 71 | 00:00:47,240 --> 00:00:49,580 72 | a paper you're supposed to spend a year on. 73 | 74 | 18 75 | 00:00:49,580 --> 00:00:52,320 76 | I knew for a paper like that, my normal workflow 77 | 78 | 19 79 | 00:00:52,320 --> 00:00:54,580 80 | was not an option, it was way too big a project. 81 | 82 | 20 83 | 00:00:54,580 --> 00:00:56,580 84 | So I planned things out and I decided 85 | 86 | 21 87 | 00:00:56,580 --> 00:00:59,520 88 | I kinda had to go something like this. 89 | 90 | ``` 91 | 92 | ### 进行断句与字幕的优化翻译 93 | ``` 94 | 1 95 | 00:00:02,080 --> 00:00:08,597 96 | 所以在大学时,我是政府专业的学生 97 | So in college, I was a government major. 98 | 99 | 2 100 | 00:00:08,600 --> 00:00:11,078 101 | 这意味着我得写很多论文 102 | Which means I had to write a lot of papers. 103 | 104 | 3 105 | 00:00:11,080 --> 00:00:12,596 106 | 现在,普通学生写论文时 107 | Now when a normal student writes a paper, 108 | 109 | 4 110 | 00:00:12,600 --> 00:00:15,460 111 | 他们可能会这样分散工作 112 | They might spread the work out a little like this. 113 | 114 | 5 115 | 00:00:15,460 --> 00:00:20,040 116 | 所以你知道,你可能会稍微慢一些开始 117 | So you know, you get started maybe a little slowly, 118 | 119 | 6 120 | 00:00:20,040 --> 00:00:21,593 121 | 但你在第一周能够完成足够的工作 122 | But you get enough done in the first week. 123 | 124 | 7 125 | 00:00:21,600 --> 00:00:23,996 126 | 这样之后的一些繁忙日子 127 | That with some heavier days later on. 128 | 129 | 8 130 | 00:00:24,000 --> 00:00:26,200 131 | 一切都能完成,事情保持得当 132 | Everything gets done and things stay civil. 133 | 134 | 9 135 | 00:00:26,200 --> 00:00:29,840 136 | 我也希望那样去做 137 | And I would wanna do that like that. 138 | 139 | 10 140 | 00:00:29,840 --> 00:00:31,936 141 | 那将是我的计划 142 | That would be the plan I would have. 143 | 144 | 11 145 | 00:00:31,936 --> 00:00:35,059 146 | 一切都准备好了,但实际上论文却并没有完成 147 | It was all ready to go, but then actually the paper 148 | 149 | ``` 150 | 151 | ### 最终合成视频 152 | ![alt text](images/test_ted1.png) 153 | 154 | ![alt text](images/test_ted2.png) 155 | 156 | ![alt text](images/test_ted3.png) 157 | 158 | ### 查看日志 159 | ``` 160 | 原字幕:So in college, I was a government major. 161 | 翻译后字幕:所以在大学时,我是一个政府专业的学生。 162 | 反思后字幕:所以在大学时,我是政府专业的学生。 163 | =========== 164 | 原字幕:Which means I had to write a lot of papers. 165 | 翻译后字幕:这意味着我必须写很多论文。 166 | 反思后字幕:这意味着我得写很多论文。 167 | =========== 168 | 原字幕:Now when a normal student writes a paper, 169 | 翻译后字幕:现在,当一个普通学生写论文时, 170 | 反思后字幕:现在,普通学生写论文时, 171 | =========== 172 | 原字幕:They might spread the work out a little like this. 173 | 翻译后字幕:他们可能会像这样分散工作。 174 | 反思后字幕:他们可能会这样分散工作。 175 | =========== 176 | 原字幕:So you know, you get started maybe a little slowly, 177 | 翻译后字幕:所以你知道,你可能会开始得有点慢, 178 | 反思后字幕:所以你知道,你可能会稍微慢一些开始, 179 | =========== 180 | 原字幕:But you get enough done in the first week, 181 | 翻译后字幕:但你在第一周能完成足够的工作, 182 | 反思后字幕:但你在第一周能够完成足够的工作, 183 | =========== 184 | 原字幕:That with some heavier days later on, 185 | 翻译后字幕:这样之后几天会比较忙, 186 | 反思后字幕:这样之后的一些繁忙日子, 187 | =========== 188 | 原字幕:Everything gets done and things stay civil. 189 | 翻译后字幕:所有事情都能完成,事情保持得体。 190 | 反思后字幕:一切都能完成,事情保持得当。 191 | =========== 192 | 原字幕:And I would wanna do that like that. 193 | 翻译后字幕:而我想要那样做。 194 | 反思后字幕:我也希望那样去做。 195 | =========== 196 | 原字幕:That would be the plan I would have. 197 | 翻译后字幕:那是我会有的计划。 198 | 反思后字幕:那将是我的计划。 199 | 200 | ``` 201 | 202 | ### 查看大模型调用情况 203 | 204 | 本次字幕的优化翻译调用了大模型,进入服务商后台查看 205 | 206 | 调用花费的Tokens很少,消耗金额仅仅 ¥0.01 (OpenAI 官方价格计费,使用一些中转站的逆向模型花费更少) 207 | 208 | ![alt text](images/test_spend.png) 209 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2024 [VideoCaptioner] 3 | All rights reserved. 4 | 5 | Author: Weifeng 6 | """ 7 | 8 | import os 9 | import sys 10 | import traceback 11 | from datetime import datetime 12 | 13 | # Add project root directory to Python path 14 | project_root = os.path.dirname(os.path.abspath(__file__)) 15 | sys.path.append(project_root) 16 | 17 | # Fix Chinese path problem 18 | plugin_path = os.path.join( 19 | sys.prefix, "Lib", "site-packages", "PyQt5", "Qt5", "plugins" 20 | ) 21 | os.environ["QT_QPA_PLATFORM_PLUGIN_PATH"] = plugin_path 22 | 23 | # Delete pyd files app*.pyd 24 | for file in os.listdir(): 25 | if file.startswith("app") and file.endswith(".pyd"): 26 | os.remove(file) 27 | 28 | from PyQt5.QtCore import Qt, QTranslator 29 | from PyQt5.QtWidgets import QApplication 30 | from qfluentwidgets import FluentTranslator 31 | 32 | from app.common.config import cfg 33 | from app.config import RESOURCE_PATH 34 | from app.core.utils import logger 35 | from app.view.main_window import MainWindow 36 | 37 | logger = logger.setup_logger("VideoCaptioner") 38 | 39 | 40 | def exception_hook(exctype, value, tb): 41 | logger.error("".join(traceback.format_exception(exctype, value, tb))) 42 | sys.__excepthook__(exctype, value, tb) # 调用默认的异常处理 43 | 44 | 45 | sys.excepthook = exception_hook 46 | 47 | 48 | # Enable DPI Scale 49 | if cfg.get(cfg.dpiScale) == "Auto": 50 | QApplication.setHighDpiScaleFactorRoundingPolicy( 51 | Qt.HighDpiScaleFactorRoundingPolicy.PassThrough 52 | ) 53 | QApplication.setAttribute(Qt.AA_EnableHighDpiScaling) 54 | else: 55 | os.environ["QT_ENABLE_HIGHDPI_SCALING"] = "0" 56 | os.environ["QT_SCALE_FACTOR"] = str(cfg.get(cfg.dpiScale)) 57 | QApplication.setAttribute(Qt.AA_UseHighDpiPixmaps) 58 | 59 | app = QApplication(sys.argv) 60 | app.setAttribute(Qt.AA_DontCreateNativeWidgetSiblings) 61 | 62 | # Internationalization (Multi-language) 63 | locale = cfg.get(cfg.language).value 64 | translator = FluentTranslator(locale) 65 | myTranslator = QTranslator() 66 | translations_path = ( 67 | RESOURCE_PATH / "translations" / f"VideoCaptioner_{locale.name()}.qm" 68 | ) 69 | myTranslator.load(str(translations_path)) 70 | app.installTranslator(translator) 71 | app.installTranslator(myTranslator) 72 | 73 | w = MainWindow() 74 | w.show() 75 | sys.exit(app.exec_()) 76 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | openai 3 | retry 4 | PyQt5 5 | PyQt-Fluent-Widgets 6 | yt_dlp 7 | modelscope 8 | psutil 9 | sqlalchemy 10 | -------------------------------------------------------------------------------- /resource/assets/audio-thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/resource/assets/audio-thumbnail.png -------------------------------------------------------------------------------- /resource/assets/default_bg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/resource/assets/default_bg.png -------------------------------------------------------------------------------- /resource/assets/default_bg_landscape.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/resource/assets/default_bg_landscape.png -------------------------------------------------------------------------------- /resource/assets/default_bg_portrait.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/resource/assets/default_bg_portrait.png -------------------------------------------------------------------------------- /resource/assets/default_thumbnail.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/resource/assets/default_thumbnail.jpg -------------------------------------------------------------------------------- /resource/assets/donate_blue.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/resource/assets/donate_blue.jpg -------------------------------------------------------------------------------- /resource/assets/donate_green.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/resource/assets/donate_green.jpg -------------------------------------------------------------------------------- /resource/assets/logo-big.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/resource/assets/logo-big.png -------------------------------------------------------------------------------- /resource/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/resource/assets/logo.png -------------------------------------------------------------------------------- /resource/assets/qss/dark/demo.qss: -------------------------------------------------------------------------------- 1 | QWidget { 2 | border: 1px solid rgb(29, 29, 29); 3 | border-right: none; 4 | border-bottom: none; 5 | border-top-left-radius: 10px; 6 | background-color: rgb(39, 39, 39); 7 | } 8 | 9 | Window { 10 | background-color: rgb(32, 32, 32); 11 | } -------------------------------------------------------------------------------- /resource/assets/qss/light/demo.qss: -------------------------------------------------------------------------------- 1 | Widget > QLabel { 2 | font: 24px 'Segoe UI', 'Microsoft YaHei'; 3 | } 4 | 5 | Widget { 6 | border: 1px solid rgb(229, 229, 229); 7 | border-right: none; 8 | border-bottom: none; 9 | border-top-left-radius: 10px; 10 | background-color: rgb(249, 249, 249); 11 | } 12 | 13 | Window { 14 | background-color: rgb(243, 243, 243); 15 | } 16 | 17 | -------------------------------------------------------------------------------- /resource/subtitle_style/default.txt: -------------------------------------------------------------------------------- 1 | [V4+ Styles] 2 | Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,OutlineColour,BackColour,Bold,Italic,Underline,StrikeOut,ScaleX,ScaleY,Spacing,Angle,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,Encoding 3 | Style: Default,微软雅黑,42,&H005aff65,&H000000FF,&H00000000,&H00000000,-1,0,0,0,100,100,3.2,0,1,2.0,0,2,10,10,30,1,\q1 4 | Style: Secondary,微软雅黑,30,&H00ffffff,&H000000FF,&H00000000,&H00000000,-1,0,0,0,100,100,0.8,0,1,2.0,0,2,10,10,30,1,\q1 -------------------------------------------------------------------------------- /resource/subtitle_style/毕导科普风.txt: -------------------------------------------------------------------------------- 1 | [V4+ Styles] 2 | Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,OutlineColour,BackColour,Bold,Italic,Underline,StrikeOut,ScaleX,ScaleY,Spacing,Angle,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,Encoding 3 | Style: Default,微软雅黑,44,&H00e6e8f1,&H000000FF,&H00060606,&H00000000,-1,0,0,0,100,100,3.0,0,1,2.2,0,2,10,10,32,1,\q1 4 | Style: Secondary,微软雅黑,28,&H00ffffff,&H000000FF,&H00000000,&H00000000,-1,0,0,0,100,100,0.2,0,1,2.0,0,2,10,10,32,1,\q1 -------------------------------------------------------------------------------- /resource/subtitle_style/番剧可爱风.txt: -------------------------------------------------------------------------------- 1 | [V4+ Styles] 2 | Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,OutlineColour,BackColour,Bold,Italic,Underline,StrikeOut,ScaleX,ScaleY,Spacing,Angle,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,Encoding 3 | Style: Default,微软雅黑,46,&H00e6e8f1,&H000000FF,&H000987f5,&H00000000,-1,0,0,0,100,100,2.6,0,1,2.6,0,2,10,10,20,1,\q1 4 | Style: Secondary,微软雅黑,26,&H00ffffff,&H000000FF,&H000987f5,&H00000000,-1,0,0,0,100,100,0.0,0,1,2.0,0,2,10,10,20,1,\q1 -------------------------------------------------------------------------------- /resource/subtitle_style/竖屏.txt: -------------------------------------------------------------------------------- 1 | [V4+ Styles] 2 | Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,OutlineColour,BackColour,Bold,Italic,Underline,StrikeOut,ScaleX,ScaleY,Spacing,Angle,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,Encoding 3 | Style: Default,微软雅黑,34,&H005aff65,&H000000FF,&H00000000,&H00000000,-1,0,0,0,100,100,4.0,0,1,2.0,0,2,10,10,182,1,\q1 4 | Style: Secondary,微软雅黑,18,&H00ffffff,&H000000FF,&H00000000,&H00000000,-1,0,0,0,100,100,0.8,0,1,2.0,0,2,10,10,182,1,\q1 -------------------------------------------------------------------------------- /resource/translations/VideoCaptioner_en_US.qm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/resource/translations/VideoCaptioner_en_US.qm -------------------------------------------------------------------------------- /resource/translations/VideoCaptioner_zh_CN.qm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/resource/translations/VideoCaptioner_zh_CN.qm -------------------------------------------------------------------------------- /resource/translations/VideoCaptioner_zh_HK.qm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WEIFENG2333/VideoCaptioner/295a40d0de071fef19439c3df2ca592a6d63de6a/resource/translations/VideoCaptioner_zh_HK.qm -------------------------------------------------------------------------------- /streamlit_app/requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | openai 3 | retry 4 | streamlit 5 | sqlalchemy 6 | --------------------------------------------------------------------------------