├── .gitignore ├── examples └── demo.mp4 ├── whisper-bin-x64 ├── SDL2.dll ├── lsp.exe ├── main.exe ├── talk.exe ├── bench.exe ├── server.exe ├── stream.exe ├── wchess.exe ├── command.exe ├── quantize.exe ├── whisper.dll └── talk-llama.exe ├── .prettierrc.json ├── .env.local ├── service ├── deeplx.js ├── ollama.js ├── baidu.js └── volc.js ├── package.json ├── LICENSE ├── config.js ├── index.js ├── translate.js ├── utils.js └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .idea 3 | yarn.lock 4 | .env 5 | whisper.cpp 6 | -------------------------------------------------------------------------------- /examples/demo.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/examples/demo.mp4 -------------------------------------------------------------------------------- /whisper-bin-x64/SDL2.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/SDL2.dll -------------------------------------------------------------------------------- /whisper-bin-x64/lsp.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/lsp.exe -------------------------------------------------------------------------------- /whisper-bin-x64/main.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/main.exe -------------------------------------------------------------------------------- /whisper-bin-x64/talk.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/talk.exe -------------------------------------------------------------------------------- /whisper-bin-x64/bench.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/bench.exe -------------------------------------------------------------------------------- /whisper-bin-x64/server.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/server.exe -------------------------------------------------------------------------------- /whisper-bin-x64/stream.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/stream.exe -------------------------------------------------------------------------------- /whisper-bin-x64/wchess.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/wchess.exe -------------------------------------------------------------------------------- /whisper-bin-x64/command.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/command.exe -------------------------------------------------------------------------------- /whisper-bin-x64/quantize.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/quantize.exe -------------------------------------------------------------------------------- /whisper-bin-x64/whisper.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/whisper.dll -------------------------------------------------------------------------------- /whisper-bin-x64/talk-llama.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/talk-llama.exe -------------------------------------------------------------------------------- /.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "printWidth": 120, 3 | "singleQuote": true, 4 | "tabWidth": 2, 5 | "trailingComma": "all", 6 | "arrowParens": "always" 7 | } 8 | -------------------------------------------------------------------------------- /.env.local: -------------------------------------------------------------------------------- 1 | BAIDU_KEY= 2 | BAIDU_SECRET= 3 | VOLC_KEY= 4 | VOLC_SECRET= 5 | 6 | OLLAMA_API_URL=http://localhost:11434 7 | OLLAMA_MODEL_NAME=llama3 8 | OLLAMA_PROMPT=Please translate the following content from ${sourceLanguage} to ${targetLanguage}, only return the translation result can be. \n ${content} -------------------------------------------------------------------------------- /service/deeplx.js: -------------------------------------------------------------------------------- 1 | import axios from 'axios'; 2 | import {translateConfig} from "../config.js"; 3 | export default async function deeplx(query) { 4 | try { 5 | const res = await axios.post('http://localhost:1188/translate', {text: query, source_lang: translateConfig.sourceLanguage, target_lang: translateConfig.targetLanguage}); 6 | return res?.data?.alternatives?.[0] || ''; 7 | } catch (error) { 8 | return 'error'; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "VideoSubtitleGenerator", 3 | "version": "1.0.0", 4 | "type": "module", 5 | "description": "Generate subtitle files for video files in batches and translate them into other languages.", 6 | "main": "index.js", 7 | "scripts": { 8 | "test": "echo \"Error: no test specified\" && exit 1", 9 | "start": "node index.js" 10 | }, 11 | "keywords": [], 12 | "author": "buxuku", 13 | "license": "ISC", 14 | "dependencies": { 15 | "@ffmpeg-installer/ffmpeg": "^1.1.0", 16 | "@volcengine/openapi": "^1.14.0", 17 | "axios": "^1.6.2", 18 | "crypto": "^1.0.1", 19 | "dotenv": "^16.4.5", 20 | "fluent-ffmpeg": "^2.1.2" 21 | } 22 | } -------------------------------------------------------------------------------- /service/ollama.js: -------------------------------------------------------------------------------- 1 | import axios from 'axios'; 2 | import { renderTemplate } from '../utils.js'; 3 | import { config } from 'dotenv'; 4 | config(); 5 | 6 | const apiUrl = process.env.OLLAMA_API_URL; 7 | const modelName = process.env.OLLAMA_MODEL_NAME; 8 | const prompt = process.env.OLLAMA_PROMPT; 9 | 10 | export default async function translateWithOllama( 11 | text, 12 | sourceLanguage, 13 | targetLanguage 14 | ) { 15 | const renderedPrompt = renderTemplate(prompt, { 16 | sourceLanguage, 17 | targetLanguage, 18 | content: text 19 | }); 20 | try { 21 | const response = await axios.post(`${apiUrl}/api/generate`, { 22 | model: modelName, 23 | prompt: renderedPrompt, 24 | stream: false 25 | }); 26 | 27 | if (response.data && response.data.response) { 28 | return response.data.response.trim(); 29 | } else { 30 | throw new Error(response?.data?.error || 'Unexpected response from Ollama'); 31 | } 32 | } catch (error) { 33 | throw error; 34 | } 35 | } -------------------------------------------------------------------------------- /service/baidu.js: -------------------------------------------------------------------------------- 1 | import crypto from 'crypto'; 2 | import axios from 'axios'; 3 | import { config } from 'dotenv'; 4 | config(); 5 | import { translateConfig } from '../config.js'; 6 | 7 | const appid = process.env.BAIDU_KEY; 8 | const key = process.env.BAIDU_SECRET; 9 | 10 | export default async function baidu(query) { 11 | if (!appid ||!key) { 12 | console.log('请先配置环境变量 BAIDU_APPID 和 BAIDU_KEY'); 13 | throw new Error('请先配置环境变量 BAIDU_APPID 和 BAIDU_KEY'); 14 | } 15 | const salt = new Date().getTime(); 16 | const str1 = appid + query + salt + key; 17 | const sign = crypto.createHash('md5').update(str1).digest('hex'); 18 | const data = { 19 | q: query, 20 | appid, 21 | salt, 22 | from: translateConfig.sourceLanguage, 23 | to: translateConfig.targetLanguage, 24 | sign, 25 | }; 26 | const res = await axios.post('https://fanyi-api.baidu.com/api/trans/vip/translate', data, { 27 | headers: { 28 | 'Content-Type': 'application/x-www-form-urlencoded', 29 | }, 30 | }); 31 | return res?.data?.trans_result?.[0]?.dst || ''; 32 | } 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Lin Xiaodong 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /service/volc.js: -------------------------------------------------------------------------------- 1 | import { Service } from '@volcengine/openapi'; 2 | import { config } from 'dotenv'; 3 | import { translateConfig } from '../config.js'; 4 | 5 | config(); 6 | 7 | const accessKeyId = process.env.VOLC_KEY; 8 | const secretKey = process.env.VOLC_SECRET; 9 | 10 | const service = new Service({ 11 | host: 'open.volcengineapi.com', 12 | serviceName: 'translate', 13 | region: 'cn-north-1', 14 | accessKeyId, 15 | secretKey, 16 | }); 17 | 18 | const fetchApi = service.createAPI('TranslateText', { 19 | Version: '2020-06-01', 20 | method: 'POST', 21 | contentType: 'json', 22 | }); 23 | 24 | export default async function translate(query) { 25 | if (!accessKeyId || !secretKey) { 26 | console.log('请先配置环境变量 VOLC_KEY 和 VOLC_SECRET'); 27 | throw new Error('请先配置环境变量 VOLC_KEY 和 VOLC_SECRET'); 28 | } 29 | const postBody = { 30 | SourceLanguage: translateConfig.sourceLanguage, 31 | TargetLanguage: translateConfig.targetLanguage, 32 | TextList: [query], 33 | }; 34 | try { 35 | const res = await fetchApi(postBody, {}); 36 | return res.TranslationList?.[0]?.Translation; 37 | } catch (error) { 38 | return 'error'; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /config.js: -------------------------------------------------------------------------------- 1 | // 视频文件所在目录 如 /Users/demo/video 2 | export const videoDir = './examples'; 3 | 4 | /* 5 | whisper.cpp 模型 支持以下 6 | tiny.en 7 | tiny 8 | base.en 9 | base 10 | small.en 11 | small 12 | medium.en 13 | medium 14 | large-v1 15 | large-v2 16 | large-v3 17 | */ 18 | export const whisperModel = 'base.en'; 19 | 20 | // 翻译配置,视频原语言与翻译后的目标语言 21 | // 语言列表可参考 https://fanyi-api.baidu.com/api/trans/product/apidoc 22 | export const translateConfig = { 23 | sourceLanguage: 'en', 24 | targetLanguage: 'zh', 25 | }; 26 | 27 | // 支持的翻译服务商 28 | export const supportedService = { 29 | baidu: Symbol.for('baidu'), 30 | volc: Symbol.for('volc'), 31 | deeplx: Symbol.for('deeplx'), 32 | ollama: Symbol.for('ollama'), 33 | }; 34 | 35 | // 当前使用的翻译服务商,如果不配置,则不执行翻译流程 36 | export const translateServiceProvider = supportedService.ollama; 37 | 38 | // 翻译结果字幕文件内容配置 39 | export const contentTemplateRuleMap = { 40 | onlyTranslate: Symbol.for('onlyTranslate'), // 只输出翻译内容 41 | sourceAndTranslate: Symbol.for('sourceAndTranslate'), // 输出原始字幕和翻译字幕, 原始字幕在上面 42 | translateAndSource: Symbol.for('translateAndSource'), // 输出翻译后的字幕和原始字幕, 翻译字幕在上面 43 | }; 44 | 45 | // 字幕文件内容模板 支持 ${sourceContent}, ${targetContent} 变量 46 | export const contentTemplate = { 47 | [contentTemplateRuleMap.onlyTranslate]: '${targetContent}\n\n', 48 | [contentTemplateRuleMap.sourceAndTranslate]: '${sourceContent}\n${targetContent}\n\n', 49 | [contentTemplateRuleMap.translateAndSource]: '${targetContent}\n${sourceContent}\n\n', 50 | }; 51 | 52 | // 翻译内容输出模板规则,默认只输出翻译内容, 支持 contentTemplateRuleMap 内的规则 53 | export const contentTemplateRule = contentTemplateRuleMap.onlyTranslate; 54 | 55 | // 原始字幕文件保存命名规则 支持 fileName, sourceLanguage, targetLanguage 变量 56 | // 如果为空,将不保存原始字幕文件 57 | // eg: '${fileName}.${sourceLanguage}' -> 对于视频名为 text.mp4 的英文视频原始字幕文件名为 text.en.srt 58 | export const sourceSrtSaveName = '${fileName}.${sourceLanguage}'; 59 | 60 | // 翻译后的字幕文件保存命名规则 支持 fileName, sourceLanguage, targetLanguage 变量 61 | export const targetSrtSaveName = '${fileName}.${targetLanguage}'; 62 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | import fs from 'fs'; 2 | import path from 'path'; 3 | import { execSync } from 'child_process'; 4 | import { config } from 'dotenv'; 5 | import app from './translate.js'; 6 | import { sourceSrtSaveName, translateServiceProvider, videoDir, translateConfig ,whisperModel } from './config.js'; 7 | import { extractAudio, renderFilePath, installWhisper, isDarwin, isWin32 } from './utils.js'; 8 | 9 | config(); 10 | 11 | const { log, error } = console; 12 | 13 | const SUPPORTED_VIDEO_FORMATS = ["mp4", "avi", "mov", "mkv", "flv", "wmv", "webm", "m4a"]; 14 | 15 | await installWhisper(); 16 | fs.readdir(videoDir, async (err, files) => { 17 | if (err) { 18 | error(err); 19 | return; 20 | } 21 | for (let i = 0; i <= files.length - 1; i++) { 22 | const file = files[i]; 23 | const fileExtension = file.split('.').pop().toLowerCase(); 24 | if (SUPPORTED_VIDEO_FORMATS.includes(fileExtension)) { 25 | log('开始处理文件:', file); 26 | try { 27 | const fileName = file.substring(0, file.lastIndexOf('.')); 28 | const wavFile = `${videoDir}/${fileName}.wav`; 29 | const srtFile = `${renderFilePath(sourceSrtSaveName, fileName)}`; 30 | await extractAudio(`${videoDir}/${file}`, `${wavFile}`); 31 | //execSync(`ffmpeg -v quiet -stats -i "${videoDir}/${file}" -ar 16000 -ac 1 -c:a pcm_s16le -y "${wavFile}"`); 32 | log('完成音频文件提取, 准备生成字幕文件'); 33 | let mainPath = path.join('./', 'whisper.cpp/main'); 34 | if(isWin32()){ 35 | mainPath = path.join('./', 'whisper-bin-x64/main.exe'); 36 | } 37 | execSync( 38 | `${mainPath} -m ./whisper.cpp/models/ggml-${whisperModel}.bin -f "${wavFile}" -osrt -of "${srtFile}" -l ${translateConfig.sourceLanguage}` 39 | ) 40 | log('完成字幕文件生成, 准备开始翻译'); 41 | if (translateServiceProvider) { 42 | await app(videoDir, fileName, `${srtFile}.srt`); 43 | } 44 | log('翻译完成'); 45 | fs.unlink(wavFile, (err) => { 46 | if (err) { 47 | error(err); 48 | } else { 49 | log('删除wav文件', wavFile); 50 | } 51 | }); 52 | if (!sourceSrtSaveName) { 53 | fs.unlink(`${srtFile}.srt`, () => {}); 54 | } 55 | } catch (err) { 56 | log('执行出错', err); 57 | } 58 | } 59 | } 60 | }); 61 | -------------------------------------------------------------------------------- /translate.js: -------------------------------------------------------------------------------- 1 | import path from 'path'; 2 | import fs from 'fs'; 3 | import { 4 | contentTemplate, 5 | contentTemplateRule, 6 | supportedService, 7 | targetSrtSaveName, 8 | translateConfig, 9 | translateServiceProvider, 10 | } from './config.js'; 11 | import { renderTemplate } from './utils.js'; 12 | 13 | export default async function translate(folder, fileName, absolutePath) { 14 | const renderContentTemplate = contentTemplate[contentTemplateRule]; 15 | return new Promise(async (resolve, reject) => { 16 | try { 17 | const result = fs.readFileSync(absolutePath, 'utf8'); 18 | const data = result.split('\n'); 19 | const items = []; 20 | for (var i = 0; i < data.length; i += 4) { 21 | const source = data[i + 2]; 22 | if (!source) continue; 23 | let text; 24 | switch (translateServiceProvider) { 25 | case supportedService.volc: 26 | const volc = await import('./service/volc.js'); 27 | text = await volc.default(source); 28 | break; 29 | case supportedService.baidu: 30 | const baidu = await import('./service/baidu.js'); 31 | text = await baidu.default(source); 32 | break; 33 | case supportedService.deeplx: 34 | const deeplx = await import('./service/deeplx.js'); 35 | text = await deeplx.default(source); 36 | break; 37 | case supportedService.ollama: 38 | const ollama = await import('./service/ollama.js'); 39 | text = await ollama.default(source, translateConfig.sourceLanguage, translateConfig.targetLanguage); 40 | break; 41 | default: 42 | text = 'no supported service'; 43 | } 44 | 45 | items.push({ 46 | id: data[i], 47 | startEndTime: data[i + 1], 48 | targetContent: text, 49 | sourceContent: source, 50 | }); 51 | } 52 | const fileSave = path.join(folder, `${renderTemplate(targetSrtSaveName, { fileName, ...translateConfig })}.srt`); 53 | for (let i = 0; i <= items.length - 1; i++) { 54 | const item = items[i]; 55 | const content = `${item.id}\n${item.startEndTime}\n${renderTemplate(renderContentTemplate, item)}`; 56 | fs.appendFileSync(fileSave, content, (err) => { 57 | if (err) { 58 | console.error(err); 59 | } 60 | }); 61 | } 62 | resolve(); 63 | } catch (error) { 64 | console.error(error); 65 | reject(error); 66 | } 67 | }); 68 | } 69 | -------------------------------------------------------------------------------- /utils.js: -------------------------------------------------------------------------------- 1 | import { spawn } from 'child_process'; 2 | import fs from 'fs'; 3 | import os from 'os'; 4 | import path from 'path'; 5 | import ffmpegInstaller from '@ffmpeg-installer/ffmpeg'; 6 | import ffmpeg from 'fluent-ffmpeg'; 7 | import { translateConfig, videoDir, whisperModel } from './config.js'; 8 | 9 | ffmpeg.setFfmpegPath(ffmpegInstaller.path); 10 | 11 | // 将字符串转成模板字符串 12 | export const renderTemplate = (template, data) => { 13 | const names = Object.keys(data); 14 | const values = Object.values(data); 15 | return new Function(...names, `return \`${template}\`;`)(...values); 16 | }; 17 | 18 | export const renderFilePath = (template, fileName) => { 19 | const data = { 20 | fileName, 21 | sourceLanguage: translateConfig.sourceLanguage, 22 | targetLanguage: translateConfig.targetLanguage, 23 | }; 24 | const finalPath = template || 'temp-${fileName}'; // 如果不保存字幕文件,需要先生成临时文件 25 | const filePath = renderTemplate(finalPath, data); 26 | return `${videoDir}/${filePath}`; 27 | }; 28 | 29 | export const extractAudio = (videoPath, audioPath) => { 30 | return new Promise((resolve, reject) => { 31 | ffmpeg(videoPath) 32 | .audioFrequency(16000) 33 | .audioChannels(1) 34 | .audioCodec('pcm_s16le') 35 | .outputOptions('-y') 36 | .on('start', function (str) { 37 | console.log('转换任务开始~', str); 38 | }) 39 | .on('progress', function (progress) { 40 | console.log(`进行中,完成${(progress.percent || 0)}%`); 41 | }) 42 | .on('end', function (str) { 43 | console.log('转换任务完成!'); 44 | resolve(); 45 | }) 46 | .on('error', function (err) { 47 | console.log('转换任务出错:', err); 48 | reject(err); 49 | }) 50 | .save(audioPath); 51 | }); 52 | }; 53 | 54 | function runCommand(command, args) { 55 | return new Promise((resolve, reject) => { 56 | const child = spawn(command, args); 57 | 58 | child.stdout.on('data', (data) => { 59 | console.log(`${data}`); 60 | }); 61 | 62 | child.stderr.on('data', (data) => { 63 | console.error(`${data}`); 64 | }); 65 | 66 | child.on('close', (code) => { 67 | if (code !== 0) { 68 | reject(new Error(`${command} ${args.join(' ')} 进程退出,退出码 ${code}`)); 69 | } else { 70 | resolve(); 71 | } 72 | }); 73 | }); 74 | } 75 | 76 | export const isDarwin = () => os.platform() === 'darwin'; 77 | 78 | export const isWin32 = () => os.platform() === 'win32'; 79 | 80 | 81 | // 安装 whisper.cpp 及模型 82 | export const installWhisper = async () => { 83 | const repoUrl = 'https://github.com/ggerganov/whisper.cpp'; 84 | const localPath = path.join('./', 'whisper.cpp'); 85 | const modelPath = path.join('./', `whisper.cpp/models/ggml-${whisperModel}.bin`); 86 | const mainPath = path.join('./', 'whisper.cpp/main'); 87 | 88 | if (!fs.existsSync(localPath)) { 89 | console.log('开始克隆 whisper.cpp 仓库'); 90 | await runCommand('git', ['clone', repoUrl]); 91 | } 92 | if (!fs.existsSync(modelPath)) { 93 | let script; 94 | console.log('正在安装 whisper.cpp 模型'); 95 | if (isDarwin()) { 96 | script = path.join('./', './whisper.cpp/models/download-ggml-model.sh'); 97 | await runCommand('bash', [script, whisperModel]); 98 | } else if (isWin32()) { 99 | script = path.join('./', 'whisper.cpp/models/download-ggml-model.cmd'); 100 | await runCommand('cmd.exe', ['/c', script, whisperModel]) 101 | } else { 102 | throw Error('platform does not support! ') 103 | } 104 | } 105 | if (isDarwin() && !fs.existsSync(mainPath)) { 106 | // 编译 whisper.cpp 107 | console.log('正在编译 whisper.cpp'); 108 | await runCommand('make', ['-C', './whisper.cpp']); 109 | } 110 | }; -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 批量为视频生成字幕文件,并翻译成其它语言 2 | 3 | > [!IMPORTANT] 4 | > 🧨 💥 🎉 本项目是一个命令行工具,但非常荣幸得到了很多朋友的支持,也给我了很大的鼓舞,因此,我基于它制作了一款客户端工具,让大家能够更加方便地使用,也方便一部分不熟悉代码配置的朋友也能使用该工具。 5 | > 6 | > 欢迎大家移步 [SmartSub](https://github.com/buxuku/SmartSub) 以获得更加便捷的使用体验 7 | > ![image](https://github.com/buxuku/video-subtitle-master/raw/main/resources/preview.png) 8 | 9 | 10 | 做这个小工具的初衷: 11 | 12 | 自己有一大批外文视频,没有字幕,希望能够添加字幕文件,同时也能够将字幕文件翻译成中文, 同时希望能够通过批量处理的方式来减轻工作量。 13 | 14 | 类似需求,有一批厂商已经提供到了支持,比如 讯飞听见, 网易见外 等,但这些在线服务都涉及到视频的上传动作,效率相对比较低下。 15 | 16 | 希望能够找一个客户端工具,在本地来生成,试用了一些工具,依然不理想 17 | 18 | - Buzz 非 Store 版本没有对 apple silicon 做优化,字幕生成速度比较慢,也不支持翻译 19 | - MacWhisper 免费版本只支持单个生成,不支持批量,不支持翻译 20 | - WhisperScript 可以批量生成,但字幕文件需要手动一个个地保存,不支持翻译 21 | - memo.ac 做了 mac 下的性能优化,可以使用 GPU ,也支持翻译功能,非常棒的一款软件,但目前批量模式 bug 太多,无法正常使用 22 | 23 | 最后想了一下,本地语音转文字,通常的做法就是使用目前最强的 whisper 模型来生成。那我的需求就比较简单了: 24 | 25 | - 通过 ffmpeg 从视频文件中提取出音频文件 26 | - 通过 whisper 模型将音频生成原语言的字幕文件 27 | - 调用翻译 API, 将原语言的字幕文件翻译成目标语言的字幕文件 28 | 29 | 基于以上简单的思路和流程,就可以简单写一个小工具来批量处理本地的视频了。 30 | 31 | ## 💥特性 32 | 33 | - 源语言字幕文件和目标语言字幕文件放在视频同目录下,方便播放时任意挂载字幕文件 34 | - 批量处理目录下面的所有视频文件 35 | - 可以只生成字幕,不翻译,方便批量为视频生成字幕 36 | - 支持火山引擎翻译 37 | - 支持百度翻译 38 | - 支持 deeplx 翻译 (批量翻译容易存在被限流的情况) 39 | - 支持 ollama 翻译 40 | - 自定义字幕文件名,方便兼容不同的播放器挂载字幕识别 41 | - 自定义翻译后的字幕文件内容,纯翻译结果,原字幕+翻译结果 42 | - 项目集成 `whisper.cpp`, 它对 apple silicon 进行了优化,有较快的生成速度 43 | - 项目集成了 `fluent-ffmpeg`, 无须安装 `ffmpeg` 44 | 45 | ### ⬆️ 支持的模型 46 | 47 | ``` 48 | tiny.en 49 | tiny 50 | base.en 51 | base 52 | small.en 53 | small 54 | medium.en 55 | medium 56 | large-v1 57 | large-v2 58 | large-v3 59 | ``` 60 | 61 | ## 翻译服务 62 | 63 | 本项目的翻译能力是基于 **百度/火山/deeplx** 的翻译API来实现的,这些 API 的使用需要申请对的 KEY 和 SECRET, 因此,如果你需要使用到翻译服务,需要先申请一个 API 。 64 | 65 | 具体的申请方法,可以参考 https://bobtranslate.com/service/ , 感谢 [Bob](https://bobtranslate.com/) 这款优秀的软件。 66 | 67 | ## 🔦使用 68 | 69 | 1️⃣ 克隆本项目在本地 70 | 71 | ```shell 72 | git clone https://github.com/buxuku/VideoSubtitleGenerator.git 73 | ``` 74 | 75 | 2️⃣ 在项目中执行 `yarn install` 或者 `npm install` 76 | 77 | ```shell 78 | cd VideoSubtitleGenerator 79 | yarn install 80 | ``` 81 | 82 | 3️⃣ 如果需要翻译,复制 `.env.local` 为 `.env` 在项目根目录,访文件用于配置翻译相关的 KEY 和 SECRET, 例如 83 | 84 | `BAIDU_` 开头的为百度翻译的配置 85 | 86 | `VOLC_` 开头的为火山翻译的配置 87 | 88 | `OLLAMA_` 开头的为 ollama 翻译的配置 89 | 90 | ```shell 91 | BAIDU_KEY=2023120600190xxxx 92 | BAIDU_SECRET=PIbyKjEr1y8u18RZxxxx 93 | VOLC_KEY=AKLTMDUwZjY4MTZkNTFmN4M3ZjlkMzlmYzAzMTdlMDExxxx 94 | VOLC_SECRET=T0dRMllUUmpPREUzWWpjNE5HVm2Zamt4TlRObU9EUm1ORFk0T1dGbExxxx== 95 | 96 | OLLAMA_API_URL=http://localhost:11434 97 | OLLAMA_MODEL_NAME=llama3 98 | OLLAMA_PROMPT=Please translate the following content from ${sourceLanguage} to ${targetLanguage}, only return the translation result can be. \n ${content} 99 | ``` 100 | 101 | 4️⃣ 其余的配置在 `config.js` 文件中进行配置,每条配置均的详细的注释 102 | 103 | ```js 104 | // 视频文件所在目录 如 /Users/demo/video 105 | export const videoDir = './examples'; 106 | 107 | /* 108 | whisper.cpp 模型 支持以下 109 | tiny.en 110 | tiny 111 | base.en 112 | base 113 | small.en 114 | small 115 | medium.en 116 | medium 117 | large-v1 118 | large-v2 119 | large-v3 120 | */ 121 | export const whisperModel = 'base.en'; 122 | 123 | // 翻译配置,视频原语言与翻译后的目标语言 124 | export const translateConfig = { 125 | sourceLanguage: 'en', 126 | targetLanguage: 'zh', 127 | }; 128 | 129 | // 支持的翻译服务商 130 | export const supportedService = { 131 | baidu: Symbol.for('baidu'), 132 | volc: Symbol.for('volc'), 133 | deeplx: Symbol.for('deeplx'), 134 | ollama: Symbol.for('ollama'), 135 | }; 136 | 137 | // 当前使用的翻译服务商,如果不配置,则不执行翻译流程 138 | export const translateServiceProvider = supportedService.volc; 139 | 140 | // 翻译结果字幕文件内容配置 141 | export const contentTemplateRuleMap = { 142 | onlyTranslate: Symbol.for('onlyTranslate'), // 只输出翻译内容 143 | sourceAndTranslate: Symbol.for('sourceAndTranslate'), // 输出原始字幕和翻译字幕, 原始字幕在上面 144 | translateAndSource: Symbol.for('translateAndSource'), // 输出翻译后的字幕和原始字幕, 翻译字幕在上面 145 | }; 146 | 147 | // 字幕文件内容模板 支持 ${sourceContent}, ${targetContent} 变量 148 | export const contentTemplate = { 149 | [contentTemplateRuleMap.onlyTranslate]: '${targetContent}\n\n', 150 | [contentTemplateRuleMap.sourceAndTranslate]: '${sourceContent}\n${targetContent}\n\n', 151 | [contentTemplateRuleMap.translateAndSource]: '${targetContent}\n${sourceContent}\n\n', 152 | }; 153 | 154 | // 翻译内容输出模板规则,默认只输出翻译内容, 支持 contentTemplateRuleMap 内的规则 155 | export const contentTemplateRule = contentTemplateRuleMap.onlyTranslate; 156 | 157 | // 原始字幕文件保存命名规则 支持 fileName, sourceLanguage, targetLanguage 变量 158 | // 如果为空,将不保存原始字幕文件 159 | // eg: '${fileName}.${sourceLanguage}' -> 对于视频名为 text.mp4 的英文视频原始字幕文件名为 text.en.srt 160 | export const sourceSrtSaveName = '${fileName}.${sourceLanguage}'; 161 | 162 | // 翻译后的字幕文件保存命名规则 支持 fileName, sourceLanguage, targetLanguage 变量 163 | export const targetSrtSaveName = '${fileName}.${targetLanguage}'; 164 | ``` 165 | 166 | 这里面的字幕内容和字幕文件名可以自定义配置,同时会将里面的 `${xxx}`变量转为对应的字符 167 | 168 | 核心配置主要为以下几项 169 | 170 | #### 选择翻译服务商 171 | 172 | 173 | ```js 174 | // 当前使用的翻译服务商,如果不配置,则不执行翻译流程 175 | export const translateServiceProvider = supportedService.volc; 176 | ``` 177 | 178 | 支持 179 | - supportedService.volc 火山翻译 180 | - supportedService.baidu 百度翻译 181 | - supportedService.deeplx deeplx 翻译 182 | - supportedService.ollama ollama 翻译 183 | 184 | #### 翻译结果的配置 185 | 186 | ```js 187 | // 翻译内容输出模板规则,默认只输出翻译内容, 支持 contentTemplateRuleMap 内的规则 188 | export const contentTemplateRule = contentTemplateRuleMap.onlyTranslate; 189 | ``` 190 | 191 | 支持以下几种设置模式 192 | 193 | ##### contentTemplateRuleMap.onlyTranslate 194 | 195 | 只保留翻译后的结果内容,如 196 | 197 | ``` 198 | 1 199 | 00:00:00,000 --> 00:00:09,360 200 | 我要和你们谈谈我这本书里的一些东西,我希望能 201 | 202 | 2 203 | 00:00:09,360 --> 00:00:13,680 204 | 和你们已经听到的东西产生共鸣,我会试着建立一些联系。 205 | ``` 206 | 207 | ##### contentTemplateRuleMap.sourceAndTranslate 208 | 209 | 保留原字幕和翻译之后的字幕,且原字幕在上面 210 | 211 | ``` 212 | 1 213 | 00:00:00,000 --> 00:00:09,360 214 | I'm going to talk to you about some stuff that's in this book of mine that I hope will 215 | 我要和你们谈谈我这本书里的一些东西,我希望能 216 | 217 | 2 218 | 00:00:09,360 --> 00:00:13,680 219 | resonate with other things that you've already heard and I'll try to make some connections 220 | 和你们已经听到的东西产生共鸣,我会试着建立一些联系。 221 | ``` 222 | 223 | ##### contentTemplateRuleMap.translateAndSource 224 | 225 | 保留原字幕和翻译之后的字幕,且翻译字幕在上面 226 | 227 | ``` 228 | 1 229 | 00:00:00,000 --> 00:00:09,360 230 | 我要和你们谈谈我这本书里的一些东西,我希望能 231 | I'm going to talk to you about some stuff that's in this book of mine that I hope will 232 | 233 | 2 234 | 00:00:09,360 --> 00:00:13,680 235 | 和你们已经听到的东西产生共鸣,我会试着建立一些联系。 236 | resonate with other things that you've already heard and I'll try to make some connections 237 | ``` 238 | 239 | 5️⃣ 配置好该文件之后,执行 `yarn start` 或者 `npm start`, 首次执行会下载 `whisper.cpp` 和配置的对应的模型文件,会比较慢一些。下次执行将会跳过该流程 240 | 241 | 如果在使用过程中遇到啥问题,可以提 Issue 或者通过 Discussions 进行讨论 242 | 243 | [![Powered by DartNode](https://dartnode.com/branding/DN-Open-Source-sm.png)](https://dartnode.com "Powered by DartNode - Free VPS for Open Source") 244 | --------------------------------------------------------------------------------