├── .gitignore
├── examples
    └── demo.mp4
├── whisper-bin-x64
    ├── SDL2.dll
    ├── lsp.exe
    ├── main.exe
    ├── talk.exe
    ├── bench.exe
    ├── server.exe
    ├── stream.exe
    ├── wchess.exe
    ├── command.exe
    ├── quantize.exe
    ├── whisper.dll
    └── talk-llama.exe
├── .prettierrc.json
├── .env.local
├── service
    ├── deeplx.js
    ├── ollama.js
    ├── baidu.js
    └── volc.js
├── package.json
├── LICENSE
├── config.js
├── index.js
├── translate.js
├── utils.js
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | .idea
3 | yarn.lock
4 | .env
5 | whisper.cpp
6 | 


--------------------------------------------------------------------------------
/examples/demo.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/examples/demo.mp4


--------------------------------------------------------------------------------
/whisper-bin-x64/SDL2.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/SDL2.dll


--------------------------------------------------------------------------------
/whisper-bin-x64/lsp.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/lsp.exe


--------------------------------------------------------------------------------
/whisper-bin-x64/main.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/main.exe


--------------------------------------------------------------------------------
/whisper-bin-x64/talk.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/talk.exe


--------------------------------------------------------------------------------
/whisper-bin-x64/bench.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/bench.exe


--------------------------------------------------------------------------------
/whisper-bin-x64/server.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/server.exe


--------------------------------------------------------------------------------
/whisper-bin-x64/stream.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/stream.exe


--------------------------------------------------------------------------------
/whisper-bin-x64/wchess.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/wchess.exe


--------------------------------------------------------------------------------
/whisper-bin-x64/command.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/command.exe


--------------------------------------------------------------------------------
/whisper-bin-x64/quantize.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/quantize.exe


--------------------------------------------------------------------------------
/whisper-bin-x64/whisper.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/whisper.dll


--------------------------------------------------------------------------------
/whisper-bin-x64/talk-llama.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/buxuku/VideoSubtitleGenerator/HEAD/whisper-bin-x64/talk-llama.exe


--------------------------------------------------------------------------------
/.prettierrc.json:
--------------------------------------------------------------------------------
1 | {
2 |   "printWidth": 120,
3 |   "singleQuote": true,
4 |   "tabWidth": 2,
5 |   "trailingComma": "all",
6 |   "arrowParens": "always"
7 | }
8 | 


--------------------------------------------------------------------------------
/.env.local:
--------------------------------------------------------------------------------
1 | BAIDU_KEY=
2 | BAIDU_SECRET=
3 | VOLC_KEY=
4 | VOLC_SECRET=
5 | 
6 | OLLAMA_API_URL=http://localhost:11434
7 | OLLAMA_MODEL_NAME=llama3
8 | OLLAMA_PROMPT=Please translate the following content from ${sourceLanguage} to ${targetLanguage}, only return the translation result can be. \n ${content}


--------------------------------------------------------------------------------
/service/deeplx.js:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import {translateConfig} from "../config.js";
 3 | export default async function deeplx(query) {
 4 |     try {
 5 |         const res = await axios.post('http://localhost:1188/translate',  {text: query, source_lang: translateConfig.sourceLanguage, target_lang: translateConfig.targetLanguage});
 6 |         return res?.data?.alternatives?.[0] || '';
 7 |     } catch (error) {
 8 |         return 'error';
 9 |     }
10 | }
11 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "VideoSubtitleGenerator",
 3 |   "version": "1.0.0",
 4 |   "type": "module",
 5 |   "description": "Generate subtitle files for video files in batches and translate them into other languages.",
 6 |   "main": "index.js",
 7 |   "scripts": {
 8 |     "test": "echo \"Error: no test specified\" && exit 1",
 9 |     "start": "node index.js"
10 |   },
11 |   "keywords": [],
12 |   "author": "buxuku",
13 |   "license": "ISC",
14 |   "dependencies": {
15 |     "@ffmpeg-installer/ffmpeg": "^1.1.0",
16 |     "@volcengine/openapi": "^1.14.0",
17 |     "axios": "^1.6.2",
18 |     "crypto": "^1.0.1",
19 |     "dotenv": "^16.4.5",
20 |     "fluent-ffmpeg": "^2.1.2"
21 |   }
22 | }


--------------------------------------------------------------------------------
/service/ollama.js:
--------------------------------------------------------------------------------
 1 | import axios from 'axios';
 2 | import { renderTemplate } from '../utils.js';
 3 | import { config } from 'dotenv';
 4 | config();
 5 | 
 6 | const apiUrl = process.env.OLLAMA_API_URL;
 7 | const modelName = process.env.OLLAMA_MODEL_NAME;
 8 | const prompt = process.env.OLLAMA_PROMPT;
 9 | 
10 | export default async function translateWithOllama(
11 |   text,
12 |   sourceLanguage,
13 |   targetLanguage
14 | ) {
15 |   const renderedPrompt = renderTemplate(prompt, {
16 |     sourceLanguage,
17 |     targetLanguage,
18 |     content: text
19 |   });
20 |   try {
21 |     const response = await axios.post(`${apiUrl}/api/generate`, {
22 |       model: modelName,
23 |       prompt: renderedPrompt,
24 |       stream: false
25 |     });
26 | 
27 |     if (response.data && response.data.response) {
28 |       return response.data.response.trim();
29 |     } else {
30 |       throw new Error(response?.data?.error || 'Unexpected response from Ollama');
31 |     }
32 |   } catch (error) {
33 |     throw error;
34 |   }
35 | }


--------------------------------------------------------------------------------
/service/baidu.js:
--------------------------------------------------------------------------------
 1 | import crypto from 'crypto';
 2 | import axios from 'axios';
 3 | import { config } from 'dotenv';
 4 | config();
 5 | import { translateConfig } from '../config.js';
 6 | 
 7 | const appid = process.env.BAIDU_KEY;
 8 | const key = process.env.BAIDU_SECRET;
 9 | 
10 | export default async function baidu(query) {
11 |   if (!appid ||!key) {
12 |       console.log('请先配置环境变量 BAIDU_APPID 和 BAIDU_KEY');
13 |       throw new Error('请先配置环境变量 BAIDU_APPID 和 BAIDU_KEY');
14 |   }
15 |   const salt = new Date().getTime();
16 |   const str1 = appid + query + salt + key;
17 |   const sign = crypto.createHash('md5').update(str1).digest('hex');
18 |   const data = {
19 |     q: query,
20 |     appid,
21 |     salt,
22 |     from: translateConfig.sourceLanguage,
23 |     to: translateConfig.targetLanguage,
24 |     sign,
25 |   };
26 |   const res = await axios.post('https://fanyi-api.baidu.com/api/trans/vip/translate', data, {
27 |     headers: {
28 |       'Content-Type': 'application/x-www-form-urlencoded',
29 |     },
30 |   });
31 |   return res?.data?.trans_result?.[0]?.dst || '';
32 | }
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024  Lin Xiaodong
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/service/volc.js:
--------------------------------------------------------------------------------
 1 | import { Service } from '@volcengine/openapi';
 2 | import { config } from 'dotenv';
 3 | import { translateConfig } from '../config.js';
 4 | 
 5 | config();
 6 | 
 7 | const accessKeyId = process.env.VOLC_KEY;
 8 | const secretKey = process.env.VOLC_SECRET;
 9 | 
10 | const service = new Service({
11 |   host: 'open.volcengineapi.com',
12 |   serviceName: 'translate',
13 |   region: 'cn-north-1',
14 |   accessKeyId,
15 |   secretKey,
16 | });
17 | 
18 | const fetchApi = service.createAPI('TranslateText', {
19 |   Version: '2020-06-01',
20 |   method: 'POST',
21 |   contentType: 'json',
22 | });
23 | 
24 | export default async function translate(query) {
25 |   if (!accessKeyId || !secretKey) {
26 |     console.log('请先配置环境变量 VOLC_KEY 和 VOLC_SECRET');
27 |     throw new Error('请先配置环境变量 VOLC_KEY 和 VOLC_SECRET');
28 |   }
29 |   const postBody = {
30 |     SourceLanguage: translateConfig.sourceLanguage,
31 |     TargetLanguage: translateConfig.targetLanguage,
32 |     TextList: [query],
33 |   };
34 |   try {
35 |     const res = await fetchApi(postBody, {});
36 |     return res.TranslationList?.[0]?.Translation;
37 |   } catch (error) {
38 |     return 'error';
39 |   }
40 | }
41 | 


--------------------------------------------------------------------------------
/config.js:
--------------------------------------------------------------------------------
 1 | // 视频文件所在目录 如 /Users/demo/video
 2 | export const videoDir = './examples';
 3 | 
 4 | /*
 5 | whisper.cpp 模型 支持以下
 6 | tiny.en
 7 | tiny
 8 | base.en
 9 | base
10 | small.en
11 | small
12 | medium.en
13 | medium
14 | large-v1
15 | large-v2
16 | large-v3
17 |  */
18 | export const whisperModel = 'base.en';
19 | 
20 | // 翻译配置，视频原语言与翻译后的目标语言
21 | // 语言列表可参考 https://fanyi-api.baidu.com/api/trans/product/apidoc
22 | export const translateConfig = {
23 |   sourceLanguage: 'en',
24 |   targetLanguage: 'zh',
25 | };
26 | 
27 | // 支持的翻译服务商
28 | export const supportedService = {
29 |   baidu: Symbol.for('baidu'),
30 |   volc: Symbol.for('volc'),
31 |   deeplx: Symbol.for('deeplx'),
32 |   ollama: Symbol.for('ollama'),
33 | };
34 | 
35 | // 当前使用的翻译服务商，如果不配置，则不执行翻译流程
36 | export const translateServiceProvider = supportedService.ollama;
37 | 
38 | // 翻译结果字幕文件内容配置
39 | export const contentTemplateRuleMap = {
40 |   onlyTranslate: Symbol.for('onlyTranslate'), // 只输出翻译内容
41 |   sourceAndTranslate: Symbol.for('sourceAndTranslate'), // 输出原始字幕和翻译字幕， 原始字幕在上面
42 |   translateAndSource: Symbol.for('translateAndSource'), // 输出翻译后的字幕和原始字幕， 翻译字幕在上面
43 | };
44 | 
45 | // 字幕文件内容模板 支持 ${sourceContent}, ${targetContent} 变量
46 | export const contentTemplate = {
47 |   [contentTemplateRuleMap.onlyTranslate]: '${targetContent}\n\n',
48 |   [contentTemplateRuleMap.sourceAndTranslate]: '${sourceContent}\n${targetContent}\n\n',
49 |   [contentTemplateRuleMap.translateAndSource]: '${targetContent}\n${sourceContent}\n\n',
50 | };
51 | 
52 | // 翻译内容输出模板规则，默认只输出翻译内容, 支持 contentTemplateRuleMap 内的规则
53 | export const contentTemplateRule = contentTemplateRuleMap.onlyTranslate;
54 | 
55 | // 原始字幕文件保存命名规则 支持 fileName, sourceLanguage, targetLanguage 变量
56 | // 如果为空，将不保存原始字幕文件
57 | // eg: '${fileName}.${sourceLanguage}' -> 对于视频名为 text.mp4 的英文视频原始字幕文件名为 text.en.srt
58 | export const sourceSrtSaveName = '${fileName}.${sourceLanguage}';
59 | 
60 | // 翻译后的字幕文件保存命名规则 支持 fileName, sourceLanguage, targetLanguage 变量
61 | export const targetSrtSaveName = '${fileName}.${targetLanguage}';
62 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
 1 | import fs from 'fs';
 2 | import path from 'path';
 3 | import { execSync } from 'child_process';
 4 | import { config } from 'dotenv';
 5 | import app from './translate.js';
 6 | import { sourceSrtSaveName, translateServiceProvider, videoDir, translateConfig ,whisperModel } from './config.js';
 7 | import { extractAudio, renderFilePath, installWhisper, isDarwin, isWin32 } from './utils.js';
 8 | 
 9 | config();
10 | 
11 | const { log, error } = console;
12 | 
13 | const SUPPORTED_VIDEO_FORMATS = ["mp4", "avi", "mov", "mkv", "flv", "wmv", "webm", "m4a"];
14 | 
15 | await installWhisper();
16 | fs.readdir(videoDir, async (err, files) => {
17 |   if (err) {
18 |     error(err);
19 |     return;
20 |   }
21 |   for (let i = 0; i <= files.length - 1; i++) {
22 |     const file = files[i];
23 |     const fileExtension = file.split('.').pop().toLowerCase();
24 |     if (SUPPORTED_VIDEO_FORMATS.includes(fileExtension)) {
25 |       log('开始处理文件：', file);
26 |       try {
27 |         const fileName = file.substring(0, file.lastIndexOf('.'));
28 |         const wavFile = `${videoDir}/${fileName}.wav`;
29 |         const srtFile = `${renderFilePath(sourceSrtSaveName, fileName)}`;
30 |         await extractAudio(`${videoDir}/${file}`, `${wavFile}`);
31 |         //execSync(`ffmpeg -v quiet -stats -i "${videoDir}/${file}" -ar 16000 -ac 1 -c:a pcm_s16le -y "${wavFile}"`);
32 |         log('完成音频文件提取， 准备生成字幕文件');
33 |         let mainPath = path.join('./', 'whisper.cpp/main');
34 |         if(isWin32()){
35 |           mainPath = path.join('./', 'whisper-bin-x64/main.exe');
36 |         }
37 |         execSync(
38 |           `${mainPath} -m ./whisper.cpp/models/ggml-${whisperModel}.bin -f "${wavFile}" -osrt -of "${srtFile}" -l ${translateConfig.sourceLanguage}`
39 |         )
40 |         log('完成字幕文件生成， 准备开始翻译');
41 |         if (translateServiceProvider) {
42 |           await app(videoDir, fileName, `${srtFile}.srt`);
43 |         }
44 |         log('翻译完成');
45 |         fs.unlink(wavFile, (err) => {
46 |           if (err) {
47 |             error(err);
48 |           } else {
49 |             log('删除wav文件', wavFile);
50 |           }
51 |         });
52 |         if (!sourceSrtSaveName) {
53 |           fs.unlink(`${srtFile}.srt`, () => {});
54 |         }
55 |       } catch (err) {
56 |         log('执行出错', err);
57 |       }
58 |     }
59 |   }
60 | });
61 | 


--------------------------------------------------------------------------------
/translate.js:
--------------------------------------------------------------------------------
 1 | import path from 'path';
 2 | import fs from 'fs';
 3 | import {
 4 |   contentTemplate,
 5 |   contentTemplateRule,
 6 |   supportedService,
 7 |   targetSrtSaveName,
 8 |   translateConfig,
 9 |   translateServiceProvider,
10 | } from './config.js';
11 | import { renderTemplate } from './utils.js';
12 | 
13 | export default async function translate(folder, fileName, absolutePath) {
14 |   const renderContentTemplate = contentTemplate[contentTemplateRule];
15 |   return new Promise(async (resolve, reject) => {
16 |     try {
17 |       const result = fs.readFileSync(absolutePath, 'utf8');
18 |       const data = result.split('\n');
19 |       const items = [];
20 |       for (var i = 0; i < data.length; i += 4) {
21 |         const source = data[i + 2];
22 |         if (!source) continue;
23 |         let text;
24 |         switch (translateServiceProvider) {
25 |           case supportedService.volc:
26 |             const volc = await import('./service/volc.js');
27 |             text = await volc.default(source);
28 |             break;
29 |           case supportedService.baidu:
30 |             const baidu = await import('./service/baidu.js');
31 |             text = await baidu.default(source);
32 |             break;
33 |           case supportedService.deeplx:
34 |             const deeplx = await import('./service/deeplx.js');
35 |             text = await deeplx.default(source);
36 |             break;
37 |           case supportedService.ollama:
38 |             const ollama = await import('./service/ollama.js');
39 |             text = await ollama.default(source, translateConfig.sourceLanguage, translateConfig.targetLanguage);
40 |             break;
41 |           default:
42 |             text = 'no supported service';
43 |         }
44 | 
45 |         items.push({
46 |           id: data[i],
47 |           startEndTime: data[i + 1],
48 |           targetContent: text,
49 |           sourceContent: source,
50 |         });
51 |       }
52 |       const fileSave = path.join(folder, `${renderTemplate(targetSrtSaveName, { fileName, ...translateConfig })}.srt`);
53 |       for (let i = 0; i <= items.length - 1; i++) {
54 |         const item = items[i];
55 |         const content = `${item.id}\n${item.startEndTime}\n${renderTemplate(renderContentTemplate, item)}`;
56 |         fs.appendFileSync(fileSave, content, (err) => {
57 |           if (err) {
58 |             console.error(err);
59 |           }
60 |         });
61 |       }
62 |       resolve();
63 |     } catch (error) {
64 |       console.error(error);
65 |       reject(error);
66 |     }
67 |   });
68 | }
69 | 


--------------------------------------------------------------------------------
/utils.js:
--------------------------------------------------------------------------------
  1 | import { spawn } from 'child_process';
  2 | import fs from 'fs';
  3 | import os from 'os';
  4 | import path from 'path';
  5 | import ffmpegInstaller from '@ffmpeg-installer/ffmpeg';
  6 | import ffmpeg from 'fluent-ffmpeg';
  7 | import { translateConfig, videoDir, whisperModel } from './config.js';
  8 | 
  9 | ffmpeg.setFfmpegPath(ffmpegInstaller.path);
 10 | 
 11 | // 将字符串转成模板字符串
 12 | export const renderTemplate = (template, data) => {
 13 |   const names = Object.keys(data);
 14 |   const values = Object.values(data);
 15 |   return new Function(...names, `return \`${template}\`;`)(...values);
 16 | };
 17 | 
 18 | export const renderFilePath = (template, fileName) => {
 19 |   const data = {
 20 |     fileName,
 21 |     sourceLanguage: translateConfig.sourceLanguage,
 22 |     targetLanguage: translateConfig.targetLanguage,
 23 |   };
 24 |   const finalPath = template || 'temp-${fileName}'; // 如果不保存字幕文件，需要先生成临时文件
 25 |   const filePath = renderTemplate(finalPath, data);
 26 |   return `${videoDir}/${filePath}`;
 27 | };
 28 | 
 29 | export const extractAudio = (videoPath, audioPath) => {
 30 |   return new Promise((resolve, reject) => {
 31 |     ffmpeg(videoPath)
 32 |       .audioFrequency(16000)
 33 |       .audioChannels(1)
 34 |       .audioCodec('pcm_s16le')
 35 |       .outputOptions('-y')
 36 |       .on('start', function (str) {
 37 |         console.log('转换任务开始~', str);
 38 |       })
 39 |       .on('progress', function (progress) {
 40 |         console.log(`进行中，完成${(progress.percent || 0)}%`);
 41 |       })
 42 |       .on('end', function (str) {
 43 |         console.log('转换任务完成!');
 44 |         resolve();
 45 |       })
 46 |       .on('error', function (err) {
 47 |         console.log('转换任务出错:', err);
 48 |         reject(err);
 49 |       })
 50 |       .save(audioPath);
 51 |   });
 52 | };
 53 | 
 54 | function runCommand(command, args) {
 55 |   return new Promise((resolve, reject) => {
 56 |     const child = spawn(command, args);
 57 | 
 58 |     child.stdout.on('data', (data) => {
 59 |       console.log(`${data}`);
 60 |     });
 61 | 
 62 |     child.stderr.on('data', (data) => {
 63 |       console.error(`${data}`);
 64 |     });
 65 | 
 66 |     child.on('close', (code) => {
 67 |       if (code !== 0) {
 68 |         reject(new Error(`${command} ${args.join(' ')} 进程退出，退出码 ${code}`));
 69 |       } else {
 70 |         resolve();
 71 |       }
 72 |     });
 73 |   });
 74 | }
 75 | 
 76 | export const isDarwin = () => os.platform() === 'darwin';
 77 | 
 78 | export const isWin32 = () => os.platform() === 'win32';
 79 | 
 80 | 
 81 | // 安装 whisper.cpp 及模型
 82 | export const installWhisper = async () => {
 83 |   const repoUrl = 'https://github.com/ggerganov/whisper.cpp';
 84 |   const localPath = path.join('./', 'whisper.cpp');
 85 |   const modelPath = path.join('./', `whisper.cpp/models/ggml-${whisperModel}.bin`);
 86 |   const mainPath = path.join('./', 'whisper.cpp/main');
 87 | 
 88 |   if (!fs.existsSync(localPath)) {
 89 |     console.log('开始克隆 whisper.cpp 仓库');
 90 |     await runCommand('git', ['clone', repoUrl]);
 91 |   }
 92 |   if (!fs.existsSync(modelPath)) {
 93 |     let script;
 94 |     console.log('正在安装 whisper.cpp 模型');
 95 |     if (isDarwin()) {
 96 |       script = path.join('./', './whisper.cpp/models/download-ggml-model.sh');
 97 |       await runCommand('bash', [script, whisperModel]);
 98 |     } else if (isWin32()) {
 99 |       script = path.join('./', 'whisper.cpp/models/download-ggml-model.cmd');
100 |       await runCommand('cmd.exe', ['/c', script, whisperModel])
101 |     } else {
102 |       throw Error('platform does not support! ')
103 |     }
104 |   }
105 |   if (isDarwin() && !fs.existsSync(mainPath)) {
106 |     // 编译 whisper.cpp
107 |     console.log('正在编译 whisper.cpp');
108 |     await runCommand('make', ['-C', './whisper.cpp']);
109 |   }
110 | };


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 批量为视频生成字幕文件，并翻译成其它语言
  2 | 
  3 | > [!IMPORTANT]  
  4 | > 🧨 💥 🎉 本项目是一个命令行工具，但非常荣幸得到了很多朋友的支持，也给我了很大的鼓舞，因此，我基于它制作了一款客户端工具，让大家能够更加方便地使用，也方便一部分不熟悉代码配置的朋友也能使用该工具。
  5 | > 
  6 | > 欢迎大家移步 [SmartSub](https://github.com/buxuku/SmartSub) 以获得更加便捷的使用体验
  7 | > ![image](https://github.com/buxuku/video-subtitle-master/raw/main/resources/preview.png)
  8 | 
  9 | 
 10 | 做这个小工具的初衷：
 11 | 
 12 | 自己有一大批外文视频，没有字幕，希望能够添加字幕文件，同时也能够将字幕文件翻译成中文， 同时希望能够通过批量处理的方式来减轻工作量。
 13 | 
 14 | 类似需求，有一批厂商已经提供到了支持，比如 讯飞听见， 网易见外 等，但这些在线服务都涉及到视频的上传动作，效率相对比较低下。
 15 | 
 16 | 希望能够找一个客户端工具，在本地来生成，试用了一些工具，依然不理想
 17 | 
 18 | - Buzz 非 Store 版本没有对 apple silicon 做优化，字幕生成速度比较慢，也不支持翻译
 19 | - MacWhisper 免费版本只支持单个生成，不支持批量，不支持翻译
 20 | - WhisperScript 可以批量生成，但字幕文件需要手动一个个地保存，不支持翻译
 21 | - memo.ac 做了 mac 下的性能优化，可以使用 GPU ，也支持翻译功能，非常棒的一款软件，但目前批量模式 bug 太多，无法正常使用
 22 | 
 23 | 最后想了一下，本地语音转文字，通常的做法就是使用目前最强的 whisper 模型来生成。那我的需求就比较简单了：
 24 | 
 25 | - 通过 ffmpeg 从视频文件中提取出音频文件
 26 | - 通过 whisper 模型将音频生成原语言的字幕文件
 27 | - 调用翻译 API， 将原语言的字幕文件翻译成目标语言的字幕文件
 28 | 
 29 | 基于以上简单的思路和流程，就可以简单写一个小工具来批量处理本地的视频了。
 30 | 
 31 | ## 💥特性
 32 | 
 33 | - 源语言字幕文件和目标语言字幕文件放在视频同目录下，方便播放时任意挂载字幕文件
 34 | - 批量处理目录下面的所有视频文件
 35 | - 可以只生成字幕，不翻译，方便批量为视频生成字幕
 36 | - 支持火山引擎翻译
 37 | - 支持百度翻译
 38 | - 支持 deeplx 翻译 （批量翻译容易存在被限流的情况）
 39 | - 支持 ollama 翻译
 40 | - 自定义字幕文件名，方便兼容不同的播放器挂载字幕识别
 41 | - 自定义翻译后的字幕文件内容，纯翻译结果，原字幕+翻译结果
 42 | - 项目集成 `whisper.cpp`， 它对 apple silicon 进行了优化，有较快的生成速度
 43 | - 项目集成了 `fluent-ffmpeg`, 无须安装 `ffmpeg`
 44 | 
 45 | ### ⬆️ 支持的模型
 46 | 
 47 | ```
 48 | tiny.en
 49 | tiny
 50 | base.en
 51 | base
 52 | small.en
 53 | small
 54 | medium.en
 55 | medium
 56 | large-v1
 57 | large-v2
 58 | large-v3
 59 | ```
 60 | 
 61 | ## 翻译服务
 62 | 
 63 | 本项目的翻译能力是基于 **百度/火山/deeplx** 的翻译API来实现的，这些 API 的使用需要申请对的 KEY 和 SECRET， 因此，如果你需要使用到翻译服务，需要先申请一个 API 。
 64 | 
 65 | 具体的申请方法，可以参考 https://bobtranslate.com/service/ ， 感谢 [Bob](https://bobtranslate.com/) 这款优秀的软件。
 66 | 
 67 | ## 🔦使用
 68 | 
 69 | 1️⃣ 克隆本项目在本地
 70 | 
 71 | ```shell
 72 | git clone https://github.com/buxuku/VideoSubtitleGenerator.git
 73 | ```
 74 | 
 75 | 2️⃣ 在项目中执行 `yarn install` 或者 `npm install`
 76 | 
 77 | ```shell
 78 | cd VideoSubtitleGenerator
 79 | yarn install 
 80 | ```
 81 | 
 82 | 3️⃣ 如果需要翻译，复制 `.env.local` 为 `.env` 在项目根目录，访文件用于配置翻译相关的 KEY 和 SECRET， 例如
 83 | 
 84 | `BAIDU_` 开头的为百度翻译的配置
 85 | 
 86 | `VOLC_` 开头的为火山翻译的配置
 87 | 
 88 | `OLLAMA_` 开头的为 ollama 翻译的配置
 89 | 
 90 | ```shell
 91 | BAIDU_KEY=2023120600190xxxx
 92 | BAIDU_SECRET=PIbyKjEr1y8u18RZxxxx
 93 | VOLC_KEY=AKLTMDUwZjY4MTZkNTFmN4M3ZjlkMzlmYzAzMTdlMDExxxx
 94 | VOLC_SECRET=T0dRMllUUmpPREUzWWpjNE5HVm2Zamt4TlRObU9EUm1ORFk0T1dGbExxxx==
 95 | 
 96 | OLLAMA_API_URL=http://localhost:11434
 97 | OLLAMA_MODEL_NAME=llama3
 98 | OLLAMA_PROMPT=Please translate the following content from ${sourceLanguage} to ${targetLanguage}, only return the translation result can be. \n ${content}
 99 | ```
100 | 
101 | 4️⃣ 其余的配置在 `config.js` 文件中进行配置，每条配置均的详细的注释
102 | 
103 | ```js
104 | // 视频文件所在目录 如 /Users/demo/video
105 | export const videoDir = './examples';
106 | 
107 | /*
108 | whisper.cpp 模型 支持以下
109 | tiny.en
110 | tiny
111 | base.en
112 | base
113 | small.en
114 | small
115 | medium.en
116 | medium
117 | large-v1
118 | large-v2
119 | large-v3
120 |  */
121 | export const whisperModel = 'base.en';
122 | 
123 | // 翻译配置，视频原语言与翻译后的目标语言
124 | export const translateConfig = {
125 |     sourceLanguage: 'en',
126 |     targetLanguage: 'zh',
127 | };
128 | 
129 | // 支持的翻译服务商
130 | export const supportedService = {
131 |     baidu: Symbol.for('baidu'),
132 |     volc: Symbol.for('volc'),
133 |     deeplx: Symbol.for('deeplx'),
134 |     ollama: Symbol.for('ollama'),
135 | };
136 | 
137 | // 当前使用的翻译服务商，如果不配置，则不执行翻译流程
138 | export const translateServiceProvider = supportedService.volc;
139 | 
140 | // 翻译结果字幕文件内容配置
141 | export const contentTemplateRuleMap = {
142 |     onlyTranslate: Symbol.for('onlyTranslate'), // 只输出翻译内容
143 |     sourceAndTranslate: Symbol.for('sourceAndTranslate'), // 输出原始字幕和翻译字幕， 原始字幕在上面
144 |     translateAndSource: Symbol.for('translateAndSource'), // 输出翻译后的字幕和原始字幕， 翻译字幕在上面
145 | };
146 | 
147 | // 字幕文件内容模板 支持 ${sourceContent}, ${targetContent} 变量
148 | export const contentTemplate = {
149 |     [contentTemplateRuleMap.onlyTranslate]: '${targetContent}\n\n',
150 |     [contentTemplateRuleMap.sourceAndTranslate]: '${sourceContent}\n${targetContent}\n\n',
151 |     [contentTemplateRuleMap.translateAndSource]: '${targetContent}\n${sourceContent}\n\n',
152 | };
153 | 
154 | // 翻译内容输出模板规则，默认只输出翻译内容, 支持 contentTemplateRuleMap 内的规则
155 | export const contentTemplateRule = contentTemplateRuleMap.onlyTranslate;
156 | 
157 | // 原始字幕文件保存命名规则 支持 fileName, sourceLanguage, targetLanguage 变量
158 | // 如果为空，将不保存原始字幕文件
159 | // eg: '${fileName}.${sourceLanguage}' -> 对于视频名为 text.mp4 的英文视频原始字幕文件名为 text.en.srt
160 | export const sourceSrtSaveName = '${fileName}.${sourceLanguage}';
161 | 
162 | // 翻译后的字幕文件保存命名规则 支持 fileName, sourceLanguage, targetLanguage 变量
163 | export const targetSrtSaveName = '${fileName}.${targetLanguage}';
164 | ```
165 | 
166 | 这里面的字幕内容和字幕文件名可以自定义配置，同时会将里面的 `${xxx}`变量转为对应的字符
167 | 
168 | 核心配置主要为以下几项
169 | 
170 | #### 选择翻译服务商
171 | 
172 | 
173 | ```js
174 | // 当前使用的翻译服务商，如果不配置，则不执行翻译流程
175 | export const translateServiceProvider = supportedService.volc;
176 | ```
177 | 
178 | 支持
179 | - supportedService.volc 火山翻译
180 | - supportedService.baidu 百度翻译
181 | - supportedService.deeplx deeplx 翻译
182 | - supportedService.ollama ollama 翻译
183 | 
184 | #### 翻译结果的配置
185 | 
186 | ```js
187 | // 翻译内容输出模板规则，默认只输出翻译内容, 支持 contentTemplateRuleMap 内的规则
188 | export const contentTemplateRule = contentTemplateRuleMap.onlyTranslate;
189 | ```
190 | 
191 | 支持以下几种设置模式
192 | 
193 | ##### contentTemplateRuleMap.onlyTranslate
194 | 
195 | 只保留翻译后的结果内容，如
196 | 
197 | ```
198 | 1
199 | 00:00:00,000 --> 00:00:09,360
200 |  我要和你们谈谈我这本书里的一些东西，我希望能
201 | 
202 | 2
203 | 00:00:09,360 --> 00:00:13,680
204 |  和你们已经听到的东西产生共鸣，我会试着建立一些联系。
205 | ```
206 | 
207 | ##### contentTemplateRuleMap.sourceAndTranslate
208 | 
209 | 保留原字幕和翻译之后的字幕，且原字幕在上面
210 | 
211 | ```
212 | 1
213 | 00:00:00,000 --> 00:00:09,360
214 |  I'm going to talk to you about some stuff that's in this book of mine that I hope will
215 |  我要和你们谈谈我这本书里的一些东西，我希望能
216 | 
217 | 2
218 | 00:00:09,360 --> 00:00:13,680
219 |  resonate with other things that you've already heard and I'll try to make some connections
220 |  和你们已经听到的东西产生共鸣，我会试着建立一些联系。
221 | ```
222 | 
223 | ##### contentTemplateRuleMap.translateAndSource
224 | 
225 | 保留原字幕和翻译之后的字幕，且翻译字幕在上面
226 | 
227 | ```
228 | 1
229 | 00:00:00,000 --> 00:00:09,360
230 |  我要和你们谈谈我这本书里的一些东西，我希望能
231 |  I'm going to talk to you about some stuff that's in this book of mine that I hope will
232 | 
233 | 2
234 | 00:00:09,360 --> 00:00:13,680
235 |  和你们已经听到的东西产生共鸣，我会试着建立一些联系。
236 |  resonate with other things that you've already heard and I'll try to make some connections
237 | ```
238 | 
239 | 5️⃣ 配置好该文件之后，执行 `yarn start` 或者 `npm start`, 首次执行会下载 `whisper.cpp` 和配置的对应的模型文件，会比较慢一些。下次执行将会跳过该流程
240 | 
241 | 如果在使用过程中遇到啥问题，可以提 Issue 或者通过 Discussions 进行讨论
242 | 
243 | [![Powered by DartNode](https://dartnode.com/branding/DN-Open-Source-sm.png)](https://dartnode.com "Powered by DartNode - Free VPS for Open Source")
244 | 


--------------------------------------------------------------------------------