├── .gitignore
├── README.md
├── engine_translation
    ├── baidu.py
    ├── gpt.py
    └── tencent.py
├── file
    ├── 1.mp4
    ├── config.json.temp
    ├── log.png
    ├── test.mp3
    └── test_cn.mp4
├── requirements.txt
├── srt2ass.py
├── transcribe.py
├── translation.py
├── utils.py
├── uvr.py
└── web.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | # 自定义
163 | temp/*.*
164 | file/config.json
165 | engine_translator/secret.yaml
166 | models
167 | engine_translation/secret.yaml
168 | file/SubtitleEdit-4.0.3-Setup.zip
169 | file/Aegisub-3.2.2-64.exe
170 | file/2.wav
171 | file/test.mp4


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 字幕生成器
  2 | 
  3 | 一个能够自动生成媒体字幕的工具
  4 | 
  5 | 目前功能：
  6 | 
  7 |     * 输入视频
  8 |     * 输入音频
  9 |     * 输出srt字幕
 10 |     * 输出ass字幕
 11 |     * GPT字幕翻译
 12 |     * 百度字幕翻译
 13 |     * 腾讯字幕翻译
 14 |     * 音频清洁
 15 | 
 16 | ## 环境
 17 | 
 18 | * conda
 19 |     ```bash
 20 |     conda create -n subtitle python=3.10
 21 |     conda activate subtitle
 22 |     ```
 23 | 
 24 | * torch（CUDA 11.8，其他版本去[官网](https://pytorch.org/get-started/locally/)找）
 25 |     ```bash
 26 |     # GPU
 27 |     pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
 28 | 
 29 |     # CPU
 30 |     pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
 31 |     ```
 32 | 
 33 | * 安装ffmpeg（windows）。
 34 |     去[官网](https://ffmpeg.org/download.html#build-windows)下载，解压后将bin目录添加到环境变量
 35 | 
 36 | * 安装ffmpeg（ubuntu）
 37 |     ```bash
 38 |     apt install ffmpeg
 39 |     ```
 40 | 
 41 | * 其他依赖
 42 |     ```
 43 |     pip install -r requirements.txt
 44 | 
 45 |     # 腾讯翻译
 46 |     pip install -i https://mirrors.tencent.com/pypi/simple/ --upgrade tencentcloud-sdk-python
 47 | 
 48 |     # 音频清洁
 49 |     # https://github.com/karaokenerds/python-audio-separator
 50 |     pip install audio-separator[gpu]
 51 |     pip install audio-separator[cpu]
 52 |     ```
 53 | 
 54 | 
 55 | ## 模型下载
 56 | 
 57 | 将下载的文件夹放入根目录的`models`文件夹中
 58 | 
 59 | * 语音清洁相关模型
 60 |     * [百度云](https://pan.baidu.com/s/1wDQ_I1NIL942o1Dm2XU8zg?pwd=kuon)，目前只使用了`UVR_MDXNET_Main.onnx`，可以只下载它，但是文件夹目录结构还是要的一样的
 61 |  
 62 | * vad
 63 |     * [百度云](https://pan.baidu.com/s/1gcEfO8pxqoZKIAW2SyzbKA?pwd=kuon)
 64 | 
 65 | * whisper模型
 66 |     * 可以使用时候自动下载，会被保存到huggingface的缓存目录中
 67 |     * [百度云](https://pan.baidu.com/s/1NbutR2cHvHbboUy-QTg5zw?pwd=kuon)，这压缩包包含上面的所有模型
 68 |     * [huggingface](https://huggihttps://huggingface.co/collections/guillaumekln/faster-whisper-64f9c349b3115b4f51434976)
 69 | 
 70 | 示例models目录结构
 71 | ```text
 72 | │models
 73 | |
 74 | ├───faster-whisper-large-v3
 75 | │       .gitattributes
 76 | │       config.json
 77 | │       model.bin
 78 | │       preprocessor_config.json
 79 | │       README.md
 80 | │       tokenizer.json
 81 | │       vocabulary.json
 82 | │
 83 | │
 84 | ├───silero-vad-4.0
 85 | │
 86 | └───uvr5_weights
 87 |         UVR_MDXNET_Main.onnx
 88 | 
 89 | ```
 90 | 
 91 | ## 运行
 92 | 
 93 | ```bash
 94 | streamlit run web.py --server.port 1234 --server.maxUploadSize 1000
 95 | ```
 96 | 
 97 | 演示视频：
 98 | <video src="https://github.com/lissettecarlr/auto-subtitle/assets/16299917/bd83db31-a830-441a-82ad-caccaa9c3833" controls="controls" width="100%" height="100%"></video>
 99 | 
100 | 
101 | 
102 | 
103 | ## 效果
104 | 
105 | 
106 | ### 葬送的芙莉蓮 OP 主題曲 -「勇者」/ YOASOBI
107 | 
108 | |识别出的歌词|本软件输出|
109 | |---|---|
110 | |まるでおとぎの話 終わり迎えた証|就像童话故事迎来了结局的证明|
111 | |長すぎる旅路から 切り出した一節|从过长的旅程中切出的一节|
112 | |それはかつてこの地に 影を落とした悪を|那是曾经在这片土地上投下阴影的恶|
113 | |打ち取る自由者との 短い旅の記憶 | 是与击败自由者的短暂旅行的记忆|
114 | |物語は終わり 勇者は眠りにつく | 故事结束了 勇者已经入睡|
115 | |穏やかな日常を この地に残して | 留下了平静的日常在这片土地上|
116 | |時の眺めは無情に 人を忘れさせる | 时间的眺望无情地让人忘记|
117 | |そこに生きた奇跡も 錆びついてく | 在那里生活的奇迹也开始生锈了|
118 | |それでも君は 生きてる | 但是你依然活着|
119 | |君の言葉も 願いも 勇気も | 你的话语 你的愿望 你的勇气|
120 | |今は確かに私の中で 生きてる | 现在它们确实在我心中活着|
121 | |同じ道を選んだ それだけだった | 只是选择了相同的道路|
122 | 
123 | 
124 | ## 参考
125 | 
126 | * [faster-whisper](https://github.com/SYSTRAN/faster-whisper)
127 | * [N46Whisper](https://github.com/Ayanaminn/N46Whisper/blob/main/README_CN.md)


--------------------------------------------------------------------------------
/engine_translation/baidu.py:
--------------------------------------------------------------------------------
 1 | from random import randint
 2 | from hashlib import md5
 3 | from http.client import HTTPConnection
 4 | import json
 5 | from urllib import parse
 6 | 
 7 | class Baidu:
 8 |     def __init__(self,appid,secretKey) -> None:
 9 |         self.url  = '/api/trans/vip/translate'
10 |         self.appid = appid
11 |         self.secretKey = secretKey
12 | 
13 |     def reset(self):
14 |         pass
15 |    
16 |     def run(self,text,from_language='auto',target_language='中文'):
17 | 
18 |         if target_language == "中文":
19 |             target_language = 'zh'
20 |         elif target_language == "日语":
21 |             target_language = 'jp'
22 |         elif target_language == "英语":
23 |             target_language = 'en'
24 |         
25 |         salt = randint(32768, 65536)
26 |         sign = self.appid + text + str(salt) + self.secretKey
27 |         sign = md5(sign.encode()).hexdigest()
28 |         get_url = self.url + '?appid=' + self.appid + '&q=' + parse.quote(text) + '&from=' + from_language + '&to=' + target_language + '&salt=' + str(
29 |             salt) + '&sign=' + sign
30 |         
31 |         try:
32 |             httpClient = HTTPConnection('api.fanyi.baidu.com')
33 |             httpClient.request('GET', get_url)
34 | 
35 |             response = httpClient.getresponse()
36 |             result_all = response.read().decode("utf-8")
37 |             result = json.loads(result_all)
38 | 
39 |             string = ''
40 |             for word in result['trans_result']:
41 |                 if word == result['trans_result'][-1]:
42 |                     string += word['dst']
43 |                 else:
44 |                     string += word['dst'] + '\n'
45 | 
46 |         except Exception:
47 |             if result['error_code'] == '54003':
48 |                 string = "翻译：我抽风啦！"
49 |             elif result['error_code'] == '52001':
50 |                 string = '翻译：请求超时，请重试'
51 |             elif result['error_code'] == '52002':
52 |                 string = '翻译：系统错误，请重试'
53 |             elif result['error_code'] == '52003':
54 |                 string = '翻译：APPID 或 密钥 不正确'
55 |             elif result['error_code'] == '54001':
56 |                 string = '翻译：APPID 或 密钥 不正确'
57 |             elif result['error_code'] == '54004':
58 |                 string = '翻译：账户余额不足'
59 |             elif result['error_code'] == '54005':
60 |                 string = '翻译：请降低长query的发送频率，3s后再试'
61 |             elif result['error_code'] == '58000':
62 |                 string = '翻译：客户端IP非法，注册时错误填入服务器地址，请前往开发者信息-基本信息修改，服务器地址必须为空'
63 |             elif result['error_code'] == '90107':
64 |                 string = '翻译：认证未通过或未生效'
65 |             else:
66 |                 string = '翻译：%s，%s' % (result['error_code'], result['error_msg'])
67 |             raise Exception(string)
68 |         
69 |         finally:
70 |             if httpClient:
71 |                 httpClient.close()
72 | 
73 |         return string
74 | 
75 | 
76 | if __name__ == '__main__':
77 |     t = Baidu(appid="",secretKey="")
78 |     res = t.run( "まるでおとぎの話 終わり迎えた証")
79 |     print(res)
80 |      


--------------------------------------------------------------------------------
/engine_translation/gpt.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | 
 3 | class GPT():
 4 |     def __init__(self,key,base_url = "https://api.openai.com/v1",model="gpt-3.5-turbo",temperature=0.6) -> None:
 5 |         self.client = OpenAI(
 6 |             api_key = key,
 7 |             base_url = base_url
 8 |         )
 9 |         
10 |         # if model not in ["gpt-3.5-turbo","gpt-4"]:
11 |         #     raise Exception("model not supported")
12 |         
13 |         self.model = model
14 |         self.temperature = temperature
15 |         self.prompt = "You are a language expert.Your task is to translate the input subtitle text, sentence by sentence, into the user specified target language.However, please utilize the context to improve the accuracy and quality of translation.Please be aware that the input text could contain typos and grammar mistakes, utilize the context to correct the translation.Please return only translated content and do not include the origin text.Please do not use any punctuation around the returned text.Please do not translate people's name and leave it as original language.\""
16 |         self.reset()
17 | 
18 |     def reset(self):
19 |         """
20 |         清空历史记录
21 |         """
22 |         self.messages = [
23 |             {
24 |                 "role": "system",
25 |                 "content": f'{self.prompt}'
26 |             }
27 |         ]
28 | 
29 |     def run(self,text,target_language="zh-hans"):
30 |         """
31 |         target_language : ["zh-hans","english"]
32 |         """
33 |         # if target_language not in ["中文","英语","日语"]:
34 |         #     raise Exception("target language not supported")
35 |         
36 |         # if target_language == "中文":
37 |         #     target_language = 'zh'
38 |         # elif target_language == "日语":
39 |         #     target_language = 'jp'
40 |         # elif target_language == "英语":
41 |         #     target_language = 'en'
42 | 
43 |         new_message = {
44 |                 "role":"user",
45 |                 "content": f"Original text:`{text}`. Target language: {target_language}"
46 |         }
47 |         self.messages.append(new_message)
48 |         try:
49 |             completion = self.client.chat.completions.create(
50 |                 model=self.model,
51 |                 messages= self.messages,
52 |                 temperature=self.temperature,
53 |                 stream = False
54 |             )
55 |             
56 |             content = (
57 |                 completion.choices[0].message.content.encode("utf8").decode()
58 |             )
59 |             # total_tokens = completion.usage.total_tokens     
60 | 
61 |         except Exception as e:
62 |             self.messages.pop()
63 |             raise Exception(e)
64 |         # 将其保存成历史
65 |         self.messages.append({"role": "assistant", "content": content})
66 |         # print("{}".format(self.messages))
67 |         return content
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     # 翻译测试
72 |     import yaml
73 |     with open('./secret.yaml', 'r',encoding="utf-8") as file:
74 |         config = yaml.safe_load(file)
75 |     key = config["chatgpt"]["key"]
76 |     base_url = config["chatgpt"]["base_url"]
77 | 
78 |     eng = GPT(key=key ,base_url = base_url ,model="gpt-4")
79 |     print(eng.run("まるでおとぎの話 終わり迎えた証"))
80 |     # eng.run("長すぎる旅路から 切り出した一説")
81 |     


--------------------------------------------------------------------------------
/engine_translation/tencent.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | # pip install -i https://mirrors.tencent.com/pypi/simple/ --upgrade tencentcloud-sdk-python
 3 | from tencentcloud.common import credential
 4 | from tencentcloud.common.profile.client_profile import ClientProfile
 5 | from tencentcloud.common.profile.http_profile import HttpProfile
 6 | from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
 7 | from tencentcloud.tmt.v20180321 import tmt_client, models
 8 | 
 9 | class Tencent:
10 |     def __init__(self,appid,secretKey) -> None:
11 |         self.appid = appid
12 |         self.secretKey = secretKey
13 | 
14 |     def reset(self):
15 |         pass
16 |     
17 |     def run(self,text,from_language='auto',target_language='中文'):
18 | 
19 |         if target_language == "中文":
20 |             target_language = 'zh'
21 |         elif target_language == "日语":
22 |             target_language = 'jp'
23 |         elif target_language == "英语":
24 |             target_language = 'en'
25 |         
26 |         try:
27 |             cred = credential.Credential(self.appid, self.secretKey)
28 |             httpProfile = HttpProfile()
29 |             httpProfile.endpoint = "tmt.tencentcloudapi.com"
30 |             clientProfile = ClientProfile()
31 |             clientProfile.httpProfile = httpProfile
32 |             client = tmt_client.TmtClient(cred, "ap-chengdu", clientProfile)
33 |             req = models.TextTranslateRequest()
34 |             params = {
35 |                 "SourceText": text,
36 |                 "Source": from_language,
37 |                 "Target": target_language,
38 |                 "DocumentType": 'txt', # pdf,docx,pptx,xlsx,txt,xml,html,markdown,properties
39 |                 'ProjectId': 0,
40 |                 "UntranslateTencentdText": "RBA"
41 |             }
42 |             req.from_json_string(json.dumps(params))
43 |             resp = client.TextTranslate(req).TargetText
44 |             return resp
45 |         
46 |         except TencentCloudSDKException as err:
47 |             raise err
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     import yaml
52 |     with open('./secret.yaml', 'r',encoding="utf-8") as file:
53 |         config = yaml.safe_load(file)
54 |     secretId = config["tencent"]["secretId"]
55 |     secretKey = config["tencent"]["secretKey"]
56 |     t = Tencent(appid=secretId,secretKey=secretKey)
57 |     print(t.run( "まるでおとぎの話 終わり迎えた証",from_language='jp',target_language='中文'))
58 |     
59 |     
60 | 


--------------------------------------------------------------------------------
/file/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lissettecarlr/auto-subtitle/386b533f4b7db1f8a57f4879d35506eae9d0b88f/file/1.mp4


--------------------------------------------------------------------------------
/file/config.json.temp:
--------------------------------------------------------------------------------
 1 | {
 2 |     "chat_url" : "https://api/v1",
 3 |     "chat_key": "sk-",
 4 |     "chat_model_list":["gpt-3.5-turbo", "gpt-4"],
 5 |     "chat_model_name":"gpt-4",
 6 |     "baidu_appid": "",
 7 |     "baidu_appkey": "",
 8 |     "tencent_appid": "",
 9 |     "tencent_secretKey":"",
10 |     "model_name":"large-v3",
11 |     "model_list":["tiny","base","small","medium","large-v2","large-v3","tiny.en","base.en","medium.en","small.en"],
12 |     "media_type":"视频",
13 |     "vad_filter":"是"
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/file/log.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lissettecarlr/auto-subtitle/386b533f4b7db1f8a57f4879d35506eae9d0b88f/file/log.png


--------------------------------------------------------------------------------
/file/test.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lissettecarlr/auto-subtitle/386b533f4b7db1f8a57f4879d35506eae9d0b88f/file/test.mp3


--------------------------------------------------------------------------------
/file/test_cn.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lissettecarlr/auto-subtitle/386b533f4b7db1f8a57f4879d35506eae9d0b88f/file/test_cn.mp4


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | streamlit
2 | openai
3 | regex
4 | faster-whisper
5 | pysubs2
6 | ffmpeg-python
7 | 


--------------------------------------------------------------------------------
/srt2ass.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # python-srt2ass: https://github.com/ewwink/python-srt2ass
  4 | # by: ewwink
  5 | # modified by:  一堂宁宁 Lenshyuu227
  6 | 
  7 | import sys
  8 | import os
  9 | import regex as re
 10 | import codecs
 11 | 
 12 | 
 13 | def fileopen(input_file):
 14 |     # use correct codec to encode the input file
 15 |     encodings = ["utf-32", "utf-16", "utf-8", "cp1252", "gb2312", "gbk", "big5"]
 16 |     srt_src = ''
 17 |     for enc in encodings:
 18 |         try:
 19 |             with codecs.open(input_file, mode="r", encoding=enc) as fd:
 20 |                 # return an instance of StreamReaderWriter
 21 |                 srt_src = fd.read()
 22 |                 break
 23 |         except:
 24 |             # print enc + ' failed'
 25 |             continue
 26 |     return [srt_src, enc]
 27 | 
 28 | 
 29 | def srt2ass(input_file,sub_style, is_split:bool, split_method:str):
 30 |     if '.ass' in input_file:
 31 |         return input_file
 32 | 
 33 |     if not os.path.isfile(input_file):
 34 |         print(input_file + ' not exist')
 35 |         return
 36 | 
 37 |     src = fileopen(input_file)
 38 |     srt_content = src[0]
 39 |     # encoding = src[1] # Will not encode so do not need to pass codec para
 40 |     src = ''
 41 |     utf8bom = ''
 42 | 
 43 |     if u'\ufeff' in srt_content:
 44 |         srt_content = srt_content.replace(u'\ufeff', '')
 45 |         utf8bom = u'\ufeff'
 46 |     
 47 |     srt_content = srt_content.replace("\r", "")
 48 |     lines = [x.strip() for x in srt_content.split("\n") if x.strip()]
 49 |     subLines = ''
 50 |     dlgLines = '' # dialogue line
 51 |     lineCount = 0
 52 |     output_file = '.'.join(input_file.split('.')[:-1])
 53 |     output_file += '.ass'
 54 | 
 55 |     for ln in range(len(lines)):
 56 |         line = lines[ln]
 57 |         if line.isdigit() and re.match('-?\d\d:\d\d:\d\d', lines[(ln+1)]):
 58 |             if dlgLines:
 59 |                 subLines += dlgLines + "\n"
 60 |             dlgLines = ''
 61 |             lineCount = 0
 62 |             continue
 63 |         else:
 64 |             if re.match('-?\d\d:\d\d:\d\d', line):
 65 |                 line = line.replace('-0', '0')
 66 |                 if sub_style =='default':
 67 |                     dlgLines += 'Dialogue: 0,' + line + ',default,,0,0,0,,'
 68 |                 elif sub_style =='ikedaCN':
 69 |                     dlgLines += 'Dialogue: 0,' + line + ',池田字幕1080p,,0,0,0,,'
 70 |                 elif sub_style == 'sugawaraCN':
 71 |                     dlgLines += 'Dialogue: 0,' + line + ',中字 1080P,,0,0,0,,'
 72 |                 elif sub_style == 'kaedeCN':
 73 |                     dlgLines += 'Dialogue: 0,' + line + ',den SR红色,,0,0,0,,'
 74 |                 elif sub_style == 'taniguchiCN':
 75 |                     dlgLines += 'Dialogue: 0,' + line + ',正文_1080P,,0,0,0,,'
 76 |                 elif sub_style == 'asukaCN':
 77 |                     dlgLines += 'Dialogue: 0,' + line + ',DEFAULT1,,0,0,0,,'
 78 |             else:
 79 |                 if lineCount < 2:
 80 |                     dlg_string = line
 81 |                     if is_split == True and split_method == 'Modest':
 82 |                         # do not split if space proceed and followed by non-ASC-II characters
 83 |                         # do not split if space followed by word that less than 5 characters
 84 |                         split_string = re.sub(r'(?<=[^\x00-\x7F])\s+(?=[^\x00-\x7F])(?=\w{5})', r'|', dlg_string)
 85 |                         # print(split_string)
 86 |                         if len(split_string.split('|')) > 1:
 87 |                             dlgLines += (split_string.replace('|', "(adjust_required)\n" + dlgLines)) + "(adjust_required)"
 88 |                         else:
 89 |                             dlgLines += line
 90 |                     elif is_split == True and split_method == 'Aggressive':
 91 |                         # do not split if space proceed and followed by non-ASC-II characters
 92 |                         # split at all the rest spaces
 93 |                         split_string = re.sub(r'(?<=[^\x00-\x7F])\s+(?=[^\x00-\x7F])', r'|', dlg_string)
 94 |                         if len(split_string.split('|')) > 1:
 95 |                             dlgLines += (split_string.replace('|',"(adjust_required)\n" + dlgLines)) + "(adjust_required)"
 96 |                         else:
 97 |                             dlgLines += line
 98 |                     else:
 99 |                         dlgLines += line
100 |                 else:
101 |                     dlgLines += "\n" + line
102 |             lineCount += 1
103 |         ln += 1
104 | 
105 | 
106 |     subLines += dlgLines + "\n"
107 | 
108 |     subLines = re.sub(r'\d(\d:\d{2}:\d{2}),(\d{2})\d', '\\1.\\2', subLines)
109 |     subLines = re.sub(r'\s+-->\s+', ',', subLines)
110 |     # replace style
111 |     # subLines = re.sub(r'<([ubi])>', "{\\\\\g<1>1}", subLines)
112 |     # subLines = re.sub(r'</([ubi])>', "{\\\\\g<1>0}", subLines)
113 |     # subLines = re.sub(r'<font\s+color="?#(\w{2})(\w{2})(\w{2})"?>', "{\\\\c&H\\3\\2\\1&}", subLines)
114 |     # subLines = re.sub(r'</font>', "", subLines)
115 | 
116 |     if sub_style == 'default':
117 |         head_name = 'head_str_default'
118 |     elif sub_style == 'ikedaCN':
119 |         head_name = 'head_str_ikeda'
120 |     elif sub_style == 'sugawaraCN':
121 |         head_name = 'head_str_sugawara'
122 |     elif sub_style == 'kaedeCN':
123 |         head_name = 'head_str_kaede'
124 |     elif sub_style == "taniguchiCN":
125 |         head_name = 'head_str_taniguchi'
126 |     elif sub_style == 'asukaCN':
127 |         head_name = 'head_str_asuka'
128 | 
129 |     head_str = STYLE_DICT.get(head_name)
130 |     output_str = utf8bom + head_str + '\n' + subLines
131 |     # encode again for head string
132 |     output_str = output_str.encode('utf8')
133 | 
134 |     with open(output_file, 'wb') as output:
135 |         output.write(output_str)
136 | 
137 |     output_file = output_file.replace('\\', '\\\\')
138 |     output_file = output_file.replace('/', '//')
139 |     return output_file
140 | 
141 | 
142 | # if len(sys.argv) > 1:
143 | #     for name in sys.argv[1:]:
144 | #         srt2ass(name,sub_style=)
145 | 
146 | 
147 | STYLE_DICT = {
148 |     'head_str_default':'''[Script Info]
149 | ; This is an Advanced Sub Station Alpha v4+ script.
150 | ; The script is generated by N46Whisper
151 | Title:
152 | ScriptType: v4.00+
153 | Collisions: Normal
154 | PlayDepth: 0
155 | 
156 | [V4+ Styles]
157 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
158 | Style: default,Meiryo,90,&H00FFFFFF,&H00FFFFFF,&H00000000,&H00050506,-1,0,0,0,100,100,5,0,1,3.5,0,2,135,135,10,1
159 | [Events]
160 | Format: Layer, Start, End, Style, Actor, MarginL, MarginR, MarginV, Effect, Text''',
161 |     'head_str_ikeda': '''[Script Info]
162 | ; This is an Advanced Sub Station Alpha v4+ script.
163 | ; The script is generated by N46Whisper
164 | Title:
165 | ScriptType: v4.00+
166 | Collisions: Normal
167 | PlayDepth: 0
168 | 
169 | [V4+ Styles]
170 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
171 | Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,2,2,2,10,10,10,1
172 | Style: 池田字幕1080p,思源黑体,71,&H00FFFFFF,&H000000FF,&H00008A11,&H00000000,-1,0,0,0,100,100,1.49999,0,1,1.99999,1,2,8,8,5,1
173 | Style: 池田字幕1080p - 不透明背景,思源黑体,71,&H00FFFFFF,&H000000FF,&H64202021,&H00000000,-1,0,0,0,100,100,1.49999,0,3,1.99999,0,2,8,8,5,1
174 | Style: staff1080p,思源黑体,55,&H00FFFFFF,&H00FFFFFF,&H34000000,&H00000000,-1,0,0,0,100,100,3,0,1,2.5,0,7,16,13,4,1
175 | Style: 注释1080p,思源宋体 CN,55,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,2,1,8,10,10,10,1
176 | Style: 多美左上遮罩,Arial,48,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,0,0,1,8,8,11,1
177 | Style: 多美紫色遮罩,Arial,48,&H00F05384,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,0,0,1,8,8,11,1
178 | Style: 多美紫色屏字,仓耳渔阳体 W03,86,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,0,0,8,8,8,11,1
179 | Style: 多美右上屏字,方正兰亭圆_GBK_细,60,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,94,100,6,0,1,0,3,9,8,45,100,1
180 | Style: 屏字-黑,汉仪正圆-55S,71,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,0,0,8,8,8,11,1
181 | Style: 免责,汉仪正圆-85W,56,&H00AE577B,&H000000FF,&H00FFFFFF,&H9D000000,0,0,0,0,100,100,1,0,1,1.5,2,8,10,10,10,1
182 | 
183 | [Events]
184 | Format: Layer, Start, End, Style, Actor, MarginL, MarginR, MarginV, Effect, Text
185 | Dialogue: 0,0:00:00.30,0:00:03.00,staff1080p,,0,0,0,,{'''+r'''\fad(150,300)}特蕾纱熊猫观察会'''+r'''\N片源：'''+r'''\N翻译：'''+r'''\N时间：'''+r'''\N校压：
186 | Dialogue: 0,0:00:00.30,0:00:50.30,免责,,0,0,0,Banner;7;0;50,片源来自互联网，仅作内部学习交流之用，严禁用于商业用途，严禁二次上传、修改，严禁转载。任何自行传播导致的法律问题均与字幕组无关。DO NOT distribute the content on the internet.''',
187 | 'head_str_sugawara':'''[Script Info]
188 | ; This is an Advanced Sub Station Alpha v4+ script.
189 | ; The script is generated by N46Whisper
190 | Title:
191 | ScriptType: v4.00+
192 | Collisions: Normal
193 | PlayDepth: 0
194 | 
195 | [V4+ Styles]
196 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
197 | Style: 中字 1080P,思源黑体 CN Medium,90,&H00FFFFFF,&H00FFFFFF,&H008F51CA,&H00A860F2,-1,0,0,0,100,100,5,0,1,3.5,0,2,135,135,10,1
198 | Style: staff 1080P,思源宋体 CN Medium,70,&H00FFFFFF,&H000000FF,&H008F51CA,&H00000000,0,0,0,0,100,100,0,0,1,4,2,7,10,10,10,1
199 | Style: 标注 1080P,思源黑体 CN Medium,70,&H00FFFFFF,&HFFFFFFFF,&H00000000,&H7F000000,-1,0,0,0,100,100,0,0,1,3,1.5,8,0,0,15,1
200 | Style: 中字 720P,思源黑体 CN Medium,60,&H00FFFFFF,&H00FFFFFF,&H008F51CA,&H00A860F2,-1,0,0,0,100,100,5,0,1,3,0,2,135,135,10,1
201 | Style: staff 720P,思源宋体 CN Medium,50,&H00FFFFFF,&H000000FF,&H008F51CA,&H00000000,0,0,0,0,100,100,0,0,1,3,2,7,10,10,10,1
202 | Style: 标注 720P,思源黑体 CN Medium,50,&H00FFFFFF,&HFFFFFFFF,&H00000000,&H7F000000,-1,0,0,0,100,100,0,0,1,3,1.5,8,0,0,15,1
203 | Style: staff msg,思源宋体 CN Medium,25,&H00FFFFFF,&H000000FF,&H008F51CA,&H00000000,0,0,0,0,100,100,0,0,1,4,2,7,10,10,10,1
204 | Style: 中字 msg,思源黑体 CN Medium,25,&H00FFFFFF,&H00FFFFFF,&H008F51CA,&H00A860F2,-1,0,0,0,100,100,5,0,1,4,0,2,135,135,10,1
205 | Style: 标注 msg,思源黑体 CN Medium,25,&H00FFFFFF,&HFFFFFFFF,&H00000000,&H7F000000,-1,0,0,0,100,100,0,0,1,3,1.5,8,0,0,15,1
206 | Style: 歌词日语 1080P,Swei Spring Sugar CJKtc,60,&H00FFFFFF,&H000000FF,&H009B46A5,&H5A9B46A5,0,0,0,0,100,100,0,0,1,2,0,2,10,10,30,1
207 | Style: 歌词中文 1080P,Swei Spring Sugar CJKtc,90,&H00FFFFFF,&H000000FF,&H009B46A5,&H5F9B46A5,-1,0,0,0,100,100,0,0,1,2,0,2,10,10,100,1
208 | Style: 歌词中文 720P,Swei Spring Sugar CJKtc,60,&H00FFFFFF,&H000000FF,&H009B46A5,&H5F9B46A5,-1,0,0,0,100,100,0,0,1,2,0,2,10,10,70,1
209 | Style: 歌词日语 720P,Swei Spring Sugar CJKtc,40,&H00FFFFFF,&H000000FF,&H009B46A5,&H5A9B46A5,0,0,0,0,100,100,0,0,1,1,0,2,10,10,15,1
210 | [Events]
211 | Format: Layer, Start, End, Style, Actor, MarginL, MarginR, MarginV, Effect, Text
212 | Dialogue: 0,0:00:00.00,0:00:05.24,staff 1080P,,0,0,0,,{'''+r'''\fad(1200,50)\pos(15.2,0.4)}菅原咲月字幕组'''+r'''\N片源：'''+r'''\N翻译：'''+r'''\N时间：'''+r'''\N校压：''',
213 |     'head_str_kaede':'''[Script Info]
214 | ; This is an Advanced Sub Station Alpha v4+ script.
215 | ; The script is generated by N46Whisper
216 | Title:
217 | ScriptType: v4.00+
218 | Collisions: Normal
219 | PlayDepth: 0
220 | 
221 | [V4+ Styles]
222 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
223 | Style: staff,微软雅黑,60,&H00FFFFFF,&H00923782,&H0076137B,&H00540D67,-1,0,0,0,100,100,0,0,1,3,0,7,15,15,15,1
224 | Style: den SR红色,微软雅黑,70,&H0AFFFFFF,&H004B4B9E,&H322828E0,&H640A0A72,-1,0,0,0,100,100,0,0,1,3,0,2,15,15,70,1
225 | Style: 注释,微软雅黑,68,&H00FFFFFF,&H000000FF,&H3D000000,&H00FFFFFF,-1,0,0,0,100,100,0,0,1,4.5,0,8,23,23,23,1
226 | Style: 红色,微软雅黑,75,&H00FFFFFF,&H000000FF,&H004243CB,&H00000000,-1,0,0,0,100,100,0,0,1,3,0,2,15,15,15,1
227 | Style: den - 中文歌词,微软雅黑,70,&H0AFFFFFF,&H004B4B9E,&H322828E0,&H640A0A72,-1,0,0,0,100,100,0,0,1,3,0,2,15,15,70,1
228 | Style: den - 日文歌词,微软雅黑,50,&H0AFFFFFF,&H00F9F9F9,&H32000001,&H640A0A72,-1,0,0,0,100,100,0,0,1,1,0,2,15,15,9,1
229 | [Events]
230 | Format: Layer, Start, End, Style, Actor, MarginL, MarginR, MarginV, Effect, Text
231 | Dialogue: 0,0:00:00.00,0:00:05.00,staff,,0,0,0,,{'''+r'''\fad(300,300)}「三番目の楓」'''+r'''\N片源：'''+r'''\N翻译：'''+r'''\N时间：'''+r'''\N校压：''',
232 | 'head_str_taniguchi':'''[Script Info]
233 | ; This is an Advanced Sub Station Alpha v4+ script.
234 | ; The script is generated by N46Whisper
235 | Title:
236 | ScriptType: v4.00+
237 | Collisions: Normal
238 | PlayDepth: 0
239 | 
240 | [V4+ Styles]
241 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
242 | Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,2,2,2,10,10,10,1
243 | Style: 正文_1080P,思源黑体 CN Bold,75,&H00FFFFFF,&H000000FF,&H0077234B,&HA00000FF,-1,0,0,0,100,100,3,0,1,3,2,2,10,10,15,1
244 | Style: staff_1080P,思源宋体 CN Heavy,60,&H00FFFFFF,&H000000FF,&H0077234B,&HA00000FF,-1,0,0,0,100,100,2,0,1,2,1,7,30,10,30,1
245 | 
246 | [Events]
247 | Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
248 | Dialogue: 0,0:00:01.00,0:00:10.00,staff_1080P,,0,0,0,,{'''+r'''\fad(300,1000)}泪痣愛季応援団 '''+r'''\N源:'''+r'''\N制作:
249 | Dialogue: 0,0:00:08.95,0:03:29.40,staff_1080P,,0,0,0,,{'''+r'''\fad(1000,1000)'''+r'''\pos(30,30)'''+r'''\bord0'''+r'''\shad0'''+r'''\c&HFFFFFF&'''+r'''\1a&H3C&}泪痣愛季応援団
250 | Dialogue: 0,0:00:00.00,0:00:05.00,正文_1080P,,0,0,0,,谷口爱季字幕组''',
251 | 'head_str_asuka':'''[Script Info]
252 | ; The script is generated by N46Whisper
253 | ; http://www.aegisub.org/
254 | Title: Default Aegisub file
255 | ScriptType: v4.00+
256 | WrapStyle: 0
257 | ScaledBorderAndShadow: yes
258 | YCbCr Matrix: None
259 | 
260 | [Aegisub Project Garbage]
261 | 
262 | [V4+ Styles]
263 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
264 | Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,2,2,2,10,10,10,1
265 | Style: DEFAULT1,微软雅黑,65,&H00FFFFFF,&HF08B581A,&H007E672E,&H0084561E,-1,0,0,0,100,100,0,0,1,2,1,2,20,20,5,1
266 | Style: STAFF,Microsoft YaHei,50,&H00FFFFFF,&HF08B581A,&H007E672E,&HF084561E,-1,0,0,0,100,100,0,0,1,2.5,3,7,30,30,3,134
267 | Style: 名单1,方正粗倩_GBK,45,&H00E7D793,&H00E9C116,&H004C3F00,&H0016161D,-1,0,0,0,100,100,0,0,1,3,2,2,10,10,10,1
268 | Style: 名单2,方正粗黑简体,45,&H00FAF9EC,&H00493F15,&H008A4D1F,&H000A0A0B,-1,0,0,0,100,100,0,0,1,3,1.5,2,10,10,10,1
269 | Style: 中文歌词,方正粗黑简体,50,&H00FFFFFF,&HF0000000,&H00000000,&H96000000,-1,0,0,0,100,100,0,0,1,1.5,2,2,10,10,4,134
270 | Style: 日文歌词,方正粗黑简体,40,&H00FFFFFF,&HF0000000,&H00000000,&H96000000,-1,0,0,0,100,100,0,0,1,1.5,2,2,10,10,10,134
271 | Style: 屏幕字/注释,微软雅黑,50,&H00FFFFFF,&HF0000000,&H00000000,&H96000000,-1,0,0,0,100,100,0,0,1,1.5,2,2,10,10,10,134
272 | Style: purple1,文鼎特圆简,26,&H00670067,&H00FFFFFF,&H00FFFFFF,&H00FFFFFF,0,0,0,0,100,100,0,0,1,4.6,0,2,10,10,10,1
273 | Style: 鸟,微软雅黑,35,&H00FFFFFF,&HF08B581A,&H00F3B70F,&H0084561E,-1,0,0,0,100,100,0,0,1,2,1,2,100,20,465,1
274 | Style: 哈利,微软雅黑,35,&H00FFFFFF,&HF08B581A,&H00445FE1,&H00445FE1,-1,0,0,0,100,100,0,0,1,2,1,2,0,150,220,1
275 | Style: 期数,Berlin Sans FB,25,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,2,1,9,10,10,0,1
276 | Style: HamAsuka-屏幕字,方正卡通_GBK,50,&H00FFFFFF,&H000000FF,&H00D08C27,&H00010102,-1,0,0,0,100,100,0,0,1,3.5,3,2,900,10,170,1
277 | Style: HamAsuka-屏幕字 小黑,方正粗黑宋简体,65,&H00000000,&H000000FF,&H00FFFFFF,&H00010102,0,0,0,0,100,100,0,0,1,0,0,2,10,10,170,1
278 | Style: HamAsuka-屏幕字 蓝底,微软雅黑,80,&H00FFFFFF,&H000000FF,&H00A21C14,&H00FFFFFF,-1,0,0,0,100,100,0,0,3,4,0,2,10,10,10,1
279 | Style: HamAsuka-屏幕字 标题,微软雅黑,90,&H00303030,&H0006C6F6,&H00FFFFFF,&H00010102,-1,0,0,0,100,100,0,0,1,0,0,2,10,10,10,1
280 | Style: HamAsuka-屏幕字 问题 白底,微软雅黑,90,&H002C2C2C,&H00B77B1B,&H00FFFFFF,&H00010102,-1,0,0,0,100,100,0,0,3,5,0,2,10,10,10,1
281 | Style: HamAsuka 歌词,微软雅黑,70,&H00FFFFFF,&H00000000,&H00000000,&H00010102,-1,0,0,0,100,100,0,0,1,0,0,2,10,10,10,1
282 | Style: HamAsuka 小窗,微软雅黑,50,&H00FFFFFF,&HF0000000,&H00000000,&H96000000,-1,0,0,0,100,100,0,0,1,1.5,2,9,10,10,300,134
283 | Style: HamAsuka-屏幕字 标题 蓝底,微软雅黑,90,&H00F9F8FB,&H000000FF,&H00AC9769,&H00000000,-1,0,0,0,100,100,0,0,3,5,0,2,10,10,10,1
284 | Style: HamAsuka-屏幕字 标题  黑字,微软雅黑,80,&H00292B2C,&H000000FF,&H00FFFFFF,&H00000000,-1,0,0,0,100,100,0,0,3,5,0,2,10,10,10,1
285 | Style: 毕业曲MV 中文歌词,思源黑体 CN,76,&H0AFFFFFF,&H000000FF,&H0F000000,&H00FFFFFF,-1,0,0,0,100,100,0,0,1,1,0,2,10,10,75,1
286 | Style: 毕业曲MV 日文歌词,思源黑体 CN,58,&H0AFFFFFF,&H000000FF,&H0F000000,&H00FFFFFF,-1,0,0,0,100,100,0,0,1,1,0,2,10,10,15,1
287 | 
288 | [Events]
289 | Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
290 | Dialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,'''
291 |     # ADD MORE
292 | 
293 | }
294 | 
295 | 
296 | # if __name__ == "__main__":
297 | #     srt2ass('sub_split_test.srt','sugawaraCN','No','Aggressive')


--------------------------------------------------------------------------------
/transcribe.py:
--------------------------------------------------------------------------------
  1 | # 字幕提取
  2 | import torch
  3 | # pip install faster-whisper
  4 | from faster_whisper import WhisperModel
  5 | 
  6 | import os
  7 | from tqdm import tqdm
  8 | import time
  9 | import pandas as pd
 10 | # pip install pysubs2
 11 | import pysubs2
 12 | from srt2ass import srt2ass
 13 | 
 14 | class Transcribe:
 15 |     def __init__(self,model_name="small",device='cuda') -> None:
 16 |         self.model = WhisperModel(model_name,device=device,compute_type="float16")
 17 |         torch.cuda.empty_cache()
 18 | 
 19 |     def run(self,file_name,audio_binary_io = None,language='ja',
 20 |             beam_size = 5,
 21 |             is_vad_filter=False,
 22 |             min_silence_duration_ms=500,
 23 |             is_split = False,
 24 |             split_method = "Modest",
 25 |             sub_style = "default",
 26 |             initial_prompt= None):
 27 |         '''
 28 |         beam_size：数值越高，在识别时探索的路径越多，这在一定范围内可以帮助提高识别准确性，但是相对的VRAM使用也会更高. 同时，Beam Size在超过5-10后有可能降低精确性，详情请见https://arxiv.org/pdf/2204.05424.pdf                                          
 29 |         is_vad_filter：使用VAD过滤。
 30 |             使用[Silero VAD model](https://github.com/snakers4/silero-vad)以检测并过滤音频中的无声段落（推荐小语种使用）
 31 |             【注意】使用VAD filter有优点亦有缺点，请用户自行根据音频内容决定是否启用. [关于VAD filter](https://github.com/Ayanaminn/N46Whisper/blob/main/FAQ.md)
 32 |         is_split：是否使用空格将文本分割成多行
 33 |             [True,False]
 34 |         split_method：分割方法
 35 |             普通分割（Modest)：当空格后的文本长度超过5个字符，则另起一行
 36 |             全部分割（Aggressive): 只要遇到空格即另起一行
 37 |         sub_style：字幕样式
 38 |             default
 39 |         initial_prompt: 使用提示词能够提高输出质量,详情见： https://platform.openai.com/docs/guides/speech-to-text/prompting
 40 |         '''
 41 |         audio_name = os.path.splitext(os.path.basename(file_name))[0]   
 42 | 
 43 |         # 如果没有传入音频的二进制，则认为是本地文件
 44 |         if audio_binary_io == None:
 45 |             if not os.path.exists(file_name):
 46 |                 raise Exception("File not found")
 47 |             audio = file_name
 48 |         else:
 49 |             audio = audio_binary_io
 50 | 
 51 |         tic = time.time()
 52 | 
 53 |         print("transcribe param")
 54 |         print(f"audio: {audio}")
 55 |         print(f"language: {language}")
 56 |         print(f"is_vad_filter: {is_vad_filter}")
 57 |         print(f"beam_size: {beam_size}")
 58 |         print(f"initial_prompt: {initial_prompt}")
 59 | 
 60 |         if is_vad_filter == False:
 61 |             vad_parameters = None
 62 |         else:
 63 |             vad_parameters = dict(min_silence_duration_ms=min_silence_duration_ms)
 64 |         
 65 |         segments, info = self.model.transcribe(audio = audio,
 66 |                                         beam_size=beam_size,
 67 |                                         language=language,
 68 |                                         vad_filter=is_vad_filter,
 69 |                                         vad_parameters=vad_parameters,
 70 |                                         initial_prompt = initial_prompt,
 71 |                                         word_timestamps=True,
 72 |                                         #condition_on_previous_text=False,
 73 |                                         #no_speech_threshold=0.6,
 74 |                                         )
 75 | 
 76 |         results= []
 77 |         with tqdm(total=round(info.duration, 2), unit=" seconds") as pbar:
 78 |             for s in segments:
 79 |                 segment_dict = {'start':s.start,'end':s.end,'text':s.text}
 80 |                 results.append(segment_dict)
 81 |                 segment_duration = round(s.end - s.start, 2)  
 82 |                 pbar.update(segment_duration)
 83 |         toc = time.time()
 84 |         subs = pysubs2.load_from_whisper(results)
 85 |     
 86 |         # 保存srt文件
 87 |         srt_filename = os.path.join("./temp",audio_name + ".srt") 
 88 |         subs.save(srt_filename)
 89 |         print('生成srt：{} 识别耗时：{}'.format(srt_filename,toc-tic) )
 90 |         
 91 |         # 保存ass文件
 92 |         ass_filename  = srt2ass(srt_filename, sub_style, is_split,split_method)
 93 |         print('生成ass：{}'.format(ass_filename))
 94 |         return srt_filename,ass_filename
 95 | 
 96 | 
 97 | if __name__ == "__main__":
 98 |     test = Transcribe(model_name = r"D:\code\auto-subtitle\models\faster-whisper-large-v3",device="cuda")
 99 |     # 测试直接传入文件地址
100 |     #test.run(file_name="./test.mp3")
101 | 
102 |     # 测试传入二进制
103 |     with open('./file/2.wav', 'rb') as f:
104 |         test.run(file_name="test",
105 |                  audio_binary_io=f,
106 |                  language="zh",
107 |                  #initial_prompt="简体中文",
108 |                  #is_vad_filter=True,
109 |                  #is_split=False
110 |         )
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 


--------------------------------------------------------------------------------
/translation.py:
--------------------------------------------------------------------------------
 1 | # 字幕翻译
 2 | import os
 3 | from tqdm import tqdm
 4 | from typing import Union
 5 | # !pip install openai
 6 | # !pip install pysubs2
 7 | import pysubs2
 8 | from engine_translation.gpt import GPT
 9 | from engine_translation.baidu import Baidu
10 | from engine_translation.tencent import Tencent
11 | import time
12 | 
13 | class translation :
14 |     def __init__(self,engine:Union[GPT,Baidu]) -> None:
15 |         self.engine = engine
16 |         self.max_retries = 3
17 |     
18 |     def translate_save(self,sub_src,language="中文",keep_origin = True):
19 |         """
20 |         keep_origin : 是否保存原文
21 |         """
22 |         retry_count = 0
23 |         sub_trans = pysubs2.load(sub_src)
24 |         total_lines = len(sub_trans)
25 |         self.engine.reset()
26 |         for line in tqdm(sub_trans,total = total_lines):
27 |             # print(line.text)
28 |             # try:
29 |             #     line_trans = self.engine.run(line.text,target_language=language)
30 |             # except Exception as e:
31 |             #     print("翻译出错：{}，进行重试".format(e))
32 |             #     time.sleep(1)
33 |             #     self.engine.run(line.text,target_language=language)    
34 |             while retry_count < self.max_retries:
35 |                 try:
36 |                     line_trans = self.engine.run(line.text, target_language=language)
37 |                     retry_count = 0
38 |                     break  # 翻译成功，跳出循环
39 |                 except Exception as e:
40 |                     print("翻译出错：{}，进行重试".format(e))
41 |                     time.sleep(10)
42 |                     retry_count += 1
43 | 
44 |             if keep_origin:
45 |                 line.text += (r'\N'+ line_trans)
46 |             else:
47 |                 line.text = line_trans
48 |             print(line.text)
49 |  
50 | 
51 |         if language == "中文":
52 |             language = 'zh'
53 |         elif language == "日语":
54 |             language = 'jp'
55 |         elif language == "英语":
56 |             language = 'en'
57 |         else:
58 |             language = "other"
59 |         save_ass_path = "./temp/" + os.path.splitext(os.path.basename(sub_src))[0]+ "_"+ language +".ass"
60 |         save_srt_path = "./temp/" + os.path.splitext(os.path.basename(sub_src))[0]+ "_" + language +".srt"
61 |         # print(save_ass_path)
62 |         # print(save_srt_path)
63 |         sub_trans.save(save_ass_path)
64 |         sub_trans.save(save_srt_path)
65 |         return save_ass_path,save_srt_path
66 |   
67 |         
68 | if __name__ == '__main__':
69 |     # 翻译测试
70 |     import yaml
71 |     with open('./engine_translation/secret.yaml', 'r',encoding="utf-8") as file:
72 |         config = yaml.safe_load(file)
73 |     eng = GPT(key = config["chatgpt"]["key"], base_url = config["chatgpt"]["base_url"])
74 |     eng2 = Baidu(appid = config["baidu"]["appid"],secretKey = config["baidu"]["secretKey"])
75 |     # eng.run("まるでおとぎの話 終わり迎えた証")
76 |     # eng.run("長すぎる旅路から 切り出した一説")
77 |     
78 |     t = translation(eng)
79 |     p1 ,p2 = t.translate_save("./test.ass",keep_origin=True)
80 |     


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import ffmpeg
 3 | import os
 4 | import json
 5 | 
 6 | def extract_audio(video_path, output_audio_path):
 7 |     """
 8 |     从视频文件中提取音频并保存为wav。
 9 |     参数:
10 |     video_path (str): 视频文件的路径。
11 |     output_audio_path (str): 输出音频文件的路径。
12 |     """
13 |     if not os.path.exists(video_path):
14 |         raise "{} not find".format(video_path)
15 |     if  os.path.exists(output_audio_path):
16 |         os.remove(output_audio_path)
17 |     try:
18 |         (
19 |             ffmpeg
20 |             .input(video_path)
21 |             .output(output_audio_path, acodec='mp3', audio_bitrate='320k')
22 |             .run(overwrite_output=True)
23 |         )
24 |     except ffmpeg.Error as e:
25 |         raise e
26 | 
27 | def merge_subtitles_to_video(video_path, subtitle_path, output_video_path):
28 |     """
29 |     将字幕文件合并到视频文件中。
30 |     参数:
31 |     video_path (str): 视频文件的路径。
32 |     subtitle_path (str): 字幕文件的路径。
33 |     output_video_path (str): 合并字幕后的输出视频文件的路径。
34 |     """
35 |     if not os.path.exists(video_path):
36 |         raise FileNotFoundError(f"{video_path} not found")
37 |     if not os.path.exists(subtitle_path):
38 |         raise FileNotFoundError(f"{subtitle_path} not found")
39 |     if os.path.exists(output_video_path):
40 |         os.remove(output_video_path)
41 |     
42 |     subtitle_path = subtitle_path.replace("\\", "/")
43 |     print("subtitle_path = {}".format(subtitle_path))
44 |     try:
45 |         (
46 |             ffmpeg
47 |             .input(video_path)
48 |             .output(output_video_path, vf=f"subtitles={subtitle_path}")
49 |             .run(overwrite_output=True)
50 |         )
51 |     except ffmpeg.Error as e:
52 |         raise RuntimeError(f"Failed to merge subtitles into video: {e}")
53 | 
54 | def clear_folder(folder_path):
55 |     for filename in os.listdir(folder_path):
56 |         file_path = os.path.join(folder_path, filename)
57 |         os.remove(file_path)
58 |     print("清空文件夹：{}".format(folder_path))
59 | 
60 | 
61 | 
62 | def import_config_file(file):
63 |     if file is not None:
64 |         content = file.read()
65 |         try:
66 |             json_data = json.loads(content)
67 |             return json_data  
68 |         except Exception as e:
69 |             raise e
70 |             
71 | if __name__ == "__main__":
72 |     pass


--------------------------------------------------------------------------------
/uvr.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/karaokenerds/python-audio-separator
 2 | # pip install audio-separator[gpu]
 3 | # pip install audio-separator[cpu]
 4 | 
 5 | from audio_separator.separator import Separator  
 6 | import logging
 7 | LOG_LE = logging.WARN
 8 | 
 9 | class UVR_Client:
10 |     def __init__(self,model_file_dir="./models/uvr5_weights",output_dir='./temp',sample_rate=44000) -> None:
11 |         self.model = Separator(log_level=LOG_LE,
12 |                                model_file_dir=model_file_dir,
13 |                                output_dir=output_dir,
14 |                                sample_rate=sample_rate)
15 |         self.model.load_model('UVR_MDXNET_Main.onnx')
16 | 
17 |     def change_model(self,model_name):
18 |         self.model.load_model(model_name)
19 | 
20 |     def infer(self,audio="E:\\audio_AI\\audio\\test\\感受孤独.flac"):
21 |         rimary_stem_output_path, secondary_stem_output_path = self.model.separate(audio)
22 |         return rimary_stem_output_path,secondary_stem_output_path
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     uvr = UVR_Client()
27 |     print(uvr.infer())
28 |     uvr.change_model("VR-DeEchoAggressive.pth")
29 |     print(uvr.infer())


--------------------------------------------------------------------------------
/web.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import json
  3 | import os
  4 | from transcribe import Transcribe
  5 | from zipfile import ZipFile
  6 | import base64
  7 | import io
  8 | import ffmpeg
  9 | from translation import GPT,Baidu,Tencent,translation
 10 | from utils import extract_audio,merge_subtitles_to_video,clear_folder,import_config_file
 11 | from uvr import UVR_Client
 12 | 
 13 | # 临时文件存放地址
 14 | TEMP = "./temp"
 15 | 
 16 | # def import_config_file(file):
 17 | #     '''
 18 | #     为避免页面刷新重新填写，可以通过配置文件导入，配置文件格式：
 19 | #         config.json
 20 | #         {
 21 | #             "chat_url" : "",
 22 | #             "chat_key": "",
 23 | #             "baidu_appid": "",
 24 | #             "baidu_appkey": "",
 25 | #             "tencent_appid": "",
 26 | #             "tencent_secretKey":""
 27 | #         }
 28 | #     '''
 29 | #     if file is not None:
 30 | #         content = file.read()
 31 | #         try:
 32 | #             # 解析JSON数据
 33 | #             json_data = json.loads(content)
 34 | #             st.success("load config success")
 35 | #         except Exception as e:
 36 | #             st.error("load config error:{}".format(e))
 37 | #         st.session_state.chat_url = json_data.get("chat_url")
 38 | #         st.session_state.chat_key = json_data.get("chat_key")
 39 | #         st.session_state.baidu_appid = json_data.get("baidu_appid")
 40 | #         st.session_state.baidu_appkey = json_data.get("baidu_appkey")
 41 | #         st.session_state.tencent_appid = json_data.get("tencent_appid")
 42 | #         st.session_state.tencent_secretKey = json_data.get("tencent_secretKey")
 43 | 
 44 | def web_page():
 45 |     st.title("字幕生成器")
 46 |     st.caption("")
 47 | 
 48 |     if "transcribe" not in st.session_state:
 49 |         st.session_state['transcribe'] = None
 50 | 
 51 |     # 通过配置文件导入
 52 |     if "config" not in st.session_state:
 53 |         st.session_state['config'] = None
 54 | 
 55 |     uploaded_file = st.file_uploader("上传配置文件（可选）：", type="json")
 56 |     if uploaded_file is not None:
 57 |         try:
 58 |             st.session_state.config =  import_config_file(uploaded_file)
 59 |         except:
 60 |             st.error("load config error")
 61 | 
 62 |     if st.button("清空缓存"):
 63 |         clear_folder("./temp")
 64 |         
 65 |     # st.sidebar.markdown("----")
 66 |     # st.sidebar.markdown("## 使用说明")
 67 |     # st.sidebar.write("1.选择模型，加载模型")
 68 |     # st.sidebar.write("2.根据需求设置配置")
 69 |     # st.sidebar.write("3.上传音频")
 70 |     # st.sidebar.write("4.点击开始转换")
 71 |     # st.sidebar.write("5.下载字幕")
 72 |     # st.sidebar.markdown("----")
 73 |  
 74 |     #st.markdown("## 提取配置")
 75 |     #col1, col2 = st.columns(2)
 76 |     #with col1:
 77 | 
 78 |     st.markdown("## 1 模型")
 79 |     st.markdown("如果未在models中找到模型，则会自动下载到huggingface缓存目录中，也可以手动去[huggingface]((https://huggingface.co/collections/guillaumekln/faster-whisper-64f9c349b3115b4f51434976))下载模型，然后将模型放如models目录下，这里也提供一个[百度云](https://pan.baidu.com/s/1rRcSRhBpizuQo20qowG2UA?pwd=kuon)")
 80 |     if st.session_state.config is not None:
 81 |         #  从配置文件中读取模型列表和默认模型
 82 |         st.session_state.model_name = st.session_state.config.get("model_name")
 83 |         st.session_state.model_list = st.session_state.config.get("model_list")
 84 |         for index,current_model_name in enumerate(st.session_state.model_list):
 85 |             if current_model_name == st.session_state.model_name:
 86 |                 model_index = index
 87 |                 break
 88 |     else:
 89 |         st.session_state.model_list = ["tiny","base","small","medium","large-v2","large-v3",
 90 |                                         "tiny.en","base.en","medium.en","small.en"]
 91 |         st.session_state.model_name = "large-v2"
 92 |         model_index = 5
 93 | 
 94 |     model_name = st.selectbox('模型选择：', st.session_state.model_list, index=model_index)
 95 |     device_list = ["cpu","cuda"]
 96 |     device_name = st.selectbox('设备选择（cpu会相当相当的慢，所有请使用cuda）：', device_list, index=1)
 97 |     
 98 |     if st.button("加载模型：{}，使用：{}".format(model_name,device_name)):
 99 |         with st.spinner('加载中，请稍后。。。'):
100 |             if st.session_state.transcribe is not None:
101 |                 del st.session_state.transcribe
102 |             models_path = "./models" + "/faster-whisper-" + model_name
103 |             #print(models_path)
104 |             try:
105 |                 if os.path.exists(models_path):
106 |                     print("加载模型：{}".format(models_path))
107 |                     st.session_state.transcribe = Transcribe(model_name=models_path,device=device_name)
108 |                 else:
109 |                     print("加载hf模型：{}".format(model_name))
110 |                     st.session_state.transcribe = Transcribe(model_name=model_name,device=device_name)
111 |                 st.success("模型加载成功：{}".format(models_path))    
112 |             except Exception as e:
113 |                 st.error("加载模型失败：{}".format(e))
114 |                 
115 | 
116 |     st.markdown("----")
117 |     st.markdown("## 2 上传媒体")
118 |     if st.session_state.config is not None:
119 |         st.session_state.media_type = st.session_state.config.get("media_type")
120 |         if st.session_state.media_type == "视频":
121 |             media_type_index = 0
122 |         else:
123 |             media_type_index = 1
124 |     else:
125 |         st.session_state.media_type = "视频"
126 |         media_type_index = 0
127 |     st.session_state.media_type = st.radio("选择来源", ("视频", "音频"),horizontal=True,index=media_type_index)
128 |     
129 |     # 保存用于提取转化字幕的音频地址
130 |     if "media_temp" not in st.session_state:
131 |         st.session_state.audio_temp = None
132 |     if "audio_separator_temp" not in st.session_state:
133 |         st.session_state.audio_separator_temp = None
134 |     if "uvr_client" not in st.session_state:
135 |         st.session_state.uvr_client = None
136 | 
137 |     if st.session_state.media_type == "视频":
138 |         if "video_temp" not in st.session_state:
139 |             st.session_state.video_temp = None
140 |         input_file = st.file_uploader("上传视频：", type=["mp4", "avi", "mov", "mkv"])
141 |         if input_file is not None:
142 |             # 上传视频临时保存地址
143 |             temp_input_video = os.path.join(
144 |                 TEMP,
145 |                 os.path.splitext(os.path.basename(input_file.name))[0]+"_temp.mp4"
146 |             )
147 |             if not os.path.exists(temp_input_video):      
148 |                 with open(temp_input_video, "wb") as f:
149 |                     f.write(input_file.read())
150 |             else:
151 |                 print("文件:{} 已存在，无需创建".format(temp_input_video))   
152 | 
153 |             st.session_state.video_temp_name = input_file.name   
154 |             st.session_state.video_temp = temp_input_video
155 | 
156 |             temp_audio_path = os.path.join(
157 |                 TEMP, 
158 |                 os.path.splitext(os.path.basename(input_file.name))[0]+".wav"
159 |             )
160 |             if not os.path.exists(temp_audio_path):
161 |                 with st.spinner('音频提取中，请稍后。。。'):
162 |                     extract_audio(temp_input_video,temp_audio_path)
163 |                 print("音频提取完成")
164 |             else:
165 |                 print("音频文件:{} 已存在，无需提取".format(temp_audio_path))  
166 |             st.session_state.audio_temp = temp_audio_path
167 | 
168 |     elif st.session_state.media_type == "音频":
169 |         input_file = st.file_uploader("上传音频：", type=["mp3", "wav", "m4a"])
170 |         if input_file is not None:
171 |             temp_audio_path = os.path.join(
172 |                 TEMP, 
173 |                 os.path.splitext(os.path.basename(input_file.name))[0]+".wav"
174 |             )
175 |             if not os.path.exists(temp_audio_path):      
176 |                 with open(temp_audio_path, "wb") as f:
177 |                     f.write(input_file.read())
178 |             else:
179 |                 print("文件:{} 已存在，无需创建".format(temp_audio_path))    
180 |             st.session_state.audio_temp = temp_audio_path
181 |     
182 |     if st.session_state.audio_temp is not None:
183 |         st.write("音频：")
184 |         st.audio(st.session_state.audio_temp, format='audio/wav', start_time=0)
185 | 
186 |     if st.button("音频清洁（用于清除背景音，可选）"):
187 |         if st.session_state.audio_temp is None:
188 |             st.error("请先上传媒体")
189 | 
190 |         if st.session_state.uvr_client is None:
191 |             print("加载模型：UVR_modle")        # UVR_modle.load_model('UVR_MDXNET_Main.onnx')
192 |             st.session_state.uvr_client = UVR_Client()
193 | 
194 |         with st.spinner('音频清洁中'):
195 |             rimary_stem_output_path, secondary_stem_output_path = st.session_state.uvr_client.infer(st.session_state.audio_temp)
196 |             st.session_state.audio_separator_temp = os.path.join('./temp',secondary_stem_output_path)
197 |         if st.session_state.audio_separator_temp is not None:
198 |             st.write("清洁音频：")
199 |             st.audio(st.session_state.audio_separator_temp, format='audio/wav', start_time=0)           
200 | 
201 |     st.markdown("----")
202 |     st.markdown("## 3 配置")
203 | 
204 |     language_mapping = {"中文": "zh", "日文": "ja", "英文": "en"}
205 |     language = list(language_mapping.keys())
206 |     selected_language = st.selectbox('选择媒体语言', language,index=1)
207 |     st.session_state.language = language_mapping[selected_language]
208 |   
209 |     if st.session_state.config is not None:
210 |         vad_filter = st.session_state.config.get("vad_filter")
211 |         if vad_filter == "是":
212 |             vad_filter_index = 0
213 |         else:
214 |             vad_filter_index = 1
215 |     else:
216 |         vad_filter_index = 1
217 | 
218 |     vad_filter = st.radio("是使用VAD（过滤音频中的无声段落,whisper模型在识别无声片段，会输出乱七八糟的内容，改项就是解决这个的）", ("是", "否"),horizontal=True,index=vad_filter_index)
219 |     
220 |     if "min_silence_duration_ms" not in st.session_state:
221 |         st.session_state.min_silence_duration_ms = None
222 | 
223 |     if vad_filter == "是":
224 |         st.session_state.is_vad_filter = True
225 |         st.session_state.min_silence_duration_ms = st.number_input("最小静默时长（毫秒）", min_value=0, max_value=10000, value=500, step=100)
226 |     else:
227 |         st.session_state.is_vad_filter = False
228 | 
229 | 
230 |     is_split = st.radio("是否对文本进行分割（当单行显示文本过长时可开启）", ("是", "否"),horizontal=True,index=1)
231 |     if is_split == "是":
232 |         st.session_state.is_split = True
233 |         st.session_state.split_method = st.selectbox('导出格式（Modest：当空格后的文本长度超过5个字符，则另起一行；Aggressive: 只要遇到空格即另起一行）', ["Modest","Aggressive"],index=0)
234 |     else:
235 |         st.session_state.is_split = False
236 |         st.session_state.split_method = "Modest"
237 | 
238 |     
239 |     st.session_state.prompt = st.text_input('请输入提示词：', "",placeholder="简体中文")
240 |     if st.session_state.prompt == "":
241 |         st.session_state.prompt = None
242 | 
243 |     # 是否显示融合字幕后的视频
244 |     if st.session_state.media_type == "视频":
245 |         st.session_state.is_show_video = st.radio("是否显示翻译后的视频", ("是", "否"),horizontal=True,index=0)
246 |     else:
247 |         st.session_state.is_show_video = "否"
248 | 
249 |     # print("-----")
250 |     # print(st.session_state.language)
251 |     # print(st.session_state.is_vad_filter)
252 |     # print(st.session_state.is_split)
253 |     # if st.session_state.is_split == "是":
254 |     #     print(st.session_state.split_method)
255 |     # print("-----")
256 | 
257 |     # 翻译--------------------------------
258 |     #st.markdown("----")
259 |     if st.session_state.config is not None:
260 |         st.session_state.chat_url = st.session_state.config.get("chat_url")
261 |         st.session_state.chat_key = st.session_state.config.get("chat_key")
262 |         st.session_state.chat_model_list = st.session_state.config.get("chat_model_list")
263 |         st.session_state.chat_model_name = st.session_state.config.get("chat_model_name")
264 |         for index,current_model_name in enumerate(st.session_state.chat_model_list):
265 |                 if current_model_name == st.session_state.model_name:
266 |                     chat_model_index = index
267 |                     break
268 | 
269 |         st.session_state.baidu_appid = st.session_state.config.get("baidu_appid")
270 |         st.session_state.baidu_appkey = st.session_state.config.get("baidu_appkey")
271 |        
272 |         st.session_state.tencent_appid = st.session_state.config.get("tencent_appid")
273 |         st.session_state.tencent_secretKey = st.session_state.config.get("tencent_secretKey")
274 | 
275 |  
276 |     else:
277 |         st.session_state.chat_url = "https://api.openai.com/v1"
278 |         st.session_state.chat_key = ""
279 |         st.session_state.chat_model_list = ["gpt-3.5-turbo", "gpt-4","gpt-4-turbo"]
280 |         st.session_state.chat_model_name = "gpt-4-turbo"
281 |         chat_model_index = 2
282 | 
283 |         st.session_state.baidu_appid = ""
284 |         st.session_state.baidu_appkey = ""
285 | 
286 |         st.session_state.tencent_appid = ""
287 |         st.session_state.tencent_secretKey = ""
288 |    
289 |     if "engine" not in st.session_state:
290 |         st.session_state['engine'] = None
291 | 
292 | 
293 |     is_translation= st.radio("翻译器选择（翻译成中文）", ("否", "gpt翻译","百度翻译","腾讯翻译"),horizontal=True,index=0)
294 |     if is_translation == "否":
295 |         st.session_state.engine = None
296 |     elif is_translation == "gpt翻译":
297 |         # 使用gpt模型时
298 |         st.session_state.chat_url = st.text_input('Base URL', st.session_state.chat_url,type='password')
299 |         st.session_state.chat_key =  st.text_input('API Key',st.session_state.chat_key, type='password')
300 |         
301 |         st.session_state.chat_model_name = st.selectbox('Models', st.session_state.chat_model_list,index=chat_model_index)
302 | 
303 |         if st.session_state.chat_key != "":
304 |             st.session_state.engine = GPT(key = st.session_state.chat_key ,
305 |                                         base_url = st.session_state.chat_url,
306 |                                         model = st.session_state.chat_model_name)
307 | 
308 |     elif is_translation == "百度翻译":
309 |         st.write("申请地址：https://fanyi-api.baidu.com/manage/developer")
310 |         st.session_state.baidu_appid = st.text_input('appid', st.session_state.baidu_appid,type='password')
311 |         st.session_state.baidu_appkey =  st.text_input('appkey',st.session_state.baidu_appkey, type='password')
312 |         st.session_state.engine = Baidu(appid = st.session_state.baidu_appid ,secretKey = st.session_state.baidu_appkey)
313 | 
314 |     elif is_translation == "腾讯翻译":
315 |         st.write("申请地址：https://console.cloud.tencent.com/tmt")
316 |         st.session_state.tencent_appid = st.text_input('appid', st.session_state.tencent_appid,type='password')
317 |         st.session_state.tencent_secretKey =  st.text_input('secretKey',st.session_state.tencent_secretKey, type='password')
318 |         st.session_state.engine = Tencent(appid = st.session_state.tencent_appid ,secretKey = st.session_state.tencent_secretKey)
319 | 
320 | 
321 | 
322 |     st.markdown("----")
323 |     if st.button("开始转换"):
324 |         if st.session_state.transcribe is None:
325 |             st.error("请先加载模型")
326 |             return
327 | 
328 |         if st.session_state.audio_separator_temp is not None:
329 |             input_audio = st.session_state.audio_separator_temp
330 |         elif st.session_state.audio_temp is not None:
331 |             input_audio = st.session_state.audio_temp
332 |         else:
333 |             st.error("请先上传媒体")
334 |             return
335 | 
336 |         print("input audio: {}".format(input_audio))
337 | 
338 |         with st.spinner('字幕生成中。。。'):
339 |             srt,ass = st.session_state.transcribe.run(file_name = input_audio,
340 |                                                       audio_binary_io = input_audio,
341 |                                                       language=st.session_state.language,
342 |                                                       is_vad_filter = st.session_state.is_vad_filter,
343 |                                                       min_silence_duration_ms = st.session_state.min_silence_duration_ms,
344 |                                                       is_split = st.session_state.is_split,
345 |                                                       split_method = st.session_state.split_method,
346 |                                                       initial_prompt=st.session_state.prompt
347 |                                                      )
348 |              
349 |         zip_name = os.path.splitext(os.path.basename(st.session_state.audio_temp))[0]   + ".zip"
350 |         zip_name_path = os.path.join("./temp",zip_name)
351 |         zipObj = ZipFile(zip_name_path, "w")
352 |         zipObj.write(srt)
353 |         zipObj.write(ass)
354 |         
355 |         # 如果需要翻译
356 |         if st.session_state.engine is not None:
357 |             with st.spinner('翻译中。。。'):
358 |                 t = translation(st.session_state.engine)
359 |                 translate_ass ,translate_srt = t.translate_save(ass)
360 |                 zipObj.write(translate_ass)
361 |                 zipObj.write(translate_srt)
362 | 
363 |         zipObj.close()
364 | 
365 |         with open(zip_name_path, "rb") as f:
366 |             datazip = f.read()
367 |             b64 = base64.b64encode(datazip).decode()
368 |             href = f"<a href=\"data:file/zip;base64,{b64}\" download='{zip_name}'>\
369 |                     下载字幕： {zip_name}\
370 |                     </a>"
371 |         st.markdown(href, unsafe_allow_html=True)
372 |         st.markdown("后期可以通过[aegisub](http://www.aegisub.org/)对字幕进行修改优化")
373 | 
374 | 
375 |         if st.session_state.media_type == "视频" and st.session_state.is_show_video == "是":
376 |             #print("字幕：{}，{}".format(srt, ass))
377 |             output_video_path = os.path.join(
378 |                 TEMP, 
379 |                 os.path.splitext(os.path.basename(st.session_state.video_temp_name))[0]+"_output.mp4"
380 |             )
381 |             with st.spinner("视频生成中..."):
382 |                 merge_subtitles_to_video(st.session_state.video_temp
383 |                                         ,ass
384 |                                         ,output_video_path)    
385 |             
386 |             if os.path.exists(output_video_path):
387 |                 video_bytes = open(output_video_path, 'rb').read()
388 |                 st.video(video_bytes)
389 | 
390 | 
391 | if __name__ == "__main__":
392 |     # 如果本地没有temp文件夹则建立
393 |     if not os.path.exists('temp'):
394 |         os.makedirs('temp')
395 |     web_page()
396 | 
397 | 
398 | 
399 |     
400 | 


--------------------------------------------------------------------------------