├── .gitignore ├── README.md ├── engine_translation ├── baidu.py ├── gpt.py └── tencent.py ├── file ├── 1.mp4 ├── config.json.temp ├── log.png ├── test.mp3 └── test_cn.mp4 ├── requirements.txt ├── srt2ass.py ├── transcribe.py ├── translation.py ├── utils.py ├── uvr.py └── web.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | # 自定义 163 | temp/*.* 164 | file/config.json 165 | engine_translator/secret.yaml 166 | models 167 | engine_translation/secret.yaml 168 | file/SubtitleEdit-4.0.3-Setup.zip 169 | file/Aegisub-3.2.2-64.exe 170 | file/2.wav 171 | file/test.mp4 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 字幕生成器 2 | 3 | 一个能够自动生成媒体字幕的工具 4 | 5 | 目前功能: 6 | 7 | * 输入视频 8 | * 输入音频 9 | * 输出srt字幕 10 | * 输出ass字幕 11 | * GPT字幕翻译 12 | * 百度字幕翻译 13 | * 腾讯字幕翻译 14 | * 音频清洁 15 | 16 | ## 环境 17 | 18 | * conda 19 | ```bash 20 | conda create -n subtitle python=3.10 21 | conda activate subtitle 22 | ``` 23 | 24 | * torch(CUDA 11.8,其他版本去[官网](https://pytorch.org/get-started/locally/)找) 25 | ```bash 26 | # GPU 27 | pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 28 | 29 | # CPU 30 | pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu 31 | ``` 32 | 33 | * 安装ffmpeg(windows)。 34 | 去[官网](https://ffmpeg.org/download.html#build-windows)下载,解压后将bin目录添加到环境变量 35 | 36 | * 安装ffmpeg(ubuntu) 37 | ```bash 38 | apt install ffmpeg 39 | ``` 40 | 41 | * 其他依赖 42 | ``` 43 | pip install -r requirements.txt 44 | 45 | # 腾讯翻译 46 | pip install -i https://mirrors.tencent.com/pypi/simple/ --upgrade tencentcloud-sdk-python 47 | 48 | # 音频清洁 49 | # https://github.com/karaokenerds/python-audio-separator 50 | pip install audio-separator[gpu] 51 | pip install audio-separator[cpu] 52 | ``` 53 | 54 | 55 | ## 模型下载 56 | 57 | 将下载的文件夹放入根目录的`models`文件夹中 58 | 59 | * 语音清洁相关模型 60 | * [百度云](https://pan.baidu.com/s/1wDQ_I1NIL942o1Dm2XU8zg?pwd=kuon),目前只使用了`UVR_MDXNET_Main.onnx`,可以只下载它,但是文件夹目录结构还是要的一样的 61 | 62 | * vad 63 | * [百度云](https://pan.baidu.com/s/1gcEfO8pxqoZKIAW2SyzbKA?pwd=kuon) 64 | 65 | * whisper模型 66 | * 可以使用时候自动下载,会被保存到huggingface的缓存目录中 67 | * [百度云](https://pan.baidu.com/s/1NbutR2cHvHbboUy-QTg5zw?pwd=kuon),这压缩包包含上面的所有模型 68 | * [huggingface](https://huggihttps://huggingface.co/collections/guillaumekln/faster-whisper-64f9c349b3115b4f51434976) 69 | 70 | 示例models目录结构 71 | ```text 72 | │models 73 | | 74 | ├───faster-whisper-large-v3 75 | │ .gitattributes 76 | │ config.json 77 | │ model.bin 78 | │ preprocessor_config.json 79 | │ README.md 80 | │ tokenizer.json 81 | │ vocabulary.json 82 | │ 83 | │ 84 | ├───silero-vad-4.0 85 | │ 86 | └───uvr5_weights 87 | UVR_MDXNET_Main.onnx 88 | 89 | ``` 90 | 91 | ## 运行 92 | 93 | ```bash 94 | streamlit run web.py --server.port 1234 --server.maxUploadSize 1000 95 | ``` 96 | 97 | 演示视频: 98 | 99 | 100 | 101 | 102 | 103 | ## 效果 104 | 105 | 106 | ### 葬送的芙莉蓮 OP 主題曲 -「勇者」/ YOASOBI 107 | 108 | |识别出的歌词|本软件输出| 109 | |---|---| 110 | |まるでおとぎの話 終わり迎えた証|就像童话故事迎来了结局的证明| 111 | |長すぎる旅路から 切り出した一節|从过长的旅程中切出的一节| 112 | |それはかつてこの地に 影を落とした悪を|那是曾经在这片土地上投下阴影的恶| 113 | |打ち取る自由者との 短い旅の記憶 | 是与击败自由者的短暂旅行的记忆| 114 | |物語は終わり 勇者は眠りにつく | 故事结束了 勇者已经入睡| 115 | |穏やかな日常を この地に残して | 留下了平静的日常在这片土地上| 116 | |時の眺めは無情に 人を忘れさせる | 时间的眺望无情地让人忘记| 117 | |そこに生きた奇跡も 錆びついてく | 在那里生活的奇迹也开始生锈了| 118 | |それでも君は 生きてる | 但是你依然活着| 119 | |君の言葉も 願いも 勇気も | 你的话语 你的愿望 你的勇气| 120 | |今は確かに私の中で 生きてる | 现在它们确实在我心中活着| 121 | |同じ道を選んだ それだけだった | 只是选择了相同的道路| 122 | 123 | 124 | ## 参考 125 | 126 | * [faster-whisper](https://github.com/SYSTRAN/faster-whisper) 127 | * [N46Whisper](https://github.com/Ayanaminn/N46Whisper/blob/main/README_CN.md) -------------------------------------------------------------------------------- /engine_translation/baidu.py: -------------------------------------------------------------------------------- 1 | from random import randint 2 | from hashlib import md5 3 | from http.client import HTTPConnection 4 | import json 5 | from urllib import parse 6 | 7 | class Baidu: 8 | def __init__(self,appid,secretKey) -> None: 9 | self.url = '/api/trans/vip/translate' 10 | self.appid = appid 11 | self.secretKey = secretKey 12 | 13 | def reset(self): 14 | pass 15 | 16 | def run(self,text,from_language='auto',target_language='中文'): 17 | 18 | if target_language == "中文": 19 | target_language = 'zh' 20 | elif target_language == "日语": 21 | target_language = 'jp' 22 | elif target_language == "英语": 23 | target_language = 'en' 24 | 25 | salt = randint(32768, 65536) 26 | sign = self.appid + text + str(salt) + self.secretKey 27 | sign = md5(sign.encode()).hexdigest() 28 | get_url = self.url + '?appid=' + self.appid + '&q=' + parse.quote(text) + '&from=' + from_language + '&to=' + target_language + '&salt=' + str( 29 | salt) + '&sign=' + sign 30 | 31 | try: 32 | httpClient = HTTPConnection('api.fanyi.baidu.com') 33 | httpClient.request('GET', get_url) 34 | 35 | response = httpClient.getresponse() 36 | result_all = response.read().decode("utf-8") 37 | result = json.loads(result_all) 38 | 39 | string = '' 40 | for word in result['trans_result']: 41 | if word == result['trans_result'][-1]: 42 | string += word['dst'] 43 | else: 44 | string += word['dst'] + '\n' 45 | 46 | except Exception: 47 | if result['error_code'] == '54003': 48 | string = "翻译:我抽风啦!" 49 | elif result['error_code'] == '52001': 50 | string = '翻译:请求超时,请重试' 51 | elif result['error_code'] == '52002': 52 | string = '翻译:系统错误,请重试' 53 | elif result['error_code'] == '52003': 54 | string = '翻译:APPID 或 密钥 不正确' 55 | elif result['error_code'] == '54001': 56 | string = '翻译:APPID 或 密钥 不正确' 57 | elif result['error_code'] == '54004': 58 | string = '翻译:账户余额不足' 59 | elif result['error_code'] == '54005': 60 | string = '翻译:请降低长query的发送频率,3s后再试' 61 | elif result['error_code'] == '58000': 62 | string = '翻译:客户端IP非法,注册时错误填入服务器地址,请前往开发者信息-基本信息修改,服务器地址必须为空' 63 | elif result['error_code'] == '90107': 64 | string = '翻译:认证未通过或未生效' 65 | else: 66 | string = '翻译:%s,%s' % (result['error_code'], result['error_msg']) 67 | raise Exception(string) 68 | 69 | finally: 70 | if httpClient: 71 | httpClient.close() 72 | 73 | return string 74 | 75 | 76 | if __name__ == '__main__': 77 | t = Baidu(appid="",secretKey="") 78 | res = t.run( "まるでおとぎの話 終わり迎えた証") 79 | print(res) 80 | -------------------------------------------------------------------------------- /engine_translation/gpt.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | 3 | class GPT(): 4 | def __init__(self,key,base_url = "https://api.openai.com/v1",model="gpt-3.5-turbo",temperature=0.6) -> None: 5 | self.client = OpenAI( 6 | api_key = key, 7 | base_url = base_url 8 | ) 9 | 10 | # if model not in ["gpt-3.5-turbo","gpt-4"]: 11 | # raise Exception("model not supported") 12 | 13 | self.model = model 14 | self.temperature = temperature 15 | self.prompt = "You are a language expert.Your task is to translate the input subtitle text, sentence by sentence, into the user specified target language.However, please utilize the context to improve the accuracy and quality of translation.Please be aware that the input text could contain typos and grammar mistakes, utilize the context to correct the translation.Please return only translated content and do not include the origin text.Please do not use any punctuation around the returned text.Please do not translate people's name and leave it as original language.\"" 16 | self.reset() 17 | 18 | def reset(self): 19 | """ 20 | 清空历史记录 21 | """ 22 | self.messages = [ 23 | { 24 | "role": "system", 25 | "content": f'{self.prompt}' 26 | } 27 | ] 28 | 29 | def run(self,text,target_language="zh-hans"): 30 | """ 31 | target_language : ["zh-hans","english"] 32 | """ 33 | # if target_language not in ["中文","英语","日语"]: 34 | # raise Exception("target language not supported") 35 | 36 | # if target_language == "中文": 37 | # target_language = 'zh' 38 | # elif target_language == "日语": 39 | # target_language = 'jp' 40 | # elif target_language == "英语": 41 | # target_language = 'en' 42 | 43 | new_message = { 44 | "role":"user", 45 | "content": f"Original text:`{text}`. Target language: {target_language}" 46 | } 47 | self.messages.append(new_message) 48 | try: 49 | completion = self.client.chat.completions.create( 50 | model=self.model, 51 | messages= self.messages, 52 | temperature=self.temperature, 53 | stream = False 54 | ) 55 | 56 | content = ( 57 | completion.choices[0].message.content.encode("utf8").decode() 58 | ) 59 | # total_tokens = completion.usage.total_tokens 60 | 61 | except Exception as e: 62 | self.messages.pop() 63 | raise Exception(e) 64 | # 将其保存成历史 65 | self.messages.append({"role": "assistant", "content": content}) 66 | # print("{}".format(self.messages)) 67 | return content 68 | 69 | 70 | if __name__ == '__main__': 71 | # 翻译测试 72 | import yaml 73 | with open('./secret.yaml', 'r',encoding="utf-8") as file: 74 | config = yaml.safe_load(file) 75 | key = config["chatgpt"]["key"] 76 | base_url = config["chatgpt"]["base_url"] 77 | 78 | eng = GPT(key=key ,base_url = base_url ,model="gpt-4") 79 | print(eng.run("まるでおとぎの話 終わり迎えた証")) 80 | # eng.run("長すぎる旅路から 切り出した一説") 81 | -------------------------------------------------------------------------------- /engine_translation/tencent.py: -------------------------------------------------------------------------------- 1 | import json 2 | # pip install -i https://mirrors.tencent.com/pypi/simple/ --upgrade tencentcloud-sdk-python 3 | from tencentcloud.common import credential 4 | from tencentcloud.common.profile.client_profile import ClientProfile 5 | from tencentcloud.common.profile.http_profile import HttpProfile 6 | from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException 7 | from tencentcloud.tmt.v20180321 import tmt_client, models 8 | 9 | class Tencent: 10 | def __init__(self,appid,secretKey) -> None: 11 | self.appid = appid 12 | self.secretKey = secretKey 13 | 14 | def reset(self): 15 | pass 16 | 17 | def run(self,text,from_language='auto',target_language='中文'): 18 | 19 | if target_language == "中文": 20 | target_language = 'zh' 21 | elif target_language == "日语": 22 | target_language = 'jp' 23 | elif target_language == "英语": 24 | target_language = 'en' 25 | 26 | try: 27 | cred = credential.Credential(self.appid, self.secretKey) 28 | httpProfile = HttpProfile() 29 | httpProfile.endpoint = "tmt.tencentcloudapi.com" 30 | clientProfile = ClientProfile() 31 | clientProfile.httpProfile = httpProfile 32 | client = tmt_client.TmtClient(cred, "ap-chengdu", clientProfile) 33 | req = models.TextTranslateRequest() 34 | params = { 35 | "SourceText": text, 36 | "Source": from_language, 37 | "Target": target_language, 38 | "DocumentType": 'txt', # pdf,docx,pptx,xlsx,txt,xml,html,markdown,properties 39 | 'ProjectId': 0, 40 | "UntranslateTencentdText": "RBA" 41 | } 42 | req.from_json_string(json.dumps(params)) 43 | resp = client.TextTranslate(req).TargetText 44 | return resp 45 | 46 | except TencentCloudSDKException as err: 47 | raise err 48 | 49 | 50 | if __name__ == '__main__': 51 | import yaml 52 | with open('./secret.yaml', 'r',encoding="utf-8") as file: 53 | config = yaml.safe_load(file) 54 | secretId = config["tencent"]["secretId"] 55 | secretKey = config["tencent"]["secretKey"] 56 | t = Tencent(appid=secretId,secretKey=secretKey) 57 | print(t.run( "まるでおとぎの話 終わり迎えた証",from_language='jp',target_language='中文')) 58 | 59 | 60 | -------------------------------------------------------------------------------- /file/1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lissettecarlr/auto-subtitle/386b533f4b7db1f8a57f4879d35506eae9d0b88f/file/1.mp4 -------------------------------------------------------------------------------- /file/config.json.temp: -------------------------------------------------------------------------------- 1 | { 2 | "chat_url" : "https://api/v1", 3 | "chat_key": "sk-", 4 | "chat_model_list":["gpt-3.5-turbo", "gpt-4"], 5 | "chat_model_name":"gpt-4", 6 | "baidu_appid": "", 7 | "baidu_appkey": "", 8 | "tencent_appid": "", 9 | "tencent_secretKey":"", 10 | "model_name":"large-v3", 11 | "model_list":["tiny","base","small","medium","large-v2","large-v3","tiny.en","base.en","medium.en","small.en"], 12 | "media_type":"视频", 13 | "vad_filter":"是" 14 | } 15 | 16 | -------------------------------------------------------------------------------- /file/log.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lissettecarlr/auto-subtitle/386b533f4b7db1f8a57f4879d35506eae9d0b88f/file/log.png -------------------------------------------------------------------------------- /file/test.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lissettecarlr/auto-subtitle/386b533f4b7db1f8a57f4879d35506eae9d0b88f/file/test.mp3 -------------------------------------------------------------------------------- /file/test_cn.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lissettecarlr/auto-subtitle/386b533f4b7db1f8a57f4879d35506eae9d0b88f/file/test_cn.mp4 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | openai 3 | regex 4 | faster-whisper 5 | pysubs2 6 | ffmpeg-python 7 | -------------------------------------------------------------------------------- /srt2ass.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # python-srt2ass: https://github.com/ewwink/python-srt2ass 4 | # by: ewwink 5 | # modified by: 一堂宁宁 Lenshyuu227 6 | 7 | import sys 8 | import os 9 | import regex as re 10 | import codecs 11 | 12 | 13 | def fileopen(input_file): 14 | # use correct codec to encode the input file 15 | encodings = ["utf-32", "utf-16", "utf-8", "cp1252", "gb2312", "gbk", "big5"] 16 | srt_src = '' 17 | for enc in encodings: 18 | try: 19 | with codecs.open(input_file, mode="r", encoding=enc) as fd: 20 | # return an instance of StreamReaderWriter 21 | srt_src = fd.read() 22 | break 23 | except: 24 | # print enc + ' failed' 25 | continue 26 | return [srt_src, enc] 27 | 28 | 29 | def srt2ass(input_file,sub_style, is_split:bool, split_method:str): 30 | if '.ass' in input_file: 31 | return input_file 32 | 33 | if not os.path.isfile(input_file): 34 | print(input_file + ' not exist') 35 | return 36 | 37 | src = fileopen(input_file) 38 | srt_content = src[0] 39 | # encoding = src[1] # Will not encode so do not need to pass codec para 40 | src = '' 41 | utf8bom = '' 42 | 43 | if u'\ufeff' in srt_content: 44 | srt_content = srt_content.replace(u'\ufeff', '') 45 | utf8bom = u'\ufeff' 46 | 47 | srt_content = srt_content.replace("\r", "") 48 | lines = [x.strip() for x in srt_content.split("\n") if x.strip()] 49 | subLines = '' 50 | dlgLines = '' # dialogue line 51 | lineCount = 0 52 | output_file = '.'.join(input_file.split('.')[:-1]) 53 | output_file += '.ass' 54 | 55 | for ln in range(len(lines)): 56 | line = lines[ln] 57 | if line.isdigit() and re.match('-?\d\d:\d\d:\d\d', lines[(ln+1)]): 58 | if dlgLines: 59 | subLines += dlgLines + "\n" 60 | dlgLines = '' 61 | lineCount = 0 62 | continue 63 | else: 64 | if re.match('-?\d\d:\d\d:\d\d', line): 65 | line = line.replace('-0', '0') 66 | if sub_style =='default': 67 | dlgLines += 'Dialogue: 0,' + line + ',default,,0,0,0,,' 68 | elif sub_style =='ikedaCN': 69 | dlgLines += 'Dialogue: 0,' + line + ',池田字幕1080p,,0,0,0,,' 70 | elif sub_style == 'sugawaraCN': 71 | dlgLines += 'Dialogue: 0,' + line + ',中字 1080P,,0,0,0,,' 72 | elif sub_style == 'kaedeCN': 73 | dlgLines += 'Dialogue: 0,' + line + ',den SR红色,,0,0,0,,' 74 | elif sub_style == 'taniguchiCN': 75 | dlgLines += 'Dialogue: 0,' + line + ',正文_1080P,,0,0,0,,' 76 | elif sub_style == 'asukaCN': 77 | dlgLines += 'Dialogue: 0,' + line + ',DEFAULT1,,0,0,0,,' 78 | else: 79 | if lineCount < 2: 80 | dlg_string = line 81 | if is_split == True and split_method == 'Modest': 82 | # do not split if space proceed and followed by non-ASC-II characters 83 | # do not split if space followed by word that less than 5 characters 84 | split_string = re.sub(r'(?<=[^\x00-\x7F])\s+(?=[^\x00-\x7F])(?=\w{5})', r'|', dlg_string) 85 | # print(split_string) 86 | if len(split_string.split('|')) > 1: 87 | dlgLines += (split_string.replace('|', "(adjust_required)\n" + dlgLines)) + "(adjust_required)" 88 | else: 89 | dlgLines += line 90 | elif is_split == True and split_method == 'Aggressive': 91 | # do not split if space proceed and followed by non-ASC-II characters 92 | # split at all the rest spaces 93 | split_string = re.sub(r'(?<=[^\x00-\x7F])\s+(?=[^\x00-\x7F])', r'|', dlg_string) 94 | if len(split_string.split('|')) > 1: 95 | dlgLines += (split_string.replace('|',"(adjust_required)\n" + dlgLines)) + "(adjust_required)" 96 | else: 97 | dlgLines += line 98 | else: 99 | dlgLines += line 100 | else: 101 | dlgLines += "\n" + line 102 | lineCount += 1 103 | ln += 1 104 | 105 | 106 | subLines += dlgLines + "\n" 107 | 108 | subLines = re.sub(r'\d(\d:\d{2}:\d{2}),(\d{2})\d', '\\1.\\2', subLines) 109 | subLines = re.sub(r'\s+-->\s+', ',', subLines) 110 | # replace style 111 | # subLines = re.sub(r'<([ubi])>', "{\\\\\g<1>1}", subLines) 112 | # subLines = re.sub(r'', "{\\\\\g<1>0}", subLines) 113 | # subLines = re.sub(r'', "{\\\\c&H\\3\\2\\1&}", subLines) 114 | # subLines = re.sub(r'', "", subLines) 115 | 116 | if sub_style == 'default': 117 | head_name = 'head_str_default' 118 | elif sub_style == 'ikedaCN': 119 | head_name = 'head_str_ikeda' 120 | elif sub_style == 'sugawaraCN': 121 | head_name = 'head_str_sugawara' 122 | elif sub_style == 'kaedeCN': 123 | head_name = 'head_str_kaede' 124 | elif sub_style == "taniguchiCN": 125 | head_name = 'head_str_taniguchi' 126 | elif sub_style == 'asukaCN': 127 | head_name = 'head_str_asuka' 128 | 129 | head_str = STYLE_DICT.get(head_name) 130 | output_str = utf8bom + head_str + '\n' + subLines 131 | # encode again for head string 132 | output_str = output_str.encode('utf8') 133 | 134 | with open(output_file, 'wb') as output: 135 | output.write(output_str) 136 | 137 | output_file = output_file.replace('\\', '\\\\') 138 | output_file = output_file.replace('/', '//') 139 | return output_file 140 | 141 | 142 | # if len(sys.argv) > 1: 143 | # for name in sys.argv[1:]: 144 | # srt2ass(name,sub_style=) 145 | 146 | 147 | STYLE_DICT = { 148 | 'head_str_default':'''[Script Info] 149 | ; This is an Advanced Sub Station Alpha v4+ script. 150 | ; The script is generated by N46Whisper 151 | Title: 152 | ScriptType: v4.00+ 153 | Collisions: Normal 154 | PlayDepth: 0 155 | 156 | [V4+ Styles] 157 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding 158 | Style: default,Meiryo,90,&H00FFFFFF,&H00FFFFFF,&H00000000,&H00050506,-1,0,0,0,100,100,5,0,1,3.5,0,2,135,135,10,1 159 | [Events] 160 | Format: Layer, Start, End, Style, Actor, MarginL, MarginR, MarginV, Effect, Text''', 161 | 'head_str_ikeda': '''[Script Info] 162 | ; This is an Advanced Sub Station Alpha v4+ script. 163 | ; The script is generated by N46Whisper 164 | Title: 165 | ScriptType: v4.00+ 166 | Collisions: Normal 167 | PlayDepth: 0 168 | 169 | [V4+ Styles] 170 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding 171 | Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,2,2,2,10,10,10,1 172 | Style: 池田字幕1080p,思源黑体,71,&H00FFFFFF,&H000000FF,&H00008A11,&H00000000,-1,0,0,0,100,100,1.49999,0,1,1.99999,1,2,8,8,5,1 173 | Style: 池田字幕1080p - 不透明背景,思源黑体,71,&H00FFFFFF,&H000000FF,&H64202021,&H00000000,-1,0,0,0,100,100,1.49999,0,3,1.99999,0,2,8,8,5,1 174 | Style: staff1080p,思源黑体,55,&H00FFFFFF,&H00FFFFFF,&H34000000,&H00000000,-1,0,0,0,100,100,3,0,1,2.5,0,7,16,13,4,1 175 | Style: 注释1080p,思源宋体 CN,55,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,2,1,8,10,10,10,1 176 | Style: 多美左上遮罩,Arial,48,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,0,0,1,8,8,11,1 177 | Style: 多美紫色遮罩,Arial,48,&H00F05384,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,0,0,1,8,8,11,1 178 | Style: 多美紫色屏字,仓耳渔阳体 W03,86,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,0,0,8,8,8,11,1 179 | Style: 多美右上屏字,方正兰亭圆_GBK_细,60,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,94,100,6,0,1,0,3,9,8,45,100,1 180 | Style: 屏字-黑,汉仪正圆-55S,71,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,0,0,8,8,8,11,1 181 | Style: 免责,汉仪正圆-85W,56,&H00AE577B,&H000000FF,&H00FFFFFF,&H9D000000,0,0,0,0,100,100,1,0,1,1.5,2,8,10,10,10,1 182 | 183 | [Events] 184 | Format: Layer, Start, End, Style, Actor, MarginL, MarginR, MarginV, Effect, Text 185 | Dialogue: 0,0:00:00.30,0:00:03.00,staff1080p,,0,0,0,,{'''+r'''\fad(150,300)}特蕾纱熊猫观察会'''+r'''\N片源:'''+r'''\N翻译:'''+r'''\N时间:'''+r'''\N校压: 186 | Dialogue: 0,0:00:00.30,0:00:50.30,免责,,0,0,0,Banner;7;0;50,片源来自互联网,仅作内部学习交流之用,严禁用于商业用途,严禁二次上传、修改,严禁转载。任何自行传播导致的法律问题均与字幕组无关。DO NOT distribute the content on the internet.''', 187 | 'head_str_sugawara':'''[Script Info] 188 | ; This is an Advanced Sub Station Alpha v4+ script. 189 | ; The script is generated by N46Whisper 190 | Title: 191 | ScriptType: v4.00+ 192 | Collisions: Normal 193 | PlayDepth: 0 194 | 195 | [V4+ Styles] 196 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding 197 | Style: 中字 1080P,思源黑体 CN Medium,90,&H00FFFFFF,&H00FFFFFF,&H008F51CA,&H00A860F2,-1,0,0,0,100,100,5,0,1,3.5,0,2,135,135,10,1 198 | Style: staff 1080P,思源宋体 CN Medium,70,&H00FFFFFF,&H000000FF,&H008F51CA,&H00000000,0,0,0,0,100,100,0,0,1,4,2,7,10,10,10,1 199 | Style: 标注 1080P,思源黑体 CN Medium,70,&H00FFFFFF,&HFFFFFFFF,&H00000000,&H7F000000,-1,0,0,0,100,100,0,0,1,3,1.5,8,0,0,15,1 200 | Style: 中字 720P,思源黑体 CN Medium,60,&H00FFFFFF,&H00FFFFFF,&H008F51CA,&H00A860F2,-1,0,0,0,100,100,5,0,1,3,0,2,135,135,10,1 201 | Style: staff 720P,思源宋体 CN Medium,50,&H00FFFFFF,&H000000FF,&H008F51CA,&H00000000,0,0,0,0,100,100,0,0,1,3,2,7,10,10,10,1 202 | Style: 标注 720P,思源黑体 CN Medium,50,&H00FFFFFF,&HFFFFFFFF,&H00000000,&H7F000000,-1,0,0,0,100,100,0,0,1,3,1.5,8,0,0,15,1 203 | Style: staff msg,思源宋体 CN Medium,25,&H00FFFFFF,&H000000FF,&H008F51CA,&H00000000,0,0,0,0,100,100,0,0,1,4,2,7,10,10,10,1 204 | Style: 中字 msg,思源黑体 CN Medium,25,&H00FFFFFF,&H00FFFFFF,&H008F51CA,&H00A860F2,-1,0,0,0,100,100,5,0,1,4,0,2,135,135,10,1 205 | Style: 标注 msg,思源黑体 CN Medium,25,&H00FFFFFF,&HFFFFFFFF,&H00000000,&H7F000000,-1,0,0,0,100,100,0,0,1,3,1.5,8,0,0,15,1 206 | Style: 歌词日语 1080P,Swei Spring Sugar CJKtc,60,&H00FFFFFF,&H000000FF,&H009B46A5,&H5A9B46A5,0,0,0,0,100,100,0,0,1,2,0,2,10,10,30,1 207 | Style: 歌词中文 1080P,Swei Spring Sugar CJKtc,90,&H00FFFFFF,&H000000FF,&H009B46A5,&H5F9B46A5,-1,0,0,0,100,100,0,0,1,2,0,2,10,10,100,1 208 | Style: 歌词中文 720P,Swei Spring Sugar CJKtc,60,&H00FFFFFF,&H000000FF,&H009B46A5,&H5F9B46A5,-1,0,0,0,100,100,0,0,1,2,0,2,10,10,70,1 209 | Style: 歌词日语 720P,Swei Spring Sugar CJKtc,40,&H00FFFFFF,&H000000FF,&H009B46A5,&H5A9B46A5,0,0,0,0,100,100,0,0,1,1,0,2,10,10,15,1 210 | [Events] 211 | Format: Layer, Start, End, Style, Actor, MarginL, MarginR, MarginV, Effect, Text 212 | Dialogue: 0,0:00:00.00,0:00:05.24,staff 1080P,,0,0,0,,{'''+r'''\fad(1200,50)\pos(15.2,0.4)}菅原咲月字幕组'''+r'''\N片源:'''+r'''\N翻译:'''+r'''\N时间:'''+r'''\N校压:''', 213 | 'head_str_kaede':'''[Script Info] 214 | ; This is an Advanced Sub Station Alpha v4+ script. 215 | ; The script is generated by N46Whisper 216 | Title: 217 | ScriptType: v4.00+ 218 | Collisions: Normal 219 | PlayDepth: 0 220 | 221 | [V4+ Styles] 222 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding 223 | Style: staff,微软雅黑,60,&H00FFFFFF,&H00923782,&H0076137B,&H00540D67,-1,0,0,0,100,100,0,0,1,3,0,7,15,15,15,1 224 | Style: den SR红色,微软雅黑,70,&H0AFFFFFF,&H004B4B9E,&H322828E0,&H640A0A72,-1,0,0,0,100,100,0,0,1,3,0,2,15,15,70,1 225 | Style: 注释,微软雅黑,68,&H00FFFFFF,&H000000FF,&H3D000000,&H00FFFFFF,-1,0,0,0,100,100,0,0,1,4.5,0,8,23,23,23,1 226 | Style: 红色,微软雅黑,75,&H00FFFFFF,&H000000FF,&H004243CB,&H00000000,-1,0,0,0,100,100,0,0,1,3,0,2,15,15,15,1 227 | Style: den - 中文歌词,微软雅黑,70,&H0AFFFFFF,&H004B4B9E,&H322828E0,&H640A0A72,-1,0,0,0,100,100,0,0,1,3,0,2,15,15,70,1 228 | Style: den - 日文歌词,微软雅黑,50,&H0AFFFFFF,&H00F9F9F9,&H32000001,&H640A0A72,-1,0,0,0,100,100,0,0,1,1,0,2,15,15,9,1 229 | [Events] 230 | Format: Layer, Start, End, Style, Actor, MarginL, MarginR, MarginV, Effect, Text 231 | Dialogue: 0,0:00:00.00,0:00:05.00,staff,,0,0,0,,{'''+r'''\fad(300,300)}「三番目の楓」'''+r'''\N片源:'''+r'''\N翻译:'''+r'''\N时间:'''+r'''\N校压:''', 232 | 'head_str_taniguchi':'''[Script Info] 233 | ; This is an Advanced Sub Station Alpha v4+ script. 234 | ; The script is generated by N46Whisper 235 | Title: 236 | ScriptType: v4.00+ 237 | Collisions: Normal 238 | PlayDepth: 0 239 | 240 | [V4+ Styles] 241 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding 242 | Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,2,2,2,10,10,10,1 243 | Style: 正文_1080P,思源黑体 CN Bold,75,&H00FFFFFF,&H000000FF,&H0077234B,&HA00000FF,-1,0,0,0,100,100,3,0,1,3,2,2,10,10,15,1 244 | Style: staff_1080P,思源宋体 CN Heavy,60,&H00FFFFFF,&H000000FF,&H0077234B,&HA00000FF,-1,0,0,0,100,100,2,0,1,2,1,7,30,10,30,1 245 | 246 | [Events] 247 | Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 248 | Dialogue: 0,0:00:01.00,0:00:10.00,staff_1080P,,0,0,0,,{'''+r'''\fad(300,1000)}泪痣愛季応援団 '''+r'''\N源:'''+r'''\N制作: 249 | Dialogue: 0,0:00:08.95,0:03:29.40,staff_1080P,,0,0,0,,{'''+r'''\fad(1000,1000)'''+r'''\pos(30,30)'''+r'''\bord0'''+r'''\shad0'''+r'''\c&HFFFFFF&'''+r'''\1a&H3C&}泪痣愛季応援団 250 | Dialogue: 0,0:00:00.00,0:00:05.00,正文_1080P,,0,0,0,,谷口爱季字幕组''', 251 | 'head_str_asuka':'''[Script Info] 252 | ; The script is generated by N46Whisper 253 | ; http://www.aegisub.org/ 254 | Title: Default Aegisub file 255 | ScriptType: v4.00+ 256 | WrapStyle: 0 257 | ScaledBorderAndShadow: yes 258 | YCbCr Matrix: None 259 | 260 | [Aegisub Project Garbage] 261 | 262 | [V4+ Styles] 263 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding 264 | Style: Default,Arial,20,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,2,2,2,10,10,10,1 265 | Style: DEFAULT1,微软雅黑,65,&H00FFFFFF,&HF08B581A,&H007E672E,&H0084561E,-1,0,0,0,100,100,0,0,1,2,1,2,20,20,5,1 266 | Style: STAFF,Microsoft YaHei,50,&H00FFFFFF,&HF08B581A,&H007E672E,&HF084561E,-1,0,0,0,100,100,0,0,1,2.5,3,7,30,30,3,134 267 | Style: 名单1,方正粗倩_GBK,45,&H00E7D793,&H00E9C116,&H004C3F00,&H0016161D,-1,0,0,0,100,100,0,0,1,3,2,2,10,10,10,1 268 | Style: 名单2,方正粗黑简体,45,&H00FAF9EC,&H00493F15,&H008A4D1F,&H000A0A0B,-1,0,0,0,100,100,0,0,1,3,1.5,2,10,10,10,1 269 | Style: 中文歌词,方正粗黑简体,50,&H00FFFFFF,&HF0000000,&H00000000,&H96000000,-1,0,0,0,100,100,0,0,1,1.5,2,2,10,10,4,134 270 | Style: 日文歌词,方正粗黑简体,40,&H00FFFFFF,&HF0000000,&H00000000,&H96000000,-1,0,0,0,100,100,0,0,1,1.5,2,2,10,10,10,134 271 | Style: 屏幕字/注释,微软雅黑,50,&H00FFFFFF,&HF0000000,&H00000000,&H96000000,-1,0,0,0,100,100,0,0,1,1.5,2,2,10,10,10,134 272 | Style: purple1,文鼎特圆简,26,&H00670067,&H00FFFFFF,&H00FFFFFF,&H00FFFFFF,0,0,0,0,100,100,0,0,1,4.6,0,2,10,10,10,1 273 | Style: 鸟,微软雅黑,35,&H00FFFFFF,&HF08B581A,&H00F3B70F,&H0084561E,-1,0,0,0,100,100,0,0,1,2,1,2,100,20,465,1 274 | Style: 哈利,微软雅黑,35,&H00FFFFFF,&HF08B581A,&H00445FE1,&H00445FE1,-1,0,0,0,100,100,0,0,1,2,1,2,0,150,220,1 275 | Style: 期数,Berlin Sans FB,25,&H00FFFFFF,&H000000FF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,2,1,9,10,10,0,1 276 | Style: HamAsuka-屏幕字,方正卡通_GBK,50,&H00FFFFFF,&H000000FF,&H00D08C27,&H00010102,-1,0,0,0,100,100,0,0,1,3.5,3,2,900,10,170,1 277 | Style: HamAsuka-屏幕字 小黑,方正粗黑宋简体,65,&H00000000,&H000000FF,&H00FFFFFF,&H00010102,0,0,0,0,100,100,0,0,1,0,0,2,10,10,170,1 278 | Style: HamAsuka-屏幕字 蓝底,微软雅黑,80,&H00FFFFFF,&H000000FF,&H00A21C14,&H00FFFFFF,-1,0,0,0,100,100,0,0,3,4,0,2,10,10,10,1 279 | Style: HamAsuka-屏幕字 标题,微软雅黑,90,&H00303030,&H0006C6F6,&H00FFFFFF,&H00010102,-1,0,0,0,100,100,0,0,1,0,0,2,10,10,10,1 280 | Style: HamAsuka-屏幕字 问题 白底,微软雅黑,90,&H002C2C2C,&H00B77B1B,&H00FFFFFF,&H00010102,-1,0,0,0,100,100,0,0,3,5,0,2,10,10,10,1 281 | Style: HamAsuka 歌词,微软雅黑,70,&H00FFFFFF,&H00000000,&H00000000,&H00010102,-1,0,0,0,100,100,0,0,1,0,0,2,10,10,10,1 282 | Style: HamAsuka 小窗,微软雅黑,50,&H00FFFFFF,&HF0000000,&H00000000,&H96000000,-1,0,0,0,100,100,0,0,1,1.5,2,9,10,10,300,134 283 | Style: HamAsuka-屏幕字 标题 蓝底,微软雅黑,90,&H00F9F8FB,&H000000FF,&H00AC9769,&H00000000,-1,0,0,0,100,100,0,0,3,5,0,2,10,10,10,1 284 | Style: HamAsuka-屏幕字 标题 黑字,微软雅黑,80,&H00292B2C,&H000000FF,&H00FFFFFF,&H00000000,-1,0,0,0,100,100,0,0,3,5,0,2,10,10,10,1 285 | Style: 毕业曲MV 中文歌词,思源黑体 CN,76,&H0AFFFFFF,&H000000FF,&H0F000000,&H00FFFFFF,-1,0,0,0,100,100,0,0,1,1,0,2,10,10,75,1 286 | Style: 毕业曲MV 日文歌词,思源黑体 CN,58,&H0AFFFFFF,&H000000FF,&H0F000000,&H00FFFFFF,-1,0,0,0,100,100,0,0,1,1,0,2,10,10,15,1 287 | 288 | [Events] 289 | Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 290 | Dialogue: 0,0:00:00.00,0:00:05.00,Default,,0,0,0,,''' 291 | # ADD MORE 292 | 293 | } 294 | 295 | 296 | # if __name__ == "__main__": 297 | # srt2ass('sub_split_test.srt','sugawaraCN','No','Aggressive') -------------------------------------------------------------------------------- /transcribe.py: -------------------------------------------------------------------------------- 1 | # 字幕提取 2 | import torch 3 | # pip install faster-whisper 4 | from faster_whisper import WhisperModel 5 | 6 | import os 7 | from tqdm import tqdm 8 | import time 9 | import pandas as pd 10 | # pip install pysubs2 11 | import pysubs2 12 | from srt2ass import srt2ass 13 | 14 | class Transcribe: 15 | def __init__(self,model_name="small",device='cuda') -> None: 16 | self.model = WhisperModel(model_name,device=device,compute_type="float16") 17 | torch.cuda.empty_cache() 18 | 19 | def run(self,file_name,audio_binary_io = None,language='ja', 20 | beam_size = 5, 21 | is_vad_filter=False, 22 | min_silence_duration_ms=500, 23 | is_split = False, 24 | split_method = "Modest", 25 | sub_style = "default", 26 | initial_prompt= None): 27 | ''' 28 | beam_size:数值越高,在识别时探索的路径越多,这在一定范围内可以帮助提高识别准确性,但是相对的VRAM使用也会更高. 同时,Beam Size在超过5-10后有可能降低精确性,详情请见https://arxiv.org/pdf/2204.05424.pdf 29 | is_vad_filter:使用VAD过滤。 30 | 使用[Silero VAD model](https://github.com/snakers4/silero-vad)以检测并过滤音频中的无声段落(推荐小语种使用) 31 | 【注意】使用VAD filter有优点亦有缺点,请用户自行根据音频内容决定是否启用. [关于VAD filter](https://github.com/Ayanaminn/N46Whisper/blob/main/FAQ.md) 32 | is_split:是否使用空格将文本分割成多行 33 | [True,False] 34 | split_method:分割方法 35 | 普通分割(Modest):当空格后的文本长度超过5个字符,则另起一行 36 | 全部分割(Aggressive): 只要遇到空格即另起一行 37 | sub_style:字幕样式 38 | default 39 | initial_prompt: 使用提示词能够提高输出质量,详情见: https://platform.openai.com/docs/guides/speech-to-text/prompting 40 | ''' 41 | audio_name = os.path.splitext(os.path.basename(file_name))[0] 42 | 43 | # 如果没有传入音频的二进制,则认为是本地文件 44 | if audio_binary_io == None: 45 | if not os.path.exists(file_name): 46 | raise Exception("File not found") 47 | audio = file_name 48 | else: 49 | audio = audio_binary_io 50 | 51 | tic = time.time() 52 | 53 | print("transcribe param") 54 | print(f"audio: {audio}") 55 | print(f"language: {language}") 56 | print(f"is_vad_filter: {is_vad_filter}") 57 | print(f"beam_size: {beam_size}") 58 | print(f"initial_prompt: {initial_prompt}") 59 | 60 | if is_vad_filter == False: 61 | vad_parameters = None 62 | else: 63 | vad_parameters = dict(min_silence_duration_ms=min_silence_duration_ms) 64 | 65 | segments, info = self.model.transcribe(audio = audio, 66 | beam_size=beam_size, 67 | language=language, 68 | vad_filter=is_vad_filter, 69 | vad_parameters=vad_parameters, 70 | initial_prompt = initial_prompt, 71 | word_timestamps=True, 72 | #condition_on_previous_text=False, 73 | #no_speech_threshold=0.6, 74 | ) 75 | 76 | results= [] 77 | with tqdm(total=round(info.duration, 2), unit=" seconds") as pbar: 78 | for s in segments: 79 | segment_dict = {'start':s.start,'end':s.end,'text':s.text} 80 | results.append(segment_dict) 81 | segment_duration = round(s.end - s.start, 2) 82 | pbar.update(segment_duration) 83 | toc = time.time() 84 | subs = pysubs2.load_from_whisper(results) 85 | 86 | # 保存srt文件 87 | srt_filename = os.path.join("./temp",audio_name + ".srt") 88 | subs.save(srt_filename) 89 | print('生成srt:{} 识别耗时:{}'.format(srt_filename,toc-tic) ) 90 | 91 | # 保存ass文件 92 | ass_filename = srt2ass(srt_filename, sub_style, is_split,split_method) 93 | print('生成ass:{}'.format(ass_filename)) 94 | return srt_filename,ass_filename 95 | 96 | 97 | if __name__ == "__main__": 98 | test = Transcribe(model_name = r"D:\code\auto-subtitle\models\faster-whisper-large-v3",device="cuda") 99 | # 测试直接传入文件地址 100 | #test.run(file_name="./test.mp3") 101 | 102 | # 测试传入二进制 103 | with open('./file/2.wav', 'rb') as f: 104 | test.run(file_name="test", 105 | audio_binary_io=f, 106 | language="zh", 107 | #initial_prompt="简体中文", 108 | #is_vad_filter=True, 109 | #is_split=False 110 | ) 111 | 112 | 113 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /translation.py: -------------------------------------------------------------------------------- 1 | # 字幕翻译 2 | import os 3 | from tqdm import tqdm 4 | from typing import Union 5 | # !pip install openai 6 | # !pip install pysubs2 7 | import pysubs2 8 | from engine_translation.gpt import GPT 9 | from engine_translation.baidu import Baidu 10 | from engine_translation.tencent import Tencent 11 | import time 12 | 13 | class translation : 14 | def __init__(self,engine:Union[GPT,Baidu]) -> None: 15 | self.engine = engine 16 | self.max_retries = 3 17 | 18 | def translate_save(self,sub_src,language="中文",keep_origin = True): 19 | """ 20 | keep_origin : 是否保存原文 21 | """ 22 | retry_count = 0 23 | sub_trans = pysubs2.load(sub_src) 24 | total_lines = len(sub_trans) 25 | self.engine.reset() 26 | for line in tqdm(sub_trans,total = total_lines): 27 | # print(line.text) 28 | # try: 29 | # line_trans = self.engine.run(line.text,target_language=language) 30 | # except Exception as e: 31 | # print("翻译出错:{},进行重试".format(e)) 32 | # time.sleep(1) 33 | # self.engine.run(line.text,target_language=language) 34 | while retry_count < self.max_retries: 35 | try: 36 | line_trans = self.engine.run(line.text, target_language=language) 37 | retry_count = 0 38 | break # 翻译成功,跳出循环 39 | except Exception as e: 40 | print("翻译出错:{},进行重试".format(e)) 41 | time.sleep(10) 42 | retry_count += 1 43 | 44 | if keep_origin: 45 | line.text += (r'\N'+ line_trans) 46 | else: 47 | line.text = line_trans 48 | print(line.text) 49 | 50 | 51 | if language == "中文": 52 | language = 'zh' 53 | elif language == "日语": 54 | language = 'jp' 55 | elif language == "英语": 56 | language = 'en' 57 | else: 58 | language = "other" 59 | save_ass_path = "./temp/" + os.path.splitext(os.path.basename(sub_src))[0]+ "_"+ language +".ass" 60 | save_srt_path = "./temp/" + os.path.splitext(os.path.basename(sub_src))[0]+ "_" + language +".srt" 61 | # print(save_ass_path) 62 | # print(save_srt_path) 63 | sub_trans.save(save_ass_path) 64 | sub_trans.save(save_srt_path) 65 | return save_ass_path,save_srt_path 66 | 67 | 68 | if __name__ == '__main__': 69 | # 翻译测试 70 | import yaml 71 | with open('./engine_translation/secret.yaml', 'r',encoding="utf-8") as file: 72 | config = yaml.safe_load(file) 73 | eng = GPT(key = config["chatgpt"]["key"], base_url = config["chatgpt"]["base_url"]) 74 | eng2 = Baidu(appid = config["baidu"]["appid"],secretKey = config["baidu"]["secretKey"]) 75 | # eng.run("まるでおとぎの話 終わり迎えた証") 76 | # eng.run("長すぎる旅路から 切り出した一説") 77 | 78 | t = translation(eng) 79 | p1 ,p2 = t.translate_save("./test.ass",keep_origin=True) 80 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | 2 | import ffmpeg 3 | import os 4 | import json 5 | 6 | def extract_audio(video_path, output_audio_path): 7 | """ 8 | 从视频文件中提取音频并保存为wav。 9 | 参数: 10 | video_path (str): 视频文件的路径。 11 | output_audio_path (str): 输出音频文件的路径。 12 | """ 13 | if not os.path.exists(video_path): 14 | raise "{} not find".format(video_path) 15 | if os.path.exists(output_audio_path): 16 | os.remove(output_audio_path) 17 | try: 18 | ( 19 | ffmpeg 20 | .input(video_path) 21 | .output(output_audio_path, acodec='mp3', audio_bitrate='320k') 22 | .run(overwrite_output=True) 23 | ) 24 | except ffmpeg.Error as e: 25 | raise e 26 | 27 | def merge_subtitles_to_video(video_path, subtitle_path, output_video_path): 28 | """ 29 | 将字幕文件合并到视频文件中。 30 | 参数: 31 | video_path (str): 视频文件的路径。 32 | subtitle_path (str): 字幕文件的路径。 33 | output_video_path (str): 合并字幕后的输出视频文件的路径。 34 | """ 35 | if not os.path.exists(video_path): 36 | raise FileNotFoundError(f"{video_path} not found") 37 | if not os.path.exists(subtitle_path): 38 | raise FileNotFoundError(f"{subtitle_path} not found") 39 | if os.path.exists(output_video_path): 40 | os.remove(output_video_path) 41 | 42 | subtitle_path = subtitle_path.replace("\\", "/") 43 | print("subtitle_path = {}".format(subtitle_path)) 44 | try: 45 | ( 46 | ffmpeg 47 | .input(video_path) 48 | .output(output_video_path, vf=f"subtitles={subtitle_path}") 49 | .run(overwrite_output=True) 50 | ) 51 | except ffmpeg.Error as e: 52 | raise RuntimeError(f"Failed to merge subtitles into video: {e}") 53 | 54 | def clear_folder(folder_path): 55 | for filename in os.listdir(folder_path): 56 | file_path = os.path.join(folder_path, filename) 57 | os.remove(file_path) 58 | print("清空文件夹:{}".format(folder_path)) 59 | 60 | 61 | 62 | def import_config_file(file): 63 | if file is not None: 64 | content = file.read() 65 | try: 66 | json_data = json.loads(content) 67 | return json_data 68 | except Exception as e: 69 | raise e 70 | 71 | if __name__ == "__main__": 72 | pass -------------------------------------------------------------------------------- /uvr.py: -------------------------------------------------------------------------------- 1 | # https://github.com/karaokenerds/python-audio-separator 2 | # pip install audio-separator[gpu] 3 | # pip install audio-separator[cpu] 4 | 5 | from audio_separator.separator import Separator 6 | import logging 7 | LOG_LE = logging.WARN 8 | 9 | class UVR_Client: 10 | def __init__(self,model_file_dir="./models/uvr5_weights",output_dir='./temp',sample_rate=44000) -> None: 11 | self.model = Separator(log_level=LOG_LE, 12 | model_file_dir=model_file_dir, 13 | output_dir=output_dir, 14 | sample_rate=sample_rate) 15 | self.model.load_model('UVR_MDXNET_Main.onnx') 16 | 17 | def change_model(self,model_name): 18 | self.model.load_model(model_name) 19 | 20 | def infer(self,audio="E:\\audio_AI\\audio\\test\\感受孤独.flac"): 21 | rimary_stem_output_path, secondary_stem_output_path = self.model.separate(audio) 22 | return rimary_stem_output_path,secondary_stem_output_path 23 | 24 | 25 | if __name__ == "__main__": 26 | uvr = UVR_Client() 27 | print(uvr.infer()) 28 | uvr.change_model("VR-DeEchoAggressive.pth") 29 | print(uvr.infer()) -------------------------------------------------------------------------------- /web.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import json 3 | import os 4 | from transcribe import Transcribe 5 | from zipfile import ZipFile 6 | import base64 7 | import io 8 | import ffmpeg 9 | from translation import GPT,Baidu,Tencent,translation 10 | from utils import extract_audio,merge_subtitles_to_video,clear_folder,import_config_file 11 | from uvr import UVR_Client 12 | 13 | # 临时文件存放地址 14 | TEMP = "./temp" 15 | 16 | # def import_config_file(file): 17 | # ''' 18 | # 为避免页面刷新重新填写,可以通过配置文件导入,配置文件格式: 19 | # config.json 20 | # { 21 | # "chat_url" : "", 22 | # "chat_key": "", 23 | # "baidu_appid": "", 24 | # "baidu_appkey": "", 25 | # "tencent_appid": "", 26 | # "tencent_secretKey":"" 27 | # } 28 | # ''' 29 | # if file is not None: 30 | # content = file.read() 31 | # try: 32 | # # 解析JSON数据 33 | # json_data = json.loads(content) 34 | # st.success("load config success") 35 | # except Exception as e: 36 | # st.error("load config error:{}".format(e)) 37 | # st.session_state.chat_url = json_data.get("chat_url") 38 | # st.session_state.chat_key = json_data.get("chat_key") 39 | # st.session_state.baidu_appid = json_data.get("baidu_appid") 40 | # st.session_state.baidu_appkey = json_data.get("baidu_appkey") 41 | # st.session_state.tencent_appid = json_data.get("tencent_appid") 42 | # st.session_state.tencent_secretKey = json_data.get("tencent_secretKey") 43 | 44 | def web_page(): 45 | st.title("字幕生成器") 46 | st.caption("") 47 | 48 | if "transcribe" not in st.session_state: 49 | st.session_state['transcribe'] = None 50 | 51 | # 通过配置文件导入 52 | if "config" not in st.session_state: 53 | st.session_state['config'] = None 54 | 55 | uploaded_file = st.file_uploader("上传配置文件(可选):", type="json") 56 | if uploaded_file is not None: 57 | try: 58 | st.session_state.config = import_config_file(uploaded_file) 59 | except: 60 | st.error("load config error") 61 | 62 | if st.button("清空缓存"): 63 | clear_folder("./temp") 64 | 65 | # st.sidebar.markdown("----") 66 | # st.sidebar.markdown("## 使用说明") 67 | # st.sidebar.write("1.选择模型,加载模型") 68 | # st.sidebar.write("2.根据需求设置配置") 69 | # st.sidebar.write("3.上传音频") 70 | # st.sidebar.write("4.点击开始转换") 71 | # st.sidebar.write("5.下载字幕") 72 | # st.sidebar.markdown("----") 73 | 74 | #st.markdown("## 提取配置") 75 | #col1, col2 = st.columns(2) 76 | #with col1: 77 | 78 | st.markdown("## 1 模型") 79 | st.markdown("如果未在models中找到模型,则会自动下载到huggingface缓存目录中,也可以手动去[huggingface]((https://huggingface.co/collections/guillaumekln/faster-whisper-64f9c349b3115b4f51434976))下载模型,然后将模型放如models目录下,这里也提供一个[百度云](https://pan.baidu.com/s/1rRcSRhBpizuQo20qowG2UA?pwd=kuon)") 80 | if st.session_state.config is not None: 81 | # 从配置文件中读取模型列表和默认模型 82 | st.session_state.model_name = st.session_state.config.get("model_name") 83 | st.session_state.model_list = st.session_state.config.get("model_list") 84 | for index,current_model_name in enumerate(st.session_state.model_list): 85 | if current_model_name == st.session_state.model_name: 86 | model_index = index 87 | break 88 | else: 89 | st.session_state.model_list = ["tiny","base","small","medium","large-v2","large-v3", 90 | "tiny.en","base.en","medium.en","small.en"] 91 | st.session_state.model_name = "large-v2" 92 | model_index = 5 93 | 94 | model_name = st.selectbox('模型选择:', st.session_state.model_list, index=model_index) 95 | device_list = ["cpu","cuda"] 96 | device_name = st.selectbox('设备选择(cpu会相当相当的慢,所有请使用cuda):', device_list, index=1) 97 | 98 | if st.button("加载模型:{},使用:{}".format(model_name,device_name)): 99 | with st.spinner('加载中,请稍后。。。'): 100 | if st.session_state.transcribe is not None: 101 | del st.session_state.transcribe 102 | models_path = "./models" + "/faster-whisper-" + model_name 103 | #print(models_path) 104 | try: 105 | if os.path.exists(models_path): 106 | print("加载模型:{}".format(models_path)) 107 | st.session_state.transcribe = Transcribe(model_name=models_path,device=device_name) 108 | else: 109 | print("加载hf模型:{}".format(model_name)) 110 | st.session_state.transcribe = Transcribe(model_name=model_name,device=device_name) 111 | st.success("模型加载成功:{}".format(models_path)) 112 | except Exception as e: 113 | st.error("加载模型失败:{}".format(e)) 114 | 115 | 116 | st.markdown("----") 117 | st.markdown("## 2 上传媒体") 118 | if st.session_state.config is not None: 119 | st.session_state.media_type = st.session_state.config.get("media_type") 120 | if st.session_state.media_type == "视频": 121 | media_type_index = 0 122 | else: 123 | media_type_index = 1 124 | else: 125 | st.session_state.media_type = "视频" 126 | media_type_index = 0 127 | st.session_state.media_type = st.radio("选择来源", ("视频", "音频"),horizontal=True,index=media_type_index) 128 | 129 | # 保存用于提取转化字幕的音频地址 130 | if "media_temp" not in st.session_state: 131 | st.session_state.audio_temp = None 132 | if "audio_separator_temp" not in st.session_state: 133 | st.session_state.audio_separator_temp = None 134 | if "uvr_client" not in st.session_state: 135 | st.session_state.uvr_client = None 136 | 137 | if st.session_state.media_type == "视频": 138 | if "video_temp" not in st.session_state: 139 | st.session_state.video_temp = None 140 | input_file = st.file_uploader("上传视频:", type=["mp4", "avi", "mov", "mkv"]) 141 | if input_file is not None: 142 | # 上传视频临时保存地址 143 | temp_input_video = os.path.join( 144 | TEMP, 145 | os.path.splitext(os.path.basename(input_file.name))[0]+"_temp.mp4" 146 | ) 147 | if not os.path.exists(temp_input_video): 148 | with open(temp_input_video, "wb") as f: 149 | f.write(input_file.read()) 150 | else: 151 | print("文件:{} 已存在,无需创建".format(temp_input_video)) 152 | 153 | st.session_state.video_temp_name = input_file.name 154 | st.session_state.video_temp = temp_input_video 155 | 156 | temp_audio_path = os.path.join( 157 | TEMP, 158 | os.path.splitext(os.path.basename(input_file.name))[0]+".wav" 159 | ) 160 | if not os.path.exists(temp_audio_path): 161 | with st.spinner('音频提取中,请稍后。。。'): 162 | extract_audio(temp_input_video,temp_audio_path) 163 | print("音频提取完成") 164 | else: 165 | print("音频文件:{} 已存在,无需提取".format(temp_audio_path)) 166 | st.session_state.audio_temp = temp_audio_path 167 | 168 | elif st.session_state.media_type == "音频": 169 | input_file = st.file_uploader("上传音频:", type=["mp3", "wav", "m4a"]) 170 | if input_file is not None: 171 | temp_audio_path = os.path.join( 172 | TEMP, 173 | os.path.splitext(os.path.basename(input_file.name))[0]+".wav" 174 | ) 175 | if not os.path.exists(temp_audio_path): 176 | with open(temp_audio_path, "wb") as f: 177 | f.write(input_file.read()) 178 | else: 179 | print("文件:{} 已存在,无需创建".format(temp_audio_path)) 180 | st.session_state.audio_temp = temp_audio_path 181 | 182 | if st.session_state.audio_temp is not None: 183 | st.write("音频:") 184 | st.audio(st.session_state.audio_temp, format='audio/wav', start_time=0) 185 | 186 | if st.button("音频清洁(用于清除背景音,可选)"): 187 | if st.session_state.audio_temp is None: 188 | st.error("请先上传媒体") 189 | 190 | if st.session_state.uvr_client is None: 191 | print("加载模型:UVR_modle") # UVR_modle.load_model('UVR_MDXNET_Main.onnx') 192 | st.session_state.uvr_client = UVR_Client() 193 | 194 | with st.spinner('音频清洁中'): 195 | rimary_stem_output_path, secondary_stem_output_path = st.session_state.uvr_client.infer(st.session_state.audio_temp) 196 | st.session_state.audio_separator_temp = os.path.join('./temp',secondary_stem_output_path) 197 | if st.session_state.audio_separator_temp is not None: 198 | st.write("清洁音频:") 199 | st.audio(st.session_state.audio_separator_temp, format='audio/wav', start_time=0) 200 | 201 | st.markdown("----") 202 | st.markdown("## 3 配置") 203 | 204 | language_mapping = {"中文": "zh", "日文": "ja", "英文": "en"} 205 | language = list(language_mapping.keys()) 206 | selected_language = st.selectbox('选择媒体语言', language,index=1) 207 | st.session_state.language = language_mapping[selected_language] 208 | 209 | if st.session_state.config is not None: 210 | vad_filter = st.session_state.config.get("vad_filter") 211 | if vad_filter == "是": 212 | vad_filter_index = 0 213 | else: 214 | vad_filter_index = 1 215 | else: 216 | vad_filter_index = 1 217 | 218 | vad_filter = st.radio("是使用VAD(过滤音频中的无声段落,whisper模型在识别无声片段,会输出乱七八糟的内容,改项就是解决这个的)", ("是", "否"),horizontal=True,index=vad_filter_index) 219 | 220 | if "min_silence_duration_ms" not in st.session_state: 221 | st.session_state.min_silence_duration_ms = None 222 | 223 | if vad_filter == "是": 224 | st.session_state.is_vad_filter = True 225 | st.session_state.min_silence_duration_ms = st.number_input("最小静默时长(毫秒)", min_value=0, max_value=10000, value=500, step=100) 226 | else: 227 | st.session_state.is_vad_filter = False 228 | 229 | 230 | is_split = st.radio("是否对文本进行分割(当单行显示文本过长时可开启)", ("是", "否"),horizontal=True,index=1) 231 | if is_split == "是": 232 | st.session_state.is_split = True 233 | st.session_state.split_method = st.selectbox('导出格式(Modest:当空格后的文本长度超过5个字符,则另起一行;Aggressive: 只要遇到空格即另起一行)', ["Modest","Aggressive"],index=0) 234 | else: 235 | st.session_state.is_split = False 236 | st.session_state.split_method = "Modest" 237 | 238 | 239 | st.session_state.prompt = st.text_input('请输入提示词:', "",placeholder="简体中文") 240 | if st.session_state.prompt == "": 241 | st.session_state.prompt = None 242 | 243 | # 是否显示融合字幕后的视频 244 | if st.session_state.media_type == "视频": 245 | st.session_state.is_show_video = st.radio("是否显示翻译后的视频", ("是", "否"),horizontal=True,index=0) 246 | else: 247 | st.session_state.is_show_video = "否" 248 | 249 | # print("-----") 250 | # print(st.session_state.language) 251 | # print(st.session_state.is_vad_filter) 252 | # print(st.session_state.is_split) 253 | # if st.session_state.is_split == "是": 254 | # print(st.session_state.split_method) 255 | # print("-----") 256 | 257 | # 翻译-------------------------------- 258 | #st.markdown("----") 259 | if st.session_state.config is not None: 260 | st.session_state.chat_url = st.session_state.config.get("chat_url") 261 | st.session_state.chat_key = st.session_state.config.get("chat_key") 262 | st.session_state.chat_model_list = st.session_state.config.get("chat_model_list") 263 | st.session_state.chat_model_name = st.session_state.config.get("chat_model_name") 264 | for index,current_model_name in enumerate(st.session_state.chat_model_list): 265 | if current_model_name == st.session_state.model_name: 266 | chat_model_index = index 267 | break 268 | 269 | st.session_state.baidu_appid = st.session_state.config.get("baidu_appid") 270 | st.session_state.baidu_appkey = st.session_state.config.get("baidu_appkey") 271 | 272 | st.session_state.tencent_appid = st.session_state.config.get("tencent_appid") 273 | st.session_state.tencent_secretKey = st.session_state.config.get("tencent_secretKey") 274 | 275 | 276 | else: 277 | st.session_state.chat_url = "https://api.openai.com/v1" 278 | st.session_state.chat_key = "" 279 | st.session_state.chat_model_list = ["gpt-3.5-turbo", "gpt-4","gpt-4-turbo"] 280 | st.session_state.chat_model_name = "gpt-4-turbo" 281 | chat_model_index = 2 282 | 283 | st.session_state.baidu_appid = "" 284 | st.session_state.baidu_appkey = "" 285 | 286 | st.session_state.tencent_appid = "" 287 | st.session_state.tencent_secretKey = "" 288 | 289 | if "engine" not in st.session_state: 290 | st.session_state['engine'] = None 291 | 292 | 293 | is_translation= st.radio("翻译器选择(翻译成中文)", ("否", "gpt翻译","百度翻译","腾讯翻译"),horizontal=True,index=0) 294 | if is_translation == "否": 295 | st.session_state.engine = None 296 | elif is_translation == "gpt翻译": 297 | # 使用gpt模型时 298 | st.session_state.chat_url = st.text_input('Base URL', st.session_state.chat_url,type='password') 299 | st.session_state.chat_key = st.text_input('API Key',st.session_state.chat_key, type='password') 300 | 301 | st.session_state.chat_model_name = st.selectbox('Models', st.session_state.chat_model_list,index=chat_model_index) 302 | 303 | if st.session_state.chat_key != "": 304 | st.session_state.engine = GPT(key = st.session_state.chat_key , 305 | base_url = st.session_state.chat_url, 306 | model = st.session_state.chat_model_name) 307 | 308 | elif is_translation == "百度翻译": 309 | st.write("申请地址:https://fanyi-api.baidu.com/manage/developer") 310 | st.session_state.baidu_appid = st.text_input('appid', st.session_state.baidu_appid,type='password') 311 | st.session_state.baidu_appkey = st.text_input('appkey',st.session_state.baidu_appkey, type='password') 312 | st.session_state.engine = Baidu(appid = st.session_state.baidu_appid ,secretKey = st.session_state.baidu_appkey) 313 | 314 | elif is_translation == "腾讯翻译": 315 | st.write("申请地址:https://console.cloud.tencent.com/tmt") 316 | st.session_state.tencent_appid = st.text_input('appid', st.session_state.tencent_appid,type='password') 317 | st.session_state.tencent_secretKey = st.text_input('secretKey',st.session_state.tencent_secretKey, type='password') 318 | st.session_state.engine = Tencent(appid = st.session_state.tencent_appid ,secretKey = st.session_state.tencent_secretKey) 319 | 320 | 321 | 322 | st.markdown("----") 323 | if st.button("开始转换"): 324 | if st.session_state.transcribe is None: 325 | st.error("请先加载模型") 326 | return 327 | 328 | if st.session_state.audio_separator_temp is not None: 329 | input_audio = st.session_state.audio_separator_temp 330 | elif st.session_state.audio_temp is not None: 331 | input_audio = st.session_state.audio_temp 332 | else: 333 | st.error("请先上传媒体") 334 | return 335 | 336 | print("input audio: {}".format(input_audio)) 337 | 338 | with st.spinner('字幕生成中。。。'): 339 | srt,ass = st.session_state.transcribe.run(file_name = input_audio, 340 | audio_binary_io = input_audio, 341 | language=st.session_state.language, 342 | is_vad_filter = st.session_state.is_vad_filter, 343 | min_silence_duration_ms = st.session_state.min_silence_duration_ms, 344 | is_split = st.session_state.is_split, 345 | split_method = st.session_state.split_method, 346 | initial_prompt=st.session_state.prompt 347 | ) 348 | 349 | zip_name = os.path.splitext(os.path.basename(st.session_state.audio_temp))[0] + ".zip" 350 | zip_name_path = os.path.join("./temp",zip_name) 351 | zipObj = ZipFile(zip_name_path, "w") 352 | zipObj.write(srt) 353 | zipObj.write(ass) 354 | 355 | # 如果需要翻译 356 | if st.session_state.engine is not None: 357 | with st.spinner('翻译中。。。'): 358 | t = translation(st.session_state.engine) 359 | translate_ass ,translate_srt = t.translate_save(ass) 360 | zipObj.write(translate_ass) 361 | zipObj.write(translate_srt) 362 | 363 | zipObj.close() 364 | 365 | with open(zip_name_path, "rb") as f: 366 | datazip = f.read() 367 | b64 = base64.b64encode(datazip).decode() 368 | href = f"\ 369 | 下载字幕: {zip_name}\ 370 | " 371 | st.markdown(href, unsafe_allow_html=True) 372 | st.markdown("后期可以通过[aegisub](http://www.aegisub.org/)对字幕进行修改优化") 373 | 374 | 375 | if st.session_state.media_type == "视频" and st.session_state.is_show_video == "是": 376 | #print("字幕:{},{}".format(srt, ass)) 377 | output_video_path = os.path.join( 378 | TEMP, 379 | os.path.splitext(os.path.basename(st.session_state.video_temp_name))[0]+"_output.mp4" 380 | ) 381 | with st.spinner("视频生成中..."): 382 | merge_subtitles_to_video(st.session_state.video_temp 383 | ,ass 384 | ,output_video_path) 385 | 386 | if os.path.exists(output_video_path): 387 | video_bytes = open(output_video_path, 'rb').read() 388 | st.video(video_bytes) 389 | 390 | 391 | if __name__ == "__main__": 392 | # 如果本地没有temp文件夹则建立 393 | if not os.path.exists('temp'): 394 | os.makedirs('temp') 395 | web_page() 396 | 397 | 398 | 399 | 400 | --------------------------------------------------------------------------------