├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.yml
│ ├── config.yml
│ └── feature_request.md
└── workflows
│ └── stale.yml
├── .gitignore
├── GSV_API_launcher.py
├── LICENSE
├── README.md
├── Sava_Utils
├── __init__.py
├── base_componment.py
├── edit_panel.py
├── extern_extensions
│ ├── __init__.py
│ └── wav2srt.py
├── i18nAuto
│ ├── __init__.py
│ └── translations
│ │ ├── __init__.py
│ │ ├── en_US.py
│ │ ├── fr_FR.py
│ │ ├── ja_JP.py
│ │ ├── ko_KR.py
│ │ └── zh_CN.py
├── librosa_load.py
├── man
│ ├── __init__.py
│ ├── en_US
│ │ ├── README.py
│ │ ├── __init__.py
│ │ ├── changelog.py
│ │ ├── help.py
│ │ ├── help_custom.py
│ │ ├── issues.py
│ │ └── title.py
│ ├── fr_FR
│ │ ├── README.py
│ │ ├── __init__.py
│ │ ├── changelog.py
│ │ ├── help.py
│ │ ├── help_custom.py
│ │ ├── issues.py
│ │ └── title.py
│ ├── ja_JP
│ │ ├── README.py
│ │ ├── __init__.py
│ │ ├── changelog.py
│ │ ├── help.py
│ │ ├── issues.py
│ │ └── title.py
│ └── zh_CN
│ │ ├── README.py
│ │ ├── __init__.py
│ │ ├── changelog.py
│ │ ├── help.py
│ │ ├── help_custom.py
│ │ ├── issues.py
│ │ └── title.py
├── polyphone.py
├── settings.py
├── subtitle.py
├── subtitle_translation.py
├── translator
│ ├── __init__.py
│ └── ollama.py
├── tts_projects
│ ├── __init__.py
│ ├── bv2.py
│ ├── custom.py
│ ├── gsv.py
│ └── mstts.py
└── utils.py
├── Srt-AI-Voice-Assistant.py
├── check_i18n.sh
├── create_build_script.sh
├── create_built-in_manual.sh
├── docs
├── en_US
│ ├── README.md
│ ├── changelog.md
│ ├── help.md
│ ├── help_custom.md
│ ├── issues.md
│ └── title.md
├── fr_FR
│ ├── README.md
│ ├── changelog.md
│ ├── help.md
│ ├── help_custom.md
│ ├── issues.md
│ └── title.md
├── ja_JP
│ ├── README.md
│ ├── changelog.md
│ ├── help.md
│ ├── issues.md
│ └── title.md
└── zh_CN
│ ├── README.md
│ ├── changelog.md
│ ├── help.md
│ ├── help_custom.md
│ ├── issues.md
│ └── title.md
├── requirements.txt
├── tools
├── __init__.py
├── put_extensions_here
├── slicer2.py
└── wav2srt.py
└── 启动Srt-AI-Voice-Assistant.bat
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
1 | name: "Issue report"
2 | description: Something is not working as expected.
3 | body:
4 | - type: checkboxes
5 | attributes:
6 | label: Self-Checks
7 | options:
8 | - label: I have already read the documentation and couldn't find any useful information to solve my problem. | 我已经阅读完内置说明,但仍无法解决问题。
9 | required: true
10 | - label: I have searched for the existing issues. | 我已经查看过现有的issue。
11 | required: true
12 |
13 | - type: textarea
14 | attributes:
15 | label: Describe the problem
16 | description: A clear and concise description of the problem. | 请清晰地描述遇到的问题。
17 | validations:
18 | required: true
19 |
20 | - type: textarea
21 | attributes:
22 | label: System Info
23 | description: OS and any relevant environments? Which TTS project are you using? And its version? | 您的系统环境?您正在使用哪一个TTS项目?以及它的版本?
24 | placeholder: e.g. I'm using the integrated package./I'm running it and using GPT-SoVITS-v3 on WSL2 with Python 3.12. | 我在使用本项目提供的整合包/我在wsl2下使用GSV并用python3.12运行本项目
25 | validations:
26 | required: false
27 |
28 | - type: textarea
29 | attributes:
30 | label: How To Reproduce | 请您提供问题的复现方法
31 | description: Include detailed steps, screenshots(This should both include the console of this application and corresponding API). | 请提供复现问题的步骤,并提供本项目控制台报错截图和对应TTS项目的API的控制台截图。
32 | validations:
33 | required: true
34 |
35 | - type: textarea
36 | attributes:
37 | label: (Optional) Additional context
38 | placeholder: Add any other context about the problem here. | (可选)补充说明
39 | validations:
40 | required: false
41 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: enhancement
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
1 | # This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time.
2 | # You can adjust the behavior by modifying this file.
3 | # For more information, see:
4 | # https://github.com/actions/stale
5 | name: Mark stale issues
6 |
7 | on:
8 | schedule:
9 | - cron: '0 12 * * *'
10 |
11 | jobs:
12 | stale:
13 | runs-on: ubuntu-latest
14 | permissions:
15 | issues: write
16 | pull-requests: write
17 |
18 | steps:
19 | - uses: actions/stale@v9
20 | with:
21 | repo-token: ${{ secrets.GITHUB_TOKEN }}
22 | days-before-pr-stale: -1
23 | days-before-pr-close: -1
24 | stale-issue-label: "stale"
25 | days-before-issue-stale: 14
26 | days-before-issue-close: 0
27 | close-issue-message: 'This issue has been automatically closed due to 14 days of inactivity. If it still needs to be addressed, please reopen this issue.'
28 | remove-stale-when-updated: true
29 | enable-statistics: false
30 |
31 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.bat
2 | *.sh
3 | __pycache__
4 | SAVAdata
5 | dist
6 | build
7 | *.spec
8 | *.exe
9 | *.7z
--------------------------------------------------------------------------------
/GSV_API_launcher.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import json
4 | import time
5 | import yaml
6 |
7 | MAX_P = 3
8 | sava_config = json.load(open("SAVAdata/config.json", encoding="utf-8"))
9 | apath = "api.py" if sava_config['gsv_fallback'] else "api_v2.py"
10 | process_tab = dict()
11 | if __name__ == "__main__":
12 | os.makedirs('SAVAdata/temp', exist_ok=True)
13 | count = 0
14 | for i in [os.path.join('SAVAdata/presets', x) for x in os.listdir('SAVAdata/presets') if os.path.isdir(os.path.join('SAVAdata/presets', x))]:
15 | preset = json.load(open(os.path.join(i, 'info.json'), encoding="utf-8"))
16 | gsv_yml = {
17 | "custom": {
18 | "device": "cuda",
19 | "is_half": False,
20 | "version": "v2",
21 | "t2s_weights_path": preset["gpt_path"],
22 | "vits_weights_path": preset["sovits_path"],
23 | "cnhuhbert_base_path": "GPT_SoVITS/pretrained_models/chinese-hubert-base",
24 | "bert_base_path": "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large",
25 | }
26 | }
27 | yml_temp_dir = os.path.join('SAVAdata/temp', f'{preset["name"]}.yml')
28 | with open(yml_temp_dir, 'w') as f:
29 | yaml.dump(gsv_yml, f)
30 | # launch api
31 | if preset["port"] not in process_tab:
32 | command = f'"{sava_config['gsv_pydir']}" "{os.path.join(sava_config['gsv_dir'],apath)}" -c {os.path.abspath(yml_temp_dir)} -p {preset["port"]}'
33 | process_tab[preset["port"]] = subprocess.Popen(command, cwd=sava_config['gsv_dir'], shell=True)
34 | count += 1
35 | if count >= MAX_P:
36 | break
37 | while True:
38 | time.sleep(200)
39 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Srt-AI-Voice-Assistant
2 | **English** | [**简体中文**](./docs/zh_CN/README.md) | [**Français**](./docs/fr_FR/README.md) | [**Other**](./docs/) |**Documents in other languages are translated by AI and they are provided only for reference.**
3 | ### This project can use multiple AI-TTS to dub for your subtitle or text files.
And provides various convenient auxiliary functions including audio/video transcription and subtitle translation.
4 | If you have encountered problems or want to create a feature request, please go to [Issues](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/issues) .
5 | ## Features
6 | - ✅ Open-source, Friendly WebUI interface, Run locally and Accessible via LAN
7 | - ✅ Support multiple TTS projects: BV2, GSV, CosyVoice2, AzureTTS, and you can even customize your APIs!
8 | - ✅ Save personalized settings and presets
9 | - ✅ Batch mode
10 | - ✅ Subtitle editing
11 | - ✅ Subtitle translation
12 | - ✅ Regenerating Specific Lines
13 | - ✅ Support multi-speaker dubbing
14 | - ✅ Re-export subtitles
15 | - ✅ Extended functions: subtitle transcription for audio/video
16 | - ✅ I18n
17 |
18 | ## [Download the packaged version only](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/releases)
19 | * Use this version only when there are dependency conflicts or installation issues.
20 |
21 | ## [Download the integrated package with GPT-SoVITS (From Hugging Face)](https://huggingface.co/YYuX/GPT-SoVITS-SAVA-windows-package/tree/main)
22 | * The GPT-SoVITS integrated package includes the packaged version, without removing any built-in or pretrained models, and its code for finetuning and training is the same with the official repository.
23 | * Note: Packaged Version included in the GPT-SoVITS integrated package may not be the latest version; overwrite it to update.
--------------------------------------------------------------------------------
/Sava_Utils/__init__.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import json
4 |
5 | current_path = os.environ.get("current_path")
6 | log_colors = {
7 | "DEBUG": "white",
8 | "INFO": "green",
9 | "WARNING": "yellow",
10 | "ERROR": "red",
11 | "CRITICAL": "bold_red",
12 | }
13 | logger = logging.getLogger("SAVA")
14 | logger.setLevel(logging.INFO)
15 | try:
16 | import colorlog
17 |
18 | handler = colorlog.StreamHandler()
19 | handler.setFormatter(
20 | colorlog.ColoredFormatter(
21 | fmt="%(log_color)s[%(levelname)s][%(asctime)s]:%(funcName)s: %(message)s",
22 | datefmt="%Y-%m-%d_%H:%M:%S",
23 | log_colors=log_colors,
24 | )
25 | )
26 | logger.addHandler(handler)
27 | except ImportError:
28 | handler = logging.StreamHandler()
29 | handler.setLevel(logging.INFO)
30 | formatter = logging.Formatter("[%(levelname)s][%(asctime)s]:%(funcName)s: %(message)s")
31 | handler.setFormatter(formatter)
32 | logger.addHandler(handler)
33 |
34 | from .i18nAuto import I18n
35 |
36 | config_path = os.path.join(current_path, "SAVAdata", "config.json")
37 | try:
38 | if os.path.isfile(config_path):
39 | x = json.load(open(config_path, encoding="utf-8"))
40 | i18n = I18n(x.get("language"))
41 | else:
42 | x = dict()
43 | i18n = I18n()
44 | from .settings import Settings
45 | config = Settings.from_dict(x)
46 | del x
47 | except Exception as e:
48 | i18n = I18n()
49 | logger.warning(f"{i18n('Failed to load settings, reset to default')}: {e}")
50 | from .settings import Settings
51 | config = Settings()
52 | from .man import Man
53 |
54 | MANUAL = Man(language=config.language)
55 |
56 | import argparse
57 |
58 | parser = argparse.ArgumentParser(add_help=False)
59 | parser.add_argument("-p", "--server_port", type=int, help="server_port")
60 | parser.add_argument("-share", dest="share", action="store_true", default=False, help="set share True")
61 | parser.add_argument("-server_mode", dest="server_mode", action="store_true", default=False, help="activate server mode")
62 | args, unknown = parser.parse_known_args()
63 |
64 | # from .settings import load_cfg
65 | # config=load_cfg()
66 |
67 | config.server_mode = args.server_mode or config.server_mode
68 | if config.server_mode:
69 | logger.warning(i18n("Server Mode has been enabled!"))
70 |
71 | from .utils import clear_cache
72 | if config.clear_tmp:
73 | clear_cache()
74 |
--------------------------------------------------------------------------------
/Sava_Utils/base_componment.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 |
3 |
4 | class Base_Componment(ABC):
5 | _instances = {}
6 |
7 | def __init__(self, config=None):
8 | self.ui = False
9 | self.server_mode = False
10 | if config is not None:
11 | self.update_cfg(config)
12 | super().__init__()
13 |
14 | def getUI(self, *args, **kwargs):
15 | if not self.ui:
16 | self.ui = True
17 | return self._UI(*args, **kwargs)
18 | else:
19 | raise "ERR"
20 |
21 | def update_cfg(self, config):
22 | self.server_mode = config.server_mode
23 |
24 | @abstractmethod
25 | def _UI(self):
26 | raise NotImplementedError
27 |
28 | def __new__(cls, *args, **kwargs):
29 | if cls not in cls._instances:
30 | cls._instances[cls] = super().__new__(cls)
31 | return cls._instances[cls]
32 |
--------------------------------------------------------------------------------
/Sava_Utils/extern_extensions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YYuX-1145/Srt-AI-Voice-Assistant/66fb0108fa6b2e34a160a41d105182866fd704f2/Sava_Utils/extern_extensions/__init__.py
--------------------------------------------------------------------------------
/Sava_Utils/extern_extensions/wav2srt.py:
--------------------------------------------------------------------------------
1 | import gradio as gr
2 | from ..import i18n
3 | from ..utils import rc_bg,kill_process
4 | from ..base_componment import Base_Componment
5 | import os
6 |
7 |
8 | current_path = os.environ.get("current_path")
9 | OUT_DIR_DEFAULT=os.path.join(current_path,"SAVAdata","output")
10 |
11 |
12 | class WAV2SRT(Base_Componment):
13 | def __init__(self, config):
14 | self.gsv_pydir = ""
15 | self.gsv_dir = ""
16 | super().__init__(config)
17 |
18 | def update_cfg(self, config):
19 | self.gsv_pydir = config.gsv_pydir
20 | self.gsv_dir = config.gsv_dir
21 | super().update_cfg(config)
22 |
23 | def _UI(self, file_main, file_tr):
24 | available = False
25 | if os.path.exists(os.path.join(current_path, "tools", "wav2srt.py")):
26 | available = True
27 | with gr.TabItem(i18n('Audio/Video Transcribe')):
28 | with gr.Row():
29 | self.wav2srt_pid = gr.State(value=-1)
30 | with gr.Column():
31 | self.wav2srt_input = gr.File(label=i18n('Upload File'), file_count="multiple", interactive=True)
32 | self.wav2srt_out_dir = gr.Textbox(value="Default", label=i18n('Save Path(Folder Path), Default: SAVAdata\\output'), visible=not self.server_mode, interactive=not self.server_mode)
33 | self.wav2srt_pydir = gr.Textbox(value='Auto', label=i18n('Python Interpreter Path, align with GSV by default'), visible=not self.server_mode, interactive=not self.server_mode)
34 | self.wav2srt_engine = gr.Radio(choices=["funasr", "whisper"], value="funasr", label=i18n('Select ASR model. Funasr supports only Chinese(but much more faster) while Faster-Whisper has multi-language support'), interactive=True)
35 | self.wav2srt_min_length = gr.Slider(label=i18n('(ms)Minimum length of each segment'), minimum=0, maximum=90000, step=100, value=5000)
36 | self.wav2srt_min_interval = gr.Slider(label=i18n('(ms)Minium slice interval'), minimum=0, maximum=5000, step=10, value=300)
37 | self.wav2srt_sil = gr.Slider(label=i18n('(ms)Minium silence length'), minimum=0, maximum=2000, step=100, value=1000)
38 | self.wav2srt_args = gr.Textbox(value="", label=i18n('Other Parameters'), interactive=True)
39 | with gr.Column():
40 | gr.Markdown(i18n('WAV2SRT_INFO'))
41 | self.wav2srt_output = gr.File(label=i18n('Output File'), file_count="multiple", interactive=False)
42 | self.wav2srt_output_status = gr.Textbox(
43 | label=i18n('Output Info'),
44 | value="",
45 | interactive=False,
46 | )
47 | with gr.Row():
48 | self.wav2srt_run = gr.Button(value=i18n('Start'), variant="primary", interactive=True)
49 | self.wav2srt_terminate = gr.Button(value=i18n('Stop'), variant="secondary", interactive=True)
50 | self.wav2srt_terminate.click(kill_process, inputs=[self.wav2srt_pid])
51 | self.wav2srt_send2main = gr.Button(value=i18n('Send output files to Main Page'), variant="secondary", interactive=True)
52 | self.wav2srt_send2main.click(send, inputs=[self.wav2srt_output], outputs=[file_main])
53 | self.wav2srt_send2tr = gr.Button(value=i18n('Send output files to Translator'), variant="secondary", interactive=True)
54 | self.wav2srt_send2tr.click(send, inputs=[self.wav2srt_output], outputs=[file_tr])
55 | self.wav2srt_run.click(
56 | self.run_wav2srt,
57 | inputs=[self.wav2srt_input, self.wav2srt_out_dir, self.wav2srt_pydir, self.wav2srt_engine, self.wav2srt_min_length, self.wav2srt_min_interval, self.wav2srt_sil, self.wav2srt_args],
58 | outputs=[self.wav2srt_pid, self.wav2srt_output_status, self.wav2srt_output],
59 | max_batch_size=2,
60 | )
61 | return available
62 |
63 | def run_wav2srt(self,inputs,out_dir,pydir,engine,min_length,min_interval,max_sil_kept,args):
64 | if self.server_mode:
65 | pydir=""
66 | out_dir=""
67 | if inputs in [None,[]]:
68 | gr.Warning(i18n('Please upload audio or video!'))
69 | return -1,i18n('Please upload audio or video!'),None
70 | pydir=pydir.strip('"')
71 | if pydir in [None,"",'Auto']:
72 | if self.gsv_pydir not in [None,""]:
73 | pydir=self.gsv_pydir
74 | else:
75 | gr.Warning(i18n('Please specify Python Interpreter!'))
76 | return -1,i18n('Please specify Python Interpreter!'),None
77 | if out_dir in ['',None,'Default']:
78 | out_dir=OUT_DIR_DEFAULT
79 | output_list=[]
80 | out_dir=out_dir.strip('"')
81 | msg=""
82 | for input in inputs:
83 | msg+=f"{i18n('Processing')}: {os.path.basename(input.name)}\n"
84 | output_path=f"{os.path.join(out_dir,os.path.basename(input.name))}.srt"
85 | command=f'"{pydir}" tools\\wav2srt.py -input_dir "{input.name}" -output_dir "{output_path}" -engine {engine} --min_length {int(min_length)} --min_interval {int(min_interval)} --max_sil_kept {int(max_sil_kept)} {args}'
86 | x=rc_bg(command=command,dir=self.gsv_dir if self.gsv_dir and os.path.isdir(self.gsv_dir) else current_path)
87 | pid=next(x)
88 | yield pid,msg,output_list
89 | exit_code=next(x)
90 | if exit_code==0:
91 | msg+=f"{i18n('Done!')} {os.path.basename(input.name)}\n"
92 | output_list.append(output_path)
93 | else:
94 | msg+=f"{i18n('Tasks are terminated due to an error in')} {os.path.basename(input.name)}\n"
95 | break
96 | yield -1,msg,output_list
97 | msg+=f"{i18n('Finished')}\n"
98 | yield -1,msg,output_list
99 |
100 |
101 | def send(fp_list):
102 | return [i.name for i in fp_list] if fp_list is not None else fp_list
103 |
--------------------------------------------------------------------------------
/Sava_Utils/i18nAuto/__init__.py:
--------------------------------------------------------------------------------
1 | import locale
2 |
3 | class I18n():
4 | def __init__(self, language=None):
5 | if language in ["Auto", None]:
6 | language = locale.getdefaultlocale()[0]
7 | self.language = language
8 | ls=dict()
9 | try:
10 | exec(f"from .translations.{language} import i18n_dict",globals(),ls)
11 | self.language_map=ls["i18n_dict"]
12 | except:
13 | self.language_map=dict()
14 |
15 | def __call__(self, key):
16 | return self.language_map.get(key, key)
17 |
18 | def __repr__(self):
19 | return f"Using Language: {self.language}"
20 |
--------------------------------------------------------------------------------
/Sava_Utils/i18nAuto/translations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YYuX-1145/Srt-AI-Voice-Assistant/66fb0108fa6b2e34a160a41d105182866fd704f2/Sava_Utils/i18nAuto/translations/__init__.py
--------------------------------------------------------------------------------
/Sava_Utils/i18nAuto/translations/en_US.py:
--------------------------------------------------------------------------------
1 | i18n_dict={
2 | #GSV
3 | "DICT_LANGUAGE":{
4 | "Chinese": "all_zh",
5 | "Cantonese": "all_yue",
6 | "English": "en",
7 | "Japanese": "all_ja",
8 | "Korean": "all_ko",
9 | "Chinese-English Mix": "zh",
10 | "Cantonese-English Mix": "yue",
11 | "Japanese-English Mix": "ja",
12 | "Korean-English Mix": "ko",
13 | "Multi-Language Mix": "auto",
14 | "Multi-Language Mix (Cantonese)": "auto_yue"
15 | },
16 | "CUT_METHOD":{
17 | "No cutting": "cut0",
18 | "Slice once every 4 sentences": "cut1",
19 | "Slice per 50 characters": "cut2",
20 | "Slice by Chinese punct": "cut3",
21 | "Slice by English punct": "cut4",
22 | "Slice by every punct": "cut5"
23 | },
24 |
25 | #MSTTS
26 | "MSTTS_NOTICE":"""Microsoft TTS needs Internet Connection. You should fill in your key and specify the server region before gengerating audios. Please pay attention to the monthly free quota.
[【To Get Your Key】](https://learn.microsoft.com/en-US/azure/ai-services/speech-service/get-started-text-to-speech)""",
27 |
28 | #Subtitle Translation
29 | "OLLAMA_NOTICE":"⚠️LLMs use much VRAM while they're running and do not forget to select and unload the corresponding model after usage in order to free up VRAM.",
30 |
31 | # Polyphone Editor
32 | "POLYPHONE_NOTICE": "⚠️This feature allows you to modify the polyphonic character configuration of GPT-SoVITS. Changes will take effect after saving and restarting the API.⚠️",
33 |
34 | #EXTENSIONS
35 |
36 | #WAV2SRT
37 | "WAV2SRT_INFO":"""
38 | This function can be directly used in the GPT-SoVITS integrated package; otherwise, you need to install the corresponding dependencies yourself.
39 |
40 | # Other Parameters:
41 | `--whisper_size` Default:large-v3 | Specifies the model when using faster whisper.
42 | `--threshold` Default:-40 | Volume less than this value is considered as a mute alternative cutting point.
43 | `--hop_size` Default:20 | How to calculate the volume curve; the smaller the value, the higher the precision and computation (Note: Higher precision does not always mean better quality).
44 | """
45 | }
--------------------------------------------------------------------------------
/Sava_Utils/i18nAuto/translations/ja_JP.py:
--------------------------------------------------------------------------------
1 | #this file is translated by AI. Just for reference.
2 | i18n_dict = {
3 | # functions in main
4 | "You need to load custom API functions!": "カスタムAPI関数を読み込む必要があります!",
5 | "Please upload the subtitle file!": "字幕ファイルをアップロードしてください!",
6 | "The current mode does not allow batch processing!": "現在のモードではバッチ処理は許可されていません!",
7 | "Synthesizing single-speaker task": "単一話者のタスクを合成中です",
8 | "All subtitle syntheses have failed, please check the API service!": "すべての字幕合成に失敗しました。APIサービスを確認してください!",
9 | "Done! Time used": "完了!使用時間",
10 | "There is no subtitle in the current workspace": "現在のワークスペースに字幕はありません",
11 | "Warning: No speaker has been assigned": "警告:話者が割り当てられていません",
12 | "Using default speaker": "現在、選択されたデフォルトの話者を使用しています",
13 | "Speaker archive not found": "話者のアーカイブが見つかりません",
14 | "Synthesizing multi-speaker task, the current speaker is": "複数話者のタスクを合成中です。現在の話者は",
15 | "Synthesis for the single speaker has failed !": "単一話者のすべての音声合成に失敗しました!",
16 | "Done!": "完了!",
17 | "Failed subtitle id": "失敗した字幕のID",
18 | "error message received": "受け取ったエラーメッセージは",
19 | "Please go to the settings page to specify the corresponding environment path and do not forget to save it!": "設定ページに移動して、対応する環境パスを指定し、保存するのを忘れないでください!",
20 | " has been launched, please ensure the configuration is correct.": "が起動しました。設定ファイルが正しいことを確認してください。",
21 | "API downgraded to v1, functionality is limited.": "APIがv1にダウングレードされました。機能が制限されています。",
22 | "You must specify the speakers while using multi-speaker dubbing!": "複数話者のダビングを使用する場合は、話者を指定する必要があります!",
23 | "Audio re-generation was successful! Click the button.": "音声の再生成に成功しました!<音声を再構成>ボタンをクリックしてください。",
24 | "Audio re-generation failed!": "音声の再生成に失敗しました!",
25 | "Reassemble successfully!": "再構成が完了しました!",
26 | "This function has been disabled!": "この機能は無効になっています!",
27 | "Please enter a valid name!": "有効な名前を入力してください!",
28 | "Saved successfully": "保存に成功しました",
29 |
30 | # UI in main
31 | "Subtitle Dubbing": "字幕ダビング",
32 | "File content": "ファイル内容の表示",
33 | "Create Multi-Speaker Dubbing Project": "複数話者のダビングプロジェクトを作成する",
34 | "Custom API": "カスタムAPI",
35 | "Frame rate of Adobe Premiere project, only applicable to csv files exported from Pr": "Adobe Premiereプロジェクトのフレームレート。Prからエクスポートされたcsvファイルにのみ適用されます",
36 | "API Launcher": "APIサービスを起動する",
37 | "Number of threads for sending requests": "リクエストを送信するスレッド数",
38 | "Voice time offset (seconds)": "音声の時間オフセット(秒) すべての音声の時間を遅らせるまたは早める",
39 | "Upload file (Batch mode only supports one speaker at a time)": "ファイルをアップロードする(バッチモードでは一度に1人の話者のみサポートされます)",
40 | "Output Info": "出力情報",
41 | "Output File": "出力ファイル",
42 | "Editing area *Note: DO NOT clear temporary files while using this function.": "編集エリア *注意:この機能を使用中は一時ファイルを削除しないでください。",
43 | "History": "合成履歴",
44 | "Load": "読み込む",
45 | "Reassemble Audio": "音声を再構成する",
46 | "Export Subtitles": "字幕をエクスポートする",
47 | "Select All": "すべて選択",
48 | "Reverse Selection": "選択を反転する",
49 | "Clear Selection": "選択を解除する",
50 | "Apply Timestamp modifications": "タイムスタンプの変更を適用する",
51 | "Copy": "コピー",
52 | "Merge": "結合",
53 | "Delete": "削除",
54 | "Multi-speaker dubbing": "複数話者のダビング",
55 | "Select/Create Speaker": "話者を選択/作成する",
56 | "TTS Project": "話者が所属するプロジェクト",
57 | "Start Multi-speaker Synthesizing": "複数話者のダビングを生成する",
58 | "Auxiliary Functions": "ツール",
59 | "Extended Contents": "外部拡張コンテンツ",
60 | "Settings": "設定",
61 | "Readme": "説明書",
62 | "Issues": "よくあるエラー",
63 | "Help & User guide": "ヘルプとユーザーガイド",
64 |
65 | # utils
66 | "An error occurred": "エラーが発生しました",
67 | "Server Mode has been enabled!": "サーバーモードが有効になりました!",
68 | "Temporary files cleared successfully!": "一時ファイルを正常に削除しました!",
69 | "There are no temporary files.": "現在、一時ファイルはありません。",
70 | "Execute command": "コマンドを実行する",
71 | "No running processes": "実行中のプロセスはありません",
72 | "Process terminated.": "プロセスが終了しました。",
73 | "": "<複数のファイル>",
74 | "Failed to read file": "字幕ファイルの読み取りに失敗しました",
75 | "Error: File too large": "エラー:ファイルが大きすぎます",
76 | "Unknown format. Please ensure the extension name is correct!": "未知の形式です。拡張子が正しいことを確認してください!",
77 | "Creating a multi-speaker project can only upload one file at a time!": "複数話者のダビングプロジェクトを作成する場合は、一度に1つのファイルのみアップロードできます!",
78 |
79 | # edit_panel
80 | "Not available!": "利用できません!",
81 | "Must not be empty!": "空にすることはできません!",
82 | "No subtitles selected.": "字幕が選択されていません",
83 | "Please select both the start and end points!": "開始点と終了点の両方を選択してください!",
84 | "Input format mismatch": "入力形式が一致しません。",
85 |
86 | # subtitle.py
87 | "Subtitles have not been synthesized yet!": "まだ字幕が合成されていません!",
88 | "The following subtitles are delayed due to the previous audio being too long.": "以下の字幕は、前の音声が長すぎるために遅延しています。",
89 | "Failed to synthesize the following subtitles or they were not synthesized": "以下の字幕の合成に失敗したか、まだ合成されていません",
90 |
91 | # Settings
92 | "Failed to load settings, reset to default": "設定の読み込みに失敗しました。デフォルトにリセットします",
93 | "Error, Invalid Path": "エラー、無効なパスです",
94 | "Env detected": "環境が検出されました",
95 | "Restarting...": "再起動中...",
96 | "An error occurred. Please restart manually!": "エラーが発生しました。手動で再起動してください!",
97 | "Settings saved successfully!": "設定を正常に保存しました!",
98 | "Settings have been disabled!": "設定が無効になりました",
99 | "Click Apply & Save for these settings to take effect.": "これらの設定を有効にするには、<適用して保存>をクリックしてください。",
100 | "General": "一般設定",
101 | "The port used by this program, 0=auto. When conflicts prevent startup, use -p parameter to specify the port.": "このプログラムが使用するポート 0=自動。起動時に競合が発生する場合は、-pパラメータでポートを指定してください。",
102 | "Enable LAN access. Restart to take effect.": "LANアクセスを有効にする。再起動して設定を有効にします。",
103 | "Overwrite history records with files of the same name instead of creating a new project.": "同名のファイルで履歴レコードを上書きし、新しいプロジェクトを作成しないでください。",
104 | "Clear temporary files on each startup": "起動時に一時ファイルを削除します",
105 | "Concurrency Count": "同時に処理できるリクエスト数",
106 | "Server Mode can only be enabled by modifying configuration file or startup parameters.": "サーバーモードは、設定ファイルまたは起動パラメータを変更することでのみ有効にできます。",
107 | "Minimum voice interval (seconds)": "音声の最小間隔(秒)",
108 | "Sampling rate of output audio, 0=Auto": "出力音声のサンプリングレート、0=自動",
109 | "Edit Panel Row Count (Requires a restart)": "編集パネルの行数(再起動が必要です)",
110 | "Theme (Requires a restart)": "テーマを選択する(再起動後に有効になります。)",
111 | "Clear temporary files": "一時ファイルをすぐに削除する",
112 | "Python Interpreter Path for BV2": "BV2環境のPythonインタープリターのパスを設定する",
113 | "Root Path of BV2": "BV2プロジェクトのルートパス",
114 | "Start Parameters": "起動パラメータ",
115 | "Downgrade API version to v1": "APIバージョンをv1にダウングレードする",
116 | "Python Interpreter Path for GSV": "GSV環境のPythonインタープリターのパスを設定する",
117 | "Root Path of GSV": "GSVプロジェクトのルートパス",
118 | "Server Region": "サーバーの地域",
119 | "KEY Warning: Key is stored in plaintext. DO NOT send the key to others or share your configuration file!": "キー 警告:キーは平文で保存されています。キーを他人に送信したり、設定ファイルを共有しないでください!",
120 | "Select required languages, separated by commas or spaces.": "必要な言語を選択し、カンマまたはスペースで区切ってください。",
121 | "Translation Module": "翻訳モジュールの設定",
122 | "Default Request Address for Ollama": "Ollamaのデフォルトのリクエストアドレス",
123 | "Apply & Save": "現在の設定を適用して保存する",
124 | "Restart UI": "UIを再起動する",
125 |
126 | # TTS
127 | "An error has occurred. Please check if the API is running correctly. Details": "エラーが発生しました。APIが正しく動作しているか確認してください。エラー詳細",
128 | "Advanced Parameters": "高度な合成パラメータ",
129 | "Generate Audio": "生成する",
130 |
131 | # BV2
132 | "Select Speaker ID or Speaker Name": "話者IDを選択するか、名前を入力してください",
133 |
134 | # GSV(AR)
135 | "Returned Message": "返されたメッセージ",
136 | "Select TTS Project": "TTSプロジェクトを選択する",
137 | "Inference text language": "合成する言語",
138 | "Main Reference Audio": "主参考音声",
139 | "Auxiliary Reference Audios": "補助参考音声",
140 | "Transcription of Main Reference Audio": "主参考音声の文字起こし",
141 | "Transcription | Pretrained Speaker (Cosy)": "参考音声の文字起こし|Cosyの事前学習済み音色",
142 | "Language of Main Reference Audio": "参考音声の言語",
143 | "Model Path": "モデルのパス",
144 | "Switch Models": "モデルを切り替える",
145 | "Fragment Interval(sec)": "分割間隔(秒)",
146 | "How to cut": "分割方法",
147 | "(Optional) Description": "説明情報(オプション)",
148 | "Presets": "プリセット",
149 | "You must upload Main Reference Audio": "主参考音声を指定する必要があります",
150 | "Preset saved successfully": "プリセットを正常に保存しました",
151 | "Failed to switch model": "モデルの切り替えに失敗しました",
152 | "Preset has been loaded.": "プリセットを読み込みました。",
153 | "Models are not switched. If you need to switch, please manually click the button.": "現在、モデルは切り替えられていません。強制的に切り替える場合は、手動でボタンをクリックしてください。",
154 | "Please specify the model path!": "モデルのパスを指定してください!",
155 | "Switching Models...": "モデルを切り替え中...",
156 | "Model Paths seem to be invalid, which could lead to errors!": "モデルのパスが無効な可能性があり、切り替えエラーが発生する場合があります!",
157 | "You have incorrectly entered a folder path!": "フォルダのパスを誤って入力しました!!!",
158 | "Models switched successfully": "モデルが正常に切り替わりました",
159 | "Error details": "エラーの詳細",
160 | "Successfully deleted": "削除に成功しました",
161 | "Please select a valid preset!": "有効なプリセットを選択してください!",
162 | "No preset available": "現在、プリセットはありません",
163 | "Partial auxiliary reference audio is missing!": "補助参照オーディオが一部欠落しています!",
164 |
165 | "DICT_LANGUAGE": {
166 | "中国語": "all_zh",
167 | "広東語": "all_yue",
168 | "英語": "en",
169 | "日本語": "all_ja",
170 | "韓国語": "all_ko",
171 | "中英混合": "zh",
172 | "粵英混合": "yue",
173 | "日英混合": "ja",
174 | "韓英混合": "ko",
175 | "多言語混合": "auto",
176 | "多言語混合(広東語)": "auto_yue"
177 | },
178 | "CUT_METHOD": {
179 | "切らない": "cut0",
180 | "4文でまとめて切る": "cut1",
181 | "50字でまとめて切る": "cut2",
182 | "中国語の句読点。で切る": "cut3",
183 | "英語の句読点.で切る": "cut4",
184 | "句読点で切る": "cut5"
185 | },
186 |
187 | #MSTTS
188 | "Please fill in your key to get MSTTS speaker list.": "Microsoft TTSの話者リストを取得するには、キーを入力してください!",
189 | "Can not get speaker list of MSTTS. Details": "Microsoft TTSの話者リストをダウンロードできません。エラーの詳細",
190 | "Failed to obtain access token from Microsoft.": "Microsoftからアクセストークンの取得に失敗しました。",
191 | "Failed to obtain access token from Microsoft. Check your API key, server status, and network connection. Details": "Microsoftからアクセストークンの取得に失敗しました。APIキー、サーバーの状態、およびネットワーク接続を確認してください。エラーの詳細",
192 | "Can not access Microsoft TTS service. Check your API key, server status, and network connection. Details": "Microsoft TTSにアクセスできません。APIキー、サーバーの状態、およびネットワーク接続を確認してください。エラーの詳細",
193 | "Refresh speakers list": "話者リストを更新",
194 | "Choose Language": "言語を選択",
195 | "Choose Your Speaker": "話者を選択",
196 | "Style": "話し方のスタイル",
197 | "Role": "役割プレイ",
198 | "Speed": "話すスピード",
199 | "Pitch": "音程",
200 | "MSTTS_NOTICE": """Microsoft TTSを使用するにはインターネット接続が必要です。設定ページにサービスエリアとキーを入力してから使用してください。毎月の無料枠にご注意ください。
[【キーの取得方法:リンクを開いてから 前提条件 をよく読んでください】](https://learn.microsoft.com/ja-JP/azure/ai-services/speech-service/get-started-text-to-speech)""",
201 | "Please Select Your Speaker!": "話者を選択してください!",
202 | "Please fill in your key!": "キーを設定してください!",
203 |
204 | #custom api
205 | "Choose Custom API Code File": "カスタムAPIコードファイルを選択",
206 | "No custom API code file found.": "現在、カスタムAPIのプリセットはありません",
207 | "Please select a valid custom API code file!": "有効なAPI設定ファイルを選択してください!",
208 |
209 | #Subtitle Translation
210 | "Start Translating": "翻訳を開始",
211 | "Translating": "翻訳中",
212 | "Failed to translate": "翻訳に失敗しました",
213 | "Subtitle Translation": "字幕の翻訳",
214 | "Upload your subtitle files (multiple allowed).": "字幕をアップロード(複数可)",
215 | "Send output files to Main Page": "出力ファイルをメインページに送信",
216 | "Send output files to Translator": "出力ファイルを翻訳ページに送信",
217 | "Specify Target Language": "対象言語を選択",
218 | "File Output Path": "ファイル出力パス",
219 | "Select Translator": "翻訳機を選択",
220 |
221 | #Ollama
222 | "Failed to get model list from Ollama": "Ollamaからモデルリストの取得に失敗しました",
223 | "You must specify the model!": "モデルを指定しなければなりません!",
224 | "Select Your Model": "モデルを選択",
225 | "Unload Model": "モデルをアンロード",
226 | "OLLAMA_NOTICE": "⚠️LLMを実行するとVRAMを多く消費します。使用が終わったら、対応するモデルを選択してアンロードしてVRAMを解放してください!⚠️",
227 |
228 | #EXTENSIONS
229 |
230 | #WAV2SRT
231 | "Audio/Video Transcribe": "音声/ビデオを字幕に変換",
232 | "Upload File": "ファイルをアップロード",
233 | "Save Path(Folder Path), Default: SAVAdata\\output": "保存パス(フォルダパス)、デフォルト: SAVAdata\\output",
234 | "Python Interpreter Path, align with GSV by default": "Pythonインタープリタのパス、デフォルトでGSVと一致",
235 | "Select ASR model. Funasr supports only Chinese(but much more faster) while Faster-Whisper has multi-language support": "ASRモデルを選択。funasrは中国語のみサポートしますが、はるかに高速です。faster whisperは多言語をサポートしています",
236 | "(ms)Minimum length of each segment": "(ms)各セグメントの最小長さ",
237 | "(ms)Minium slice interval": "(ms)最短のスライス間隔",
238 | "(ms)Minium silence length": "(ms)切り取った後の最大無音長",
239 | "Other Parameters": "その他のパラメータ",
240 | "Start": "開始",
241 | "Stop": "停止",
242 | "Please upload audio or video!": "音声ファイルをアップロードしてください!",
243 | "Please specify Python Interpreter!": "インタープリタを指定してください!",
244 | "Processing": "処理中",
245 | "Tasks are terminated due to an error in": "タスクがエラーで終了: ",
246 | "Finished": "タスク終了",
247 | "WAV2SRT_INFO":"""
248 | この機能は直接GPT-SoVITS統合パッケージで使用できます。それ以外の場合は、独自に対応する依存関係をインストールする必要があります。
249 | # その他のパラメータ:
250 | `--whisper_size` デフォルト:large-v3 | faster whisperを使用するときにモデルを指定
251 | `--threshold` デフォルト:-40 | 音量がこの値未満の場合、無音の候補切り取り点とみなします
252 | `--hop_size` デフォルト:20 | 音量曲線の計算方法。値が小さいほど精度が高くなり、計算量も増えます(精度が高いほど必ずしも良い結果になるとは限りません)
253 | """
254 | }
--------------------------------------------------------------------------------
/Sava_Utils/i18nAuto/translations/ko_KR.py:
--------------------------------------------------------------------------------
1 | #this file is translated by AI. Just for reference.
2 | i18n_dict={
3 | #functions in main
4 | "You need to load custom API functions!":"사용자 지정 API 함수를 로드해야 합니다!",
5 | "Please upload the subtitle file!":"자막 파일을 업로드하세요!",
6 | "The current mode does not allow batch processing!":"현재 모드에서는批量 처리가 허용되지 않습니다!",
7 | "Synthesizing single-speaker task":"단일 화자 작업을 합성 중입니다",
8 | "All subtitle syntheses have failed, please check the API service!":"모든 자막 합성이 실패했습니다. API 서비스를 확인하세요!",
9 | "Done! Time used":"완료! 사용된 시간",
10 | "There is no subtitle in the current workspace":"현재 작업 공간에 자막이 없습니다",
11 | "Warning: No speaker has been assigned":"경고: 어떤 화자도 할당되지 않았습니다",
12 | "Using default speaker":"현재 선택된 기본 화자를 사용합니다",
13 | "Speaker archive not found":"화자 아카이브를 찾을 수 없습니다",
14 | "Synthesizing multi-speaker task, the current speaker is":"다중 화자 작업을 합성 중이며, 현재 화자는",
15 | "Synthesis for the single speaker has failed !":"단일 화자의 모든 음성 합성이 실패했습니다!",
16 | "Done!":"완료!",
17 | "Failed subtitle id":"실패한 자막 id",
18 | "error message received":"받은 에러 메시지가",
19 | "Please go to the settings page to specify the corresponding environment path and do not forget to save it!":"설정 페이지로 가서 해당 환경 경로를 지정하고 저장하지 마세요!",
20 | " has been launched, please ensure the configuration is correct.":"가 시작되었습니다. 구성 파일이 정확한지 확인하세요.",
21 | "API downgraded to v1, functionality is limited.":"API가 v1로 다운그레이드되어 기능이 제한됩니다.",
22 | "You must specify the speakers while using multi-speaker dubbing!":"다중 역할 대본딩을 사용할 때는 반드시 화자를 지정해야 합니다!",
23 | "Audio re-generation was successful! Click the button.":"다시 합성이 성공했습니다! <재조립 음성> 버튼을 클릭하세요.",
24 | "Audio re-generation failed!":"다시 합성 실패!",
25 | "Reassemble successfully!":"다시 합성 완료!",
26 | "This function has been disabled!":"현재 기능이 비활성화되어 있습니다!",
27 | "Please enter a valid name!":"유효한 이름을 입력하세요!",
28 | "Saved successfully":"성공적으로 저장했습니다",
29 |
30 | #UI in main
31 | "Subtitle Dubbing":"자막 대본딩",
32 | "File content":"파일 내용 보기",
33 | "Create Multi-Speaker Dubbing Project":"다중 역할 프로젝트 만들기",
34 | "Custom API":"사용자 지정 API",
35 | "Frame rate of Adobe Premiere project, only applicable to csv files exported from Pr":"Adobe Premiere 프로젝트의 프레임速率, Pr에서 내보낸 csv 파일에만 적용됩니다",
36 | "API Launcher":"API 서비스 시작",
37 | "Number of threads for sending requests":"요청 보내는 스레드 수",
38 | "Voice time offset (seconds)":"음성 시간 오프셋(초) 모든 음성의 시간을 연기 또는 앞당김",
39 | "Upload file (Batch mode only supports one speaker at a time)":"파일 업로드(배치 모드는 한 번에 한 명의 화자만 지원합니다)",
40 | "Output Info":"출력 정보",
41 | "Output File":"출력 파일",
42 | "Editing area *Note: DO NOT clear temporary files while using this function.":"편집 영역 *Note: 이 기능을 사용하는 동안 임시 파일을 지우지 마세요.",
43 | "History":"합성 이력",
44 | "Load":"로드",
45 | "Reassemble Audio":"재조립 음성",
46 | "Export Subtitles":"자막 내보내기",
47 | "Select All":"모두 선택",
48 | "Reverse Selection":"반전 선택",
49 | "Clear Selection":"선택 지우기",
50 | "Apply Timestamp modifications":"시간 코드 적용",
51 | "Copy":"복사",
52 | "Merge":"병합",
53 | "Delete":"삭제",
54 | "Multi-speaker dubbing":"다중 역할 대본딩",
55 | "Select/Create Speaker":"선택/화자 만들기",
56 | "TTS Project":"화자가 속한 프로젝트",
57 | "Start Multi-speaker Synthesizing":"다중 역할 대본딩 생성",
58 | "Auxiliary Functions":"보조 기능",
59 | "Extended Contents":"외부 확장 내용",
60 | "Settings":"설정",
61 | "Readme":"개요",
62 | "Issues":"일반적인 오류",
63 | "Help & User guide":"사용 가이드",
64 |
65 | #utils
66 | "An error occurred":"오류가 발생했습니다",
67 | "Server Mode has been enabled!":"서비스 모드가 활성화되었습니다!",
68 | "Temporary files cleared successfully!":"임시 파일이 성공적으로 지워졌습니다!",
69 | "There are no temporary files.":"현재 임시 파일이 없습니다.",
70 | "Execute command":"명령 실행",
71 | "No running processes":"실행 중인 프로세스가 없습니다",
72 | "Process terminated.":"프로세스가 종료되었습니다.",
73 | "":"<여러 파일>",
74 | "Failed to read file":"자막 파일 읽기 실패",
75 | "Error: File too large":"오류: 파일이 너무 큽니다",
76 | "Unknown format. Please ensure the extension name is correct!":"알 수 없는 형식입니다. 확장자 이름이 정확한지 확인하세요!",
77 | "Creating a multi-speaker project can only upload one file at a time!":"다중 역할 대본딩 프로젝트를 만들 때는 한 번에 하나의 파일만 업로드할 수 있습니다!",
78 |
79 | #edit_panel
80 | "Not available!":"사용할 수 없습니다!",
81 | "Must not be empty!":"빈 칸일 수 없습니다!",
82 | "No subtitles selected.":"어떤 자막도 선택되지 않았습니다",
83 | "Please select both the start and end points!":"시작점과 종료점을 모두 선택하세요!",
84 | "Input format mismatch":"입력 형식 불일치",
85 |
86 | #subtitle.py
87 | "Subtitles have not been synthesized yet!":"아직 어떤 자막도 합성되지 않았습니다!",
88 | "The following subtitles are delayed due to the previous audio being too long.":"다음 자막은 이전 음성이 너무 길기 때문에 지연되었습니다",
89 | "Failed to synthesize the following subtitles or they were not synthesized":"다음 자막의 합성에 실패했거나 합성되지 않았습니다",
90 |
91 | #Settings
92 | "Failed to load settings, reset to default":"설정 로드 실패, 기본값으로 복원",
93 | "Error, Invalid Path":"오류, 유효하지 않은 경로",
94 | "Env detected":"환경이 감지되었습니다",
95 | "Restarting...":"다시 시작 중...",
96 | "An error occurred. Please restart manually!":"오류가 발생했습니다. 수동으로 다시 시작하세요!",
97 | "Settings saved successfully!":"설정이 성공적으로 저장되었습니다!",
98 | "Settings have been disabled!":"설정이 비활성화되었습니다",
99 | "Click Apply & Save for these settings to take effect.":"적용을 클릭하면 이러한 설정이 적용됩니다.",
100 | "General":"일반 설정",
101 | "The port used by this program, 0=auto. When conflicts prevent startup, use -p parameter to specify the port.":"이 프로그램에서 사용하는 포트 0=자동. 충돌로 인해 시작이 불가능한 경우 -p 매개변수를 사용하여 포트를 지정합니다.",
102 | "Enable LAN access. Restart to take effect.":"LAN 액세스 활성화. 재시작하여 적용됩니다.",
103 | "Overwrite history records with files of the same name instead of creating a new project.":"동일한 이름의 파일로 이력 기록을 덮어쓰고 새 프로젝트를 생성하지 않습니다.",
104 | "Clear temporary files on each startup":"매번 시작할 때 임시 파일을 지웁니다",
105 | "Concurrency Count":"동시에 처리할 수 있는 요청 수",
106 | "Server Mode can only be enabled by modifying configuration file or startup parameters.":"서비스 모드는 구성 파일 또는 시작 매개변수를 수정하여만 활성화할 수 있습니다.",
107 | "Minimum voice interval (seconds)":"음성 최소 간격(초)",
108 | "Sampling rate of output audio, 0=Auto":"출력 음성 샘플링速率, 0=자동",
109 | "Edit Panel Row Count (Requires a restart)":"편집 창 행 수, 재시작 필요",
110 | "Theme (Requires a restart)":"테마 선택, 재시작 후 적용됩니다.",
111 | "Clear temporary files":"즉시 임시 파일 지우기",
112 | "Python Interpreter Path for BV2":"BV2 환경 경로 설정",
113 | "Root Path of BV2":"BV2 프로젝트 루트 디렉토리",
114 | "Start Parameters":"시작 매개변수",
115 | "Downgrade API version to v1":"api_v1을 사용하고 v2가 아닙니다",
116 | "Python Interpreter Path for GSV":"GSV 환경 경로 설정",
117 | "Root Path of GSV":"GSV 프로젝트 루트 디렉토리",
118 | "Server Region":"서비스 지역",
119 | "KEY Warning: Key is stored in plaintext. DO NOT send the key to others or share your configuration file!":"키 경고: 키는 평문으로 저장됩니다. 키를 다른 사람에게 보내거나 설정 파일을 공유하지 마세요!",
120 | "Select required languages, separated by commas or spaces.":"필요한 언어를 선택하세요. 쉼표 또는 공백으로 구분합니다.",
121 | "Translation Module":"번역 모듈 설정",
122 | "Default Request Address for Ollama":"Ollama의 기본 요청 주소",
123 | "Apply & Save":"적용하고 현재 설정 저장",
124 | "Restart UI":"UI 다시 시작",
125 |
126 | #TTS
127 | "An error has occurred. Please check if the API is running correctly. Details":"에러가 발생했습니다. API가 정확하게 실행되는지 확인하세요. 에러 내용",
128 | "Advanced Parameters":"고급 합성 매개변수",
129 | "Generate Audio":"생성",
130 |
131 | #BV2
132 | "Select Speaker ID or Speaker Name":"화자 ID 선택 또는 이름 입력",
133 |
134 | #GSV(AR)
135 | "Returned Message":"반환된 메시지",
136 | "Select TTS Project":"TTS 프로젝트 선택",
137 | "Inference text language":"합성할 언어",
138 | "Main Reference Audio":"주 참조 오디오",
139 | "Auxiliary Reference Audios":"보조 참조 오디오",
140 | "Transcription of Main Reference Audio":"주 참조 오디오 텍스트",
141 | "Transcription | Pretrained Speaker (Cosy)":"참조 오디오 텍스트|Cosy 사전 훈련된 음색",
142 | "Language of Main Reference Audio":"참조 오디오 언어",
143 | "Model Path":"모델 경로",
144 | "Switch Models":"모델 전환",
145 | "Fragment Interval(sec)":"분절 간격(초)",
146 | "How to cut":"어떻게 자를지",
147 | "(Optional) Description":"설명 정보, 선택 사항",
148 | "Presets":"사전 설정",
149 | "You must upload Main Reference Audio":"주 참조 오디오를 지정해야 합니다",
150 | "Preset saved successfully":"사전 설정이 성공적으로 저장되었습니다",
151 | "Failed to switch model":"모델 전환 실패",
152 | "Preset has been loaded.":"사전 설정이 로드되었습니다.",
153 | "Models are not switched. If you need to switch, please manually click the button.":"현재 모델이 전환되지 않았습니다. 강제로 전환이 필요한 경우 수동으로 버튼을 클릭하세요",
154 | "Please specify the model path!":"모델 경로를 지정하세요!",
155 | "Switching Models...":"모델 전환 중...",
156 | "Model Paths seem to be invalid, which could lead to errors!":"모델 경로가 유효하지 않을 수 있으며, 이는 전환 오류로 이어질 수 있습니다!",
157 | "You have incorrectly entered a folder path!":"폴더 경로를 잘못 입력했습니다!!!",
158 | "Models switched successfully":"모델이 전환되었습니다",
159 | "Error details":"에러 내용",
160 | "Successfully deleted":"삭제 성공",
161 | "Please select a valid preset!":"유효한 사전 설정을 선택하세요!",
162 | "No preset available":"현재 사전 설정이 없습니다",
163 | "Partial auxiliary reference audio is missing!":"보조 참조 오디오가 일부 누락되어 있습니다!",
164 |
165 | "DICT_LANGUAGE":{
166 | "中文": "all_zh",
167 | "粤语": "all_yue",
168 | "英文": "en",
169 | "日文": "all_ja",
170 | "韩文": "all_ko",
171 | "中英混合": "zh",
172 | "粤英混合": "yue",
173 | "日英混合": "ja",
174 | "韩英混合": "ko",
175 | "多语种混合": "auto",
176 | "多语种混合(粤语)": "auto_yue",
177 | },
178 | "CUT_METHOD":{
179 | "不切": "cut0",
180 | "凑四句一切": "cut1",
181 | "凑50字一切": "cut2",
182 | "按中文句号。切": "cut3",
183 | "按英文句号.切": "cut4",
184 | "按标点符号切": "cut5",
185 | },
186 |
187 | #MSTTS
188 | "Please fill in your key to get MSTTS speaker list.":"Microsoft TTS 화자 목록을 가져오려면 먼저 키를 입력해야 합니다!",
189 | "Can not get speaker list of MSTTS. Details":"Microsoft TTS 화자 목록을 다운로드할 수 없습니다. 에러 내용",
190 | "Failed to obtain access token from Microsoft.":"Microsoft에서 액세스 토큰을 가져오지 못했습니다.",
191 | "Failed to obtain access token from Microsoft. Check your API key, server status, and network connection. Details":"Microsoft에서 액세스 토큰을 가져오지 못했습니다. 키, 서버 상태 및 네트워크 연결을 확인하세요. 에러 내용",
192 | "Can not access Microsoft TTS service. Check your API key, server status, and network connection. Details":"Microsoft TTS 에러. 키, 서버 상태 및 네트워크 연결을 확인하세요. 에러 내용",
193 | "Refresh speakers list":"화자 목록 새로 고침",
194 | "Choose Language":"언어 선택",
195 | "Choose Your Speaker":"당신의 화자를 선택하세요",
196 | "Style":"화자 스타일",
197 | "Role":"역할 연기",
198 | "Speed":"말하는 속도",
199 | "Pitch":"음조",
200 | "MSTTS_NOTICE":"""Microsoft TTS를 사용하려면 인터넷 연결이 필요합니다. 먼저 설정 페이지로 가서 서비스 지역과 키를 입력해야 합니다. 매월 무료 한도에 유의하세요.
[【키 가져오기: 링크를 열고 나면 필수 조건을 꼼꼼히 읽어주세요 】](https://learn.microsoft.com/zh-cn/azure/ai-services/speech-service/get-started-text-to-speech)""",
201 | "Please Select Your Speaker!":"당신의 화자를 선택하세요!",
202 | "Please fill in your key!":"키를 구성하세요!",
203 |
204 | #custom api
205 | "Choose Custom API Code File":"사용자 지정 API 코드 파일 선택",
206 | "No custom API code file found.":"현재 사용자 지정 API 사전 설정이 없습니다",
207 | "Please select a valid custom API code file!":"유효한 API 구성 파일을 선택하세요!",
208 |
209 | #Subtitle Translation
210 | "Start Translating":"번역 시작",
211 | "Translating":"번역 중",
212 | "Failed to translate":"번역 실패",
213 | "Subtitle Translation":"자막 번역",
214 | "Upload your subtitle files (multiple allowed).":"자막 업로드(여러 개 허용)",
215 | "Send output files to Main Page":"메인 페이지로 출력 파일 보내기",
216 | "Send output files to Translator":"번역 페이지로 출력 파일 보내기",
217 | "Specify Target Language":"목표 언어 선택",
218 | "File Output Path":"파일 출력 경로",
219 | "Select Translator":"번역기 선택",
220 |
221 | #Ollama
222 | "Failed to get model list from Ollama":"Ollama에서 모델 목록을 가져오지 못했습니다",
223 | "You must specify the model!":"모델을 지정해야 합니다!",
224 | "Select Your Model":"모델 선택",
225 | "Unload Model":"모델 언로드",
226 | "OLLAMA_NOTICE":"⚠️LLM은 실행 중에 많은 VRAM을 사용합니다. 사용이 끝나면 해당 모델을 선택하고 언로드하여 비디오 메모리를 해제하지 마세요!⚠️",
227 |
228 | #EXTENSIONS
229 |
230 | #WAV2SRT
231 | "Audio/Video Transcribe":"오디오/비디오 자막 변환",
232 | "Upload File":"파일 업로드",
233 | "Save Path(Folder Path), Default: SAVAdata\\output":"저장 경로, 폴더 이름 입력, 기본값: SAVAdata\\output",
234 | "Python Interpreter Path, align with GSV by default":"Python 인터프리터 경로, 기본적으로 GSV와 일치",
235 | "Select ASR model. Funasr supports only Chinese(but much more faster) while Faster-Whisper has multi-language support":"ASR 모델 선택, funasr은 중국어만 지원하지만 더 빠르고 정확하며, faster whisper는 다중 언어 지원",
236 | "(ms)Minimum length of each segment":"(ms)각 분절의 최소 길이",
237 | "(ms)Minium slice interval":"(ms)최소 슬라이스 간격",
238 | "(ms)Minium silence length":"(ms)자른 후 최대 정지 시간",
239 | "Other Parameters":"다른 매개변수",
240 | "Start":"시작",
241 | "Stop":"중지",
242 | "Please upload audio or video!":"오디오 파일을 업로드하세요!",
243 | "Please specify Python Interpreter!":"인터프리터를 지정하세요!",
244 | "Processing":"처리 중",
245 | "Tasks are terminated due to an error in":"작업 중 에러로 인해 작업이 종료됨:",
246 | "Finished":"작업 종료",
247 | "WAV2SRT_INFO":"""
248 | 이 기능은 GPT-SoVITS 통합 팩에 직접 사용할 수 있으며, 그렇지 않으면 해당 종속성을 직접 설치해야 합니다.
249 | # 다른 매개변수:
250 | `--whisper_size` 기본값:large-v3 | faster whisper를 사용할 때 모델 지정
251 | `--threshold` 기본값:-40 | 음량이 이 값보다 작으면 정지된 상태로 간주하여 대체 분할 지점으로 간주
252 | `--hop_size` 기본값:20 | 음량 곡선을 계산하는 방법. 값이 작을수록 정확도가 높아지고 계산량이 증가합니다(정확도가 높을수록效果이 좋지는 않습니다)
253 | """,
254 | }
--------------------------------------------------------------------------------
/Sava_Utils/i18nAuto/translations/zh_CN.py:
--------------------------------------------------------------------------------
1 | i18n_dict = {
2 | # functions in main
3 | "You need to load custom API functions!": "需要加载自定义API函数!",
4 | "Please upload the subtitle file!": "请上传字幕文件!",
5 | "The current mode does not allow batch processing!": "当前不允许批量处理!",
6 | "Synthesizing single-speaker task": "正在合成单说话人任务",
7 | "All subtitle syntheses have failed, please check the API service!": "所有的字幕合成都出错了,请检查API服务!",
8 | "Done! Time used": "完成!所用时间",
9 | "There is no subtitle in the current workspace": "当前没有字幕",
10 | "Warning: No speaker has been assigned": "警告:没有指派任何说话人",
11 | "No subtitles are going to be resynthesized.": "没有需要重新合成的字幕!",
12 | "Using default speaker": "当前使用选定的默认说话人",
13 | "Speaker archive not found": "找不到说话人存档",
14 | "Synthesizing multi-speaker task, the current speaker is": "正在合成多说话人任务,当前说话人为",
15 | "Synthesis for the single speaker has failed !": "单一说话人的全部语音合成失败了!",
16 | "Failed to execute ffmpeg.": "执行ffmpeg命令失败!",
17 | "Done!": "完成!",
18 | "Failed subtitle id": "出错字幕id",
19 | "error message received": "接收的报错信息为",
20 | "Please go to the settings page to specify the corresponding environment path and do not forget to save it!": "请前往设置页面指定环境路径并保存!",
21 | " has been launched, please ensure the configuration is correct.": "已启动,请确保其配置文件无误。",
22 | "API downgraded to v1, functionality is limited.": "API降级至v1,功能受限。",
23 | "You must specify the speakers while using multi-speaker dubbing!": "使用多角色配音时,必须指定说话人!",
24 | "Audio re-generation was successful! Click the button.": "重新合成成功!点击重新拼接内容。",
25 | "Audio re-generation failed!": "重新合成失败!",
26 | "Reassemble successfully!": "重新合成完毕!",
27 | "This function has been disabled!": "当前功能已被禁用!",
28 | "Please enter a valid name!": "请输入有效的名称!",
29 | "Saved successfully": "保存成功",
30 | # UI in main
31 | "Subtitle Dubbing": "字幕配音",
32 | "File content": "文件内容展示",
33 | "Speaker Map": "说话人映射表",
34 | "Enable Marking Mode": "启用标记模式",
35 | "Speaker map is empty.": "映射表是空的",
36 | "Original Speaker": "原始说话人",
37 | "Target Speaker": "映射后的目标说话人",
38 | "Select Original Speaker": "选择原始说话人",
39 | "Select Target Speaker": "选择目标说话人",
40 | "From Upload File": "从上传文件创建",
41 | "From Workspace": "从当前字幕创建",
42 | "Identify Original Speakers": "识别原始说话人",
43 | "Apply to current Workspace": "应用到当前字幕",
44 | "Create Multi-Speaker Dubbing Project": "创建多角色项目",
45 | "Custom API": "自定义API",
46 | "Other Parameters": "其他参数",
47 | "Frame rate of Adobe Premiere project, only applicable to csv files exported from Pr": "Pr项目帧速率,仅适用于Pr导出的csv文件",
48 | "API Launcher": "启动API服务",
49 | "Number of threads for sending requests": "请求线程数",
50 | "Voice time offset (seconds)": "语音时间偏移(秒) 延后或提前所有语音的时间",
51 | "Upload file (Batch mode only supports one speaker at a time)": "上传文件(批量模式只支持单个同一说话人)",
52 | "Output Info": "输出信息",
53 | "Output File": "输出文件",
54 | "Editing area *Note: DO NOT clear temporary files while using this function.": "编辑区域 *Note:请勿在使用本功能时清除临时文件。",
55 | "History": "合成历史",
56 | "Load": "加载",
57 | "Reassemble Audio": "重新拼接",
58 | "Export Subtitles": "导出字幕",
59 | "Select All": "全选",
60 | "Reverse Selection": "反选",
61 | "Clear Selection": "清除选择",
62 | "Apply Timestamp modifications": "应用时间码",
63 | "Copy": "复制",
64 | "Merge": "合并",
65 | "Delete": "删除",
66 | "Continue Generation": "继续生成",
67 | "Find What": "查找目标",
68 | "Replace With": "替换为",
69 | "Enable Regular Expression": "使用正则表达式",
70 | "Find and Replace": "查找和替换",
71 | "Replace All": "全部替换",
72 | "You must enter the text to find.": "你必须输入查找目标文本!",
73 | "Multi-speaker dubbing": "多角色配音",
74 | "Select/Create Speaker": "选定/创建说话人",
75 | "TTS Project": "说话人所属项目",
76 | "Start Multi-speaker Synthesizing": "生成多角色配音",
77 | "Auxiliary Functions": "辅助功能",
78 | "Extended Contents": "外部扩展内容",
79 | "Settings": "设置",
80 | "Readme": "简介",
81 | "Issues": "常见错误",
82 | "Help & User guide": "使用指南",
83 | # utils
84 | "After completing the generation of the next audio, the task will be aborted.": "完成下一条音频生成后中止任务...",
85 | "No running tasks.": "没有进行中的任务",
86 | "An error occurred": "出现错误",
87 | "Server Mode has been enabled!": "服务模式已启用!",
88 | "Temporary files cleared successfully!": "成功清除临时文件!",
89 | "There are no temporary files.": "目前没有临时文件!",
90 | "Execute command": "执行命令",
91 | "No running processes": "没有运行的进程",
92 | "Process terminated.": "已终止进程",
93 | "": "<多个文件>",
94 | "Failed to read file": "读取字幕文件出错",
95 | "Error: File too large": "错误:文件过大",
96 | "Unknown format. Please ensure the extension name is correct!": "未知的格式,请确保扩展名正确!",
97 | "Creating a multi-speaker project can only upload one file at a time!": "创建多角色配音工程只能上传有且只有一个文件!",
98 | # edit_panel
99 | "Not available!": "不可用!",
100 | "Must not be empty!": "不得为空!",
101 | "No subtitles selected.": "未选中任何字幕",
102 | "Please select both the start and end points!": "请选择起点和终点!",
103 | "Input format mismatch": "输入格式不匹配",
104 | # subtitle.py
105 | "Subtitles have not been synthesized yet!": "还未合成任何字幕!",
106 | "The following subtitles are delayed due to the previous audio being too long.": "以下字幕由于之前的音频过长而被延迟",
107 | "Failed to synthesize the following subtitles or they were not synthesized": "以下字幕合成失败或未合成",
108 | # Settings
109 | "Failed to load settings, reset to default": "设置加载失败,恢复默认",
110 | "Error, Invalid Path": "错误,无效的路径",
111 | "Env detected": "已检测到环境",
112 | "Restarting...": "正在重启...",
113 | "An error occurred. Please restart manually!": "出现错误,请手动重启!",
114 | "Settings saved successfully!": "成功保存设置!",
115 | "Settings have been disabled!": "设置已被禁用",
116 | "Click Apply & Save for these settings to take effect.": "点击应用后,这些设置才会生效。",
117 | "General": "通用设置",
118 | "The port used by this program, 0=auto. When conflicts prevent startup, use -p parameter to specify the port.": "本程序所使用的端口 0=自动。当冲突无法启动时,使用参数-p来指定启动端口",
119 | "Enable LAN access. Restart to take effect.": "开启局域网访问,重启生效",
120 | "Overwrite history records with files of the same name instead of creating a new project.": "同名文件覆盖历史记录而不是新建工程",
121 | "Clear temporary files on each startup": "每次启动时清除临时文件",
122 | "Concurrency Count": "可同时处理多少请求",
123 | "Server Mode can only be enabled by modifying configuration file or startup parameters.": "服务模式,只能通过修改配置文件或启动参数开启",
124 | "Minimum voice interval (seconds)": "语音最小间隔(秒)",
125 | "Maximum audio acceleration ratio (requires ffmpeg)": "音频最大加速倍率,尝试加速音频以和起止时间同步。要求安装ffmpeg",
126 | "Sampling rate of output audio, 0=Auto": "输出音频采样率,0=自动",
127 | "Remove inhalation and silence at the beginning and the end of the audio": "去除音频开头结尾的吸气声和静音",
128 | "Edit Panel Row Count (Requires a restart)": "编辑栏行数,重启生效",
129 | "Export subtitles with speaker name. Fill in your template to enable.": "导出字幕时导出角色名,填写模版视为启用",
130 | "Theme (Requires a restart)": "选择主题,重启后生效,部分主题可能需要科学上网",
131 | "Clear temporary files": "立即清除临时文件",
132 | "Storage Management": "储存空间管理",
133 | "List Archives": "列出所有存档",
134 | "No Archives Found. Click the button to refresh.": "没有存档,点击<列出所有存档>按钮刷新",
135 | "Submodule Settings": "子模块设置",
136 | "Python Interpreter Path for BV2": "设置BV2环境路径",
137 | "Root Path of BV2": "BV2项目根目录",
138 | "Start Parameters": "启动参数",
139 | "Downgrade API version to v1": "使用api_v1而不是v2",
140 | "Python Interpreter Path for GSV": "设置GSV环境路径",
141 | "Root Path of GSV": "GSV项目根目录",
142 | "Server Region": "服务区域",
143 | "KEY Warning: Key is stored in plaintext. DO NOT send the key to others or share your configuration file!": "密钥 警告:密钥明文保存,请勿将密钥发送给他人或者分享设置文件!",
144 | "Select required languages, separated by commas or spaces.": "筛选需要的语言,用逗号或空格隔开",
145 | "Translation Module": "翻译模块设置",
146 | "Default Request Address for Ollama": "Ollama默认请求地址",
147 | "Apply & Save": "应用并保存当前设置",
148 | "Restart UI": "重启UI",
149 | # TTS
150 | "An error has occurred. Please check if the API is running correctly. Details": "发生错误,请检查API是否正确运行。报错内容",
151 | "Advanced Parameters": "高级合成参数",
152 | "Generate Audio": "生成",
153 | # BV2
154 | "Select Speaker ID or Speaker Name": "选择说话人id或输入名称",
155 | # GSV(AR)
156 | "Returned Message": "返回信息",
157 | "Select TTS Project": "选择TTS项目",
158 | "Inference text language": "要合成的语言",
159 | "Reference Audio": "参考音频",
160 | "Main Reference Audio": "主参考音频",
161 | "Auxiliary Reference Audios": "辅参考音频",
162 | "Transcription of Main Reference Audio": "主参考音频文本",
163 | "Transcription | Pretrained Speaker (Cosy)": "参考音频文本|Cosy预训练音色",
164 | "Language of Main Reference Audio": "参考音频语言",
165 | "Model Path": "模型路径",
166 | "Switch Models": "模型切换",
167 | "Fragment Interval(sec)": "分段间隔(秒)",
168 | "How to cut": "怎么切",
169 | "(Optional) Description": "描述信息,可选",
170 | "Presets": "预设",
171 | "You must upload Main Reference Audio": "你必须指定主参考音频",
172 | "Preset saved successfully": "预设保存成功",
173 | "Failed to switch model": "模型切换失败",
174 | "Preset has been loaded.": "预设加载完毕",
175 | "Models are not switched. If you need to switch, please manually click the button.": "当前未切换模型,若需要强制切换请手动点击按钮",
176 | "Please specify the model path!": "请指定模型路径!",
177 | "Switching Models...": "正在切换模型...",
178 | "Model Paths seem to be invalid, which could lead to errors!": "模型路径可能无效,会导致切换错误!",
179 | "You have incorrectly entered a folder path!": "你错误地填写成了文件夹的路径!!!",
180 | "Models switched successfully": "模型已切换",
181 | "GSV root path has been not configured or does not exist.": "GSV项目根目录未配置或不存在!",
182 | "Error details": "报错内容",
183 | "Successfully deleted": "删除成功",
184 | "Please select a valid preset!": "请选择一个有效的预设!",
185 | "No preset available": "当前没有预设",
186 | "Partial auxiliary reference audio is missing!": "辅助参考音频存在丢失!",
187 | "DICT_LANGUAGE": {
188 | "中文": "all_zh",
189 | "粤语": "all_yue",
190 | "英文": "en",
191 | "日文": "all_ja",
192 | "韩文": "all_ko",
193 | "中英混合": "zh",
194 | "粤英混合": "yue",
195 | "日英混合": "ja",
196 | "韩英混合": "ko",
197 | "多语种混合": "auto",
198 | "多语种混合(粤语)": "auto_yue",
199 | },
200 | "CUT_METHOD": {
201 | "不切": "cut0",
202 | "凑四句一切": "cut1",
203 | "凑50字一切": "cut2",
204 | "按中文句号。切": "cut3",
205 | "按英文句号.切": "cut4",
206 | "按标点符号切": "cut5",
207 | },
208 | # MSTTS
209 | "Please fill in your key to get MSTTS speaker list.": "要获取微软TTS说话人列表,你必须先填写密钥!",
210 | "Can not get speaker list of MSTTS. Details": "无法下载微软TTS说话人列表。报错内容",
211 | "Failed to obtain access token from Microsoft.": "获取微软token出错",
212 | "Failed to obtain access token from Microsoft. Check your API key, server status, and network connection. Details": "获取微软token出错,检查密钥、服务器状态和网络连接。报错内容",
213 | "Can not access Microsoft TTS service. Check your API key, server status, and network connection. Details": "微软TTS出错,检查密钥、服务器状态和网络连接。报错内容",
214 | "Refresh speakers list": "刷新说话人列表",
215 | "Choose Language": "选择语言",
216 | "Choose Your Speaker": "选择你的说话人",
217 | "Style": "说话风格",
218 | "Role": "角色扮演",
219 | "Speed": "语速",
220 | "Pitch": "音调",
221 | "MSTTS_NOTICE": """使用微软TTS需要联网,请先前往设置页填入服务区和密钥才可以使用。请注意每个月的免费额度。
[【关于获取密钥:打开链接后请仔细阅读 先决条件 】](https://learn.microsoft.com/zh-cn/azure/ai-services/speech-service/get-started-text-to-speech)""",
222 | "Please Select Your Speaker!": "请选择你的说话人!",
223 | "Please fill in your key!": "请配置密钥!",
224 | # custom api
225 | "Choose Custom API Code File": "选择自定义API代码文件",
226 | "No custom API code file found.": "当前没有自定义API预设",
227 | "Please select a valid custom API code file!": "请选择有效的API配置文件!",
228 | # Subtitle Translation
229 | "Start Translating": "开始翻译",
230 | "Translating": "正在翻译",
231 | "Failed to translate": "翻译失败",
232 | "Subtitle Translation": "字幕翻译",
233 | "Upload your subtitle files (multiple allowed).": "上传字幕(可多个)",
234 | "Send output files to Main Page": "发送至主页面",
235 | "Send output files to Translator": "发送至翻译页面",
236 | "Specify Target Language": "选择目标语言",
237 | "File Output Path": "文件输出路径",
238 | "Select Translator": "选择翻译器",
239 | # Ollama
240 | "Failed to get model list from Ollama": "Ollama获取模型列表失败",
241 | "You must specify the model!": "你必须指定模型!",
242 | "The language model has probably made a mistake": "模型很有可能犯了错",
243 | "Select Your Model": "选择模型",
244 | "Unload Model": "卸载模型",
245 | "OLLAMA_NOTICE": "⚠️LLM在运行时会占用较多VRAM。使用完毕后不要忘了选择并卸载对应模型以释放显存!⚠️",
246 | "Custom prompt (enabled when filled in)": "自定义提示词(填写视为启用)",
247 | "History Message Limit": "上下文消息限制",
248 | # Polyphone Editor
249 | "POLYPHONE_NOTICE": "⚠️本功能可修改GPT-SoVITS多音字配置,保存修改后重启API生效⚠️",
250 | "Polyphone Editor": "多音字编辑",
251 | "Overwrite instead of Append": "覆写而不是追加",
252 | "Read": "读取",
253 | "Save": "保存",
254 | # EXTENSIONS
255 | # WAV2SRT
256 | "Audio/Video Transcribe": "音视频转字幕",
257 | "Upload File": "上传文件",
258 | "Save Path(Folder Path), Default: SAVAdata\\output": "保存路径,填文件夹名,默认为SAVAdata\\output",
259 | "Python Interpreter Path, align with GSV by default": "Python解释器路径,默认和GSV一致",
260 | "Select ASR model. Funasr supports only Chinese(but much more faster) while Faster-Whisper has multi-language support": "选择ASR模型,funasr只支持中文但更快更准,faster whisper支持多语言",
261 | "(ms)Minimum length of each segment": "(ms)每段最小多长",
262 | "(ms)Minium slice interval": "(ms)最短切割间隔",
263 | "(ms)Minium silence length": "(ms)切完后静音最多留多长",
264 | "Other Parameters": "其他参数",
265 | "Start": "开始",
266 | "Stop": "停止",
267 | "Please upload audio or video!": "请上传音频文件!",
268 | "Please specify Python Interpreter!": "请指定解释器!",
269 | "Processing": "正在进行",
270 | "Tasks are terminated due to an error in": "任务出错,终止:",
271 | "Finished": "任务结束",
272 | "WAV2SRT_INFO": """
273 | 本功能可直接用于GPT-SoVITS整合包,否则需要自己安装对应依赖。
274 | # 其他参数:
275 | `--whisper_size` 默认:large-v3 | 使用faster whisper时指定模型
276 | `--threshold` 默认:-40 | 音量小于这个值视作静音的备选切割点
277 | `--hop_size` 默认:20 | 怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)
278 | """,
279 | }
280 |
--------------------------------------------------------------------------------
/Sava_Utils/librosa_load.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import soundfile as sf
3 | import soxr
4 |
5 | # obtained form librosa
6 |
7 |
8 | def to_mono(y):
9 | if y.ndim > 1:
10 | y = np.mean(y, axis=tuple(range(y.ndim - 1)))
11 | return y
12 |
13 |
14 | def fix_length(data, *, size, axis=-1, **kwargs):
15 | kwargs.setdefault("mode", "constant")
16 |
17 | n = data.shape[axis]
18 |
19 | if n > size:
20 | slices = [slice(None)] * data.ndim
21 | slices[axis] = slice(0, size)
22 | return data[tuple(slices)]
23 |
24 | elif n < size:
25 | lengths = [(0, 0)] * data.ndim
26 | lengths[axis] = (0, size - n)
27 | return np.pad(data, lengths, **kwargs)
28 |
29 | return data
30 |
31 |
32 | def resample(
33 | y: np.ndarray,
34 | *,
35 | orig_sr: float,
36 | target_sr: float,
37 | res_type: str = "soxr_hq",
38 | fix: bool = True,
39 | scale: bool = False,
40 | axis: int = -1,
41 | **kwargs,
42 | ):
43 | ratio = float(target_sr) / orig_sr
44 | n_samples = int(np.ceil(y.shape[axis] * ratio))
45 | y_hat = np.apply_along_axis(
46 | soxr.resample,
47 | axis=axis,
48 | arr=y,
49 | in_rate=orig_sr,
50 | out_rate=target_sr,
51 | quality=res_type,
52 | )
53 |
54 | if fix:
55 | y_hat = fix_length(y_hat, size=n_samples, **kwargs)
56 |
57 | if scale:
58 | y_hat /= np.sqrt(ratio)
59 |
60 | return np.asarray(y_hat, dtype=y.dtype)
61 |
62 |
63 | def get_rms(
64 | y,
65 | frame_length=2048,
66 | hop_length=512,
67 | pad_mode="constant",
68 | ):
69 | padding = (int(frame_length // 2), int(frame_length // 2))
70 | y = np.pad(y, padding, mode=pad_mode)
71 |
72 | axis = -1
73 | # put our new within-frame axis at the end for now
74 | out_strides = y.strides + tuple([y.strides[axis]])
75 | # Reduce the shape on the framing axis
76 | x_shape_trimmed = list(y.shape)
77 | x_shape_trimmed[axis] -= frame_length - 1
78 | out_shape = tuple(x_shape_trimmed) + tuple([frame_length])
79 | xw = np.lib.stride_tricks.as_strided(y, shape=out_shape, strides=out_strides)
80 | if axis < 0:
81 | target_axis = axis - 1
82 | else:
83 | target_axis = axis + 1
84 | xw = np.moveaxis(xw, -1, target_axis)
85 | # Downsample along the target axis
86 | slices = [slice(None)] * xw.ndim
87 | slices[axis] = slice(0, None, hop_length)
88 | x = xw[tuple(slices)]
89 |
90 | # Calculate power
91 | power = np.mean(np.abs(x) ** 2, axis=-2, keepdims=True)
92 |
93 | return np.sqrt(power)
94 |
95 |
96 | def remove_opening_silence(audio, sr, padding_begin=0.1, padding_fin=0.2, threshold_db=-27):
97 | # Padding(sec) is actually margin of safety
98 | hop_length = 512
99 | rms_list = get_rms(audio, hop_length=hop_length).squeeze(0)
100 | threshold = 10 ** (threshold_db / 20.0)
101 | for i, rms in enumerate(rms_list):
102 | if rms >= threshold:
103 | break
104 | for j, rms in enumerate(reversed(rms_list)):
105 | if rms >= threshold:
106 | break
107 | cutting_point1 = max(i * hop_length - int(padding_begin * sr), 0)
108 | cutting_point2 = min((rms_list.shape[-1] - j) * hop_length + int(padding_fin * sr), audio.shape[-1])
109 | audio = audio[cutting_point1:cutting_point2]
110 | return audio
111 |
112 |
113 | def load_audio(filepath, sr=None):
114 | y, sr_native = sf.read(filepath)
115 | y = to_mono(y)
116 | if sr != sr_native and sr not in [None, 0]:
117 | y = resample(y, orig_sr=sr_native, target_sr=sr)
118 | return y, sr
119 | else:
120 | return y, sr_native
121 |
--------------------------------------------------------------------------------
/Sava_Utils/man/__init__.py:
--------------------------------------------------------------------------------
1 | import locale
2 | from .. import logger
3 |
4 |
5 | class Man:
6 | def __init__(self, language=None):
7 | if language in ["Auto", None]:
8 | language = locale.getdefaultlocale()[0]
9 | ls = dict()
10 | for x in ['README', 'changelog', 'title', 'help_custom', 'issues', 'help']:
11 | try:
12 | exec(f"from .{language} import {x}", globals(), ls)
13 | except ImportError:
14 | exec(f"from .en_US import {x}", globals(), ls)
15 | logger.info(f"Manual <{x}> does not support {language}.")
16 | self.Manual_dict = {
17 | "readme": ls["README"].README,
18 | "changelog": ls["changelog"].changelog,
19 | "title": ls["title"].title,
20 | "help_custom": ls["help_custom"].help_custom,
21 | "issues": ls["issues"].issues,
22 | "help": ls["help"].help,
23 | }
24 |
25 | def getInfo(self, key):
26 | return self.Manual_dict[key]
27 |
--------------------------------------------------------------------------------
/Sava_Utils/man/en_US/README.py:
--------------------------------------------------------------------------------
1 | README = r"""
2 | # Srt-AI-Voice-Assistant
3 | ### This project can use multiple AI-TTS to dub for your subtitle or text files.
And provides various convenient auxiliary functions including audio/video transcription and subtitle translation.
4 | If you have encountered problems or want to create a feature request, please go to [Issues](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/issues) .
5 | ## Features
6 | - ✅ Open-source, Friendly WebUI interface, Run locally and Accessible via LAN
7 | - ✅ Support multiple TTS projects: BV2, GSV, CosyVoice2, AzureTTS, and you can even customize your APIs!
8 | - ✅ Save personalized settings and presets
9 | - ✅ Batch mode
10 | - ✅ Subtitle editing
11 | - ✅ Subtitle translation
12 | - ✅ Regenerating Specific Lines
13 | - ✅ Support multi-speaker dubbing
14 | - ✅ Re-export subtitles
15 | - ✅ Extended functions: subtitle transcription for audio/video
16 | - ✅ I18n
17 |
18 | ## [Download the packaged version only](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/releases)
19 | * Use this version only when there are dependency conflicts or installation issues.
20 |
21 | ## [Download the integrated package with GPT-SoVITS (From Hugging Face)](https://huggingface.co/YYuX/GPT-SoVITS-SAVA-windows-package/tree/main)
22 | * The GPT-SoVITS integrated package includes the packaged version, without removing any built-in or pretrained models, and its code for finetuning and training is the same with the official repository.
23 | * Note: Packaged Version included in the GPT-SoVITS integrated package may not be the latest version; overwrite it to update.
24 | """
25 |
--------------------------------------------------------------------------------
/Sava_Utils/man/en_US/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YYuX-1145/Srt-AI-Voice-Assistant/66fb0108fa6b2e34a160a41d105182866fd704f2/Sava_Utils/man/en_US/__init__.py
--------------------------------------------------------------------------------
/Sava_Utils/man/en_US/changelog.py:
--------------------------------------------------------------------------------
1 | changelog = r"""
2 | ## ChangeLog
3 |
4 | ### V4-2503 Update:
5 | #### To make versions more clear, version are assigned in addition to release dates.
6 | #### After this update, the synthesis history and saved speakers from the previous version need to be recreated; otherwise, errors may occur!
7 | 1. Subtitle editing
8 | 2. Subtitle translation
9 | 3. Various details improved and bugs fixed
10 | 4. Supports CosyVoice2 (reusing GSV panel)
11 | 5. (4.0.1) Batch mode
12 | 6. (4.1) Server mode
13 | 7. (4.2) I18n
14 | 8. (4.3) Automatic audio acceleration & silence removing; Creating multi-speaker dubbing project from labeled texts.
15 | 9. (4.3.1) Add Find and Replace; add a one-click regeneration button.
16 | 10. (4.4) Polyphone editing for GPT-SoVITS and automatic model detection; Allow custom prompt for Ollama; Export subtitles with speaker names using customizable templates
17 |
18 | ### 250214 Update:
19 | 1. Supports reading historical projects
20 | 2. Supports multi-speaker dubbing
21 |
22 | ### 250123 Update:
23 | 1. Supports re-export SRT subtitle files that match the actual start and end timestamps after synthesis; also supports reading TXT text files for synthesis, in which case paragraphs are split by sentences.
24 | 2. To enhance expandability in the future and simplicity, the design of a single script file, which makes downloads more convenient, had to be abandoned. The code will be refactored step by step starting from this version.
25 | 3. Added some documentations.
26 |
27 | ### 240811 Update:
28 | 1. Notifies users of the error message
29 | 2. Automatic detection of TTS-Project envs
30 | 3. Compatibility with api-v1 restored
31 | 4. A major feature update: Support regenerating specific lines if you're not satisfied with them.
32 | """
33 |
--------------------------------------------------------------------------------
/Sava_Utils/man/en_US/help.py:
--------------------------------------------------------------------------------
1 | help = r"""
2 | # User Guide
3 |
4 | ## 0. Service Configuration and Usage
5 | #### This project can call 2 local projects: Bert-VITS2, GPT-SoVITS
6 | #### And 1 online service: Microsoft TTS
7 | * **For Local TTS Projects**:
8 |
9 | * Fill in and save the project root path and the corresponding python interpreter path in the settings page.
10 | * **A Simpler method**: Place the program in the root directory of the integrated package, then click the corresponding button on the first page to start the API service!
11 |
12 | * **For Microsoft TTS**:
13 |
14 | * Follow the tutorial to register an account and fill in the API key on the settings page.
15 | * Note the monthly free quota!
16 |
17 | ## 1. Getting Started
18 | ### This project supports dubbing for subtitles or plain text.
19 | * **For subtitles**:
20 |
21 | * When a subtitle is too long, subsequent subtitles will be delayed accordingly.And you can set the minimum speech interval in settings.
22 |
23 | * **For plain text**:
24 |
25 | * The text will be split into subtitle entries based on ending punctuation and line breaks.
26 |
27 | * After generation, you can export subtitles with actual audio timestamps in the editing page.
28 |
29 | ### A. Single Speaker Scenario
30 | * **I.** Upload subtitle or text files in the right panel of the `Subtitle Dubbing` page.
31 |
32 | * **II.** Select your project and adjust parameters in the middle panel.
33 |
34 | * **III.** Click `Generate Audio` Button at the bottom and wait.
35 |
36 | * **IV.** Download your audio.
37 |
38 | ### B. Multi-Speaker Scenario
39 | * **I.** Upload subtitle/text files in the right panel of `Subtitle Dubbing`.
40 | * Marking mode: The content of the file should be as follows: `Speaker:Content`, e.g. `Jerry: Hello.` The mapping table can convert the original speaker in the text file into the corresponding target speaker.
41 |
42 | * **II.** Click `Create Multi-Speaker Dubbing Project` below the file display.
43 |
44 | * **III.** Create speakers:
45 | * **a.** Expand the Multi-Speaker Dubbing section at the bottom of the editing page.
46 | * **b.** Select the target project.
47 | * **c.** In the Select/Create Speaker box, enter a speaker name.
48 | * **d.** Adjust parameters (including port numbers) and click 💾 to save. Duplicate names will overwrite existing speakers.
49 |
50 | * **IV.** Select a speaker from the dropdown, check corresponding subtitles, then click ✅ to apply. Speaker info will appear in Column 4.
51 |
52 | * **V.** The last assigned speaker becomes the default speaker (applies to unassigned subtitles in multi-speaker projects).
53 |
54 | * **VI.** Click Generate Multi-Speaker Dubbing to start generation.
55 | * ⚠️ If you are using and creating GSV speakers in a different language, the GSV speakers will not work properly.
56 |
57 | ### Regenerating Specific Lines
58 | * **I.** Locate the target subtitle using the slider in the editing page.
59 |
60 | * **II.** Modify the text if needed. Changes are auto-saved after regeneration.
61 |
62 | * **III.** Click 🔄 to regenerate a single line:
63 |
64 | * Uses project parameters if unassigned.
65 | * Uses speaker-specific parameters if assigned.
66 | * Multi-speaker projects must have assigned speakers.
67 |
68 | * **IV.** After making changes to the subtitles, you can also click `Continue Generation` to regenerate the audios of the changed subtitles or those that failed to be synthesized.
69 |
70 | * **V.** Click `Reassemble Audio` to recompose full audio.
71 |
72 | ### C. Re-editing Historical Projects
73 | * Select a project from the synthesis history in the top panel. Then click `Load` button.
74 | * The rest is self-explanatory.
75 |
76 | ### D. Subtitle Editing
77 | #### 1. Copy
78 | * Copy selected subtitles.
79 |
80 | #### 2. Delete
81 | * Delete selected subtitles.
82 |
83 | #### 3. Merge
84 | * Select no less than 2 subtitles as start/end points.
85 | * Subtitles from the starting point to the ending point will be merged.
86 |
87 | ⚠️ Changes aren't auto-saved to drive immediately, therefore you can reload the project to undo.
88 |
89 | #### 4. Modify Timestamps
90 | * Edit start/end times in SRT format.
91 | * Click `Apply Timestamps` to save changes.
92 |
93 | ⚠️ Unapplied changes will be lost during navigation.
94 |
95 | ## 2. Troubleshooting
96 | * When reporting issues:
97 | Describe the problem in detail and list steps taken before the error occurred.
98 | * Go to [GitHub-issues](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/issues) to report a problem or ask for help (Issue templates will guide proper reporting).
99 | """
100 |
--------------------------------------------------------------------------------
/Sava_Utils/man/en_US/help_custom.py:
--------------------------------------------------------------------------------
1 | help_custom = r"""
2 | ## Security Warning: This feature will execute external code!
3 | ### Please inspect the code content before running it; executing untrusted code may put your computer at risk!
4 | ### The author bear no responsibility for any consequences!
5 |
6 | ### Place code files containing Python functions in the SAVAdata/presets directory, and they will be callable.
7 | * Here is an example code for Gradio API.
8 | ```
9 | def custom_api(text): #return: audio content
10 | from gradio_client import Client
11 | client = Client("http://127.0.0.1:7860/")
12 | result = client.predict(
13 | text, # str in '输入文本内容' Textbox component
14 | "神里绫华", # str (Option from: [('神里绫华', '神里绫华')]) in 'Speaker' Dropdown component
15 | 0.1, # int | float (numeric value between 0 and 1) in 'SDP Ratio' Slider component
16 | 0.5, # int | float (numeric value between 0.1 and 2) in 'Noise' Slider component
17 | 0.5, # int | float (numeric value between 0.1 and 2) in 'Noise_W' Slider component
18 | 1, # int | float (numeric value between 0.1 and 2) in 'Length' Slider component
19 | "auto", # str (Option from: [('ZH', 'ZH'), ('JP', 'JP'), ('EN', 'EN'), ('mix', 'mix'), ('auto', 'auto')]) in 'Language' Dropdown component
20 | "", # str (filepath on your computer (or URL) of file) in 'Audio prompt' Audio component
21 | "", # str in 'Text prompt' Textbox component
22 | "", # str in 'Prompt Mode' Radio component
23 | "", # str in '辅助文本' Textbox component
24 | 0, # int | float (numeric value between 0 and 1) in 'Weight' Slider component
25 | fn_index=0
26 | )
27 | with open(result[1],'rb') as file:
28 | data=file.read()
29 | return data
30 | ```
31 | **Please note: The input value `text` of the function must be the text to be synthesized, and the return value is the binary content of the audio file!**
32 | """
33 |
--------------------------------------------------------------------------------
/Sava_Utils/man/en_US/issues.py:
--------------------------------------------------------------------------------
1 | issues = r"""
2 | # Typical Issues
3 | ## 1. GPT-SoVITS Error: 404 NOT FOUND
4 | ```
5 | /tts 404 NOT FOUND
6 | ```
7 | * Typical cause of this error: Using non-official standard code
8 | * Please ensure that you are using the official integrated package or the latest code from the official repository.
9 |
10 | ### Solution:
11 | * Manually pull the official repository code.
12 | * Download the integrated package provided in README. (stable but updates may be slow)
13 |
14 | ## 2. No connection could be made because the target machine actively refused it.
15 | ```
16 | No connection could be made because the target machine actively refused it.
17 | ```
18 | You need to check:
19 | * Is the API service already started and running?
20 | * Please wait for the API to fully start before performing operations.
21 | * Do not close the API console!
22 | * Is the port correctly filled?
23 |
24 | ## 3. 400 Bad Request
25 | ```
26 | 400 Bad Request
27 | ```
28 | Check the red error logs in this program's console; usually, the API will return the cause of the error.
29 | If no error message is received, please report this issue.
30 | * Typical error cause: Reference audio outside the 3-10 second range; model path does not exist;
31 |
32 | ## 4. The following subtitles are delayed due to the previous audio being too long.
33 | ```
34 | The following subtitles are delayed due to the previous audio being too long.
35 | ```
36 | * Your subtitle timing intervals are not proper.
37 | * Consider increasing the value of the setting `Maximum audio acceleration ratio` (setting it to a value greater than 1 to enable the feature) and enable `Remove inhalation and silence`.
38 | * There is a minimum voice interval option in the settings (default 0.3 seconds) to prevent voices from overlapping in such cases. If not needed, it can be set to 0.
39 |
40 | ## 5. GPT-SoVITS Output Audio Has Duration But It's Silent
41 | ```
42 | GPT-SoVITS Output Audio Has Duration But It's Silent
43 | ```
44 | * Your GPU does not support fp-16.
45 | * Manually modify the value of `is_half` to `false` in `GPT_SoVITS\configs\tts_infer.yaml`.
46 | """
47 |
--------------------------------------------------------------------------------
/Sava_Utils/man/en_US/title.py:
--------------------------------------------------------------------------------
1 | title = r"""
2 | Version 4.4.2-2505, Compatible with HiyoriUI, GPT-SoVITS, CosyVoice, F5-TTS(API in GSV format) and Microsoft TTS
3 | GitHub: [Check for updates manully](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/releases) | [Install Extensions](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/tree/main/tools)
4 | """
5 |
--------------------------------------------------------------------------------
/Sava_Utils/man/fr_FR/README.py:
--------------------------------------------------------------------------------
1 | README = r"""
2 | # Srt-AI-Voice-Assistant
3 | ### Ce projet peut utiliser plusieurs systèmes de synthèse vocale IA pour doubler vos fichiers de sous-titres ou de texte.
Il propose également diverses fonctions auxiliaires pratiques, comme la transcription audio/vidéo et la traduction de sous-titres.
4 | Si vous rencontrez des problèmes ou souhaitez faire une demande de fonctionnalité, veuillez vous rendre sur [Issues](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/issues).
5 | ## Fonctionnalités
6 | - ✅ Open-source, interface WebUI conviviale, exécution locale et accessible via le réseau local
7 | - ✅ Prend en charge plusieurs projets TTS : BV2, GSV, CosyVoice2, AzureTTS, et vous pouvez même personnaliser vos API !
8 | - ✅ Enregistrement de paramètres et de presets personnalisés
9 | - ✅ Mode par lots
10 | - ✅ Édition de sous-titres
11 | - ✅ Traduction de sous-titres
12 | - ✅ Régénération de lignes spécifiques
13 | - ✅ Prend en charge le doublage avec plusieurs locuteurs
14 | - ✅ Réexportation de sous-titres
15 | - ✅ Fonctions étendues : transcription de sous-titres pour audio/vidéo
16 | - ✅ I18n
17 |
18 | ## [Télécharger seulement la version empaquetée](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/releases)
19 | * Utilisez cette version seulement en cas de conflits de dépendances ou de problèmes d'installation.
20 |
21 | ## [Télécharger le package intégré avec GPT-SoVITS (Hugging Face)](https://huggingface.co/YYuX/GPT-SoVITS-SAVA-windows-package/tree/main)
22 | * Le package intégré GPT-SoVITS inclut la version empaquetée, sans supprimer aucun modèle intégré ou pré-entraîné, et son code de entraînement et inférence est identique à celui du officiel.
23 | * Note : La version empaquetée incluse dans le package intégré GPT-SoVITS peut ne pas être la version la plus récente ; remplacez-la pour la mettre à jour.
24 | """
25 |
--------------------------------------------------------------------------------
/Sava_Utils/man/fr_FR/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YYuX-1145/Srt-AI-Voice-Assistant/66fb0108fa6b2e34a160a41d105182866fd704f2/Sava_Utils/man/fr_FR/__init__.py
--------------------------------------------------------------------------------
/Sava_Utils/man/fr_FR/changelog.py:
--------------------------------------------------------------------------------
1 | changelog = r"""
2 | ## Journal des modifications
3 |
4 | ### Mise à jour V4-0325 :
5 | #### Afin de rendre les versions plus claires, des numéros de version sont attribués plus des dates de publication.
6 | #### Après cette mise à jour, l'historique de synthèse et les locuteurs enregistrés de la version précédente doivent être recréés ; sinon, des erreurs peuvent se produire !
7 | 1. Édition des sous-titres
8 | 2. Traduction des sous-titres
9 | 3. Amélioration de divers détails et correction de erreurs
10 | 4. Supporter CosyVoice2 (réutilisation du panneau GSV)
11 | 5. (4.0.1) Mode par lots
12 | 6. (4.1) Mode serveur
13 | 7. (4.2) I18n
14 | 8. (4.3) Accélération automatique de l'audio et suppression du silence; Création de projets de doublage à plusieurs locuteurs à partir de textes étiquetés.
15 | 9. (4.3.1) Ajouter la fonction de Recherche et de Remplacement; ajouter un bouton de régénération en un clic.
16 | 10. (4.4) Permet l'édition des caractères polyphoniques pour GPT-SoVITS ainsi que la détection automatique des modèles; Autorise les invites personnalisées pour Ollama; Permet d'exporter des sous-titres avec les noms des locuteurs selon un modèle personnalisable.
17 |
18 | ### Mise à jour du 140225 :
19 | 1. Prise en charge de la lecture de projets historiques
20 | 2. Prise en charge du doublage avec plusieurs locuteurs
21 |
22 | ### Mise à jour du 230125 :
23 | 1. Prise en charge de la réexportation de fichiers de sous-titres SRT correspondant aux horodatages de début et de fin réels après synthèse ; prise en charge également de la lecture de fichiers texte TXT pour la synthèse, auquel cas les paragraphes sont divisés par phrases.
24 | 2. Afin d'améliorer l'extensibilité à l'avenir et la simplicité, la conception d'un fichier de script unique, qui rendait les téléchargements plus pratiques, a dû être abandonnée. Le code sera refactorisé progressivement à partir de cette version.
25 | 3. Ajout de certaines documentations.
26 |
27 | ### Mise à jour du 110824 :
28 | 1. Notification des utilisateurs du message d'erreur
29 | 2. Détection automatique des environnements TTS-Project
30 | 3. Restauration de la compatibilité avec l'api-v1
31 | 4. Une mise à jour majeure de fonctionnalité : La régénération de lignes spécifiques si vous n'êtes pas satisfaites d'elles.
32 | """
33 |
--------------------------------------------------------------------------------
/Sava_Utils/man/fr_FR/help.py:
--------------------------------------------------------------------------------
1 | help = r"""
2 | # Guide de l'utilisateur
3 |
4 | ## 0. Configuration et utilisation du service
5 | #### Ce projet peut appeler deux projets locaux : Bert-VITS2, GPT-SoVITS
6 | #### Et un service en ligne : Microsoft TTS
7 | * **Pour les projets TTS locaux** :
8 |
9 | * Remplissez et enregistrez le chemin racine du projet et le chemin de l'interpréteur Python correspondant sur la page des paramètres.
10 | * **Méthode plus simple** : Placez le programme dans le répertoire racine du paquet intégré, puis cliquez sur le bouton correspondant sur la première page pour démarrer le service API !
11 |
12 | * **Pour Microsoft TTS** :
13 |
14 | * Suivez le tutoriel pour vous inscrire à un compte et saisissez la clé API sur la page des paramètres.
15 | * Prenez note de la quota mensuelle gratuite !
16 |
17 | ## 1. Démarrage
18 | ### Ce projet peut doubler pour les sous-titres et les textes bruts.
19 | * **Pour les sous-titres** :
20 |
21 | * Lorsqu'un sous-titre est trop long, les sous-titres suivants seront retardés en conséquence. Et vous pouvez définir l'intervalle de parole minimum dans les paramètres.
22 |
23 | * **Pour le texte brut** :
24 |
25 | * Le texte sera divisé en entrées de sous-titres en fonction des ponctuations de fin et des retours à la ligne.
26 |
27 | * Après la génération, vous pouvez exporter les sous-titres avec les horodatages audio réels sur la page d'édition.
28 |
29 | ### A. Scénario avec un seul locuteur
30 | * **I.** Téléchargez les fichiers de sous-titres ou de texte dans le panneau de droite de la page `Doublage de sous-titres`.
31 | * Mode de balisage : Le contenu du fichier doit être le suivant : `Locuteur : Contenu`, e.g. `Vincent:Bonjour.` Le tableau de correspondance peut convertir le locuteur d'origine dans le fichier de texte en locuteur cible correspondant.
32 |
33 | * **II.** Sélectionnez votre projet et ajustez les paramètres dans le panneau central.
34 |
35 | * **III.** Cliquez sur le bouton `Produire l'audio` en bas et attendez.
36 |
37 | * **IV.** Téléchargez votre audio.
38 |
39 | ### B. Scénario avec plusieurs locuteurs
40 | * **I.** Téléchargez les fichiers de sous-titres/texte dans le panneau de droite de `Doublage de sous-titres`.
41 |
42 | * **II.** Cliquez sur `Créer un projet de doublage avec plusieurs locuteurs` en dessous de l'affichage du fichier.
43 |
44 | * **III.** Créez des locuteurs :
45 | * **a.** Détendez la section Doublure avec plusieurs locuteurs en bas de la page d'édition.
46 | * **b.** Sélectionnez le projet cible.
47 | * **c.** Dans la boîte de sélection/creation de locuteur, saisissez un nom de locuteur.
48 | * **d.** Ajustez les paramètres (y compris les numéros de port) et cliquez sur 💾 pour enregistrer. Les noms dupliqués écraseront les locuteurs existants.
49 |
50 | * **IV.** Sélectionnez un locuteur dans la liste déroulante, cochez les sous-titres correspondants, puis cliquez sur ✅ pour appliquer. Les informations du locuteur apparaîtront dans la colonne 4.
51 |
52 | * **V.** Le dernier locuteur attribué devient le locuteur par défaut (s'applique aux sous-titres non attribués dans les projets avec plusieurs locuteurs).
53 |
54 | * **VI.** Cliquez sur `Lancer la synthèse à plusieurs locuteurs` pour commencer la génération.
55 | * ⚠️ Si vous utilisez et créez des locuteurs GSV dans une autre langue, les locuteurs GSV ne fonctionneront pas correctement.
56 |
57 | ### Regénérer des lignes spécifiques
58 | * **I.** Localisez le sous-titre cible à l'aide du curseur sur la page d'édition.
59 |
60 | * **II.** Modifiez le texte si nécessaire. Les modifications sont enregistrées automatiquement après la régénération.
61 |
62 | * **III.** Cliquez sur 🔄 pour régénérer une seule ligne :
63 |
64 | * Utilise les paramètres du projet s'il n'est pas attribué.
65 | * Utilise les paramètres spécifiques du locuteur s'il est attribué.
66 | * Les projets avec plusieurs locuteurs doivent avoir des locuteurs attribués.
67 |
68 | * **IV.** Après avoir apporté des modifications aux sous-titres, vous pouvez également cliquer sur `Continuer la Génération` pour régénérer la voix des sous-titres modifiés ou dont la synthèse n'a pas été réussie.
69 |
70 | * **V.** Cliquez sur `Reconstituer l'audio` pour recomposer l'audio complet.
71 |
72 | ### C. Rééditer des projets historiques
73 | * Sélectionnez un projet de l'historique de synthèse dans le panneau supérieur. Ensuite, cliquez sur le bouton `Charger`.
74 | * Le reste est évident.
75 |
76 | ### D. Édition des sous-titres
77 | #### 1. Copier
78 | * Copier les sous-titres sélectionnés.
79 |
80 | #### 2. Supprimer
81 | * Supprimer les sous-titres sélectionnés.
82 |
83 | #### 3. Fusionner
84 | * Sélectionnez au moins 2 sous-titres comme points de départ/fin.
85 | * Les sous-titres du point de départ au point de fin seront fusionnés.
86 |
87 | ⚠️ Les modifications ne sont pas enregistrées automatiquement sur le disque immédiatement, vous pouvez donc recharger le projet pour annuler.
88 |
89 | #### 4. Modifier les horodatages
90 | * Éditez les heures de début/fin au format SRT.
91 | * Cliquez sur `Appliquer les horodatages` pour enregistrer les modifications.
92 |
93 | ⚠️ Les modifications non appliquées seront perdues lors de la navigation.
94 |
95 | ## 2. Dépannage
96 | * Lorsque vous trouvez un problème :
97 | Décrivez le problème en détail et répertoriez les étapes effectuées pour reproduire l'erreur.
98 | * Visitez [GitHub-issues](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/issues) pour rapporter un problème ou demander de l'aide (les modèles de Issue vous guideront pour signaler correctement).
99 | """
100 |
--------------------------------------------------------------------------------
/Sava_Utils/man/fr_FR/help_custom.py:
--------------------------------------------------------------------------------
1 | help_custom = r"""
2 | ## Security Warning: This feature will execute external code!
3 | ### Please inspect the code content before running it; executing untrusted code may put your computer at risk!
4 | ### The author bear no responsibility for any consequences!
5 |
6 | ### Place code files containing Python functions in the SAVAdata/presets directory, and they will be callable.
7 | * Here is an example code for Gradio API.
8 | ```
9 | def custom_api(text): #return: audio content
10 | from gradio_client import Client
11 | client = Client("http://127.0.0.1:7860/")
12 | result = client.predict(
13 | text, # str in '输入文本内容' Textbox component
14 | "神里绫华", # str (Option from: [('神里绫华', '神里绫华')]) in 'Speaker' Dropdown component
15 | 0.1, # int | float (numeric value between 0 and 1) in 'SDP Ratio' Slider component
16 | 0.5, # int | float (numeric value between 0.1 and 2) in 'Noise' Slider component
17 | 0.5, # int | float (numeric value between 0.1 and 2) in 'Noise_W' Slider component
18 | 1, # int | float (numeric value between 0.1 and 2) in 'Length' Slider component
19 | "auto", # str (Option from: [('ZH', 'ZH'), ('JP', 'JP'), ('EN', 'EN'), ('mix', 'mix'), ('auto', 'auto')]) in 'Language' Dropdown component
20 | "", # str (filepath on your computer (or URL) of file) in 'Audio prompt' Audio component
21 | "", # str in 'Text prompt' Textbox component
22 | "", # str in 'Prompt Mode' Radio component
23 | "", # str in '辅助文本' Textbox component
24 | 0, # int | float (numeric value between 0 and 1) in 'Weight' Slider component
25 | fn_index=0
26 | )
27 | with open(result[1],'rb') as file:
28 | data=file.read()
29 | return data
30 | ```
31 | **Please note: The input value `text` of the function must be the text to be synthesized, and the return value is the binary content of the audio file!**
32 | """
33 |
--------------------------------------------------------------------------------
/Sava_Utils/man/fr_FR/issues.py:
--------------------------------------------------------------------------------
1 | issues = r"""
2 | # Problèmes typiques
3 | ## 1. Erreur GPT-SoVITS : 404 NOT FOUND
4 | ```
5 | /tts 404 NOT FOUND
6 | ```
7 | * Cause typique de cette erreur : Utilisation de code non officiel standard.
8 | * Veuillez vous assurer que vous utilisez le package intégré officiel ou le code le plus récent du dépôt officiel.
9 |
10 | ### Solution :
11 | * Téléchargez manuellement le code du dépôt officiel.
12 | * Téléchargez le package intégré fourni dans le README. (stable mais les mises à jour peuvent être lentes)
13 |
14 | ## 2. Impossible d'établir de connexion car l'ordinateur cible a expressément refusé celle-ci.
15 | ```
16 | Impossible d'établir de connexion car l'ordinateur cible a expressément refusé celle-ci.
17 | ```
18 | Vous devez vérifier :
19 | * Le service API est-il déjà démarré et en cours d'exécution ?
20 | * Veuillez attendre que l'API soit entièrement démarrée avant d'effectuer des opérations.
21 | * Ne fermez pas la console de l'API !
22 | * Le port est-il correctement renseigné ?
23 |
24 | ## 3. 400 Bad Request
25 | ```
26 | 400 Bad Request
27 | ```
28 | Vérifiez les journaux d'erreur en rouge dans la console de ce programme ; généralement, l'API renverra la cause de l'erreur.
29 | Si aucun message d'erreur n'est reçu, veuillez signaler ce problème.
30 | * Cause d'erreur typique : Audio de référence en dehors de la plage de 3 à 10 secondes ; le chemin du modèle n'existe pas.
31 |
32 | ## 4. Les sous-titres suivants sont retardés en raison de la longueur excessive de l'audio précédent.
33 | ```
34 | Les sous-titres suivants sont retardés en raison de la longueur excessive de l'audio précédent.
35 | ```
36 | * Vos intervalles de temps des sous-titres ne sont pas appropriés.
37 | * Considérez d'augmenter la valeur de la configuration ` rapport maximal d'accélération audio`(en la fixant à
38 | une valeur supérieure à 1 pour activer la fonction) et activez `Supprimer l'inhalation et le silence`.
39 | * Il existe une option d'intervalle vocal minimum dans les paramètres (par défaut 0,3 seconde) pour éviter que les voix ne se chevauchent dans de tels cas. Si cela n'est pas nécessaire, il peut être égal 0.
40 |
41 | ## 5. Le fichier audio de sortie de GPT-SoVITS a une durée mais est silencieux.
42 | ```
43 | Le fichier audio de sortie de GPT-SoVITS a une durée mais est silencieux.
44 | ```
45 | * Votre carte graphique ne supporte pas le fp-16.
46 | * Modifiez manuellement la valeur de `is_half` en `false` dans `GPT_SoVITS\configs\tts_infer.yaml`.
47 | """
48 |
--------------------------------------------------------------------------------
/Sava_Utils/man/fr_FR/title.py:
--------------------------------------------------------------------------------
1 | title = r"""
2 | Version 4.4.2-2505, Compatible avec HiyoriUI, GPT-SoVITS, CosyVoice, F5-TTS (API au format GSV) et Microsoft TTS
3 | GitHub : [Vérifier manuellement les mises à jour](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/releases) | [Installer des extensions](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/tree/main/tools)
4 | """
5 |
--------------------------------------------------------------------------------
/Sava_Utils/man/ja_JP/README.py:
--------------------------------------------------------------------------------
1 | README = r"""
2 | # Srt-AI-Voice-Assistant
3 | **This file is translated by AI. And just for reference.**
4 | ### このプロジェクトは、複数のAI音声合成(TTS)を使用して、字幕ファイルやテキストファイルにダビングすることができます。
また、音声/動画の文字起こしや字幕翻訳など、様々な便利な補助機能を提供します。
5 | 問題に遭遇した場合や新機能のリクエストがある場合は、[Issues](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/issues) をご利用ください。
6 |
7 | ## 機能
8 | - ✅ オープンソースで、使いやすいWebUIインターフェース。ローカルで実行でき、LAN経由でアクセス可能です。
9 | - ✅ 複数のTTSプロジェクトをサポート:BV2、GSV、CosyVoice2、AzureTTSなど、独自のAPIもカスタマイズできます!
10 | - ✅ パーソナル設定とプリセットを保存できます。
11 | - ✅ バッチモードが利用可能です。
12 | - ✅ 字幕編集機能があります。
13 | - ✅ 字幕翻訳機能を備えています。
14 | - ✅ 特定の行を再生成できます。
15 | - ✅ 複数話者によるダビングをサポートします。
16 | - ✅ 字幕を再エクスポートできます。
17 | - ✅ 拡張機能:音声/動画の字幕文字起こし
18 | - ✅ 国際化(I18n)対応
19 |
20 | ## [パッケージ版のみをダウンロード](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/releases)
21 | * 依存関係の競合やインストール問題がある場合のみ、このバージョンを使用してください。
22 |
23 | ## [GPT-SoVITS付きの統合パッケージをダウンロード(Hugging Faceから)](https://huggingface.co/YYuX/GPT-SoVITS-SAVA-windows-package/tree/main)
24 | * GPT-SoVITS統合パッケージにはパッケージ版が含まれており、組み込みモデルや事前学習モデルは削除されていません。コードは公式リポジトリと同じです。
25 | * 注意:GPT-SoVITS統合パッケージに含まれるパッケージ版は最新バージョンでない可能性があります。上書きして更新してください。
26 | """
27 |
--------------------------------------------------------------------------------
/Sava_Utils/man/ja_JP/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YYuX-1145/Srt-AI-Voice-Assistant/66fb0108fa6b2e34a160a41d105182866fd704f2/Sava_Utils/man/ja_JP/__init__.py
--------------------------------------------------------------------------------
/Sava_Utils/man/ja_JP/changelog.py:
--------------------------------------------------------------------------------
1 | changelog = r"""
2 | ## 変更履歴
3 | **This file is translated by AI. And just for reference.**
4 | ### V4-2503アップデート:
5 | #### バージョンをより明確にするため、リリース日付に加えてバージョン番号が付けられます。
6 | #### このアップデート後、前のバージョンの合成履歴と保存された話者は再作成する必要があります。そうしないと、エラーが発生する可能性があります!
7 | 1. 字幕編集
8 | 2. 字幕翻訳
9 | 3. 様々な詳細が改善され、バグが修正されました
10 | 4. CosyVoice2をサポート(GSVパネルを再利用)
11 | 5. (4.0.1) バッチモード
12 | 6. (4.1) サーバーモード
13 | 7. (4.2) 国際化(I18n)
14 |
15 | ### 250214アップデート:
16 | 1. 過去のプロジェクトの読み込みをサポート
17 | 2. 複数話者によるダビングをサポート
18 |
19 | ### 250123アップデート:
20 | 1. 合成後の実際の開始と終了タイムスタンプに一致するSRT字幕ファイルの再エクスポートをサポートします。また、合成のためのTXTテキストファイルの読み込みもサポートし、この場合、段落は文で分割されます。
21 | 2. 将来的な拡張性と簡素化を高めるため、ダウンロードをより便利にする単一のスクリプトファイルの設計を断念せざるを得ません。このバージョンからコードの再設計が始まりました。
22 | 3. いくつかのドキュメントを追加しました。
23 |
24 | ### 240811アップデート:
25 | 1. エラーメッセージをユーザーに通知
26 | 2. TTSプロジェクトの環境を自動検出
27 | 3. api-v1との互換性を回復
28 | 4. メジャーな機能アップデート:特定の行に不満がある場合、それらを再生成する機能をサポート
29 | """
30 |
--------------------------------------------------------------------------------
/Sava_Utils/man/ja_JP/help.py:
--------------------------------------------------------------------------------
1 | help = r"""
2 | # ユーザーガイド
3 | **This file is translated by AI. And just for reference.**
4 | ## 0. サービスの設定と使用方法
5 | #### このプロジェクトは、2つのローカルプロジェクト(Bert-VITS2、GPT-SoVITS)と1つのオンラインサービス(Microsoft TTS)を呼び出すことができます。
6 | * **ローカルのTTSプロジェクトについて**:
7 | * 設定ページでプロジェクトのルートパスと対応するPythonインタープリターのパスを入力して保存します。
8 | * **簡単な方法**:プログラムを統合パッケージのルートディレクトリに配置し、最初のページの対応するボタンをクリックしてAPIサービスを起動します!
9 |
10 | * **Microsoft TTSについて**:
11 | * チュートリアルに従ってアカウントを登録し、設定ページにAPIキーを入力します。
12 | * 月次の無料クォータに注意してください!
13 |
14 | ## 1. 使い始める
15 | ### このプロジェクトは、字幕または平文の吹き替えをサポートしています。
16 | * **字幕の場合**:
17 | * 実際に有効に使用されるのは開始時間のみです。字幕が長すぎる場合、後続の字幕はそれに応じて遅延します。また、設定で最小音声間隔を設定することができます。
18 |
19 | * **平文の場合**:
20 | * テキストは、終了句読点と改行に基づいて字幕エントリに分割されます。
21 |
22 | * 生成後、編集ページで実際の音声タイムスタンプ付きの字幕をエクスポートすることができます。
23 |
24 | ### A. 単一話者のシナリオ
25 | * **I.** `字幕吹き替え`ページの右パネルで字幕またはテキストファイルをアップロードします。
26 | * **II.** 中央のパネルでプロジェクトを選択し、パラメータを調整します。
27 | * **III.** 下部の`音声を生成`ボタンをクリックして待ちます。
28 | * **IV.** 音声をダウンロードします。
29 |
30 | ### B. 複数話者のシナリオ
31 | * **I.** `字幕吹き替え`の右パネルで字幕/テキストファイルをアップロードします。
32 | * **II.** ファイル表示の下にある`複数話者吹き替えプロジェクトを作成`をクリックします。
33 | * **III.** 話者を作成します:
34 | * **a.** 編集ページの下部にある「複数話者吹き替え」セクションを展開します。
35 | * **b.** ターゲットプロジェクトを選択します。
36 | * **c.** 「話者を選択/作成」ボックスに話者名を入力します。
37 | * **d.** パラメータ(ポート番号を含む)を調整し、💾をクリックして保存します。重複する名前は既存の話者を上書きします。
38 | * **IV.** ドロップダウンから話者を選択し、対応する字幕にチェックを入れてから、✅をクリックして適用します。話者情報が4列目に表示されます。
39 | * **V.** 最後に割り当てられた話者がデフォルトの話者になります(複数話者プロジェクトで割り当てられていない字幕に適用されます)。
40 | * **VI.** `複数話者吹き替えを生成`をクリックして生成を開始します。
41 | * ⚠️ 異なる言語でGSV話者を使用および作成している場合、GSV話者は正常に動作しません。
42 |
43 | ### 特定の行を再生成する
44 | * **I.** 編集ページのスライダーを使用してターゲット字幕を見つけます。
45 | * **II.** 必要に応じてテキストを変更します。再生成後、変更内容は自動的に保存されます。
46 | * **III.** 🔄をクリックして1行を再生成します:
47 | * 割り当てられていない場合は、プロジェクトのパラメータを使用します。
48 | * 割り当てられている場合は、話者固有のパラメータを使用します。
49 | * 複数話者プロジェクトでは、話者を割り当てる必要があります。
50 | * **IV.** `音声を再組み立て`をクリックして、完全な音声を再構成します。
51 |
52 | ### C. 過去のプロジェクトを再編集する
53 | * 上部パネルの合成履歴からプロジェクトを選択し、`読み込み`ボタンをクリックします。
54 | * 残りの手順は自明です。
55 |
56 | ### D. 字幕編集
57 | #### 1. コピー
58 | * 選択した字幕をコピーします。
59 |
60 | #### 2. 削除
61 | * 選択した字幕を削除します。
62 |
63 | #### 3. マージ
64 | * 少なくとも2つの字幕を開始/終了点として選択します。
65 | * 開始点から終了点までの字幕がマージされます。
66 |
67 | ⚠️ 変更はすぐにディスクに自動保存されないため、プロジェクトを再読み込みすることで元に戻すことができます。
68 |
69 | #### 4. タイムスタンプを変更する
70 | * SRT形式で開始/終了時間を編集します。
71 | * `タイムスタンプを適用`をクリックして変更を保存します。
72 |
73 | ⚠️ 適用されていない変更は、ナビゲーション中に失われます。
74 |
75 | ## 2. トラブルシューティング
76 | * 問題を報告する際は、問題を詳細に説明し、エラーが発生する前に行った手順を列挙してください。
77 | * [GitHub-issues](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/issues)にアクセスして、問題を報告またはヘルプを求めてください(Issueテンプレートが適切な報告方法をガイドします)。
78 | """
79 |
--------------------------------------------------------------------------------
/Sava_Utils/man/ja_JP/issues.py:
--------------------------------------------------------------------------------
1 | issues = r"""
2 | # 典型的な問題
3 | **This file is translated by AI. And just for reference.**
4 | ## 1. GPT - SoVITSエラー: 404 NOT FOUND
5 | ```
6 | /tts 404 NOT FOUND
7 | ```
8 | * このエラーの典型的な原因: 非公式標準のコードを使用している
9 | * 公式の統合パッケージまたは公式リポジトリの最新のコードを使用していることを確認してください。
10 |
11 | ### 解決策:
12 | * 公式リポジトリのコードを手動で取得してください。
13 | * READMEに記載されている統合パッケージをダウンロードしてください。(安定していますが、更新が遅い場合があります)
14 |
15 | ## 2. ターゲットマシンがアクティブに接続を拒否したため、接続を行うことができませんでした。
16 | ```
17 | ターゲットマシンがアクティブに接続を拒否したため、接続を行うことができませんでした。
18 | ```
19 | 以下を確認する必要があります。
20 | * APIサービスが既に起動して実行中ですか?
21 | * 操作を行う前に、APIが完全に起動するのを待ってください。
22 | * APIコンソールを閉じないでください!
23 | * ポートは正しく入力されていますか?
24 |
25 | ## 3. 400 Bad Request
26 | ```
27 | 400 Bad Request
28 | ```
29 | このプログラムのコンソールの赤色のエラーログを確認してください。通常、APIはエラーの原因を返します。
30 | エラーメッセージが受信されない場合は、この問題を報告してください。
31 | * 典型的なエラー原因: 参照音声が3 - 10秒の範囲外;モデルパスが存在しません。
32 |
33 | ## 4. 前の音声が長すぎるため、以下の字幕が遅延しています。
34 | ```
35 | 前の音声が長すぎるため、以下の字幕が遅延しています。
36 | ```
37 | * 字幕のタイミング間隔が適切ではありません。
38 | * 話す速度を上げるか、字幕のタイミング間隔を手動で増やすことを検討してください。
39 | * 設定には最小音声間隔オプションがあります(デフォルトは0.3秒)。このような場合に音声が重ならないようにするためのものです。必要なければ0に設定できます。
40 |
41 | ## 5. GPT - SoVITSの出力音声には長さがあるが、無音です。
42 | ```
43 | GPT - SoVITSの出力音声には長さがあるが、無音です。
44 | ```
45 | * あなたのGPUはfp - 16をサポートしていません。
46 | * `GPT_SoVITS\configs\tts_infer.yaml` の `is_half` の値を `false` に手動で変更してください。
47 | """
48 |
--------------------------------------------------------------------------------
/Sava_Utils/man/ja_JP/title.py:
--------------------------------------------------------------------------------
1 | title = r"""
2 | バージョン4.4.2-2505、HiyoriUI、GPT-SoVITS、CosyVoice、F5-TTS(GSV形式のAPI)およびMicrosoft TTSと互換性があります。
3 | GitHub: [手動で更新を確認](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/releases) | [拡張機能をインストール](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/tree/main/tools)
4 | """
5 |
--------------------------------------------------------------------------------
/Sava_Utils/man/zh_CN/README.py:
--------------------------------------------------------------------------------
1 | README = r"""
2 | # Srt-AI-Voice-Assistant
3 | ### 本项目可利用多个AI-TTS为你的字幕或文本文件配音。
并提供包括字幕识别、翻译在内的多种便捷的辅助功能。
4 |
5 | 如遇到bug或者有什么建议,可以在 [Issues](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/issues) 上反馈
6 |
7 | ## 特性
8 | - ✅ 代码开源,界面友好,本地运行,可局域网访问
9 | - ✅ 支持多个TTS项目:BV2,GSV,CosyVoice2,AzureTTS,以及你可以自定义API!
10 | - ✅ 保存个性化设置和预设
11 | - ✅ 批量模式
12 | - ✅ 字幕编辑
13 | - ✅ 字幕批量翻译
14 | - ✅ 单句重新抽卡
15 | - ✅ 支持多角色配音
16 | - ✅ 字幕重新导出
17 | - ✅ 扩展功能:音视频字幕转录
18 | - ✅ I18n
19 |
20 | ## [仅下载本体(打包版)](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/releases)
21 | * 当依赖冲突或无法正常安装时使用此版本
22 |
23 |
24 | ## [下载配合GPT-SoVITS的整合包(Hugging Face)](https://huggingface.co/YYuX/GPT-SoVITS-SAVA-windows-package/tree/main)
25 | * 整合包内预装打包版本体,内置模型不删减,训练和推理代码和官方仓库一致
26 | * 注意:包内自带的程序可能不是最新版本,覆盖掉以完成更新
27 | """
28 |
--------------------------------------------------------------------------------
/Sava_Utils/man/zh_CN/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YYuX-1145/Srt-AI-Voice-Assistant/66fb0108fa6b2e34a160a41d105182866fd704f2/Sava_Utils/man/zh_CN/__init__.py
--------------------------------------------------------------------------------
/Sava_Utils/man/zh_CN/changelog.py:
--------------------------------------------------------------------------------
1 | changelog = r"""
2 | ## 重大的更新历史
3 |
4 | ### V4-2503更新:
5 | #### 为了让版本更具辨识度,除了标注发布日期外,还分配了版本号。
6 | #### 本次更新后,上一个版本的合成历史和保存的说话人需要重新创建,否则会报错!
7 | 1.字幕编辑
8 | 2.字幕批量翻译
9 | 3.各项细节提升和bug修复
10 | 4.支持CosyVoice2(复用GSV的面板)
11 | 5.(4.0.1)批量模式
12 | 6.(4.1)服务模式
13 | 7.(4.2)I18n
14 | 8.(4.3)新增自动语速和自动去静音功能;现在可从标记文件快速生成多说话人工程
15 | 9.(4.3.1)加入查找和替换;加入一键重新生成按钮
16 | 10.(4.4)可编辑GSV多音字,自动检测模型;ollama允许自定义提示词;可利用自定义模版导出带说话人名称的字幕
17 |
18 | ### 250214更新:
19 | 1.支持读取历史工程
20 | 2.支持多说话人配音
21 | 3.更完善的文档
22 |
23 | ### 250123更新:
24 | 1.支持在合成完毕后导出符合实际情况的srt字幕文件,同时也支持通过读取txt纯文本文件来进行合成,在这种情况下会按每句来切分段落。
25 |
26 | 2.为了未来的扩展性和简洁性,我不得不放弃了单脚本文件的设计,即使对于下载而言更加方便。代码从现版本逐步开始重构。
27 |
28 | 3.加入一些文档说明
29 |
30 | ### 240811更新:
31 | 1.增加错误提示
32 | 2.自动检测项目路径
33 | 3.再次兼容api-v1。
34 | 4.重大功能更新:支持重新抽卡合成
35 | """
36 |
--------------------------------------------------------------------------------
/Sava_Utils/man/zh_CN/help.py:
--------------------------------------------------------------------------------
1 | help = r"""
2 | # 使用指南
3 |
4 | ## 0.配置和使用服务
5 | #### 本项目可调用2个本地项目:Bert-VITS2、GPT-SoVITS
6 | #### 和1个在线项目:微软TTS
7 | * 本地项目只需要在设置页中填写并保存项目和解释器路径,或者**以更简单的方式:将程序放于整合包根目录内即可** ,然后点击第一页右下角对应按钮即可一键启动API服务!
8 | * 对于微软TTS,需要按教程注册账号并将密钥填写在设置页内。请注意每月的免费额度!
9 |
10 | ## 1.开始使用
11 | ### 本项目可以为字幕或者纯文本配音。
12 | * 当前一个字幕过长时,后一个字幕将在其后顺延。你可以在设置里设置最小语音间隔。
13 | * 对于纯文本,将按照结束标点符号和换行切割成每一条字幕。
14 | * 完成生成后,可以在编辑页面导出符合音频实际起止时间的字幕。
15 | ### A.单一说话人的情形
16 | * 1.在`字幕配音`上半页右侧上传字幕或者纯文本文件。
17 | * 2.在中间选择你的项目,调整参数。
18 | * 3.点击下方的`生成`,等待片刻。
19 | * 4.下载你的音频。
20 |
21 | ### B.多说话人的情况
22 | * 1.在`字幕配音`上半页右侧上传字幕或者纯文本文件。
23 | * 1.5. 标记模式:文件内容应如下:`说话人:内容` e.g.`淳平:对不起,让您久等了。`
24 | * 2.点击左侧文件展示下方的按钮`生成多角色项目`
25 | * 3.创建数个说话人:
26 | - a.展开位于编辑页最下方的`多角色配音`栏
27 | - b.选择目标项目
28 | - c.`在选择/创建说话人`框中,输入说话人的名字
29 | - d.调整上方对应参数。全部的参数,包括端口号将作为说话人的配置。然后点击`💾`创建说话人。同名的说话人会覆盖。
30 | * 4.在下拉列表里选中你的说话人,然后勾选对应的字幕,再点击下方的`✅`来应用说话人。你将在第4列文本看到说话人信息。
31 | * 5.上一次点击`✅`时选中的说话人会`自动`应用为`默认说话人`(仅多说话人项目生效,未指派说话人的情况下就使用默认说话人),即使你没有选择任何一条字幕。
32 | * 6.点击`生成多角色配音`,将会开始为所有指定说话人的字幕生成音频
33 | * ⚠️如果你正在使用和创建GSV的说话人时不同的语言,GSV的说话人会无法使用。
34 | * 注:gsv的预设创建同理。在切换预设时,会自动加载模型。
35 |
36 | ### 如果对某条语音不满意?
37 | * 1.在下半编辑页中通过滑条找到目标字幕
38 | * 2.可以修改文本内容。重新抽卡完成后,字幕内容会存档。
39 | * 3.点击`🔄️`重新生成单条语音。如果你通过单说话人创建工程,在未指派说话人时,参数以当前创建工程所使用的项目的面板为准。若指派了说话人,则按说话人的参数合成。
40 | * 4.通过多说话人创建的工程必须指派说话人。
41 | * 5.在对字幕进行更改后,也可以点击`继续生成`来重新生成经过更改或未成功合成的语音。
42 | * 6.点击`重新拼接内容`,重新合成音频。
43 |
44 | ### C.历史工程的再编辑
45 | * 编辑页上侧栏的合成历史中选择对应工程,然后点击加载。
46 | * 然后应该也不用多说了吧?
47 |
48 | ### D.字幕编辑
49 | #### 1.复制
50 | * 复制选中的字幕。
51 | #### 2.删除
52 | * 删除选中的字幕。
53 | #### 3.合并
54 | * 你需要至少选择2个字幕作为合并的起点和终点。
55 | * 只有选中字幕的id的最大和最小值作为实际有效输入。
56 | #### 以上更改不会立即存档,因此可以通过重新加载当前工程来撤销操作。
57 |
58 | #### 4.更改时间码
59 | * 按srt的时间格式修改字幕的起止时间。
60 | * 必须点击`应用时间`后,本页的时间码才会被保存,并存档。
61 | * 如果在未保存的情况下进行翻页等其他操作,更改将会丢失。
62 |
63 | ## 2.我遇到了无法解决的错误
64 | ### 您需要:
65 | * 详细地描述问题,并指出问题发生前,您做了哪些操作。
66 | * 推荐在评论区和[GitHub-issues](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/issues)反馈。Github-issue的模版会指引您更清晰地反馈问题。
67 | """
68 |
--------------------------------------------------------------------------------
/Sava_Utils/man/zh_CN/help_custom.py:
--------------------------------------------------------------------------------
1 | help_custom = r"""
2 | ## 安全警告:此功能会执行外部代码!
3 | ### 运行前请务必检查代码内容,运行不受信任的代码可能会导致电脑受到攻击!
4 | ### 作者不对此产生的后果负任何责任!!
5 |
6 | ### 将装有python函数的代码文件放在`SAVAdata/presets`下即可被调用
7 | ```
8 | def custom_api(text):#return: audio content
9 | from gradio_client import Client
10 | client = Client("http://127.0.0.1:7860/")
11 | result = client.predict(
12 | text, # str in '输入文本内容' Textbox component
13 | "神里绫华", # str (Option from: [('神里绫华', '神里绫华')]) in 'Speaker' Dropdown component
14 | 0.1, # int | float (numeric value between 0 and 1) in 'SDP Ratio' Slider component
15 | 0.5, # int | float (numeric value between 0.1 and 2) in 'Noise' Slider component
16 | 0.5, # int | float (numeric value between 0.1 and 2) in 'Noise_W' Slider component
17 | 1, # int | float (numeric value between 0.1 and 2) in 'Length' Slider component
18 | "auto", # str (Option from: [('ZH', 'ZH'), ('JP', 'JP'), ('EN', 'EN'), ('mix', 'mix'), ('auto', 'auto')]) in 'Language' Dropdown component
19 | "", # str (filepath on your computer (or URL) of file) in 'Audio prompt' Audio component
20 | "", # str in 'Text prompt' Textbox component
21 | "", # str in 'Prompt Mode' Radio component
22 | "", # str in '辅助文本' Textbox component
23 | 0, # int | float (numeric value between 0 and 1) in 'Weight' Slider component
24 | fn_index=0
25 | )
26 | with open(result[1],'rb') as file:
27 | data=file.read()
28 | return data
29 | ```
30 | 以上是接入Gradio的一个示例代码,请注意:函数的输入值必须是要合成的文本`text`,返回值是音频文件的二进制内容!
31 |
32 | """
33 |
--------------------------------------------------------------------------------
/Sava_Utils/man/zh_CN/issues.py:
--------------------------------------------------------------------------------
1 | issues = r"""
2 | # 常见的错误
3 |
4 | ## 1.GPT-SoVITS错误提示404 NOT FOUND
5 | ```
6 | /tts 404 NOT FOUND
7 | ```
8 | * 典型的错误原因:使用了非官方标准的代码
9 | (例如刘悦的整合包,其拓展功能多来自官方项目其他人贡献的Pull Requests,且对API代码进行了~~意义不明的~~更改)
10 | * 请您确保使用了花儿不哭官方整合包或者官方仓库的最新代码。
11 | ### 解决方法:
12 | * 1.~~手动拉取官方仓库代码~~ 无视,因为该类整合包受众不会拉代码
13 | * 2.下载本项目readme里提供的整合包(稳定但更新慢)
14 | * 3.前往下面的视频链接获取花儿不哭的官方整合包
15 | ### 【不要用了半天整合包,却不知道对应项目的创始人是谁!!!】官方视频/教程/整合包指路 :[耗时两个月自主研发的低成本AI音色克隆软件,免费送给大家!【GPT-SoVITS】](https://www.bilibili.com/video/BV12g4y1m7Uw/)
16 |
17 | ## 2.目标计算机积极拒绝连接
18 | ```
19 | 目标计算机积极拒绝连接
20 | ```
21 |
22 | 您需要检查:
23 | * API服务是否已经启动以及正在运行?
24 | * 请等待API启动完毕后再进行操作。
25 | * 不要关闭API控制台!
26 | * 端口是否正确填写?
27 |
28 | ## 3. 400 Bad Request
29 | ```
30 | 400 Bad Request
31 | ```
32 | 查看本程序控制台红色错误日志,通常api会返回报错原因。
33 | 如果没有接收到任何错误信息,可反馈问题。
34 | * 典型的错误原因:参考音频在3-10秒外;填写的模型路径不存在;
35 |
36 | ## 4.音频过长,语音延迟
37 | ```
38 | 序号合集为 ['xxx'] 的字幕由于之前的音频过长而被延迟
39 | ```
40 | * 你的字幕时间间隔不合理。
41 | * 考虑将设置项`音频最大加速倍率`调高(大于1视为启用)并打开`去除吸气声和静音`
42 | * 设置里有最小语音间隔,默认0.3秒。以防止这种情况下语音糊在一起,若不需要可调至0。
43 |
44 | ## 5.GPT-SoVITS输出音频有时长但没有声音
45 | ```
46 | GPT-SoVITS输出音频有时长但没有声音
47 | ```
48 | * 你的显卡不支持半精度
49 | * 手动修改`GPT_SoVITS\configs\tts_infer.yaml`中的`is_half`为`false`
50 | """
51 |
--------------------------------------------------------------------------------
/Sava_Utils/man/zh_CN/title.py:
--------------------------------------------------------------------------------
1 | title = r"""
2 | 版本V4.4.2-2505,支持HiyoriUI,GPT-SoVITS,CosyVoice,F5-TTS(GSV格式API),微软在线TTS
3 | 仓库地址: [前往此处获取更新](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/releases) | [获取额外内容](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/tree/main/tools)
4 | """
5 |
--------------------------------------------------------------------------------
/Sava_Utils/polyphone.py:
--------------------------------------------------------------------------------
1 | from .base_componment import Base_Componment
2 | from . import i18n
3 | import gradio as gr
4 | import re
5 | import numpy as np
6 | import os
7 |
8 | PATH = {"ZH": "GPT_SoVITS/text/g2pw/polyphonic.rep", "EN": "GPT_SoVITS/text/engdict-hot.rep"}
9 | CACHE = {"ZH": "GPT_SoVITS/text/g2pw/polyphonic.pickle", "EN": "GPT_SoVITS/text/engdict_cache.pickle"}
10 |
11 | ZH_SINGLE_PY_PATTERN = re.compile(r"[a-z]+[1-5]")
12 | ZH_FORMAT_PATTERN = re.compile(r"^[a-z]+[1-5](?:\s+[a-z]+[1-5])*$")
13 | # Raw: 一丝不苟: ['yi1', 'si1', 'bu4', 'gou3']
14 | # Userinput: yi1 si1 bu4 gou3
15 | EN_FORMAT_PATTERN = re.compile(r"^[A-Z]+[0-2]{0,1}(?:\s+[A-Z]+[0-2]{0,1})*$")
16 | # CHATGPT CH AE1 T JH IY1 P IY1 T IY1
17 | PATTERN = {"ZH": ZH_FORMAT_PATTERN, "EN": EN_FORMAT_PATTERN}
18 |
19 |
20 | def read_fn_zh(x: str):
21 | key, content_raw = x.split(":")
22 | items = ZH_SINGLE_PY_PATTERN.findall(content_raw)
23 | result = ' '.join(items)
24 | return key.strip(), result
25 | READ_FN = {"ZH": read_fn_zh, "EN": lambda x: [i.strip() for i in x.split(' ', 1)]}
26 | WRITE_FN = {"ZH": lambda x, y: f"{x}: {str(y.split())}\n", "EN": lambda x, y: f"{x} {y}\n"}
27 |
28 |
29 | class Polyphone(Base_Componment):
30 | def __init__(self, config):
31 | super().__init__(config)
32 |
33 | def update_cfg(self, config):
34 | self.gsv_dir = config.gsv_dir
35 | return super().update_cfg(config)
36 |
37 | def _UI(self, *args):
38 | with gr.TabItem(i18n('Polyphone Editor')):
39 | if self.server_mode:
40 | gr.Markdown(i18n('This function has been disabled!'))
41 | return
42 | gr.Markdown(i18n('POLYPHONE_NOTICE'))
43 | self.language = gr.Dropdown(label=i18n('Choose Language'), value=list(PATH.keys())[1], choices=list(PATH.keys()), interactive=True)
44 | self.tab = gr.DataFrame(datatype=["str", "str"], col_count=(2, 'fixed'), type="numpy", interactive=True, show_search='search')
45 | self.overwrite = gr.Checkbox(value=False, label=i18n('Overwrite instead of Append'))
46 | self.language.change(lambda: np.array([['', '']], dtype=str), outputs=[self.tab])
47 | with gr.Row():
48 | self.readbtn = gr.Button(value=i18n('Read'), variant="primary")
49 | self.readbtn.click(self.read_file, inputs=[self.language], outputs=[self.tab])
50 | self.writebtn = gr.Button(value=i18n('Save'), variant="primary")
51 | self.writebtn.click(self.save_file, inputs=[self.language, self.tab, self.overwrite])
52 |
53 | def read_file(self, lang):
54 | if self.gsv_dir in [None, ""] or not os.path.isdir(self.gsv_dir):
55 | gr.Warning(i18n('GSV root path has been not configured or does not exist.'))
56 | return None
57 | rows = []
58 | try:
59 | with open(os.path.join(self.gsv_dir, PATH[lang]), 'r', encoding='utf-8') as f:
60 | for line in f:
61 | rows.append(READ_FN[lang](line))
62 | except Exception as e:
63 | gr.Warning(f"Error: {str(e)}")
64 | if len(rows) == 0:
65 | rows.append(['', ''])
66 | return np.array(rows, dtype=str)
67 |
68 | def save_file(self, lang, map, overwrite):
69 | try:
70 | if overwrite:
71 | content = {}
72 | else:
73 | x = self.read_file(lang)
74 | content = {i[0]: i[-1] for i in x if i[0]}
75 | for i in map:
76 | if i[0]:
77 | i[-1] = i[-1].strip()
78 | if PATTERN[lang].match(i[-1]):
79 | content[i[0]] = i[-1]
80 | else:
81 | gr.Info(f"{i18n('Input format mismatch')}: {i[-1]}")
82 | with open(os.path.join(self.gsv_dir, PATH[lang]), 'w', encoding='utf-8') as f:
83 | for key, value in content.items():
84 | f.write(WRITE_FN[lang](key, value))
85 | cachedir = os.path.join(self.gsv_dir, CACHE[lang])
86 | if os.path.isfile(cachedir):
87 | os.remove(cachedir)
88 | gr.Info(i18n('Done!'))
89 | except Exception as e:
90 | gr.Warning(f"Error: {str(e)}")
91 |
--------------------------------------------------------------------------------
/Sava_Utils/subtitle.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import gradio as gr
4 | import numpy as np
5 | import soundfile as sf
6 | import datetime
7 | import pickle
8 | import shutil
9 | import Sava_Utils
10 | import copy
11 | from . import logger, i18n
12 | from .librosa_load import load_audio
13 |
14 | current_path = os.environ.get("current_path")
15 | MAX_TIMESTAMP = 18000
16 | SRT_TIME_Pattern = re.compile(r"\d+:\d+:\d+,\d+")
17 |
18 |
19 | def compare_index_lt(i1, i2):
20 | l1 = list(map(int, i1.split("-")))
21 | l2 = list(map(int, i2.split("-")))
22 | while len(l1) < len(l2):
23 | l1.append(0)
24 | while len(l2) < len(l1):
25 | l2.append(0)
26 | return l1 < l2
27 |
28 |
29 | def to_time(time_raw: float):
30 | hours, r = divmod(time_raw, 3600)
31 | minutes, r = divmod(r, 60)
32 | seconds, milliseconds = divmod(r, 1)
33 | return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds*1000):03d}"
34 |
35 |
36 | class Base_subtitle:
37 | def __init__(self, index: int, start_time, end_time, text: str, ntype: str, fps=30):
38 | self.index: str = str(index)
39 | self.start_time_raw: str = start_time
40 | self.end_time_raw: str = end_time
41 | self.start_time = 0.0
42 | self.end_time = 0.0
43 | self.text: str = text.strip()
44 | # def normalize(self,ntype:str,fps=30):
45 | if ntype == "prcsv":
46 | self.start_time = self.to_float_prcsv_time(self.start_time_raw, fps)
47 | self.end_time = self.to_float_prcsv_time(self.end_time_raw, fps)
48 | elif ntype == "srt":
49 | self.start_time = self.to_float_srt_time(self.start_time_raw)
50 | self.end_time = self.to_float_srt_time(self.end_time_raw)
51 | else:
52 | raise ValueError
53 | # 5h=5*60*60s=18000s
54 | assert self.start_time < MAX_TIMESTAMP, 'too long'
55 | assert self.end_time < MAX_TIMESTAMP, 'too long'
56 |
57 | def to_float_prcsv_time(self, time: str, fps: int):
58 | h, m, s, fs = (time.replace(";", ":")).split(":") # seconds
59 | result = int(h) * 3600 + int(m) * 60 + int(s) + round(int(fs) / fps, 2)
60 | return result
61 |
62 | def to_float_srt_time(self, time: str):
63 | h, m, s = time.split(":")
64 | s = s.replace(",", ".")
65 | result = int(h) * 3600 + int(m) * 60 + round(float(s), 2)
66 | return result
67 |
68 | def reset_srt_time(self, timestamp):
69 | if timestamp != self.get_srt_time():
70 | st, et = timestamp.split("-->")
71 | st = st.strip()
72 | et = et.strip()
73 | if SRT_TIME_Pattern.fullmatch(st) and SRT_TIME_Pattern.fullmatch(et):
74 | start_time_new = self.to_float_srt_time(st)
75 | end_time_new = self.to_float_srt_time(et)
76 | if start_time_new < MAX_TIMESTAMP and end_time_new < MAX_TIMESTAMP:
77 | self.start_time_raw = st
78 | self.start_time = start_time_new
79 | self.end_time_raw = et
80 | self.end_time = end_time_new
81 | else:
82 | raise ValueError(f"too long: {st} --> {et}")
83 | else:
84 | raise ValueError(f"{i18n('Input format mismatch')}: {st} --> {et}")
85 |
86 | def __str__(self) -> str:
87 | return f"id:{self.index},start:{self.start_time_raw}({self.start_time}),end:{self.end_time_raw}({self.end_time}),text:{self.text}"
88 |
89 | def __lt__(self, other) -> bool:
90 | return compare_index_lt(self.index, other.index)
91 |
92 |
93 | class Subtitle(Base_subtitle):
94 | def __init__(self, index: int, start_time, end_time, text: str, ntype: str, fps=30, speaker=None):
95 | super().__init__(index, start_time, end_time, text, ntype, fps)
96 | self.is_success = None
97 | self.is_delayed = False
98 | self.real_st = 0
99 | self.real_et = 0 # frames
100 | self.speaker = speaker
101 | self.copy_count = 0
102 |
103 | def add_offset(self, offset=0):
104 | self.start_time += offset
105 | if self.start_time < 0:
106 | self.start_time = 0.0
107 | self.end_time += offset
108 | if self.end_time < 0:
109 | self.end_time = 0.0
110 |
111 | def get_srt_time(self):
112 | return f"{to_time(self.start_time)} --> {to_time(self.end_time)}"
113 |
114 | def copy(self):
115 | x = copy.deepcopy(self)
116 | self.copy_count += 1
117 | x.copy_count = 0
118 | x.index = f"{self.index}-{self.copy_count}"
119 | x.is_success = None
120 | return x
121 |
122 | def __str__(self) -> str:
123 | return f"id:{self.index},start:{self.start_time_raw}({self.start_time}),end:{self.end_time_raw}({self.end_time}),text:{self.text}.State: is_success:{self.is_success},is_delayed:{self.is_delayed}"
124 |
125 |
126 | class Subtitles:
127 | def __init__(self, proj: str = None, dir: str = None) -> None:
128 | self.subtitles: list[Subtitle] = []
129 | self.proj = proj
130 | self.dir = dir
131 | self.sr = 32000
132 | self.default_speaker = None
133 |
134 | def dump(self):
135 | assert self.dir is not None
136 | with open(os.path.join(self.get_abs_dir(), "st.pkl"), 'wb') as f:
137 | pickle.dump(self, f)
138 |
139 | def set_proj(self, proj: str):
140 | self.proj = proj
141 |
142 | def set_dir_name(self, dir_name: str):
143 | count = 1
144 | self.dir = dir_name
145 | while os.path.exists(os.path.join(current_path, "SAVAdata", "workspaces", self.dir)):
146 | if Sava_Utils.config.overwrite_workspace:
147 | shutil.rmtree(os.path.join(current_path, "SAVAdata", "workspaces", self.dir))
148 | break
149 | self.dir = f"{dir_name}({count})"
150 | count+=1
151 | os.makedirs(os.path.join(current_path, "SAVAdata", "workspaces", self.dir), exist_ok=True)
152 | self.dump()
153 |
154 | def get_abs_dir(self):
155 | return os.path.join(current_path, "SAVAdata", "workspaces", self.dir)
156 |
157 | def audio_join(self, sr=None): # -> tuple[int,np.array]
158 | assert self.dir is not None
159 | abs_path = self.get_abs_dir()
160 | audiolist = []
161 | delayed_list = []
162 | failed_list = []
163 | fl = [i for i in os.listdir(abs_path) if i.endswith(".wav")]
164 | if len(fl) == 0:
165 | gr.Warning(i18n('Subtitles have not been synthesized yet!'))
166 | return None
167 | if sr in [None, 0]:
168 | wav, sr = load_audio(os.path.join(abs_path, fl[0]), sr=sr)
169 | self.sr = sr
170 | interval = int(Sava_Utils.config.min_interval * sr)
171 | del fl
172 | ptr = 0
173 | for id, i in enumerate(self.subtitles):
174 | start_frame = int(i.start_time * sr)
175 | if ptr <= start_frame:
176 | silence_len = start_frame - ptr
177 | audiolist.append(np.zeros(silence_len))
178 | ptr += silence_len
179 | self.subtitles[id].is_delayed = False
180 | elif start_frame != 0 and ptr > start_frame:
181 | self.subtitles[id].is_delayed = True
182 | delayed_list.append(self.subtitles[id].index)
183 | f_path = os.path.join(abs_path, f"{i.index}.wav")
184 | if os.path.exists(f_path):
185 | wav, sr = load_audio(f_path, sr=sr)
186 | dur = wav.shape[-1] # frames
187 | self.subtitles[id].real_st = ptr
188 | ptr += dur
189 | audiolist.append(wav)
190 | self.subtitles[id].real_et = ptr
191 | ptr += interval
192 | audiolist.append(np.zeros(interval))
193 | # self.subtitles[id].is_success = True
194 | else:
195 | failed_list.append(self.subtitles[id].index)
196 | if delayed_list != []:
197 | # logger.warning(f"{i18n('The following subtitles are delayed due to the previous audio being too long.')}:{delayed_list}")
198 | gr.Warning(f"{i18n('The following subtitles are delayed due to the previous audio being too long.')}:{delayed_list}")
199 | if failed_list != []:
200 | logger.warning(f"{i18n('Failed to synthesize the following subtitles or they were not synthesized')}:{failed_list}")
201 | gr.Warning(f"{i18n('Failed to synthesize the following subtitles or they were not synthesized')}:{failed_list}")
202 | audio_content = np.concatenate(audiolist)
203 | self.dump()
204 | sf.write(os.path.join(current_path, "SAVAdata", "output", f"{self.dir}.wav"), audio_content, sr)
205 | return sr, audio_content
206 |
207 | def get_state(self, idx):
208 | if self.subtitles[idx].is_success:
209 | if self.subtitles[idx].is_delayed:
210 | return "delayed"
211 | return "ok"
212 | elif self.subtitles[idx].is_success is None:
213 | return "None"
214 | return "failed"
215 |
216 | def append(self, subtitle: Subtitle):
217 | self.subtitles.append(subtitle)
218 |
219 | def sort(self, begin=0, end=0, partial=False):
220 | if not partial:
221 | self.subtitles.sort()
222 | else:
223 | if end > len(self.subtitles):
224 | end = len(self.subtitles)
225 | self.subtitles[begin:end] = sorted(self.subtitles[begin:end])
226 |
227 | def __iter__(self):
228 | return iter(self.subtitles)
229 |
230 | def __getitem__(self, index):
231 | return self.subtitles[index]
232 |
233 | def pop(self, index):
234 | self.subtitles.pop(index)
235 |
236 | def insert(self, index, item):
237 | self.subtitles.insert(index, item)
238 |
239 | def __len__(self):
240 | return len(self.subtitles)
241 |
242 | def export(self, fp=None, open_explorer=True, raw=False):
243 | if len(self.subtitles) == 0:
244 | gr.Info(i18n('There is no subtitle in the current workspace'))
245 | return None
246 | idx = 0
247 | srt_content = []
248 | for i in self.subtitles:
249 | idx += 1
250 | if raw or (i.real_st == 0 and i.real_et == 0):
251 | if SRT_TIME_Pattern.fullmatch(i.start_time_raw) and SRT_TIME_Pattern.fullmatch(i.end_time_raw):
252 | start = i.start_time_raw
253 | end = i.end_time_raw
254 | else:
255 | start = to_time(i.start_time)
256 | end = to_time(i.end_time)
257 | else:
258 | start = to_time(i.real_st / self.sr)
259 | end = to_time(i.real_et / self.sr)
260 | srt_content.append(str(idx) + "\n")
261 | srt_content.append(f"{start} --> {end}" + "\n")
262 | if Sava_Utils.config.export_spk_pattern and i.speaker:
263 | srt_content.append(Sava_Utils.config.export_spk_pattern.replace(r"{#NAME}", i.speaker).replace(r"{#TEXT}", i.text.strip()) + "\n")
264 | else:
265 | srt_content.append(i.text + "\n")
266 | srt_content.append("\n")
267 | if fp is None:
268 | file_path = os.path.join(current_path, "SAVAdata", "output", f"{self.dir if self.dir else datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.srt")
269 | else:
270 | file_path = fp
271 | os.makedirs(os.path.dirname(file_path), exist_ok=True)
272 | with open(file_path, "w", encoding="utf-8") as f:
273 | f.writelines(srt_content)
274 | if open_explorer and not Sava_Utils.config.server_mode:
275 | os.system(f'explorer /select, {file_path}')
276 | return [file_path]
277 |
--------------------------------------------------------------------------------
/Sava_Utils/subtitle_translation.py:
--------------------------------------------------------------------------------
1 | import gradio as gr
2 | import os
3 | import Sava_Utils
4 | from . import i18n,logger
5 | from .utils import read_file
6 | from .base_componment import Base_Componment
7 | from .translator.ollama import Ollama
8 |
9 |
10 | LANGUAGE = ["中文", "English", "日本語", "한국어", "Français"]
11 | TRANSLATORS = {"ollama": Ollama()}
12 | current_path = os.environ.get("current_path")
13 |
14 |
15 | def start_translation(in_files, language, batch_size, output_dir, *args, translator=None):
16 | output_list = []
17 | message = ""
18 | if in_files is None:
19 | gr.Info(i18n('Please upload the subtitle file!'))
20 | return i18n('Please upload the subtitle file!'), output_list
21 | for in_file in in_files:
22 | subtitle_list = read_file(in_file.name)
23 | tasks = TRANSLATORS[translator].construct_tasks(subtitle_list,int(batch_size))
24 | try:
25 | result,msg = TRANSLATORS[translator].api(tasks, language, *args, file_name=os.path.basename(in_file.name))
26 | if msg:
27 | message += f"{os.path.basename(in_file.name)}: {msg}\n"
28 | for sub, txt in zip(subtitle_list, result):
29 | sub.text = txt
30 | output_path = os.path.join(output_dir, f"{os.path.basename(in_file.name)[:-4]}_translated_to_{language}.srt")
31 | subtitle_list.export(fp=output_path, open_explorer=False, raw=True)
32 | output_list.append(output_path)
33 | except Exception as e:
34 | err = f"{i18n('Failed to translate')} {in_file.name} :{str(e)}"
35 | gr.Warning(err)
36 | message += err + "\n"
37 | continue
38 |
39 | # os.system(f'explorer {output_dir}')
40 | return message.strip() if message else "OK", output_list
41 |
42 |
43 | class Translation_module(Base_Componment):
44 | def __init__(self, config):
45 | self.ui = False
46 | self.config = config
47 | self.menu = []
48 |
49 | def update_cfg(self, config):
50 | self.config = config
51 | for i in TRANSLATORS.values():
52 | i.update_cfg(config=config)
53 | super().update_cfg(config)
54 |
55 | def getUI(self, *args):
56 | if not self.ui:
57 | self.ui = True
58 | self._UI(*args)
59 | else:
60 | raise "err"
61 |
62 | def _UI(self, file_main):
63 | with gr.TabItem(i18n('Subtitle Translation')):
64 | with gr.Row():
65 | with gr.Column():
66 | self.translation_upload = gr.File(label=i18n('Upload your subtitle files (multiple allowed).'), file_count="multiple", file_types=[".srt", ".csv", ".txt"])
67 | self.result = gr.Text(interactive=False, value="", label=i18n('Output Info'))
68 | self.translation_output = gr.File(label=i18n('Output File'), file_count="multiple", interactive=False)
69 | self.send_btn = gr.Button(value=i18n('Send output files to Main Page'), interactive=True)
70 | self.send_btn.click(lambda x: [i.name for i in x] if x is not None else x, inputs=[self.translation_output], outputs=[file_main])
71 | with gr.Column():
72 | self.translation_target_language = gr.Dropdown(label=i18n('Specify Target Language'), choices=LANGUAGE, value=LANGUAGE[1], interactive=True)
73 | self.batch_size = gr.Number(label="Batch Size", value=5, minimum=1, interactive=True)
74 | self.output_dir = gr.Text(value=os.path.join(current_path, "SAVAdata", "output"), label=i18n('File Output Path'), interactive=not Sava_Utils.config.server_mode, visible=not Sava_Utils.config.server_mode, max_lines=1)
75 | self.translator = gr.Radio(label=i18n('Select Translator'), choices=[i for i in TRANSLATORS.keys()], value="ollama")
76 | Base_args = [self.translation_upload, self.translation_target_language, self.batch_size, self.output_dir]
77 | with gr.Column():
78 | v = True
79 | for i in TRANSLATORS.keys():
80 | x = gr.Column(visible=v)
81 | with x:
82 | TRANSLATORS[i].update_cfg(config=self.config)
83 | TRANSLATORS[i].getUI(*Base_args, output_info=self.result, output_files=self.translation_output)
84 | v = False
85 | self.menu.append(x)
86 | self.translation_target_language.change(lambda x: [gr.update(visible=x == i) for i in TRANSLATORS.keys()], inputs=[self.translator], outputs=self.menu)
87 |
--------------------------------------------------------------------------------
/Sava_Utils/translator/__init__.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from ..base_componment import Base_Componment
3 | import re
4 |
5 | class Traducteur(Base_Componment):
6 | def __init__(self, name, config=None):
7 | self.name = name
8 | self.args = []
9 | super().__init__(config)
10 |
11 | def update_cfg(self, config):
12 | super().update_cfg(config)
13 |
14 | def construct_tasks(self, subtitles, batch_size: int = 1):
15 | tasks = [[]]
16 | for idx, item in enumerate(subtitles):
17 | tasks[-1].append(re.sub(r'\n+', '\n', item.text).strip())
18 | if (idx + 1) % batch_size == 0:
19 | tasks.append([])
20 | if len(tasks[-1]) == 0:
21 | tasks.pop(-1)
22 | return tasks
23 |
24 | @abstractmethod
25 | def api(self, *args, file_name: str = "", **kwargs):
26 | raise NotImplementedError
27 |
--------------------------------------------------------------------------------
/Sava_Utils/translator/ollama.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import gradio as gr
3 | import json
4 | import re
5 | import subprocess
6 | from . import Traducteur
7 | from ..utils import rc_open_window
8 | from .. import logger, i18n
9 | from tqdm import tqdm
10 |
11 |
12 | class Ollama(Traducteur):
13 | def __init__(self, config=None):
14 | self.models = []
15 | super().__init__("ollama", config)
16 |
17 | def update_cfg(self, config):
18 | self.ollama_url = config.ollama_url
19 | super().update_cfg(config)
20 |
21 | def get_models(self, url):
22 | try:
23 | if self.server_mode:
24 | result = subprocess.run("ollama list", capture_output=True, text=True) # consider using awk
25 | lines = result.stdout.strip().split("\n")[1:]
26 | self.models = [i.split()[0] for i in lines]
27 | # print(self.models)
28 | return gr.update(choices=self.models, value=self.models[0] if len(self.models) != 0 else None)
29 | if url in [None, "", "Default"]:
30 | url = self.ollama_url
31 | response = requests.get(f'{url}/api/tags')
32 | response.raise_for_status()
33 | self.models.clear()
34 | for item in json.loads(response.content)["models"]:
35 | self.models.append(item["name"])
36 | except Exception as e:
37 | gr.Warning(f"{i18n('Failed to get model list from Ollama')}: {str(e)}")
38 | logger.error(f"{i18n('Failed to get model list from Ollama')}: {str(e)}")
39 | return gr.update(choices=self.models, value=self.models[0] if len(self.models) != 0 else None)
40 |
41 | def unload_model(self, model):
42 | if model in [None, [], ""] or self.server_mode:
43 | gr.Warning(i18n('You must specify the model!'))
44 | return None
45 | rc_open_window(f"ollama stop {model} && exit")
46 |
47 | def api(self, tasks, target_lang, model_name, url, custom_prompt, num_history, file_name: str = ""):
48 | num_history = int(num_history)
49 | if url in [None, "", "Default"] or self.server_mode:
50 | url = self.ollama_url
51 | if model_name in [None, [], ""]:
52 | raise ValueError(i18n('You must specify the model!'))
53 | ret = []
54 | msg = ""
55 | request_data = {
56 | "model": model_name,
57 | "messages": [],
58 | "stream": False,
59 | }
60 | for task in tqdm(tasks, desc=f"{i18n('Translating')}: {file_name}", total=len(tasks)):
61 | text = "\n\n".join(task)
62 | if custom_prompt:
63 | prompt = custom_prompt + '\n' + text
64 | else:
65 | prompt = f"Please translate the following content into {target_lang}. Strictly preserve the original paragraph structure. Do not include any additional comments or explanations---return only the translated text:\n{text}"
66 | data = {"role": "user", "content": prompt}
67 | request_data["messages"].append(data)
68 | response = requests.post(url=f'{url}/api/chat', json=request_data)
69 | response.raise_for_status()
70 | response_dict = json.loads(response.content)["message"]
71 | result = re.sub(r'.*?', '', response_dict["content"], flags=re.DOTALL).strip()
72 |
73 | request_data["messages"].append(response_dict)
74 | if len(request_data["messages"]) > 2*num_history:
75 | request_data["messages"].pop(0)
76 | request_data["messages"].pop(0)
77 |
78 | #print(request_data)
79 | batch = result.split("\n\n")
80 | d = len(task) - len(batch)
81 | if d:
82 | msg += f"{i18n('The language model has probably made a mistake')} @{len(ret)+1}-{len(ret)+len(task)}\n"
83 | if d > 0:
84 | batch += ["" for _ in range(d)]
85 | else:
86 | batch = batch[: len(task)]
87 | ret += batch
88 | return ret, msg
89 |
90 | def _UI(self, *inputs, output_info, output_files):
91 | from ..subtitle_translation import start_translation
92 |
93 | if self.server_mode:
94 | self.get_models("")
95 | with gr.Column():
96 | gr.Markdown(i18n('OLLAMA_NOTICE'))
97 | self.select_model = gr.Dropdown(label=i18n('Select Your Model'), choices=self.models, allow_custom_value=False)
98 | self.api_url = gr.Text(value="Default", interactive=not self.server_mode, label="URL", max_lines=1)
99 | with gr.Row():
100 | self.unload_model_btn = gr.Button(value=i18n('Unload Model'), visible=not self.server_mode, interactive=not self.server_mode)
101 | self.unload_model_btn.click(self.unload_model, inputs=[self.select_model])
102 | if not self.server_mode:
103 | self.refresh_model_btn = gr.Button(value="🔄️")
104 | self.refresh_model_btn.click(self.get_models, inputs=[self.api_url], outputs=[self.select_model])
105 | self.prompt = gr.Text(label=i18n('Custom prompt (enabled when filled in)'), value='', placeholder="Directly translate the following content to English:", interactive=True)
106 | self.num_history = gr.Slider(label=i18n('History Message Limit'), value=2, minimum=0, maximum=10, step=1)
107 | self.translate_btn = gr.Button(value=i18n('Start Translating'), variant="primary")
108 | self.translate_btn.click(lambda *args: start_translation(*args, translator="ollama"), inputs=[*inputs, self.select_model, self.api_url, self.prompt, self.num_history], outputs=[output_info, output_files])
109 |
--------------------------------------------------------------------------------
/Sava_Utils/tts_projects/__init__.py:
--------------------------------------------------------------------------------
1 | from ..base_componment import Base_Componment
2 | from abc import ABC, abstractmethod
3 |
4 |
5 | class TTSProjet(Base_Componment):
6 |
7 | def __init__(self, name, config):
8 | self.name = name
9 | self.args = []
10 | super().__init__(config)
11 |
12 | @abstractmethod
13 | def api(self, *args, **kwargs):
14 | raise NotImplementedError
15 |
16 | @abstractmethod
17 | def save_action(self, *args, **kwargs):
18 | raise NotImplementedError
19 |
20 | def before_gen_action(self, *args, **kwargs):
21 | pass
22 |
23 | @abstractmethod
24 | def arg_filter(self, *args):
25 | raise NotImplementedError
26 |
--------------------------------------------------------------------------------
/Sava_Utils/tts_projects/bv2.py:
--------------------------------------------------------------------------------
1 | from . import TTSProjet
2 | import requests
3 | import gradio as gr
4 | from ..utils import positive_int
5 | from .. import logger, i18n
6 |
7 |
8 | class BV2(TTSProjet):
9 | def __init__(self, config):
10 | super().__init__("bv2", config)
11 |
12 | def api(
13 | self,
14 | text,
15 | mid,
16 | spk_name,
17 | sid,
18 | lang,
19 | length,
20 | noise,
21 | noisew,
22 | sdp,
23 | emotion,
24 | split,
25 | style_text,
26 | style_weight,
27 | port,
28 | ):
29 | try:
30 | API_URL = f'http://127.0.0.1:{port}/voice'
31 | data_json = {"model_id": mid, "speaker_name": spk_name, "speaker_id": sid, "language": lang, "length": length, "noise": noise, "noisew": noisew, "sdp_ratio": sdp, "emotion": emotion, "auto_translate": False, "auto_split": split, "style_text": style_text, "style_weight": style_weight, "text": text}
32 | # print(data_json)
33 | response = requests.get(url=API_URL, params=data_json)
34 | response.raise_for_status()
35 | return response.content
36 | except Exception as e:
37 | err = f"{i18n('An error has occurred. Please check if the API is running correctly. Details')}:{e}"
38 | logger.error(err)
39 | return None
40 |
41 | def save_action(self, *args, text: str = None):
42 | language, port, mid, sid, speaker_name, sdp_ratio, noise_scale, noise_scale_w, length_scale, emotion_text = args
43 | sid, port, mid = positive_int(sid, port, mid)
44 | if speaker_name is not None and speaker_name != "":
45 | audio = self.api(text=text, mid=mid, spk_name=speaker_name, sid=None, lang=language, length=length_scale, noise=noise_scale, noisew=noise_scale_w, sdp=sdp_ratio, split=False, style_text=None, style_weight=0, port=port, emotion=emotion_text)
46 | else:
47 | audio = self.api(text=text, mid=mid, spk_name=None, sid=sid, lang=language, length=length_scale, noise=noise_scale, noisew=noise_scale_w, sdp=sdp_ratio, split=False, style_text=None, style_weight=0, port=port, emotion=emotion_text)
48 | return audio
49 |
50 | def switch_spk(self, choice):
51 | if choice == "Speaker_ID":
52 | return gr.update(label="Speaker_ID", value=0, visible=True, interactive=True), gr.update(label="Speaker_Name", visible=False, value="", interactive=True)
53 | else:
54 | return gr.update(label="Speaker_ID", value=0, visible=False, interactive=True), gr.update(label="Speaker_Name", visible=True, value="", interactive=True)
55 |
56 | def _UI(self):
57 | with gr.TabItem("Bert-VITS2-HiyoriUI"):
58 | with gr.Row():
59 | with gr.Column():
60 | self.spkchoser = gr.Radio(label=i18n('Select Speaker ID or Speaker Name'), choices=['Speaker_ID', 'Speaker_Name'], value="Speaker_ID")
61 | with gr.Row():
62 | self.model_id = gr.Number(label="Model_id", value=0, visible=True, interactive=True)
63 | self.spkid = gr.Number(label="Speaker_ID", value=0, visible=True, interactive=True)
64 | self.speaker_name = gr.Textbox(label="Speaker_Name", visible=False, interactive=True)
65 | self.language1 = gr.Dropdown(choices=['ZH', 'JP', 'EN', 'AUTO'], value='ZH', label="Language", interactive=True, allow_custom_value=False)
66 | with gr.Accordion(label=i18n('Advanced Parameters'), open=False):
67 | self.sdp_ratio = gr.Slider(minimum=0, maximum=1, value=0.2, step=0.1, label="SDP Ratio")
68 | self.noise_scale = gr.Slider(minimum=0.1, maximum=2, value=0.6, step=0.1, label="Noise Scale")
69 | self.noise_scale_w = gr.Slider(minimum=0.1, maximum=2, value=0.8, step=0.1, label="Noise Scale W")
70 | self.length_scale = gr.Slider(minimum=0.1, maximum=2, value=1, step=0.1, label="Length Scale")
71 | self.emo_text = gr.Textbox(label="text prompt", interactive=True, value="")
72 | with gr.Row():
73 | self.api_port1 = gr.Number(label="API Port", value=5000, visible=not self.server_mode, interactive=not self.server_mode)
74 | self.spkchoser.change(self.switch_spk, inputs=[self.spkchoser], outputs=[self.spkid, self.speaker_name])
75 | self.gen_btn1 = gr.Button(value=i18n('Generate Audio'), variant="primary", visible=True)
76 | BV2_ARGS = [
77 | self.language1,
78 | self.api_port1,
79 | self.model_id,
80 | self.spkid,
81 | self.speaker_name,
82 | self.sdp_ratio,
83 | self.noise_scale,
84 | self.noise_scale_w,
85 | self.length_scale,
86 | self.emo_text,
87 | ]
88 | return BV2_ARGS
89 |
90 | def arg_filter(self, *args):
91 | in_file, fps, offset, max_workers, language, port, mid, spkid, speaker_name, sdp_ratio, noise_scale, noise_scale_w, length_scale, emo_text = args
92 | pargs = (language, port, mid, spkid, speaker_name, sdp_ratio, noise_scale, noise_scale_w, length_scale, emo_text)
93 | kwargs = {'in_files': in_file, 'fps': fps, 'offset': offset, 'proj': "bv2", 'max_workers': max_workers}
94 | return pargs, kwargs
95 |
--------------------------------------------------------------------------------
/Sava_Utils/tts_projects/custom.py:
--------------------------------------------------------------------------------
1 | from . import TTSProjet
2 | import requests
3 | import gradio as gr
4 | from .. import logger, i18n, MANUAL
5 | import time
6 | import os
7 |
8 | current_path = os.environ.get("current_path")
9 |
10 |
11 | class Custom(TTSProjet):
12 | def __init__(self, config):
13 | self.custom_api_list = []
14 | self.refresh_custom_api_list()
15 | super().__init__("custom", config)
16 |
17 | def api(self, func, text):
18 | return func(text)
19 |
20 | def _UI(self):
21 | with gr.TabItem(i18n('Custom API')):
22 | with gr.Column():
23 | gr.Markdown(value=MANUAL.getInfo("help_custom"))
24 | self.choose_custom_api = gr.Dropdown(label=i18n('Choose Custom API Code File'), choices=self.custom_api_list, value=self.custom_api_list[0] if self.custom_api_list != [] else '', allow_custom_value=False, scale=4)
25 | with gr.Row():
26 | self.gen_btn4 = gr.Button(value=i18n('Generate Audio'), variant="primary", scale=8)
27 | self.refresh_custom_btn = gr.Button(value="🔄️", scale=1, min_width=40)
28 | self.refresh_custom_btn.click(self.refresh_custom_api_list, outputs=[self.choose_custom_api])
29 | return []
30 |
31 | def before_gen_action(self, custom_api_path, temp_namesp, **kwargs):
32 | # print(args)
33 | logger.info(f"Exec: custom_api_path {custom_api_path}")
34 | with open(os.path.join(current_path, "SAVAdata", "presets", custom_api_path), "r", encoding="utf-8") as f:
35 | code = f.read()
36 | exec(code, temp_namesp)
37 |
38 | def save_action(self, custom_api_path, temp_namesp, text):
39 | return self.api(temp_namesp["custom_api"], text)
40 |
41 | def refresh_custom_api_list(self):
42 | self.custom_api_list = ['None']
43 | try:
44 | preset_dir = os.path.join(current_path, "SAVAdata", "presets")
45 | if os.path.isdir(preset_dir):
46 | self.custom_api_list += [i for i in os.listdir(preset_dir) if i.endswith(".py")]
47 | else:
48 | logger.info(i18n('No custom API code file found.'))
49 | except Exception as e:
50 | self.custom_api_list = ['None']
51 | err = f"Error: {e}"
52 | logger.error(err)
53 | gr.Warning(err)
54 | time.sleep(0.1)
55 | return gr.update(value="None", choices=self.custom_api_list)
56 |
57 | def arg_filter(self, *args):
58 | input_file, fps, offset, workers, custom_api = args
59 | if custom_api in [None, 'None', '']:
60 | gr.Info(i18n('Please select a valid custom API code file!'))
61 | raise Exception(i18n('Please select a valid custom API code file!'))
62 | kwargs = {'in_files': input_file, 'fps': fps, 'offset': offset, 'proj': "custom", 'max_workers': workers}
63 | return (custom_api, dict()), kwargs #
64 |
--------------------------------------------------------------------------------
/Sava_Utils/tts_projects/mstts.py:
--------------------------------------------------------------------------------
1 | from . import TTSProjet
2 | import os
3 | import re
4 | import json
5 | import requests
6 | import gradio as gr
7 | from .. import logger, i18n
8 | from xml.etree import ElementTree
9 |
10 | current_path = os.environ.get("current_path")
11 |
12 |
13 | class MSTTS(TTSProjet):
14 | def __init__(self, config):
15 | self.ms_access_token = None
16 | self.ms_speaker_info = None
17 | self.cfg_ms_region = None
18 | self.cfg_ms_key = None
19 | self.ms_lang_option = ""
20 | super().__init__("mstts", config)
21 | self.ms_refresh()
22 |
23 | def update_cfg(self, config):
24 | self.cfg_ms_region = config.ms_region
25 | self.cfg_ms_key = config.ms_key
26 | self.ms_lang_option = config.ms_lang_option
27 | super().update_cfg(config)
28 |
29 | def getms_speakers(self):
30 | # if not os.path.exists(os.path.join(current_path,"SAVAdata", "ms_speaker_info.json")):
31 | if not os.path.exists(os.path.join(current_path, "SAVAdata", "ms_speaker_info_raw.json")):
32 | try:
33 | assert self.cfg_ms_key not in [None, ""], i18n('Please fill in your key to get MSTTS speaker list.')
34 | headers = {"Ocp-Apim-Subscription-Key": self.cfg_ms_key}
35 | url = f"https://{self.cfg_ms_region}.tts.speech.microsoft.com/cognitiveservices/voices/list"
36 | data = requests.get(url=url, headers=headers)
37 | data.raise_for_status()
38 | info = json.loads(data.content)
39 | with open(
40 | os.path.join(current_path, "SAVAdata", "ms_speaker_info_raw.json"),
41 | "w",
42 | encoding="utf-8",
43 | ) as f:
44 | json.dump(info, f, indent=2, ensure_ascii=False)
45 | except Exception as e:
46 | err = f"{i18n('Can not get speaker list of MSTTS. Details')}: {e}"
47 | gr.Warning(err)
48 | logger.error(err)
49 | self.ms_speaker_info = {}
50 | return None
51 | dataraw = json.load(open(os.path.join(current_path, "SAVAdata", "ms_speaker_info_raw.json"), encoding="utf-8")) # list
52 | classified_info = {}
53 | # target_language=["zh","ja","en","ko","fr"]
54 | target_language = re.split(r'(?<=[,,])| ', self.ms_lang_option)
55 | target_language = [x.strip() for x in target_language if x.strip()]
56 | if len(target_language) == 0:
57 | target_language = [""]
58 | for i in dataraw:
59 | if any(lan in i["Locale"] for lan in target_language):
60 | if i["Locale"] not in classified_info:
61 | classified_info[i["Locale"]] = {}
62 | classified_info[i["Locale"]][i["LocalName"]] = i
63 | with open(os.path.join("SAVAdata", "ms_speaker_info.json"), "w", encoding="utf-8") as f:
64 | json.dump(classified_info, f, indent=2, ensure_ascii=False)
65 | self.ms_speaker_info = json.load(open(os.path.join("SAVAdata", "ms_speaker_info.json"), encoding="utf-8"))
66 |
67 | def getms_token(self):
68 | fetch_token_url = f"https://{self.cfg_ms_region}.api.cognitive.microsoft.com/sts/v1.0/issueToken"
69 | headers = {"Ocp-Apim-Subscription-Key": self.cfg_ms_key}
70 | try:
71 | response = requests.post(fetch_token_url, headers=headers)
72 | response.raise_for_status()
73 | self.ms_access_token = str(response.text)
74 | except Exception as e:
75 | err = f"{i18n('Failed to obtain access token from Microsoft. Check your API key, server status, and network connection. Details')}: {e}"
76 | gr.Warning(err)
77 | logger.error(err)
78 | self.ms_access_token = None
79 |
80 | def api(self, language, speaker, style, role, rate, pitch, text, **kwargs):
81 | xml_body = ElementTree.Element("speak", version="1.0")
82 | xml_body.set("xmlns", "http://www.w3.org/2001/10/synthesis")
83 | xml_body.set("xmlns:mstts", "https://www.w3.org/2001/mstts")
84 | xml_body.set("xml:lang", "zh-CN")
85 | voice = ElementTree.SubElement(xml_body, "voice")
86 | voice.set("name", self.ms_speaker_info[language][speaker]["ShortName"]) # Short name
87 | express = ElementTree.SubElement(voice, "express-as")
88 | express.set("style", style)
89 | express.set("role", role)
90 | prosody = ElementTree.SubElement(express, "prosody")
91 | prosody.set("rate", f"{int((rate - 1) * 100)}%")
92 | prosody.set("pitch", f"{int((pitch- 1) * 100)}%")
93 | prosody.text = text
94 | body = ElementTree.tostring(xml_body)
95 | try:
96 | if self.ms_access_token is None:
97 | self.getms_token()
98 | assert self.ms_access_token is not None, i18n('Failed to obtain access token from Microsoft.')
99 | headers = {
100 | "X-Microsoft-OutputFormat": "riff-48khz-16bit-mono-pcm",
101 | "Content-Type": "application/ssml+xml",
102 | "Authorization": "Bearer " + self.ms_access_token,
103 | "User-Agent": "py_sava",
104 | }
105 | response = requests.post(
106 | url=f"https://{self.cfg_ms_region}.tts.speech.microsoft.com/cognitiveservices/v1",
107 | headers=headers,
108 | data=body,
109 | )
110 | response.raise_for_status()
111 | return response.content
112 | except Exception as e:
113 | err = f"{i18n}: {e}"
114 | logger.error(err)
115 | return None
116 |
117 | def _UI(self):
118 | with gr.TabItem("Azure-TTS(Microsoft)"):
119 | with gr.Column():
120 | self.ms_refresh_btn = gr.Button(value=i18n('Refresh speakers list'), variant="secondary")
121 | if self.ms_speaker_info == {}:
122 | self.ms_languages = gr.Dropdown(label=i18n('Choose Language'), value=None, choices=[], allow_custom_value=False, interactive=True)
123 | self.ms_speaker = gr.Dropdown(label=i18n('Choose Your Speaker'), value=None, choices=[], allow_custom_value=False, interactive=True)
124 | else:
125 | choices = list(self.ms_speaker_info.keys())
126 | self.ms_languages = gr.Dropdown(label=i18n('Choose Language'), value=choices[0], choices=choices, allow_custom_value=False, interactive=True)
127 | choices = list(self.ms_speaker_info[choices[0]].keys())
128 | self.ms_speaker = gr.Dropdown(label=i18n('Choose Your Speaker'), value=None, choices=choices, allow_custom_value=False, interactive=True)
129 | del choices
130 | with gr.Row():
131 | self.ms_style = gr.Dropdown(label=i18n('Style'), value=None, choices=[], allow_custom_value=False, interactive=True)
132 | self.ms_role = gr.Dropdown(label=i18n('Role'), value=None, choices=[], allow_custom_value=False, interactive=True)
133 | self.ms_speed = gr.Slider(minimum=0.2, maximum=2, step=0.01, label=i18n('Speed'), value=1, interactive=True)
134 | self.ms_pitch = gr.Slider(minimum=0.5, maximum=1.5, step=0.01, label=i18n('Pitch'), value=1, interactive=True)
135 | gr.Markdown(value=i18n('MSTTS_NOTICE'))
136 | self.gen_btn3 = gr.Button(value=i18n('Generate Audio'), variant="primary", visible=True)
137 | self.ms_refresh_btn.click(self.ms_refresh, outputs=[self.ms_languages])
138 | self.ms_languages.change(self.display_ms_spk, inputs=[self.ms_languages], outputs=[self.ms_speaker])
139 | self.ms_speaker.change(self.display_style_role, inputs=[self.ms_languages, self.ms_speaker], outputs=[self.ms_style, self.ms_role])
140 | MSTTS_ARGS = [self.ms_languages, self.ms_speaker, self.ms_style, self.ms_role, self.ms_speed, self.ms_pitch]
141 | return MSTTS_ARGS
142 |
143 | def save_action(self, *args, text: str = None):
144 | language, speaker, style, role, rate, pitch = args
145 | audio = self.api(language, speaker, style, role, rate, pitch, text)
146 | return audio
147 |
148 | def before_gen_action(self, *args, **kwargs):
149 | self.update_cfg(kwargs.get("config"))
150 | if self.ms_access_token is None:
151 | self.getms_token()
152 | assert self.ms_access_token is not None, i18n('Failed to obtain access token from Microsoft.')
153 |
154 | def arg_filter(self, *args):
155 | input_file, fps, offset, workers, ms_language, ms_speaker, ms_style, ms_role, ms_speed, ms_pitch = args
156 | if ms_speaker in [None, "", []]:
157 | gr.Info(i18n('Please Select Your Speaker!'))
158 | raise Exception(i18n('Please Select Your Speaker!'))
159 | if self.cfg_ms_key == "":
160 | gr.Warning(i18n('Please fill in your key!'))
161 | raise Exception(i18n('Please fill in your key!'))
162 | pargs = (ms_language, ms_speaker, ms_style, ms_role, ms_speed, ms_pitch)
163 | kwargs = {'in_files': input_file, 'fps': fps, 'offset': offset, 'proj': "mstts", 'max_workers': workers}
164 | return pargs, kwargs
165 |
166 | def ms_refresh(self): # language
167 | self.getms_speakers()
168 | if self.ms_speaker_info == {}:
169 | return gr.update(value=None, choices=[], allow_custom_value=False)
170 | choices = list(self.ms_speaker_info.keys())
171 | return gr.update(value=choices[0], choices=choices, allow_custom_value=False)
172 |
173 | def display_ms_spk(self, language): # speaker
174 | if language in [None, ""]:
175 | return gr.update(value=None, choices=[], allow_custom_value=False)
176 | choices = list(self.ms_speaker_info[language].keys())
177 | return gr.update(value=choices[0], choices=choices, allow_custom_value=False)
178 |
179 | def display_style_role(self, language, speaker):
180 | if language in [None, ""] or speaker in [None, ""]:
181 | return gr.update(value=None, choices=[], allow_custom_value=False), gr.update(value=None, choices=[], allow_custom_value=False)
182 | choices1 = ["Default"] + self.ms_speaker_info[language][speaker].get("StyleList", [])
183 | choices2 = ["Default"] + self.ms_speaker_info[language][speaker].get("RolePlayList", [])
184 | return (gr.update(value=choices1[0], choices=choices1, allow_custom_value=False), gr.update(value=choices2[0], choices=choices2, allow_custom_value=False))
185 |
--------------------------------------------------------------------------------
/Sava_Utils/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import subprocess
4 | from . import logger, i18n
5 | from .librosa_load import get_rms
6 | import gradio as gr
7 | import numpy as np
8 | import csv
9 | import re
10 | import shutil
11 | import platform
12 | import Sava_Utils
13 |
14 |
15 | current_path = os.environ.get("current_path")
16 | system = platform.system()
17 | LABELED_TXT_PATTERN = re.compile(r'^([^::]{1,20})[::](.+)')
18 |
19 |
20 | class Flag:
21 | def __init__(self):
22 | self.stop = False
23 | self.using = False
24 |
25 | def set(self):
26 | if self.using:
27 | self.stop = True
28 | return i18n('After completing the generation of the next audio, the task will be aborted.')
29 | else:
30 | return i18n('No running tasks.')
31 |
32 | def clear(self):
33 | self.stop = False
34 | self.using = False
35 |
36 | def is_set(self):
37 | return self.stop
38 |
39 | def __enter__(self):
40 | self.stop = False
41 | self.using = True
42 | return self
43 |
44 | def __exit__(self, exception_type, exception_value, traceback):
45 | self.clear()
46 |
47 |
48 | def positive_int(*a):
49 | r = [max(0,int(x)) for x in a]
50 | return r if len(r)>1 else r[0]
51 |
52 |
53 | def clear_cache():
54 | dir = os.path.join(current_path, "SAVAdata", "temp")
55 | if os.path.exists(dir):
56 | shutil.rmtree(dir)
57 | logger.info(i18n('Temporary files cleared successfully!'))
58 | gr.Info(i18n('Temporary files cleared successfully!'))
59 | else:
60 | logger.info(i18n('There are no temporary files.'))
61 | gr.Info(i18n('There are no temporary files.'))
62 |
63 |
64 | def rc_open_window(command, dir=current_path):
65 | if system != "Windows":
66 | gr.Warning("This function is only available on Windows!")
67 | logger.warning("This function is only available on Windows!")
68 | return
69 | command = f'start cmd /k "{command}"'
70 | subprocess.Popen(command, cwd=dir, shell=True)
71 | logger.info(f"{i18n('Execute command')}:{command}")
72 | time.sleep(0.1)
73 |
74 |
75 | def rc_bg(command, dir=current_path, get_id=True):
76 | process = subprocess.Popen(command, cwd=dir, shell=True)
77 | logger.info(f"{i18n('Execute command')}:{command}")
78 | if get_id:
79 | yield process.pid
80 | yield process.wait()
81 |
82 |
83 | def kill_process(pid):
84 | if pid < 0:
85 | gr.Info(i18n('No running processes'))
86 | return None
87 | if system == "Windows":
88 | command = f"taskkill /t /f /pid {pid}"
89 | else:
90 | command = f"pkill --parent {pid} && kill {pid} " # not tested on real machine yet!!!
91 | subprocess.run(command, shell=True)
92 | logger.info(f"{i18n('Execute command')}:{command}")
93 | gr.Info(i18n('Process terminated.'))
94 |
95 |
96 | def file_show(files):
97 | if files in [None, []]:
98 | return ""
99 | if len(files) > 1:
100 | return i18n('')
101 | else:
102 | file = files[0]
103 | try:
104 | with open(file.name, "r", encoding="utf-8") as f:
105 | text = f.read()
106 | return text
107 | except Exception as error:
108 | return error
109 |
110 |
111 | from .subtitle import Base_subtitle, Subtitle, Subtitles
112 | from .edit_panel import *
113 |
114 |
115 | def read_srt(filename, offset):
116 | try:
117 | with open(filename, "r", encoding="utf-8") as f:
118 | file = f.readlines()
119 | subtitle_list = Subtitles()
120 | indexlist = []
121 | filelength = len(file)
122 | pattern = re.compile(r"\d+")
123 | for i in range(0, filelength):
124 | if " --> " in file[i]:
125 | if pattern.fullmatch(file[i - 1].strip().replace("\ufeff", "")):
126 | indexlist.append(i) # get line id
127 | listlength = len(indexlist)
128 | id = 1
129 | for i in range(0, listlength - 1):
130 | st, et = file[indexlist[i]].split(" --> ")
131 | # id = int(file[indexlist[i] - 1].strip().replace("\ufeff", ""))
132 | text = "".join(file[x] for x in range(indexlist[i] + 1, indexlist[i + 1] - 2))
133 | st = Subtitle(id, st, et, text, ntype="srt")
134 | st.add_offset(offset=offset)
135 | subtitle_list.append(st)
136 | id += 1
137 | st, et = file[indexlist[-1]].split(" --> ")
138 | # id = int(file[indexlist[-1] - 1].strip().replace("\ufeff", ""))
139 | text = "".join(file[x] for x in range(indexlist[-1] + 1, filelength))
140 | st = Subtitle(id, st, et, text, ntype="srt")
141 | st.add_offset(offset=offset)
142 | subtitle_list.append(st)
143 | except Exception as e:
144 | err = f"{i18n('Failed to read file')}: {str(e)}"
145 | logger.error(err)
146 | gr.Warning(err)
147 | return subtitle_list
148 |
149 |
150 | def read_prcsv(filename, fps, offset):
151 | try:
152 | with open(filename, "r", encoding="utf-8", newline="") as csvfile:
153 | reader = list(csv.reader(csvfile))
154 | lenth = len(reader)
155 | subtitle_list = Subtitles()
156 | stid = 1
157 | for index in range(1, lenth):
158 | if reader[index] == []:
159 | continue
160 | st = Subtitle(
161 | stid,
162 | reader[index][0],
163 | reader[index][1],
164 | reader[index][2],
165 | ntype="prcsv",
166 | fps=fps,
167 | )
168 | st.add_offset(offset=offset)
169 | subtitle_list.append(st)
170 | stid += 1
171 | except Exception as e:
172 | err = f"{i18n('Failed to read file')}: {str(e)}"
173 | logger.error(err)
174 | gr.Warning(err)
175 | return subtitle_list
176 |
177 |
178 | def read_txt(filename):
179 | # REF_DUR = 2
180 | try:
181 | with open(filename, "r", encoding="utf-8") as f:
182 | text = f.read()
183 | sentences = re.split(r"(?<=[!?。!?])(?=[^!?。!?()()[\]【】'\"“”]|$)|\n|(?<=[.])(?=\s|$)", text)
184 | sentences = [s.strip() for s in sentences if s.strip()]
185 | subtitle_list = Subtitles()
186 | idx = 1
187 | for s in sentences:
188 | subtitle_list.append(Subtitle(idx, "00:00:00,000", "00:00:00,000", s, ntype="srt"))
189 | idx += 1
190 | except Exception as e:
191 | err = f"{i18n('Failed to read file')}: {str(e)}"
192 | logger.error(err)
193 | gr.Warning(err)
194 | return subtitle_list
195 |
196 |
197 | def read_labeled_txt(filename: str, spk_dict: dict):
198 | try:
199 | idx = 1
200 | subtitle_list = Subtitles()
201 | subtitle_list.append(Subtitle(idx, "00:00:00,000", "00:00:00,000", "", ntype="srt"))
202 | with open(filename, 'r', encoding='utf-8') as f:
203 | for line in f:
204 | if line.startswith("#") or line.strip() == "":
205 | continue
206 | match = LABELED_TXT_PATTERN.match(line.strip())
207 | if match:
208 | speaker = match.group(1).strip()
209 | speaker = spk_dict.get(speaker, speaker)
210 | if speaker in ['', 'None']:
211 | speaker = None
212 | subtitle_list.append(Subtitle(idx, "00:00:00,000", "00:00:00,000", match.group(2).strip(), ntype="srt", speaker=speaker))
213 | idx += 1
214 | else:
215 | subtitle_list[-1].text += ',' + line
216 | if not subtitle_list[0].text:
217 | subtitle_list.pop(0)
218 | return subtitle_list
219 | except Exception as e:
220 | err = f"{i18n('Failed to read file')}: {str(e)}"
221 | logger.error(err)
222 | gr.Warning(err)
223 | return subtitle_list
224 |
225 |
226 | def get_speaker_map_from_file(in_files):
227 | speakers = set()
228 | if in_files in [[], None] or len(in_files) > 1:
229 | gr.Info(i18n('Creating a multi-speaker project can only upload one file at a time!'))
230 | return speakers, dict()
231 | yield speakers, dict()
232 | filename = in_files[0].name
233 | subtitles = read_labeled_file(filename, spk_dict={}, fps=30, offset=0)
234 | for i in subtitles:
235 | if i.speaker:
236 | speakers.add(i.speaker)
237 | else:
238 | speakers.add("None")
239 | speakers_dict = {i:i for i in speakers}
240 | yield speakers,speakers_dict
241 |
242 | def get_speaker_map_from_sub(subtitles:Subtitles):
243 | speakers = set()
244 | if subtitles is None or len(subtitles) == 0:
245 | gr.Info(i18n('There is no subtitle in the current workspace'))
246 | return speakers, dict()
247 | yield speakers,dict()
248 | for i in subtitles:
249 | if i.speaker:
250 | speakers.add(i.speaker)
251 | else:
252 | speakers.add("None")
253 | speakers_dict = {i: i for i in speakers}
254 | yield speakers, speakers_dict
255 |
256 |
257 | def modify_spkmap(map: dict, k: str, v: str):
258 | value = v.strip()
259 | map[k]= value if value!='None' else None
260 |
261 |
262 | def read_file(file_name, fps=30, offset=0):
263 | if Sava_Utils.config.server_mode:
264 | assert os.stat(file_name).st_size < 65536, i18n('Error: File too large') # 64KB
265 | if file_name[-4:].lower() == ".csv":
266 | subtitle_list = read_prcsv(file_name, fps, offset)
267 | elif file_name[-4:].lower() == ".srt":
268 | subtitle_list = read_srt(file_name, offset)
269 | elif file_name[-4:].lower() == ".txt":
270 | subtitle_list = read_txt(file_name)
271 | else:
272 | raise ValueError(i18n('Unknown format. Please ensure the extension name is correct!'))
273 | assert len(subtitle_list) != 0, "Empty file???"
274 | return subtitle_list
275 |
276 |
277 | def read_labeled_file(file_name, spk_dict, fps=30, offset=0):
278 | if file_name[-4:].lower() == ".txt":
279 | subtitle_list = read_labeled_txt(file_name, spk_dict)
280 | else:
281 | try:
282 | subtitle_list = read_file(file_name, fps, offset)
283 | except Exception as e:
284 | gr.Warning(str(e))
285 | return Subtitles()
286 | for i in subtitle_list:
287 | match = LABELED_TXT_PATTERN.match(i.text.strip())
288 | if match:
289 | speaker = match.group(1).strip()
290 | speaker = spk_dict.get(speaker, speaker)
291 | if speaker in ['', 'None']:
292 | speaker = None
293 | i.speaker = speaker
294 | i.text = match.group(2).strip()
295 | return subtitle_list
296 |
297 |
298 | def create_multi_speaker(in_files, use_labled_text_mode, spk_dict, fps, offset):
299 | if in_files in [[], None] or len(in_files) > 1:
300 | gr.Info(i18n('Creating a multi-speaker project can only upload one file at a time!'))
301 | return getworklist(), *load_page(Subtitles()), Subtitles()
302 | in_file = in_files[0]
303 | try:
304 | if not use_labled_text_mode:
305 | subtitle_list = read_file(in_file.name, fps, offset)
306 | else:
307 | subtitle_list = read_labeled_file(in_file.name, spk_dict, fps, offset)
308 | assert len(subtitle_list) != 0, "Empty???"
309 | except Exception as e:
310 | what = str(e)
311 | gr.Warning(what)
312 | return getworklist(), *load_page(Subtitles()), Subtitles()
313 | subtitle_list.set_dir_name(os.path.basename(in_file.name).replace(".", "-"))
314 | return getworklist(value=subtitle_list.dir), *load_page(subtitle_list), subtitle_list
315 |
316 |
317 | def remove_silence(audio, sr, padding_begin=0.1, padding_fin=0.15, threshold_db=-27):
318 | # Padding(sec) is actually margin of safety
319 | hop_length = 512
320 | rms_list = get_rms(audio, hop_length=hop_length).squeeze(0)
321 | threshold = 10 ** (threshold_db / 20.0)
322 | x = rms_list > threshold
323 | i = np.argmax(x)
324 | j = rms_list.shape[-1] - 1 - np.argmax(x[::-1])
325 | if not np.any(x) or i==j:
326 | return audio
327 | cutting_point1 = max(i * hop_length - int(padding_begin * sr), 0)
328 | cutting_point2 = min(j * hop_length + int(padding_fin * sr), audio.shape[-1])
329 | #print(audio.shape[-1],cutting_point1,cutting_point2)
330 | return audio[cutting_point1:cutting_point2]
331 |
--------------------------------------------------------------------------------
/check_i18n.sh:
--------------------------------------------------------------------------------
1 | IFS=$'\n'
2 | for file in $(find -type f -name '*.py');do
3 | for line in $(grep -oP "i18n\('\K[^']*(?='\))" $file);do
4 | if ! grep -Fq "\"$line\"" './Sava_Utils/i18nAuto/translations/zh_CN.py';then
5 | echo $line
6 | fi
7 | done
8 | done
--------------------------------------------------------------------------------
/create_build_script.sh:
--------------------------------------------------------------------------------
1 | #ex: $1 = pyinstaller
2 | echo -n "$1 " > build_sava.bat
3 | for i in $(find ./Sava_Utils/man -type f -name '*.py' );do
4 | module_name=$(basename $i .py)
5 | if [ $module_name != "__init__" ];then
6 | dn=$(dirname $i)
7 | dn=$(basename $dn)
8 | module_name="Sava_Utils.man.$dn.$module_name"
9 | echo -n "--hidden-import=$module_name " >> build_sava.bat
10 | fi
11 | done
12 | for i in $(find ./Sava_Utils/i18nAuto/translations -type f -name '*.py');do
13 | module_name=$(basename $i .py)
14 | if [ $module_name != "__init__" ];then
15 | module_name="Sava_Utils.i18nAuto.translations.$module_name"
16 | echo -n "--hidden-import=$module_name " >> build_sava.bat
17 | fi
18 | done
19 | echo '-F Srt-AI-Voice-Assistant.py' >> build_sava.bat
20 | echo 'pause' >> build_sava.bat
21 |
--------------------------------------------------------------------------------
/create_built-in_manual.sh:
--------------------------------------------------------------------------------
1 | sed '2d' ./README.md > ./docs/en_US/README.md
2 | for item in $(find ./docs -type f -name '*.md');do
3 | language="$(awk -F '/' '{print($(NF-1))}' <<< $item)"
4 | name="$(basename $item .md)"
5 | mkdir -p "./Sava_Utils/man/$language"
6 | cat <(echo -e "$name = r\"\"\"") $item <(echo -e "\n\"\"\"") > ./Sava_Utils/man/$language/$name.py
7 | #echo $language $name
8 | done
--------------------------------------------------------------------------------
/docs/en_US/README.md:
--------------------------------------------------------------------------------
1 | # Srt-AI-Voice-Assistant
2 | ### This project can use multiple AI-TTS to dub for your subtitle or text files.
And provides various convenient auxiliary functions including audio/video transcription and subtitle translation.
3 | If you have encountered problems or want to create a feature request, please go to [Issues](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/issues) .
4 | ## Features
5 | - ✅ Open-source, Friendly WebUI interface, Run locally and Accessible via LAN
6 | - ✅ Support multiple TTS projects: BV2, GSV, CosyVoice2, AzureTTS, and you can even customize your APIs!
7 | - ✅ Save personalized settings and presets
8 | - ✅ Batch mode
9 | - ✅ Subtitle editing
10 | - ✅ Subtitle translation
11 | - ✅ Regenerating Specific Lines
12 | - ✅ Support multi-speaker dubbing
13 | - ✅ Re-export subtitles
14 | - ✅ Extended functions: subtitle transcription for audio/video
15 | - ✅ I18n
16 |
17 | ## [Download the packaged version only](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/releases)
18 | * Use this version only when there are dependency conflicts or installation issues.
19 |
20 | ## [Download the integrated package with GPT-SoVITS (From Hugging Face)](https://huggingface.co/YYuX/GPT-SoVITS-SAVA-windows-package/tree/main)
21 | * The GPT-SoVITS integrated package includes the packaged version, without removing any built-in or pretrained models, and its code for finetuning and training is the same with the official repository.
22 | * Note: Packaged Version included in the GPT-SoVITS integrated package may not be the latest version; overwrite it to update.
--------------------------------------------------------------------------------
/docs/en_US/changelog.md:
--------------------------------------------------------------------------------
1 | ## ChangeLog
2 |
3 | ### V4-2503 Update:
4 | #### To make versions more clear, version are assigned in addition to release dates.
5 | #### After this update, the synthesis history and saved speakers from the previous version need to be recreated; otherwise, errors may occur!
6 | 1. Subtitle editing
7 | 2. Subtitle translation
8 | 3. Various details improved and bugs fixed
9 | 4. Supports CosyVoice2 (reusing GSV panel)
10 | 5. (4.0.1) Batch mode
11 | 6. (4.1) Server mode
12 | 7. (4.2) I18n
13 | 8. (4.3) Automatic audio acceleration & silence removing; Creating multi-speaker dubbing project from labeled texts.
14 | 9. (4.3.1) Add Find and Replace; add a one-click regeneration button.
15 | 10. (4.4) Polyphone editing for GPT-SoVITS and automatic model detection; Allow custom prompt for Ollama; Export subtitles with speaker names using customizable templates
16 |
17 | ### 250214 Update:
18 | 1. Supports reading historical projects
19 | 2. Supports multi-speaker dubbing
20 |
21 | ### 250123 Update:
22 | 1. Supports re-export SRT subtitle files that match the actual start and end timestamps after synthesis; also supports reading TXT text files for synthesis, in which case paragraphs are split by sentences.
23 | 2. To enhance expandability in the future and simplicity, the design of a single script file, which makes downloads more convenient, had to be abandoned. The code will be refactored step by step starting from this version.
24 | 3. Added some documentations.
25 |
26 | ### 240811 Update:
27 | 1. Notifies users of the error message
28 | 2. Automatic detection of TTS-Project envs
29 | 3. Compatibility with api-v1 restored
30 | 4. A major feature update: Support regenerating specific lines if you're not satisfied with them.
--------------------------------------------------------------------------------
/docs/en_US/help.md:
--------------------------------------------------------------------------------
1 | # User Guide
2 |
3 | ## 0. Service Configuration and Usage
4 | #### This project can call 2 local projects: Bert-VITS2, GPT-SoVITS
5 | #### And 1 online service: Microsoft TTS
6 | * **For Local TTS Projects**:
7 |
8 | * Fill in and save the project root path and the corresponding python interpreter path in the settings page.
9 | * **A Simpler method**: Place the program in the root directory of the integrated package, then click the corresponding button on the first page to start the API service!
10 |
11 | * **For Microsoft TTS**:
12 |
13 | * Follow the tutorial to register an account and fill in the API key on the settings page.
14 | * Note the monthly free quota!
15 |
16 | ## 1. Getting Started
17 | ### This project supports dubbing for subtitles or plain text.
18 | * **For subtitles**:
19 |
20 | * When a subtitle is too long, subsequent subtitles will be delayed accordingly.And you can set the minimum speech interval in settings.
21 |
22 | * **For plain text**:
23 |
24 | * The text will be split into subtitle entries based on ending punctuation and line breaks.
25 |
26 | * After generation, you can export subtitles with actual audio timestamps in the editing page.
27 |
28 | ### A. Single Speaker Scenario
29 | * **I.** Upload subtitle or text files in the right panel of the `Subtitle Dubbing` page.
30 |
31 | * **II.** Select your project and adjust parameters in the middle panel.
32 |
33 | * **III.** Click `Generate Audio` Button at the bottom and wait.
34 |
35 | * **IV.** Download your audio.
36 |
37 | ### B. Multi-Speaker Scenario
38 | * **I.** Upload subtitle/text files in the right panel of `Subtitle Dubbing`.
39 | * Marking mode: The content of the file should be as follows: `Speaker:Content`, e.g. `Jerry: Hello.` The mapping table can convert the original speaker in the text file into the corresponding target speaker.
40 |
41 | * **II.** Click `Create Multi-Speaker Dubbing Project` below the file display.
42 |
43 | * **III.** Create speakers:
44 | * **a.** Expand the Multi-Speaker Dubbing section at the bottom of the editing page.
45 | * **b.** Select the target project.
46 | * **c.** In the Select/Create Speaker box, enter a speaker name.
47 | * **d.** Adjust parameters (including port numbers) and click 💾 to save. Duplicate names will overwrite existing speakers.
48 |
49 | * **IV.** Select a speaker from the dropdown, check corresponding subtitles, then click ✅ to apply. Speaker info will appear in Column 4.
50 |
51 | * **V.** The last assigned speaker becomes the default speaker (applies to unassigned subtitles in multi-speaker projects).
52 |
53 | * **VI.** Click Generate Multi-Speaker Dubbing to start generation.
54 | * ⚠️ If you are using and creating GSV speakers in a different language, the GSV speakers will not work properly.
55 |
56 | ### Regenerating Specific Lines
57 | * **I.** Locate the target subtitle using the slider in the editing page.
58 |
59 | * **II.** Modify the text if needed. Changes are auto-saved after regeneration.
60 |
61 | * **III.** Click 🔄 to regenerate a single line:
62 |
63 | * Uses project parameters if unassigned.
64 | * Uses speaker-specific parameters if assigned.
65 | * Multi-speaker projects must have assigned speakers.
66 |
67 | * **IV.** After making changes to the subtitles, you can also click `Continue Generation` to regenerate the audios of the changed subtitles or those that failed to be synthesized.
68 |
69 | * **V.** Click `Reassemble Audio` to recompose full audio.
70 |
71 | ### C. Re-editing Historical Projects
72 | * Select a project from the synthesis history in the top panel. Then click `Load` button.
73 | * The rest is self-explanatory.
74 |
75 | ### D. Subtitle Editing
76 | #### 1. Copy
77 | * Copy selected subtitles.
78 |
79 | #### 2. Delete
80 | * Delete selected subtitles.
81 |
82 | #### 3. Merge
83 | * Select no less than 2 subtitles as start/end points.
84 | * Subtitles from the starting point to the ending point will be merged.
85 |
86 | ⚠️ Changes aren't auto-saved to drive immediately, therefore you can reload the project to undo.
87 |
88 | #### 4. Modify Timestamps
89 | * Edit start/end times in SRT format.
90 | * Click `Apply Timestamps` to save changes.
91 |
92 | ⚠️ Unapplied changes will be lost during navigation.
93 |
94 | ## 2. Troubleshooting
95 | * When reporting issues:
96 | Describe the problem in detail and list steps taken before the error occurred.
97 | * Go to [GitHub-issues](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/issues) to report a problem or ask for help (Issue templates will guide proper reporting).
--------------------------------------------------------------------------------
/docs/en_US/help_custom.md:
--------------------------------------------------------------------------------
1 | ## Security Warning: This feature will execute external code!
2 | ### Please inspect the code content before running it; executing untrusted code may put your computer at risk!
3 | ### The author bear no responsibility for any consequences!
4 |
5 | ### Place code files containing Python functions in the SAVAdata/presets directory, and they will be callable.
6 | * Here is an example code for Gradio API.
7 | ```
8 | def custom_api(text): #return: audio content
9 | from gradio_client import Client
10 | client = Client("http://127.0.0.1:7860/")
11 | result = client.predict(
12 | text, # str in '输入文本内容' Textbox component
13 | "神里绫华", # str (Option from: [('神里绫华', '神里绫华')]) in 'Speaker' Dropdown component
14 | 0.1, # int | float (numeric value between 0 and 1) in 'SDP Ratio' Slider component
15 | 0.5, # int | float (numeric value between 0.1 and 2) in 'Noise' Slider component
16 | 0.5, # int | float (numeric value between 0.1 and 2) in 'Noise_W' Slider component
17 | 1, # int | float (numeric value between 0.1 and 2) in 'Length' Slider component
18 | "auto", # str (Option from: [('ZH', 'ZH'), ('JP', 'JP'), ('EN', 'EN'), ('mix', 'mix'), ('auto', 'auto')]) in 'Language' Dropdown component
19 | "", # str (filepath on your computer (or URL) of file) in 'Audio prompt' Audio component
20 | "", # str in 'Text prompt' Textbox component
21 | "", # str in 'Prompt Mode' Radio component
22 | "", # str in '辅助文本' Textbox component
23 | 0, # int | float (numeric value between 0 and 1) in 'Weight' Slider component
24 | fn_index=0
25 | )
26 | with open(result[1],'rb') as file:
27 | data=file.read()
28 | return data
29 | ```
30 | **Please note: The input value `text` of the function must be the text to be synthesized, and the return value is the binary content of the audio file!**
--------------------------------------------------------------------------------
/docs/en_US/issues.md:
--------------------------------------------------------------------------------
1 | # Typical Issues
2 | ## 1. GPT-SoVITS Error: 404 NOT FOUND
3 | ```
4 | /tts 404 NOT FOUND
5 | ```
6 | * Typical cause of this error: Using non-official standard code
7 | * Please ensure that you are using the official integrated package or the latest code from the official repository.
8 |
9 | ### Solution:
10 | * Manually pull the official repository code.
11 | * Download the integrated package provided in README. (stable but updates may be slow)
12 |
13 | ## 2. No connection could be made because the target machine actively refused it.
14 | ```
15 | No connection could be made because the target machine actively refused it.
16 | ```
17 | You need to check:
18 | * Is the API service already started and running?
19 | * Please wait for the API to fully start before performing operations.
20 | * Do not close the API console!
21 | * Is the port correctly filled?
22 |
23 | ## 3. 400 Bad Request
24 | ```
25 | 400 Bad Request
26 | ```
27 | Check the red error logs in this program's console; usually, the API will return the cause of the error.
28 | If no error message is received, please report this issue.
29 | * Typical error cause: Reference audio outside the 3-10 second range; model path does not exist;
30 |
31 | ## 4. The following subtitles are delayed due to the previous audio being too long.
32 | ```
33 | The following subtitles are delayed due to the previous audio being too long.
34 | ```
35 | * Your subtitle timing intervals are not proper.
36 | * Consider increasing the value of the setting `Maximum audio acceleration ratio` (setting it to a value greater than 1 to enable the feature) and enable `Remove inhalation and silence`.
37 | * There is a minimum voice interval option in the settings (default 0.3 seconds) to prevent voices from overlapping in such cases. If not needed, it can be set to 0.
38 |
39 | ## 5. GPT-SoVITS Output Audio Has Duration But It's Silent
40 | ```
41 | GPT-SoVITS Output Audio Has Duration But It's Silent
42 | ```
43 | * Your GPU does not support fp-16.
44 | * Manually modify the value of `is_half` to `false` in `GPT_SoVITS\configs\tts_infer.yaml`.
--------------------------------------------------------------------------------
/docs/en_US/title.md:
--------------------------------------------------------------------------------
1 | Version 4.4.2-2505, Compatible with HiyoriUI, GPT-SoVITS, CosyVoice, F5-TTS(API in GSV format) and Microsoft TTS
2 | GitHub: [Check for updates manully](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/releases) | [Install Extensions](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/tree/main/tools)
--------------------------------------------------------------------------------
/docs/fr_FR/README.md:
--------------------------------------------------------------------------------
1 | # Srt-AI-Voice-Assistant
2 | ### Ce projet peut utiliser plusieurs systèmes de synthèse vocale IA pour doubler vos fichiers de sous-titres ou de texte.
Il propose également diverses fonctions auxiliaires pratiques, comme la transcription audio/vidéo et la traduction de sous-titres.
3 | Si vous rencontrez des problèmes ou souhaitez faire une demande de fonctionnalité, veuillez vous rendre sur [Issues](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/issues).
4 | ## Fonctionnalités
5 | - ✅ Open-source, interface WebUI conviviale, exécution locale et accessible via le réseau local
6 | - ✅ Prend en charge plusieurs projets TTS : BV2, GSV, CosyVoice2, AzureTTS, et vous pouvez même personnaliser vos API !
7 | - ✅ Enregistrement de paramètres et de presets personnalisés
8 | - ✅ Mode par lots
9 | - ✅ Édition de sous-titres
10 | - ✅ Traduction de sous-titres
11 | - ✅ Régénération de lignes spécifiques
12 | - ✅ Prend en charge le doublage avec plusieurs locuteurs
13 | - ✅ Réexportation de sous-titres
14 | - ✅ Fonctions étendues : transcription de sous-titres pour audio/vidéo
15 | - ✅ I18n
16 |
17 | ## [Télécharger seulement la version empaquetée](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/releases)
18 | * Utilisez cette version seulement en cas de conflits de dépendances ou de problèmes d'installation.
19 |
20 | ## [Télécharger le package intégré avec GPT-SoVITS (Hugging Face)](https://huggingface.co/YYuX/GPT-SoVITS-SAVA-windows-package/tree/main)
21 | * Le package intégré GPT-SoVITS inclut la version empaquetée, sans supprimer aucun modèle intégré ou pré-entraîné, et son code de entraînement et inférence est identique à celui du officiel.
22 | * Note : La version empaquetée incluse dans le package intégré GPT-SoVITS peut ne pas être la version la plus récente ; remplacez-la pour la mettre à jour.
--------------------------------------------------------------------------------
/docs/fr_FR/changelog.md:
--------------------------------------------------------------------------------
1 | ## Journal des modifications
2 |
3 | ### Mise à jour V4-0325 :
4 | #### Afin de rendre les versions plus claires, des numéros de version sont attribués plus des dates de publication.
5 | #### Après cette mise à jour, l'historique de synthèse et les locuteurs enregistrés de la version précédente doivent être recréés ; sinon, des erreurs peuvent se produire !
6 | 1. Édition des sous-titres
7 | 2. Traduction des sous-titres
8 | 3. Amélioration de divers détails et correction de erreurs
9 | 4. Supporter CosyVoice2 (réutilisation du panneau GSV)
10 | 5. (4.0.1) Mode par lots
11 | 6. (4.1) Mode serveur
12 | 7. (4.2) I18n
13 | 8. (4.3) Accélération automatique de l'audio et suppression du silence; Création de projets de doublage à plusieurs locuteurs à partir de textes étiquetés.
14 | 9. (4.3.1) Ajouter la fonction de Recherche et de Remplacement; ajouter un bouton de régénération en un clic.
15 | 10. (4.4) Permet l'édition des caractères polyphoniques pour GPT-SoVITS ainsi que la détection automatique des modèles; Autorise les invites personnalisées pour Ollama; Permet d'exporter des sous-titres avec les noms des locuteurs selon un modèle personnalisable.
16 |
17 | ### Mise à jour du 140225 :
18 | 1. Prise en charge de la lecture de projets historiques
19 | 2. Prise en charge du doublage avec plusieurs locuteurs
20 |
21 | ### Mise à jour du 230125 :
22 | 1. Prise en charge de la réexportation de fichiers de sous-titres SRT correspondant aux horodatages de début et de fin réels après synthèse ; prise en charge également de la lecture de fichiers texte TXT pour la synthèse, auquel cas les paragraphes sont divisés par phrases.
23 | 2. Afin d'améliorer l'extensibilité à l'avenir et la simplicité, la conception d'un fichier de script unique, qui rendait les téléchargements plus pratiques, a dû être abandonnée. Le code sera refactorisé progressivement à partir de cette version.
24 | 3. Ajout de certaines documentations.
25 |
26 | ### Mise à jour du 110824 :
27 | 1. Notification des utilisateurs du message d'erreur
28 | 2. Détection automatique des environnements TTS-Project
29 | 3. Restauration de la compatibilité avec l'api-v1
30 | 4. Une mise à jour majeure de fonctionnalité : La régénération de lignes spécifiques si vous n'êtes pas satisfaites d'elles.
--------------------------------------------------------------------------------
/docs/fr_FR/help.md:
--------------------------------------------------------------------------------
1 | # Guide de l'utilisateur
2 |
3 | ## 0. Configuration et utilisation du service
4 | #### Ce projet peut appeler deux projets locaux : Bert-VITS2, GPT-SoVITS
5 | #### Et un service en ligne : Microsoft TTS
6 | * **Pour les projets TTS locaux** :
7 |
8 | * Remplissez et enregistrez le chemin racine du projet et le chemin de l'interpréteur Python correspondant sur la page des paramètres.
9 | * **Méthode plus simple** : Placez le programme dans le répertoire racine du paquet intégré, puis cliquez sur le bouton correspondant sur la première page pour démarrer le service API !
10 |
11 | * **Pour Microsoft TTS** :
12 |
13 | * Suivez le tutoriel pour vous inscrire à un compte et saisissez la clé API sur la page des paramètres.
14 | * Prenez note de la quota mensuelle gratuite !
15 |
16 | ## 1. Démarrage
17 | ### Ce projet peut doubler pour les sous-titres et les textes bruts.
18 | * **Pour les sous-titres** :
19 |
20 | * Lorsqu'un sous-titre est trop long, les sous-titres suivants seront retardés en conséquence. Et vous pouvez définir l'intervalle de parole minimum dans les paramètres.
21 |
22 | * **Pour le texte brut** :
23 |
24 | * Le texte sera divisé en entrées de sous-titres en fonction des ponctuations de fin et des retours à la ligne.
25 |
26 | * Après la génération, vous pouvez exporter les sous-titres avec les horodatages audio réels sur la page d'édition.
27 |
28 | ### A. Scénario avec un seul locuteur
29 | * **I.** Téléchargez les fichiers de sous-titres ou de texte dans le panneau de droite de la page `Doublage de sous-titres`.
30 | * Mode de balisage : Le contenu du fichier doit être le suivant : `Locuteur : Contenu`, e.g. `Vincent:Bonjour.` Le tableau de correspondance peut convertir le locuteur d'origine dans le fichier de texte en locuteur cible correspondant.
31 |
32 | * **II.** Sélectionnez votre projet et ajustez les paramètres dans le panneau central.
33 |
34 | * **III.** Cliquez sur le bouton `Produire l'audio` en bas et attendez.
35 |
36 | * **IV.** Téléchargez votre audio.
37 |
38 | ### B. Scénario avec plusieurs locuteurs
39 | * **I.** Téléchargez les fichiers de sous-titres/texte dans le panneau de droite de `Doublage de sous-titres`.
40 |
41 | * **II.** Cliquez sur `Créer un projet de doublage avec plusieurs locuteurs` en dessous de l'affichage du fichier.
42 |
43 | * **III.** Créez des locuteurs :
44 | * **a.** Détendez la section Doublure avec plusieurs locuteurs en bas de la page d'édition.
45 | * **b.** Sélectionnez le projet cible.
46 | * **c.** Dans la boîte de sélection/creation de locuteur, saisissez un nom de locuteur.
47 | * **d.** Ajustez les paramètres (y compris les numéros de port) et cliquez sur 💾 pour enregistrer. Les noms dupliqués écraseront les locuteurs existants.
48 |
49 | * **IV.** Sélectionnez un locuteur dans la liste déroulante, cochez les sous-titres correspondants, puis cliquez sur ✅ pour appliquer. Les informations du locuteur apparaîtront dans la colonne 4.
50 |
51 | * **V.** Le dernier locuteur attribué devient le locuteur par défaut (s'applique aux sous-titres non attribués dans les projets avec plusieurs locuteurs).
52 |
53 | * **VI.** Cliquez sur `Lancer la synthèse à plusieurs locuteurs` pour commencer la génération.
54 | * ⚠️ Si vous utilisez et créez des locuteurs GSV dans une autre langue, les locuteurs GSV ne fonctionneront pas correctement.
55 |
56 | ### Regénérer des lignes spécifiques
57 | * **I.** Localisez le sous-titre cible à l'aide du curseur sur la page d'édition.
58 |
59 | * **II.** Modifiez le texte si nécessaire. Les modifications sont enregistrées automatiquement après la régénération.
60 |
61 | * **III.** Cliquez sur 🔄 pour régénérer une seule ligne :
62 |
63 | * Utilise les paramètres du projet s'il n'est pas attribué.
64 | * Utilise les paramètres spécifiques du locuteur s'il est attribué.
65 | * Les projets avec plusieurs locuteurs doivent avoir des locuteurs attribués.
66 |
67 | * **IV.** Après avoir apporté des modifications aux sous-titres, vous pouvez également cliquer sur `Continuer la Génération` pour régénérer la voix des sous-titres modifiés ou dont la synthèse n'a pas été réussie.
68 |
69 | * **V.** Cliquez sur `Reconstituer l'audio` pour recomposer l'audio complet.
70 |
71 | ### C. Rééditer des projets historiques
72 | * Sélectionnez un projet de l'historique de synthèse dans le panneau supérieur. Ensuite, cliquez sur le bouton `Charger`.
73 | * Le reste est évident.
74 |
75 | ### D. Édition des sous-titres
76 | #### 1. Copier
77 | * Copier les sous-titres sélectionnés.
78 |
79 | #### 2. Supprimer
80 | * Supprimer les sous-titres sélectionnés.
81 |
82 | #### 3. Fusionner
83 | * Sélectionnez au moins 2 sous-titres comme points de départ/fin.
84 | * Les sous-titres du point de départ au point de fin seront fusionnés.
85 |
86 | ⚠️ Les modifications ne sont pas enregistrées automatiquement sur le disque immédiatement, vous pouvez donc recharger le projet pour annuler.
87 |
88 | #### 4. Modifier les horodatages
89 | * Éditez les heures de début/fin au format SRT.
90 | * Cliquez sur `Appliquer les horodatages` pour enregistrer les modifications.
91 |
92 | ⚠️ Les modifications non appliquées seront perdues lors de la navigation.
93 |
94 | ## 2. Dépannage
95 | * Lorsque vous trouvez un problème :
96 | Décrivez le problème en détail et répertoriez les étapes effectuées pour reproduire l'erreur.
97 | * Visitez [GitHub-issues](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/issues) pour rapporter un problème ou demander de l'aide (les modèles de Issue vous guideront pour signaler correctement).
--------------------------------------------------------------------------------
/docs/fr_FR/help_custom.md:
--------------------------------------------------------------------------------
1 | ## Security Warning: This feature will execute external code!
2 | ### Please inspect the code content before running it; executing untrusted code may put your computer at risk!
3 | ### The author bear no responsibility for any consequences!
4 |
5 | ### Place code files containing Python functions in the SAVAdata/presets directory, and they will be callable.
6 | * Here is an example code for Gradio API.
7 | ```
8 | def custom_api(text): #return: audio content
9 | from gradio_client import Client
10 | client = Client("http://127.0.0.1:7860/")
11 | result = client.predict(
12 | text, # str in '输入文本内容' Textbox component
13 | "神里绫华", # str (Option from: [('神里绫华', '神里绫华')]) in 'Speaker' Dropdown component
14 | 0.1, # int | float (numeric value between 0 and 1) in 'SDP Ratio' Slider component
15 | 0.5, # int | float (numeric value between 0.1 and 2) in 'Noise' Slider component
16 | 0.5, # int | float (numeric value between 0.1 and 2) in 'Noise_W' Slider component
17 | 1, # int | float (numeric value between 0.1 and 2) in 'Length' Slider component
18 | "auto", # str (Option from: [('ZH', 'ZH'), ('JP', 'JP'), ('EN', 'EN'), ('mix', 'mix'), ('auto', 'auto')]) in 'Language' Dropdown component
19 | "", # str (filepath on your computer (or URL) of file) in 'Audio prompt' Audio component
20 | "", # str in 'Text prompt' Textbox component
21 | "", # str in 'Prompt Mode' Radio component
22 | "", # str in '辅助文本' Textbox component
23 | 0, # int | float (numeric value between 0 and 1) in 'Weight' Slider component
24 | fn_index=0
25 | )
26 | with open(result[1],'rb') as file:
27 | data=file.read()
28 | return data
29 | ```
30 | **Please note: The input value `text` of the function must be the text to be synthesized, and the return value is the binary content of the audio file!**
--------------------------------------------------------------------------------
/docs/fr_FR/issues.md:
--------------------------------------------------------------------------------
1 | # Problèmes typiques
2 | ## 1. Erreur GPT-SoVITS : 404 NOT FOUND
3 | ```
4 | /tts 404 NOT FOUND
5 | ```
6 | * Cause typique de cette erreur : Utilisation de code non officiel standard.
7 | * Veuillez vous assurer que vous utilisez le package intégré officiel ou le code le plus récent du dépôt officiel.
8 |
9 | ### Solution :
10 | * Téléchargez manuellement le code du dépôt officiel.
11 | * Téléchargez le package intégré fourni dans le README. (stable mais les mises à jour peuvent être lentes)
12 |
13 | ## 2. Impossible d'établir de connexion car l'ordinateur cible a expressément refusé celle-ci.
14 | ```
15 | Impossible d'établir de connexion car l'ordinateur cible a expressément refusé celle-ci.
16 | ```
17 | Vous devez vérifier :
18 | * Le service API est-il déjà démarré et en cours d'exécution ?
19 | * Veuillez attendre que l'API soit entièrement démarrée avant d'effectuer des opérations.
20 | * Ne fermez pas la console de l'API !
21 | * Le port est-il correctement renseigné ?
22 |
23 | ## 3. 400 Bad Request
24 | ```
25 | 400 Bad Request
26 | ```
27 | Vérifiez les journaux d'erreur en rouge dans la console de ce programme ; généralement, l'API renverra la cause de l'erreur.
28 | Si aucun message d'erreur n'est reçu, veuillez signaler ce problème.
29 | * Cause d'erreur typique : Audio de référence en dehors de la plage de 3 à 10 secondes ; le chemin du modèle n'existe pas.
30 |
31 | ## 4. Les sous-titres suivants sont retardés en raison de la longueur excessive de l'audio précédent.
32 | ```
33 | Les sous-titres suivants sont retardés en raison de la longueur excessive de l'audio précédent.
34 | ```
35 | * Vos intervalles de temps des sous-titres ne sont pas appropriés.
36 | * Considérez d'augmenter la valeur de la configuration ` rapport maximal d'accélération audio`(en la fixant à
37 | une valeur supérieure à 1 pour activer la fonction) et activez `Supprimer l'inhalation et le silence`.
38 | * Il existe une option d'intervalle vocal minimum dans les paramètres (par défaut 0,3 seconde) pour éviter que les voix ne se chevauchent dans de tels cas. Si cela n'est pas nécessaire, il peut être égal 0.
39 |
40 | ## 5. Le fichier audio de sortie de GPT-SoVITS a une durée mais est silencieux.
41 | ```
42 | Le fichier audio de sortie de GPT-SoVITS a une durée mais est silencieux.
43 | ```
44 | * Votre carte graphique ne supporte pas le fp-16.
45 | * Modifiez manuellement la valeur de `is_half` en `false` dans `GPT_SoVITS\configs\tts_infer.yaml`.
--------------------------------------------------------------------------------
/docs/fr_FR/title.md:
--------------------------------------------------------------------------------
1 | Version 4.4.2-2505, Compatible avec HiyoriUI, GPT-SoVITS, CosyVoice, F5-TTS (API au format GSV) et Microsoft TTS
2 | GitHub : [Vérifier manuellement les mises à jour](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/releases) | [Installer des extensions](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/tree/main/tools)
--------------------------------------------------------------------------------
/docs/ja_JP/README.md:
--------------------------------------------------------------------------------
1 | # Srt-AI-Voice-Assistant
2 | **This file is translated by AI. And just for reference.**
3 | ### このプロジェクトは、複数のAI音声合成(TTS)を使用して、字幕ファイルやテキストファイルにダビングすることができます。
また、音声/動画の文字起こしや字幕翻訳など、様々な便利な補助機能を提供します。
4 | 問題に遭遇した場合や新機能のリクエストがある場合は、[Issues](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/issues) をご利用ください。
5 |
6 | ## 機能
7 | - ✅ オープンソースで、使いやすいWebUIインターフェース。ローカルで実行でき、LAN経由でアクセス可能です。
8 | - ✅ 複数のTTSプロジェクトをサポート:BV2、GSV、CosyVoice2、AzureTTSなど、独自のAPIもカスタマイズできます!
9 | - ✅ パーソナル設定とプリセットを保存できます。
10 | - ✅ バッチモードが利用可能です。
11 | - ✅ 字幕編集機能があります。
12 | - ✅ 字幕翻訳機能を備えています。
13 | - ✅ 特定の行を再生成できます。
14 | - ✅ 複数話者によるダビングをサポートします。
15 | - ✅ 字幕を再エクスポートできます。
16 | - ✅ 拡張機能:音声/動画の字幕文字起こし
17 | - ✅ 国際化(I18n)対応
18 |
19 | ## [パッケージ版のみをダウンロード](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/releases)
20 | * 依存関係の競合やインストール問題がある場合のみ、このバージョンを使用してください。
21 |
22 | ## [GPT-SoVITS付きの統合パッケージをダウンロード(Hugging Faceから)](https://huggingface.co/YYuX/GPT-SoVITS-SAVA-windows-package/tree/main)
23 | * GPT-SoVITS統合パッケージにはパッケージ版が含まれており、組み込みモデルや事前学習モデルは削除されていません。コードは公式リポジトリと同じです。
24 | * 注意:GPT-SoVITS統合パッケージに含まれるパッケージ版は最新バージョンでない可能性があります。上書きして更新してください。
--------------------------------------------------------------------------------
/docs/ja_JP/changelog.md:
--------------------------------------------------------------------------------
1 | ## 変更履歴
2 | **This file is translated by AI. And just for reference.**
3 | ### V4-2503アップデート:
4 | #### バージョンをより明確にするため、リリース日付に加えてバージョン番号が付けられます。
5 | #### このアップデート後、前のバージョンの合成履歴と保存された話者は再作成する必要があります。そうしないと、エラーが発生する可能性があります!
6 | 1. 字幕編集
7 | 2. 字幕翻訳
8 | 3. 様々な詳細が改善され、バグが修正されました
9 | 4. CosyVoice2をサポート(GSVパネルを再利用)
10 | 5. (4.0.1) バッチモード
11 | 6. (4.1) サーバーモード
12 | 7. (4.2) 国際化(I18n)
13 |
14 | ### 250214アップデート:
15 | 1. 過去のプロジェクトの読み込みをサポート
16 | 2. 複数話者によるダビングをサポート
17 |
18 | ### 250123アップデート:
19 | 1. 合成後の実際の開始と終了タイムスタンプに一致するSRT字幕ファイルの再エクスポートをサポートします。また、合成のためのTXTテキストファイルの読み込みもサポートし、この場合、段落は文で分割されます。
20 | 2. 将来的な拡張性と簡素化を高めるため、ダウンロードをより便利にする単一のスクリプトファイルの設計を断念せざるを得ません。このバージョンからコードの再設計が始まりました。
21 | 3. いくつかのドキュメントを追加しました。
22 |
23 | ### 240811アップデート:
24 | 1. エラーメッセージをユーザーに通知
25 | 2. TTSプロジェクトの環境を自動検出
26 | 3. api-v1との互換性を回復
27 | 4. メジャーな機能アップデート:特定の行に不満がある場合、それらを再生成する機能をサポート
--------------------------------------------------------------------------------
/docs/ja_JP/help.md:
--------------------------------------------------------------------------------
1 | # ユーザーガイド
2 | **This file is translated by AI. And just for reference.**
3 | ## 0. サービスの設定と使用方法
4 | #### このプロジェクトは、2つのローカルプロジェクト(Bert-VITS2、GPT-SoVITS)と1つのオンラインサービス(Microsoft TTS)を呼び出すことができます。
5 | * **ローカルのTTSプロジェクトについて**:
6 | * 設定ページでプロジェクトのルートパスと対応するPythonインタープリターのパスを入力して保存します。
7 | * **簡単な方法**:プログラムを統合パッケージのルートディレクトリに配置し、最初のページの対応するボタンをクリックしてAPIサービスを起動します!
8 |
9 | * **Microsoft TTSについて**:
10 | * チュートリアルに従ってアカウントを登録し、設定ページにAPIキーを入力します。
11 | * 月次の無料クォータに注意してください!
12 |
13 | ## 1. 使い始める
14 | ### このプロジェクトは、字幕または平文の吹き替えをサポートしています。
15 | * **字幕の場合**:
16 | * 実際に有効に使用されるのは開始時間のみです。字幕が長すぎる場合、後続の字幕はそれに応じて遅延します。また、設定で最小音声間隔を設定することができます。
17 |
18 | * **平文の場合**:
19 | * テキストは、終了句読点と改行に基づいて字幕エントリに分割されます。
20 |
21 | * 生成後、編集ページで実際の音声タイムスタンプ付きの字幕をエクスポートすることができます。
22 |
23 | ### A. 単一話者のシナリオ
24 | * **I.** `字幕吹き替え`ページの右パネルで字幕またはテキストファイルをアップロードします。
25 | * **II.** 中央のパネルでプロジェクトを選択し、パラメータを調整します。
26 | * **III.** 下部の`音声を生成`ボタンをクリックして待ちます。
27 | * **IV.** 音声をダウンロードします。
28 |
29 | ### B. 複数話者のシナリオ
30 | * **I.** `字幕吹き替え`の右パネルで字幕/テキストファイルをアップロードします。
31 | * **II.** ファイル表示の下にある`複数話者吹き替えプロジェクトを作成`をクリックします。
32 | * **III.** 話者を作成します:
33 | * **a.** 編集ページの下部にある「複数話者吹き替え」セクションを展開します。
34 | * **b.** ターゲットプロジェクトを選択します。
35 | * **c.** 「話者を選択/作成」ボックスに話者名を入力します。
36 | * **d.** パラメータ(ポート番号を含む)を調整し、💾をクリックして保存します。重複する名前は既存の話者を上書きします。
37 | * **IV.** ドロップダウンから話者を選択し、対応する字幕にチェックを入れてから、✅をクリックして適用します。話者情報が4列目に表示されます。
38 | * **V.** 最後に割り当てられた話者がデフォルトの話者になります(複数話者プロジェクトで割り当てられていない字幕に適用されます)。
39 | * **VI.** `複数話者吹き替えを生成`をクリックして生成を開始します。
40 | * ⚠️ 異なる言語でGSV話者を使用および作成している場合、GSV話者は正常に動作しません。
41 |
42 | ### 特定の行を再生成する
43 | * **I.** 編集ページのスライダーを使用してターゲット字幕を見つけます。
44 | * **II.** 必要に応じてテキストを変更します。再生成後、変更内容は自動的に保存されます。
45 | * **III.** 🔄をクリックして1行を再生成します:
46 | * 割り当てられていない場合は、プロジェクトのパラメータを使用します。
47 | * 割り当てられている場合は、話者固有のパラメータを使用します。
48 | * 複数話者プロジェクトでは、話者を割り当てる必要があります。
49 | * **IV.** `音声を再組み立て`をクリックして、完全な音声を再構成します。
50 |
51 | ### C. 過去のプロジェクトを再編集する
52 | * 上部パネルの合成履歴からプロジェクトを選択し、`読み込み`ボタンをクリックします。
53 | * 残りの手順は自明です。
54 |
55 | ### D. 字幕編集
56 | #### 1. コピー
57 | * 選択した字幕をコピーします。
58 |
59 | #### 2. 削除
60 | * 選択した字幕を削除します。
61 |
62 | #### 3. マージ
63 | * 少なくとも2つの字幕を開始/終了点として選択します。
64 | * 開始点から終了点までの字幕がマージされます。
65 |
66 | ⚠️ 変更はすぐにディスクに自動保存されないため、プロジェクトを再読み込みすることで元に戻すことができます。
67 |
68 | #### 4. タイムスタンプを変更する
69 | * SRT形式で開始/終了時間を編集します。
70 | * `タイムスタンプを適用`をクリックして変更を保存します。
71 |
72 | ⚠️ 適用されていない変更は、ナビゲーション中に失われます。
73 |
74 | ## 2. トラブルシューティング
75 | * 問題を報告する際は、問題を詳細に説明し、エラーが発生する前に行った手順を列挙してください。
76 | * [GitHub-issues](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/issues)にアクセスして、問題を報告またはヘルプを求めてください(Issueテンプレートが適切な報告方法をガイドします)。
--------------------------------------------------------------------------------
/docs/ja_JP/issues.md:
--------------------------------------------------------------------------------
1 | # 典型的な問題
2 | **This file is translated by AI. And just for reference.**
3 | ## 1. GPT - SoVITSエラー: 404 NOT FOUND
4 | ```
5 | /tts 404 NOT FOUND
6 | ```
7 | * このエラーの典型的な原因: 非公式標準のコードを使用している
8 | * 公式の統合パッケージまたは公式リポジトリの最新のコードを使用していることを確認してください。
9 |
10 | ### 解決策:
11 | * 公式リポジトリのコードを手動で取得してください。
12 | * READMEに記載されている統合パッケージをダウンロードしてください。(安定していますが、更新が遅い場合があります)
13 |
14 | ## 2. ターゲットマシンがアクティブに接続を拒否したため、接続を行うことができませんでした。
15 | ```
16 | ターゲットマシンがアクティブに接続を拒否したため、接続を行うことができませんでした。
17 | ```
18 | 以下を確認する必要があります。
19 | * APIサービスが既に起動して実行中ですか?
20 | * 操作を行う前に、APIが完全に起動するのを待ってください。
21 | * APIコンソールを閉じないでください!
22 | * ポートは正しく入力されていますか?
23 |
24 | ## 3. 400 Bad Request
25 | ```
26 | 400 Bad Request
27 | ```
28 | このプログラムのコンソールの赤色のエラーログを確認してください。通常、APIはエラーの原因を返します。
29 | エラーメッセージが受信されない場合は、この問題を報告してください。
30 | * 典型的なエラー原因: 参照音声が3 - 10秒の範囲外;モデルパスが存在しません。
31 |
32 | ## 4. 前の音声が長すぎるため、以下の字幕が遅延しています。
33 | ```
34 | 前の音声が長すぎるため、以下の字幕が遅延しています。
35 | ```
36 | * 字幕のタイミング間隔が適切ではありません。
37 | * 話す速度を上げるか、字幕のタイミング間隔を手動で増やすことを検討してください。
38 | * 設定には最小音声間隔オプションがあります(デフォルトは0.3秒)。このような場合に音声が重ならないようにするためのものです。必要なければ0に設定できます。
39 |
40 | ## 5. GPT - SoVITSの出力音声には長さがあるが、無音です。
41 | ```
42 | GPT - SoVITSの出力音声には長さがあるが、無音です。
43 | ```
44 | * あなたのGPUはfp - 16をサポートしていません。
45 | * `GPT_SoVITS\configs\tts_infer.yaml` の `is_half` の値を `false` に手動で変更してください。
--------------------------------------------------------------------------------
/docs/ja_JP/title.md:
--------------------------------------------------------------------------------
1 | バージョン4.4.2-2505、HiyoriUI、GPT-SoVITS、CosyVoice、F5-TTS(GSV形式のAPI)およびMicrosoft TTSと互換性があります。
2 | GitHub: [手動で更新を確認](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/releases) | [拡張機能をインストール](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/tree/main/tools)
--------------------------------------------------------------------------------
/docs/zh_CN/README.md:
--------------------------------------------------------------------------------
1 | # Srt-AI-Voice-Assistant
2 | ### 本项目可利用多个AI-TTS为你的字幕或文本文件配音。
并提供包括字幕识别、翻译在内的多种便捷的辅助功能。
3 |
4 | ### 在线体验网址(试运营中):[srt-ai-voice-assistant-onlinedemo.work](https://srt-ai-voice-assistant-onlinedemo.work/)
5 | 如遇到bug或者有什么建议,可以在 [Issues](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/issues) 上反馈
6 |
7 | ## 特性
8 | - ✅ 代码开源,界面友好,本地运行,可局域网访问
9 | - ✅ 支持多个TTS项目:BV2,GSV,CosyVoice2,AzureTTS,以及你可以自定义API!
10 | - ✅ 保存个性化设置和预设
11 | - ✅ 批量模式
12 | - ✅ 字幕编辑
13 | - ✅ 字幕批量翻译
14 | - ✅ 单句重新抽卡
15 | - ✅ 支持多角色配音
16 | - ✅ 字幕重新导出
17 | - ✅ 扩展功能:音视频字幕转录
18 | - ✅ I18n
19 |
20 | ## [仅下载本体(打包版)](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/releases)
21 | * 当依赖冲突或无法正常安装时使用此版本
22 |
23 |
24 | ## [下载配合GPT-SoVITS的整合包(Hugging Face)](https://huggingface.co/YYuX/GPT-SoVITS-SAVA-windows-package/tree/main)
25 | * 整合包内预装打包版本体,内置模型不删减,训练和推理代码和官方仓库一致
26 | * 注意:包内自带的程序可能不是最新版本,覆盖掉以完成更新
--------------------------------------------------------------------------------
/docs/zh_CN/changelog.md:
--------------------------------------------------------------------------------
1 | ## 重大的更新历史
2 |
3 | ### V4-2503更新:
4 | #### 为了让版本更具辨识度,除了标注发布日期外,还分配了版本号。
5 | #### 本次更新后,上一个版本的合成历史和保存的说话人需要重新创建,否则会报错!
6 | 1.字幕编辑
7 | 2.字幕批量翻译
8 | 3.各项细节提升和bug修复
9 | 4.支持CosyVoice2(复用GSV的面板)
10 | 5.(4.0.1)批量模式
11 | 6.(4.1)服务模式
12 | 7.(4.2)I18n
13 | 8.(4.3)新增自动语速和自动去静音功能;现在可从标记文件快速生成多说话人工程
14 | 9.(4.3.1)加入查找和替换;加入一键重新生成按钮
15 | 10.(4.4)可编辑GSV多音字,自动检测模型;ollama允许自定义提示词;可利用自定义模版导出带说话人名称的字幕
16 |
17 | ### 250214更新:
18 | 1.支持读取历史工程
19 | 2.支持多说话人配音
20 | 3.更完善的文档
21 |
22 | ### 250123更新:
23 | 1.支持在合成完毕后导出符合实际情况的srt字幕文件,同时也支持通过读取txt纯文本文件来进行合成,在这种情况下会按每句来切分段落。
24 |
25 | 2.为了未来的扩展性和简洁性,我不得不放弃了单脚本文件的设计,即使对于下载而言更加方便。代码从现版本逐步开始重构。
26 |
27 | 3.加入一些文档说明
28 |
29 | ### 240811更新:
30 | 1.增加错误提示
31 | 2.自动检测项目路径
32 | 3.再次兼容api-v1。
33 | 4.重大功能更新:支持重新抽卡合成
--------------------------------------------------------------------------------
/docs/zh_CN/help.md:
--------------------------------------------------------------------------------
1 | # 使用指南
2 |
3 | ## 0.配置和使用服务
4 | #### 本项目可调用2个本地项目:Bert-VITS2、GPT-SoVITS
5 | #### 和1个在线项目:微软TTS
6 | * 本地项目只需要在设置页中填写并保存项目和解释器路径,或者**以更简单的方式:将程序放于整合包根目录内即可** ,然后点击第一页右下角对应按钮即可一键启动API服务!
7 | * 对于微软TTS,需要按教程注册账号并将密钥填写在设置页内。请注意每月的免费额度!
8 |
9 | ## 1.开始使用
10 | ### 本项目可以为字幕或者纯文本配音。
11 | * 当前一个字幕过长时,后一个字幕将在其后顺延。你可以在设置里设置最小语音间隔。
12 | * 对于纯文本,将按照结束标点符号和换行切割成每一条字幕。
13 | * 完成生成后,可以在编辑页面导出符合音频实际起止时间的字幕。
14 | ### A.单一说话人的情形
15 | * 1.在`字幕配音`上半页右侧上传字幕或者纯文本文件。
16 | * 2.在中间选择你的项目,调整参数。
17 | * 3.点击下方的`生成`,等待片刻。
18 | * 4.下载你的音频。
19 |
20 | ### B.多说话人的情况
21 | * 1.在`字幕配音`上半页右侧上传字幕或者纯文本文件。
22 | * 1.5. 标记模式:文件内容应如下:`说话人:内容` e.g.`淳平:对不起,让您久等了。`
23 | * 2.点击左侧文件展示下方的按钮`生成多角色项目`
24 | * 3.创建数个说话人:
25 | - a.展开位于编辑页最下方的`多角色配音`栏
26 | - b.选择目标项目
27 | - c.`在选择/创建说话人`框中,输入说话人的名字
28 | - d.调整上方对应参数。全部的参数,包括端口号将作为说话人的配置。然后点击`💾`创建说话人。同名的说话人会覆盖。
29 | * 4.在下拉列表里选中你的说话人,然后勾选对应的字幕,再点击下方的`✅`来应用说话人。你将在第4列文本看到说话人信息。
30 | * 5.上一次点击`✅`时选中的说话人会`自动`应用为`默认说话人`(仅多说话人项目生效,未指派说话人的情况下就使用默认说话人),即使你没有选择任何一条字幕。
31 | * 6.点击`生成多角色配音`,将会开始为所有指定说话人的字幕生成音频
32 | * ⚠️如果你正在使用和创建GSV的说话人时不同的语言,GSV的说话人会无法使用。
33 | * 注:gsv的预设创建同理。在切换预设时,会自动加载模型。
34 |
35 | ### 如果对某条语音不满意?
36 | * 1.在下半编辑页中通过滑条找到目标字幕
37 | * 2.可以修改文本内容。重新抽卡完成后,字幕内容会存档。
38 | * 3.点击`🔄️`重新生成单条语音。如果你通过单说话人创建工程,在未指派说话人时,参数以当前创建工程所使用的项目的面板为准。若指派了说话人,则按说话人的参数合成。
39 | * 4.通过多说话人创建的工程必须指派说话人。
40 | * 5.在对字幕进行更改后,也可以点击`继续生成`来重新生成经过更改或未成功合成的语音。
41 | * 6.点击`重新拼接内容`,重新合成音频。
42 |
43 | ### C.历史工程的再编辑
44 | * 编辑页上侧栏的合成历史中选择对应工程,然后点击加载。
45 | * 然后应该也不用多说了吧?
46 |
47 | ### D.字幕编辑
48 | #### 1.复制
49 | * 复制选中的字幕。
50 | #### 2.删除
51 | * 删除选中的字幕。
52 | #### 3.合并
53 | * 你需要至少选择2个字幕作为合并的起点和终点。
54 | * 只有选中字幕的id的最大和最小值作为实际有效输入。
55 | #### 以上更改不会立即存档,因此可以通过重新加载当前工程来撤销操作。
56 |
57 | #### 4.更改时间码
58 | * 按srt的时间格式修改字幕的起止时间。
59 | * 必须点击`应用时间`后,本页的时间码才会被保存,并存档。
60 | * 如果在未保存的情况下进行翻页等其他操作,更改将会丢失。
61 |
62 | ## 2.我遇到了无法解决的错误
63 | ### 您需要:
64 | * 详细地描述问题,并指出问题发生前,您做了哪些操作。
65 | * 推荐在评论区和[GitHub-issues](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/issues)反馈。Github-issue的模版会指引您更清晰地反馈问题。
--------------------------------------------------------------------------------
/docs/zh_CN/help_custom.md:
--------------------------------------------------------------------------------
1 | ## 安全警告:此功能会执行外部代码!
2 | ### 运行前请务必检查代码内容,运行不受信任的代码可能会导致电脑受到攻击!
3 | ### 作者不对此产生的后果负任何责任!!
4 |
5 | ### 将装有python函数的代码文件放在`SAVAdata/presets`下即可被调用
6 | ```
7 | def custom_api(text):#return: audio content
8 | from gradio_client import Client
9 | client = Client("http://127.0.0.1:7860/")
10 | result = client.predict(
11 | text, # str in '输入文本内容' Textbox component
12 | "神里绫华", # str (Option from: [('神里绫华', '神里绫华')]) in 'Speaker' Dropdown component
13 | 0.1, # int | float (numeric value between 0 and 1) in 'SDP Ratio' Slider component
14 | 0.5, # int | float (numeric value between 0.1 and 2) in 'Noise' Slider component
15 | 0.5, # int | float (numeric value between 0.1 and 2) in 'Noise_W' Slider component
16 | 1, # int | float (numeric value between 0.1 and 2) in 'Length' Slider component
17 | "auto", # str (Option from: [('ZH', 'ZH'), ('JP', 'JP'), ('EN', 'EN'), ('mix', 'mix'), ('auto', 'auto')]) in 'Language' Dropdown component
18 | "", # str (filepath on your computer (or URL) of file) in 'Audio prompt' Audio component
19 | "", # str in 'Text prompt' Textbox component
20 | "", # str in 'Prompt Mode' Radio component
21 | "", # str in '辅助文本' Textbox component
22 | 0, # int | float (numeric value between 0 and 1) in 'Weight' Slider component
23 | fn_index=0
24 | )
25 | with open(result[1],'rb') as file:
26 | data=file.read()
27 | return data
28 | ```
29 | 以上是接入Gradio的一个示例代码,请注意:函数的输入值必须是要合成的文本`text`,返回值是音频文件的二进制内容!
30 |
--------------------------------------------------------------------------------
/docs/zh_CN/issues.md:
--------------------------------------------------------------------------------
1 | # 常见的错误
2 |
3 | ## 1.GPT-SoVITS错误提示404 NOT FOUND
4 | ```
5 | /tts 404 NOT FOUND
6 | ```
7 | * 典型的错误原因:使用了非官方标准的代码
8 | (例如刘悦的整合包,其拓展功能多来自官方项目其他人贡献的Pull Requests,且对API代码进行了~~意义不明的~~更改)
9 | * 请您确保使用了花儿不哭官方整合包或者官方仓库的最新代码。
10 | ### 解决方法:
11 | * 1.~~手动拉取官方仓库代码~~ 无视,因为该类整合包受众不会拉代码
12 | * 2.下载本项目readme里提供的整合包(稳定但更新慢)
13 | * 3.前往下面的视频链接获取花儿不哭的官方整合包
14 | ### 【不要用了半天整合包,却不知道对应项目的创始人是谁!!!】官方视频/教程/整合包指路 :[耗时两个月自主研发的低成本AI音色克隆软件,免费送给大家!【GPT-SoVITS】](https://www.bilibili.com/video/BV12g4y1m7Uw/)
15 |
16 | ## 2.目标计算机积极拒绝连接
17 | ```
18 | 目标计算机积极拒绝连接
19 | ```
20 |
21 | 您需要检查:
22 | * API服务是否已经启动以及正在运行?
23 | * 请等待API启动完毕后再进行操作。
24 | * 不要关闭API控制台!
25 | * 端口是否正确填写?
26 |
27 | ## 3. 400 Bad Request
28 | ```
29 | 400 Bad Request
30 | ```
31 | 查看本程序控制台红色错误日志,通常api会返回报错原因。
32 | 如果没有接收到任何错误信息,可反馈问题。
33 | * 典型的错误原因:参考音频在3-10秒外;填写的模型路径不存在;
34 |
35 | ## 4.音频过长,语音延迟
36 | ```
37 | 序号合集为 ['xxx'] 的字幕由于之前的音频过长而被延迟
38 | ```
39 | * 你的字幕时间间隔不合理。
40 | * 考虑将设置项`音频最大加速倍率`调高(大于1视为启用)并打开`去除吸气声和静音`
41 | * 设置里有最小语音间隔,默认0.3秒。以防止这种情况下语音糊在一起,若不需要可调至0。
42 |
43 | ## 5.GPT-SoVITS输出音频有时长但没有声音
44 | ```
45 | GPT-SoVITS输出音频有时长但没有声音
46 | ```
47 | * 你的显卡不支持半精度
48 | * 手动修改`GPT_SoVITS\configs\tts_infer.yaml`中的`is_half`为`false`
--------------------------------------------------------------------------------
/docs/zh_CN/title.md:
--------------------------------------------------------------------------------
1 | 版本V4.4.2-2505,支持HiyoriUI,GPT-SoVITS,CosyVoice,F5-TTS(GSV格式API),微软在线TTS
2 | 仓库地址: [前往此处获取更新](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/releases) | [获取额外内容](https://github.com/YYuX-1145/Srt-AI-Voice-Assistant/tree/main/tools)
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | gradio>=4.44.1
2 | soundfile
3 | colorlog
4 | soxr
--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YYuX-1145/Srt-AI-Voice-Assistant/66fb0108fa6b2e34a160a41d105182866fd704f2/tools/__init__.py
--------------------------------------------------------------------------------
/tools/put_extensions_here:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YYuX-1145/Srt-AI-Voice-Assistant/66fb0108fa6b2e34a160a41d105182866fd704f2/tools/put_extensions_here
--------------------------------------------------------------------------------
/tools/slicer2.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | # This function is obtained from librosa.
5 | def get_rms(
6 | y,
7 | frame_length=2048,
8 | hop_length=512,
9 | pad_mode="constant",
10 | ):
11 | padding = (int(frame_length // 2), int(frame_length // 2))
12 | y = np.pad(y, padding, mode=pad_mode)
13 |
14 | axis = -1
15 | # put our new within-frame axis at the end for now
16 | out_strides = y.strides + tuple([y.strides[axis]])
17 | # Reduce the shape on the framing axis
18 | x_shape_trimmed = list(y.shape)
19 | x_shape_trimmed[axis] -= frame_length - 1
20 | out_shape = tuple(x_shape_trimmed) + tuple([frame_length])
21 | xw = np.lib.stride_tricks.as_strided(y, shape=out_shape, strides=out_strides)
22 | if axis < 0:
23 | target_axis = axis - 1
24 | else:
25 | target_axis = axis + 1
26 | xw = np.moveaxis(xw, -1, target_axis)
27 | # Downsample along the target axis
28 | slices = [slice(None)] * xw.ndim
29 | slices[axis] = slice(0, None, hop_length)
30 | x = xw[tuple(slices)]
31 |
32 | # Calculate power
33 | power = np.mean(np.abs(x) ** 2, axis=-2, keepdims=True)
34 |
35 | return np.sqrt(power)
36 |
37 |
38 | class Slicer:
39 | def __init__(
40 | self,
41 | sr: int,
42 | threshold: float = -40.0,
43 | min_length: int = 5000,
44 | min_interval: int = 300,
45 | hop_size: int = 20,
46 | max_sil_kept: int = 5000,
47 | ):
48 | if not min_length >= min_interval >= hop_size:
49 | raise ValueError(
50 | "The following condition must be satisfied: min_length >= min_interval >= hop_size"
51 | )
52 | if not max_sil_kept >= hop_size:
53 | raise ValueError(
54 | "The following condition must be satisfied: max_sil_kept >= hop_size"
55 | )
56 | min_interval = sr * min_interval / 1000
57 | self.threshold = 10 ** (threshold / 20.0)
58 | self.hop_size = round(sr * hop_size / 1000)
59 | self.win_size = min(round(min_interval), 4 * self.hop_size)
60 | self.min_length = round(sr * min_length / 1000 / self.hop_size)
61 | self.min_interval = round(min_interval / self.hop_size)
62 | self.max_sil_kept = round(sr * max_sil_kept / 1000 / self.hop_size)
63 |
64 | def _apply_slice(self, waveform, begin, end):
65 | if len(waveform.shape) > 1:
66 | return waveform[
67 | :, begin * self.hop_size : min(waveform.shape[1], end * self.hop_size)
68 | ]
69 | else:
70 | return waveform[
71 | begin * self.hop_size : min(waveform.shape[0], end * self.hop_size)
72 | ]
73 |
74 | # @timeit
75 | def slice(self, waveform):
76 | if len(waveform.shape) > 1:
77 | samples = waveform.mean(axis=0)
78 | else:
79 | samples = waveform
80 | if samples.shape[0] <= self.min_length:
81 | return [waveform]
82 | rms_list = get_rms(
83 | y=samples, frame_length=self.win_size, hop_length=self.hop_size
84 | ).squeeze(0)
85 | sil_tags = []
86 | silence_start = None
87 | clip_start = 0
88 | for i, rms in enumerate(rms_list):
89 | # Keep looping while frame is silent.
90 | if rms < self.threshold:
91 | # Record start of silent frames.
92 | if silence_start is None:
93 | silence_start = i
94 | continue
95 | # Keep looping while frame is not silent and silence start has not been recorded.
96 | if silence_start is None:
97 | continue
98 | # Clear recorded silence start if interval is not enough or clip is too short
99 | is_leading_silence = silence_start == 0 and i > self.max_sil_kept
100 | need_slice_middle = (
101 | i - silence_start >= self.min_interval
102 | and i - clip_start >= self.min_length
103 | )
104 | if not is_leading_silence and not need_slice_middle:
105 | silence_start = None
106 | continue
107 | # Need slicing. Record the range of silent frames to be removed.
108 | if i - silence_start <= self.max_sil_kept:
109 | pos = rms_list[silence_start : i + 1].argmin() + silence_start
110 | if silence_start == 0:
111 | sil_tags.append((0, pos))
112 | else:
113 | sil_tags.append((pos, pos))
114 | clip_start = pos
115 | elif i - silence_start <= self.max_sil_kept * 2:
116 | pos = rms_list[
117 | i - self.max_sil_kept : silence_start + self.max_sil_kept + 1
118 | ].argmin()
119 | pos += i - self.max_sil_kept
120 | pos_l = (
121 | rms_list[
122 | silence_start : silence_start + self.max_sil_kept + 1
123 | ].argmin()
124 | + silence_start
125 | )
126 | pos_r = (
127 | rms_list[i - self.max_sil_kept : i + 1].argmin()
128 | + i
129 | - self.max_sil_kept
130 | )
131 | if silence_start == 0:
132 | sil_tags.append((0, pos_r))
133 | clip_start = pos_r
134 | else:
135 | sil_tags.append((min(pos_l, pos), max(pos_r, pos)))
136 | clip_start = max(pos_r, pos)
137 | else:
138 | pos_l = (
139 | rms_list[
140 | silence_start : silence_start + self.max_sil_kept + 1
141 | ].argmin()
142 | + silence_start
143 | )
144 | pos_r = (
145 | rms_list[i - self.max_sil_kept : i + 1].argmin()
146 | + i
147 | - self.max_sil_kept
148 | )
149 | if silence_start == 0:
150 | sil_tags.append((0, pos_r))
151 | else:
152 | sil_tags.append((pos_l, pos_r))
153 | clip_start = pos_r
154 | silence_start = None
155 | # Deal with trailing silence.
156 | total_frames = rms_list.shape[0]
157 | if (
158 | silence_start is not None
159 | and total_frames - silence_start >= self.min_interval
160 | ):
161 | silence_end = min(total_frames, silence_start + self.max_sil_kept)
162 | pos = rms_list[silence_start : silence_end + 1].argmin() + silence_start
163 | sil_tags.append((pos, total_frames + 1))
164 | # Apply and return slices.
165 | ####音频+起始时间+终止时间
166 | if len(sil_tags) == 0:
167 | return [[waveform,0,int(total_frames*self.hop_size)]]
168 | else:
169 | chunks = []
170 | if sil_tags[0][0] > 0:
171 | chunks.append([self._apply_slice(waveform, 0, sil_tags[0][0]),0,int(sil_tags[0][0]*self.hop_size)])
172 | for i in range(len(sil_tags) - 1):
173 | chunks.append(
174 | [self._apply_slice(waveform, sil_tags[i][1], sil_tags[i + 1][0]),int(sil_tags[i][1]*self.hop_size),int(sil_tags[i + 1][0]*self.hop_size)]
175 | )
176 | if sil_tags[-1][1] < total_frames:
177 | chunks.append(
178 | [self._apply_slice(waveform, sil_tags[-1][1], total_frames),int(sil_tags[-1][1]*self.hop_size),int(total_frames*self.hop_size)]
179 | )
180 | return chunks
181 |
182 |
183 | def main():
184 | import os.path
185 | from argparse import ArgumentParser
186 |
187 | import librosa
188 | import soundfile
189 |
190 | parser = ArgumentParser()
191 | parser.add_argument("audio", type=str, help="The audio to be sliced")
192 | parser.add_argument(
193 | "--out", type=str, help="Output directory of the sliced audio clips"
194 | )
195 | parser.add_argument(
196 | "--db_thresh",
197 | type=float,
198 | required=False,
199 | default=-40,
200 | help="The dB threshold for silence detection",
201 | )
202 | parser.add_argument(
203 | "--min_length",
204 | type=int,
205 | required=False,
206 | default=5000,
207 | help="The minimum milliseconds required for each sliced audio clip",
208 | )
209 | parser.add_argument(
210 | "--min_interval",
211 | type=int,
212 | required=False,
213 | default=300,
214 | help="The minimum milliseconds for a silence part to be sliced",
215 | )
216 | parser.add_argument(
217 | "--hop_size",
218 | type=int,
219 | required=False,
220 | default=10,
221 | help="Frame length in milliseconds",
222 | )
223 | parser.add_argument(
224 | "--max_sil_kept",
225 | type=int,
226 | required=False,
227 | default=500,
228 | help="The maximum silence length kept around the sliced clip, presented in milliseconds",
229 | )
230 | args = parser.parse_args()
231 | out = args.out
232 | if out is None:
233 | out = os.path.dirname(os.path.abspath(args.audio))
234 | audio, sr = librosa.load(args.audio, sr=None, mono=False)
235 | slicer = Slicer(
236 | sr=sr,
237 | threshold=args.db_thresh,
238 | min_length=args.min_length,
239 | min_interval=args.min_interval,
240 | hop_size=args.hop_size,
241 | max_sil_kept=args.max_sil_kept,
242 | )
243 | chunks = slicer.slice(audio)
244 | if not os.path.exists(out):
245 | os.makedirs(out)
246 | for i, chunk in enumerate(chunks):
247 | if len(chunk.shape) > 1:
248 | chunk = chunk.T
249 | soundfile.write(
250 | os.path.join(
251 | out,
252 | f"%s_%d.wav"
253 | % (os.path.basename(args.audio).rsplit(".", maxsplit=1)[0], i),
254 | ),
255 | chunk,
256 | sr,
257 | )
258 |
259 |
260 | if __name__ == "__main__":
261 | main()
262 |
--------------------------------------------------------------------------------
/tools/wav2srt.py:
--------------------------------------------------------------------------------
1 | import faster_whisper
2 | import os
3 | import librosa
4 | import soundfile as sf
5 | import argparse
6 | from tools.slicer2 import Slicer
7 | from faster_whisper import WhisperModel
8 | from funasr import AutoModel
9 |
10 | current_directory = os.path.dirname(os.path.abspath(__file__))
11 | parser = argparse.ArgumentParser(add_help=False)
12 | parser.add_argument("-input_dir", default=None,type=str)
13 | parser.add_argument("-output_dir",default=None,type=str)
14 | parser.add_argument("-engine",default="whisper",type=str)
15 | parser.add_argument("--whisper_size", default="large-v3",type=str)
16 | parser.add_argument("--threshold",default=-40,type=float)
17 | parser.add_argument("--min_length",default=5000,type=int)
18 | parser.add_argument("--min_interval",default=300,type=int)
19 | parser.add_argument("--hop_size",default=20,type=int)
20 | parser.add_argument("--max_sil_kept", default=1000,type=int)
21 | args = parser.parse_args()
22 |
23 |
24 | if args.engine=="whisper":
25 | model_path = f'tools/asr/models/faster-whisper-{args.whisper_size}'
26 | os.makedirs(model_path,exist_ok=True)
27 | if os.listdir(model_path)==[]:
28 | print("downloading...")
29 | os.makedirs(model_path,exist_ok=True)
30 | faster_whisper.download_model(size_or_id=args.whisper_size,output_dir=model_path)
31 | try:
32 | print("loading faster whisper model:",model_path)
33 | model = WhisperModel(model_path, device='cuda')
34 | except Exception as e:
35 | print(e)
36 | print("加载或者加载出错。如果不能下载,请前往HF镜像站手动下载faster whisper模型")
37 | if args.whisper_size=="large-v3":
38 | model.feature_extractor.mel_filters = model.feature_extractor.get_mel_filters(model.feature_extractor.sampling_rate, model.feature_extractor.n_fft, n_mels=128)
39 | else :
40 | path_asr = 'tools/asr/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
41 | path_asr = path_asr if os.path.exists(path_asr) else "iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
42 | model_revision="v2.0.4"
43 | path_vad = 'tools/asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch'
44 | path_punc = 'tools/asr/models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch'
45 | path_vad = path_vad if os.path.exists(path_vad) else "iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
46 | path_punc = path_punc if os.path.exists(path_punc) else "iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
47 | vad_model_revision=punc_model_revision="v2.0.4"
48 | #sync with gsv
49 | model = AutoModel(
50 | model=path_asr,
51 | model_revision=model_revision,
52 | vad_model=path_vad,
53 | vad_model_revision=vad_model_revision,
54 | punc_model=path_punc,
55 | punc_model_revision=punc_model_revision,
56 | )
57 |
58 |
59 | def whisper_transcribe(audio,sr):
60 | audio=librosa.resample(audio,orig_sr=sr,target_sr=16000)
61 | lang=['zh','ja','en']
62 | try:
63 | segments, info = model.transcribe(
64 | audio = audio,
65 | beam_size = 5,
66 | vad_filter = False,
67 | language = None)
68 | text=""
69 | assert(info.language in lang)
70 | for seg in segments:
71 | text+=seg.text
72 | return text
73 | except Exception as e:
74 | print(e)
75 |
76 | def funasr_transcribe(audio,sr):
77 | sf.write("temp.wav",audio,sr)
78 | text = model.generate(input="temp.wav")[0]["text"]
79 | os.remove("temp.wav")
80 | return text
81 |
82 | def transcribe(audio_path):
83 | global model
84 | audio,sr=librosa.load(audio_path,sr=None)
85 | slicer=Slicer(
86 | sr=sr,
87 | threshold=int(args.threshold), # 音量小于这个值视作静音的备选切割点
88 | min_length=int(args.min_length), # 每段最小多长,如果第一段太短一直和后面段连起来直到超过这个值
89 | min_interval= int(args.min_interval), # 最短切割间隔
90 | hop_size= int(args.hop_size), # 怎么算音量曲线,越小精度越大计算量越高(不是精度越大效果越好)
91 | max_sil_kept= int(args.max_sil_kept), # 切完后静音最多留多长
92 | )
93 | srt=[]
94 | for chunk, start, end in slicer.slice(audio): # start和end是帧数
95 | start=start/sr
96 | end=end/sr
97 | try:
98 | if args.engine=="whisper":
99 | text=whisper_transcribe(chunk,sr)
100 | else:
101 | text=funasr_transcribe(chunk,sr)
102 | except Exception as e:
103 | print(e)
104 | continue
105 | srt.append((start,end,text))
106 | srt_content=[]
107 | idx=0
108 | for i in srt:
109 | idx+=1
110 | start,end,text=i
111 | srt_content.append(str(idx)+"\n")
112 | srt_content.append(f"{to_time(start)} --> {to_time(end)}"+"\n")
113 | srt_content.append(text+"\n")
114 | srt_content.append("\n")
115 |
116 | if args.output_dir is None:
117 | savepath=os.path.join(current_directory,"output.srt")
118 | elif args.output_dir.endswith(".srt"):
119 | savepath=args.output_dir
120 | os.makedirs(os.path.dirname(args.output_dir),exist_ok=True)
121 | else:
122 | savepath=f"{os.path.join(args.output_dir,os.path.basename(input.name))}.srt"
123 | os.makedirs(args.output_dir,exist_ok=True)
124 |
125 | with open(savepath,"w",encoding="utf-8") as f:
126 | f.writelines(srt_content)
127 | #os.system(f'explorer /select, {savepath}')
128 |
129 | def to_time(time_raw:float):
130 | hours, r = divmod(time_raw,3600)
131 | minutes, r = divmod(r,60)
132 | seconds, milliseconds = divmod(r, 1)
133 | return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds*1000):03d}"
134 |
135 | if __name__=="__main__":
136 | if args.input_dir is not None:
137 | wav_path=args.input_dir.strip('"')
138 | else:
139 | wav_path=input("enter input audio path: ").strip('"')
140 | print(wav_path)
141 | transcribe(wav_path)
142 |
--------------------------------------------------------------------------------
/启动Srt-AI-Voice-Assistant.bat:
--------------------------------------------------------------------------------
1 | chcp 65001
2 | echo "启动脚本仅供参考;不会安装依赖请使用打包版"
3 | runtime\python.exe Srt-AI-Voice-Assistant.py -p 0
4 | :: -p 指定端口并覆盖程序内设置,0=自动 ,可选 -server_mode:锁定大多数在多人环境下可能产生冲突的功能
5 | pause
--------------------------------------------------------------------------------