├── .github └── workflows │ └── ci.yml ├── .gitignore ├── LICENSE ├── README.md ├── build.py ├── extra-hooks └── hook-librosa.py ├── pdm.lock ├── pyproject.toml ├── rtvc ├── __init__.py ├── __main__.py ├── assets │ └── icon.png ├── audio.py ├── config.py ├── gui.py ├── i18n.py ├── locales │ ├── en_US.yaml │ └── zh_CN.yaml └── plugins │ ├── __init__.py │ ├── base.py │ ├── diffusion.py │ ├── hifisinger.py │ └── rvc.py └── tests └── test_sola.py /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | tags: 7 | - "v*.*.*" 8 | pull_request: 9 | branches: [main] 10 | 11 | jobs: 12 | lint: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v3 16 | - uses: actions/setup-python@v4 17 | with: 18 | python-version: "3.10" 19 | - uses: pdm-project/setup-pdm@v3 20 | - name: Install dependencies 21 | run: pdm sync 22 | - name: Lint with black and isort 23 | run: pdm run lint-check 24 | 25 | build: 26 | needs: lint 27 | runs-on: ${{ matrix.os }} 28 | strategy: 29 | fail-fast: false 30 | matrix: 31 | os: [ubuntu-latest, macos-latest, windows-latest] 32 | python-version: ["3.10"] 33 | package-type: ["onefile", "onedir"] 34 | steps: 35 | - uses: actions/checkout@v3 36 | - uses: actions/setup-python@v4 37 | with: 38 | python-version: ${{ matrix.python-version }} 39 | - uses: pdm-project/setup-pdm@v3 40 | - name: Install dependencies 41 | run: pdm sync 42 | - name: Build pyinstaller package 43 | env: 44 | PACKAGE_TYPE: ${{ matrix.package-type }} 45 | run: pdm run build.py 46 | - name: Upload artifact 47 | uses: actions/upload-artifact@v3 48 | with: 49 | name: rtvc-${{ matrix.os }}-${{ matrix.python-version }}-${{ matrix.package-type }} 50 | path: dist 51 | 52 | publish: 53 | needs: lint 54 | runs-on: ubuntu-latest 55 | if: startsWith(github.ref, 'refs/tags/v') 56 | steps: 57 | - uses: actions/checkout@v3 58 | - uses: actions/setup-python@v4 59 | with: 60 | python-version: "3.10" 61 | - uses: pdm-project/setup-pdm@v3 62 | - name: Install dependencies 63 | run: pdm sync 64 | - name: Publish 65 | run: pdm publish -u __token__ -P ${{ secrets.PYPI_TOKEN }} 66 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | .pdm-python 162 | .DS_Store 163 | __main__.build 164 | *.build 165 | __main__ 166 | Info.plist 167 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Lengyue 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RTVC: Real-Time Voice Conversion GUI 2 | [![PyPI Version](https://img.shields.io/pypi/v/rtvc.svg?style=flat-square)](https://pypi.python.org/pypi/rtvc) 3 | [![Downloads](https://img.shields.io/pypi/dm/rtvc.svg?style=flat-square)](https://pypi.python.org/pypi/rtvc) 4 | [![CI Status](https://img.shields.io/github/actions/workflow/status/fishaudio/realtime-vc-gui/ci.yml?style=flat-square&logo=GitHub)](https://github.com/fishaudio/realtime-vc-gui/actions) 5 | [![License](https://img.shields.io/github/license/fishaudio/realtime-vc-gui?style=flat-square)](https://github.com/fishaudio/realtime-vc-gui/blob/main/LICENSE) 6 | 7 | 8 | 9 | ## Usage 10 | You can download the latest release from **[here](https://nightly.link/fishaudio/realtime-vc-gui/workflows/ci/main)** 11 | 12 | or install from PyPI. 13 | 14 | ```bash 15 | pip install rtvc 16 | rtvc 17 | ``` 18 | 19 | or use pipx 20 | 21 | ```bash 22 | # or 23 | pipx run rtvc 24 | ``` 25 | -------------------------------------------------------------------------------- /build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | import subprocess as sp 4 | 5 | package_type = os.environ.get("PACKAGE_TYPE", "onedir") 6 | assert package_type in ("onedir", "onefile"), "PACKAGE_TYPE must be onedir or onefile" 7 | 8 | # upgrade dependencies manually 9 | if platform.system() == "Windows": 10 | sp.check_call(["pip", "install", "--upgrade", "pywin32", "cffi"]) 11 | 12 | sep = ";" if platform.system() == "Windows" else ":" 13 | 14 | args = [ 15 | "pyinstaller", 16 | "rtvc/__main__.py", 17 | f"--{package_type}", 18 | "-n", 19 | "rtvc", 20 | "--additional-hooks=extra-hooks", 21 | "--noconfirm", 22 | "--add-data", 23 | f"rtvc/assets{sep}assets", 24 | "--add-data", 25 | f"rtvc/locales{sep}locales", 26 | ] 27 | 28 | sp.check_call(args) 29 | -------------------------------------------------------------------------------- /extra-hooks/hook-librosa.py: -------------------------------------------------------------------------------- 1 | from PyInstaller.utils.hooks import collect_data_files 2 | 3 | datas = collect_data_files("librosa") 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "rtvc" 3 | version = "0.2.0" 4 | description = "Real-Time Voice Conversion GUI" 5 | readme = "README.md" 6 | requires-python = ">=3.10,<3.12" 7 | license = {text = "MIT"} 8 | keywords = ["voice-conversion", "svc"] 9 | authors = [ 10 | {name = "lengyue", email = "lengyue@lengyue.me"}, 11 | ] 12 | classifiers = [ 13 | "Development Status :: 3 - Alpha", 14 | "Programming Language :: Python :: 3.10", 15 | ] 16 | 17 | dependencies = [ 18 | "noisereduce>=2.0.1", 19 | "sounddevice>=0.4.6", 20 | "pyyaml>=6.0", 21 | "PyQt6>=6.5.0", 22 | "pyqtdarktheme==2.1.0", 23 | "requests>=2.31.0", 24 | "librosa==0.9.2", 25 | ] 26 | 27 | [project.urls] 28 | repository = "https://github.com/fishaudio/realtime-vc-gui" 29 | 30 | [project.gui-scripts] 31 | rtvc = "rtvc.__main__:main" 32 | 33 | [tool.pdm] 34 | [tool.pdm.build] 35 | includes = ["rtvc"] 36 | 37 | [tool.pdm.dev-dependencies] 38 | dev = [ 39 | "isort>=5.12.0", 40 | "black>=23.3.0", 41 | "pytest>=7.3.1", 42 | "torch>=2.0.1", 43 | "pyinstaller>=5.11.0", 44 | ] 45 | 46 | [build-system] 47 | requires = ["pdm-backend"] 48 | build-backend = "pdm.backend" 49 | 50 | [tool.pdm.scripts] 51 | lint = { shell = "black . && isort ." } 52 | lint-check = { shell = "black --check . && isort --check ." } 53 | test = { shell = "PYTHONPATH=. pytest -n=auto -q tests" } 54 | docs = { shell = "sphinx-autobuild docs docs/_build/html" } 55 | 56 | [[tool.pdm.source]] 57 | type = "find_links" 58 | name = "torch-cpu" 59 | url = "https://download.pytorch.org/whl/cpu" 60 | verify_ssl = true 61 | 62 | [tool.isort] 63 | profile = "black" 64 | extend_skip = ["dataset", "logs"] 65 | -------------------------------------------------------------------------------- /rtvc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fishaudio/realtime-vc-gui/85cbdb7f3a4cd0c7900e215202ef1c0c41a6ff36/rtvc/__init__.py -------------------------------------------------------------------------------- /rtvc/__main__.py: -------------------------------------------------------------------------------- 1 | import signal 2 | import sys 3 | 4 | import qdarktheme 5 | from PyQt6 import QtWidgets 6 | 7 | from rtvc.config import config 8 | from rtvc.gui import MainWindow 9 | 10 | 11 | def main(): 12 | qdarktheme.enable_hi_dpi() 13 | app = QtWidgets.QApplication(sys.argv) 14 | window = MainWindow() 15 | qdarktheme.setup_theme(config.theme) 16 | 17 | # run 18 | window.show() 19 | app.exec() 20 | 21 | 22 | # handle Ctrl+C 23 | signal.signal(signal.SIGINT, signal.SIG_DFL) 24 | 25 | if __name__ == "__main__": 26 | main() 27 | -------------------------------------------------------------------------------- /rtvc/assets/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fishaudio/realtime-vc-gui/85cbdb7f3a4cd0c7900e215202ef1c0c41a6ff36/rtvc/assets/icon.png -------------------------------------------------------------------------------- /rtvc/audio.py: -------------------------------------------------------------------------------- 1 | import sounddevice as sd 2 | 3 | 4 | def get_devices(update: bool = True): 5 | if update: 6 | sd._terminate() 7 | sd._initialize() 8 | 9 | devices = sd.query_devices() 10 | hostapis = sd.query_hostapis() 11 | 12 | for hostapi in hostapis: 13 | for device_idx in hostapi["devices"]: 14 | devices[device_idx]["hostapi_name"] = hostapi["name"] 15 | 16 | input_devices = [ 17 | {"id": idx, "name": f"{d['name']} ({d['hostapi_name']})"} 18 | for idx, d in enumerate(devices) 19 | if d["max_input_channels"] > 0 20 | ] 21 | 22 | output_devices = [ 23 | {"id": idx, "name": f"{d['name']} ({d['hostapi_name']})"} 24 | for idx, d in enumerate(devices) 25 | if d["max_output_channels"] > 0 26 | ] 27 | 28 | return input_devices, output_devices 29 | -------------------------------------------------------------------------------- /rtvc/config.py: -------------------------------------------------------------------------------- 1 | import locale 2 | import sys 3 | from dataclasses import dataclass, field 4 | from pathlib import Path 5 | from typing import Literal 6 | 7 | import yaml 8 | 9 | if getattr(sys, "frozen", False): 10 | # If the application is run as a bundle, the PyInstaller bootloader 11 | # extends the sys module by a flag frozen=True and sets the app 12 | # path into variable _MEIPASS'. 13 | application_path = Path(sys._MEIPASS) 14 | else: 15 | application_path = Path(__file__).parent 16 | 17 | 18 | @dataclass 19 | class Config: 20 | theme: Literal["auto", "light", "dark"] = "auto" 21 | locale: str = locale.getdefaultlocale()[0] 22 | backend: str = "http://localhost:6844/voiceChangeModel" 23 | 24 | input_device: str | None = None 25 | output_device: str | None = None 26 | 27 | db_threshold: int = -30 28 | pitch_shift: int = 0 29 | sample_duration: int = 1000 30 | fade_duration: int = 80 31 | extra_duration: int = 50 32 | input_denoise: bool = False 33 | output_denoise: bool = False 34 | sample_rate: int = 44100 35 | sola_search_duration: int = 12 36 | buffer_num: int = 4 37 | 38 | # Plugins 39 | current_plugin: str | None = None 40 | plugins: dict[str, dict] = field(default_factory=dict) 41 | 42 | @property 43 | def sample_frames(self): 44 | return self.sample_duration * self.sample_rate // 1000 45 | 46 | @property 47 | def fade_frames(self): 48 | return self.fade_duration * self.sample_rate // 1000 49 | 50 | @property 51 | def extra_frames(self): 52 | return self.extra_duration * self.sample_rate // 1000 53 | 54 | @property 55 | def sola_search_frames(self): 56 | return self.sola_search_duration * self.sample_rate // 1000 57 | 58 | 59 | default_config_path = str((Path.home() / ".rtvc" / "config.yaml").absolute()) 60 | config = Config() 61 | 62 | 63 | def load_config(path: Path | str = default_config_path) -> Config: 64 | global config 65 | 66 | path = Path(path) 67 | 68 | if path.exists(): 69 | try: 70 | with open(path, "r", encoding="utf-8") as f: 71 | config = Config(**yaml.safe_load(f.read())) 72 | except Exception: 73 | config = Config() 74 | print("Failed to load config file, use default config instead.") 75 | 76 | return config 77 | 78 | 79 | def save_config(path: Path | str = default_config_path) -> None: 80 | path = Path(path) 81 | 82 | if not path.parent.exists(): 83 | path.parent.mkdir(parents=True) 84 | 85 | with open(path, "w", encoding="utf-8") as f: 86 | yaml.safe_dump(config.__dict__, f) 87 | 88 | 89 | # Auto load config 90 | load_config() 91 | save_config() 92 | -------------------------------------------------------------------------------- /rtvc/gui.py: -------------------------------------------------------------------------------- 1 | import os 2 | import queue 3 | import sys 4 | import threading 5 | import time 6 | from io import BytesIO 7 | 8 | import librosa 9 | import noisereduce as nr 10 | import numpy as np 11 | import pkg_resources 12 | import qdarktheme 13 | import requests 14 | import sounddevice as sd 15 | import soundfile as sf 16 | from PyQt6.QtCore import Qt 17 | from PyQt6.QtGui import QIcon 18 | from PyQt6.QtWidgets import ( 19 | QCheckBox, 20 | QComboBox, 21 | QFileDialog, 22 | QGridLayout, 23 | QGroupBox, 24 | QHBoxLayout, 25 | QLabel, 26 | QLineEdit, 27 | QMessageBox, 28 | QPushButton, 29 | QSlider, 30 | QVBoxLayout, 31 | QWidget, 32 | ) 33 | from scipy.signal import convolve 34 | 35 | from rtvc.audio import get_devices 36 | from rtvc.config import application_path, config, load_config, save_config 37 | from rtvc.i18n import _t, language_map 38 | from rtvc.plugins import ALL_PLUGINS 39 | from rtvc.plugins.base import render_plugin 40 | 41 | 42 | class MainWindow(QWidget): 43 | def __init__(self): 44 | super().__init__() 45 | 46 | self.setWindowIcon(QIcon(str(application_path / "assets" / "icon.png"))) 47 | 48 | version = pkg_resources.get_distribution("rtvc").version 49 | # remove +editable if it exists 50 | version = version.split("+")[0] 51 | self.setWindowTitle(_t("title").format(version=version)) 52 | 53 | self.main_layout = QVBoxLayout() 54 | # Stick to the top 55 | self.main_layout.setAlignment(Qt.AlignmentFlag.AlignTop) 56 | 57 | self.setup_ui_settings() 58 | self.setup_backend_settings() 59 | self.setup_device_settings() 60 | self.setup_audio_settings() 61 | self.plugin_layout = QGroupBox() 62 | self.main_layout.addWidget(self.plugin_layout) 63 | self.setup_plugin_settings() 64 | self.setup_action_buttons() 65 | self.setLayout(self.main_layout) 66 | 67 | # Use size hint to set a reasonable size 68 | self.setMinimumWidth(900) 69 | 70 | # Voice Conversion Thread 71 | self.thread = None 72 | self.vc_status = threading.Event() 73 | 74 | def setup_ui_settings(self): 75 | # we have language and backend settings in the first row 76 | row = QHBoxLayout() 77 | row.setAlignment(Qt.AlignmentFlag.AlignLeft) 78 | 79 | # set up a theme combo box 80 | row.addWidget(QLabel(_t("theme.name"))) 81 | self.theme_combo = QComboBox() 82 | self.theme_combo.addItem(_t("theme.auto"), "auto") 83 | self.theme_combo.addItem(_t("theme.light"), "light") 84 | self.theme_combo.addItem(_t("theme.dark"), "dark") 85 | self.theme_combo.setCurrentText(_t(f"theme.{config.theme}")) 86 | self.theme_combo.currentIndexChanged.connect(self.change_theme) 87 | self.theme_combo.setMinimumWidth(100) 88 | row.addWidget(self.theme_combo) 89 | 90 | # set up language combo box 91 | row.addWidget(QLabel(_t("i18n.language"))) 92 | self.language_combo = QComboBox() 93 | 94 | for k, v in language_map.items(): 95 | self.language_combo.addItem(v, k) 96 | 97 | self.language_combo.setCurrentText(language_map.get(config.locale, 'en_US')) 98 | self.language_combo.currentIndexChanged.connect(self.change_language) 99 | self.language_combo.setMinimumWidth(150) 100 | row.addWidget(self.language_combo) 101 | 102 | # setup plugin combo box 103 | row.addWidget(QLabel(_t("plugins.name"))) 104 | self.plugin_combo = QComboBox() 105 | self.plugin_combo.addItem(_t("plugins.none.name"), "none") 106 | for plugin in ALL_PLUGINS: 107 | self.plugin_combo.addItem(_t(f"plugins.{plugin.id}.name"), plugin.id) 108 | 109 | if config.current_plugin is not None: 110 | self.plugin_combo.setCurrentText( 111 | _t(f"plugins.{config.current_plugin}.name") 112 | ) 113 | else: 114 | self.plugin_combo.setCurrentText(_t("plugins.none.name")) 115 | 116 | self.plugin_combo.currentIndexChanged.connect(self.change_plugin) 117 | self.plugin_combo.setMinimumWidth(150) 118 | row.addWidget(self.plugin_combo) 119 | 120 | # save button 121 | self.save_button = QPushButton(_t("config.save")) 122 | self.save_button.clicked.connect(self.save_config) 123 | row.addWidget(self.save_button) 124 | 125 | # load button 126 | self.load_button = QPushButton(_t("config.load")) 127 | self.load_button.clicked.connect(self.load_config) 128 | row.addWidget(self.load_button) 129 | 130 | self.main_layout.addLayout(row) 131 | 132 | def setup_device_settings(self): 133 | # second row: a group box for audio device settings 134 | row = QGroupBox(_t("audio_device.name")) 135 | row_layout = QGridLayout() 136 | row_layout.setAlignment(Qt.AlignmentFlag.AlignLeft) 137 | 138 | # fetch devices 139 | input_devices, output_devices = get_devices() 140 | 141 | # input device 142 | row_layout.addWidget(QLabel(_t("audio_device.input")), 0, 0) 143 | self.input_device_combo = QComboBox() 144 | for device in input_devices: 145 | self.input_device_combo.addItem(device["name"], device["id"]) 146 | 147 | # find the current device from config 148 | if config.input_device is not None: 149 | for i in range(self.input_device_combo.count()): 150 | if self.input_device_combo.itemData(i) == config.input_device: 151 | self.input_device_combo.setCurrentIndex(i) 152 | break 153 | else: 154 | # not found, use default 155 | self.input_device_combo.setCurrentIndex(0) 156 | config.input_device = self.input_device_combo.itemData(0) 157 | 158 | self.input_device_combo.setFixedWidth(300) 159 | row_layout.addWidget(self.input_device_combo, 0, 1) 160 | 161 | # output device 162 | row_layout.addWidget(QLabel(_t("audio_device.output")), 1, 0) 163 | self.output_device_combo = QComboBox() 164 | for device in output_devices: 165 | self.output_device_combo.addItem(device["name"], device["id"]) 166 | 167 | # find the current device from config 168 | if config.output_device is not None: 169 | for i in range(self.output_device_combo.count()): 170 | if self.output_device_combo.itemData(i) == config.output_device: 171 | self.output_device_combo.setCurrentIndex(i) 172 | break 173 | else: 174 | # not found, use default 175 | self.output_device_combo.setCurrentIndex(0) 176 | config.output_device = self.output_device_combo.itemData(0) 177 | 178 | self.input_device_combo.setFixedWidth(300) 179 | row_layout.addWidget(self.output_device_combo, 1, 1) 180 | 181 | row.setLayout(row_layout) 182 | 183 | self.main_layout.addWidget(row) 184 | 185 | def setup_audio_settings(self): 186 | # third row: a group box for audio settings 187 | row = QGroupBox(_t("audio.name")) 188 | row_layout = QGridLayout() 189 | 190 | # db_threshold, pitch_shift 191 | row_layout.addWidget(QLabel(_t("audio.db_threshold")), 0, 0) 192 | self.db_threshold_slider = QSlider(Qt.Orientation.Horizontal) 193 | self.db_threshold_slider.setMinimum(-60) 194 | self.db_threshold_slider.setMaximum(0) 195 | self.db_threshold_slider.setSingleStep(1) 196 | self.db_threshold_slider.setTickInterval(1) 197 | self.db_threshold_slider.setValue(config.db_threshold) 198 | row_layout.addWidget(self.db_threshold_slider, 0, 1) 199 | self.db_threshold_label = QLabel(f"{config.db_threshold} dB") 200 | self.db_threshold_label.setFixedWidth(50) 201 | row_layout.addWidget(self.db_threshold_label, 0, 2) 202 | self.db_threshold_slider.valueChanged.connect( 203 | lambda v: self.db_threshold_label.setText(f"{v} dB") 204 | ) 205 | 206 | row_layout.addWidget(QLabel(_t("audio.pitch_shift")), 0, 3) 207 | self.pitch_shift_slider = QSlider(Qt.Orientation.Horizontal) 208 | self.pitch_shift_slider.setMinimum(-24) 209 | self.pitch_shift_slider.setMaximum(24) 210 | self.pitch_shift_slider.setSingleStep(1) 211 | self.pitch_shift_slider.setTickInterval(1) 212 | self.pitch_shift_slider.setValue(config.pitch_shift) 213 | row_layout.addWidget(self.pitch_shift_slider, 0, 4) 214 | self.pitch_shift_label = QLabel(f"{config.pitch_shift}") 215 | self.pitch_shift_label.setFixedWidth(50) 216 | row_layout.addWidget(self.pitch_shift_label, 0, 5) 217 | self.pitch_shift_slider.valueChanged.connect( 218 | lambda v: self.pitch_shift_label.setText(f"{v}") 219 | ) 220 | 221 | # performance related 222 | # sample_duration, fade_duration 223 | row_layout.addWidget(QLabel(_t("audio.sample_duration")), 1, 0) 224 | self.sample_duration_slider = QSlider(Qt.Orientation.Horizontal) 225 | self.sample_duration_slider.setMinimum(100) 226 | self.sample_duration_slider.setMaximum(3000) 227 | self.sample_duration_slider.setSingleStep(100) 228 | self.sample_duration_slider.setTickInterval(100) 229 | self.sample_duration_slider.setValue(config.sample_duration) 230 | row_layout.addWidget(self.sample_duration_slider, 1, 1) 231 | self.sample_duration_label = QLabel(f"{config.sample_duration / 1000:.1f} s") 232 | self.sample_duration_label.setFixedWidth(50) 233 | row_layout.addWidget(self.sample_duration_label, 1, 2) 234 | self.sample_duration_slider.valueChanged.connect( 235 | lambda v: self.sample_duration_label.setText(f"{v / 1000:.1f} s") 236 | ) 237 | 238 | row_layout.addWidget(QLabel(_t("audio.fade_duration")), 1, 3) 239 | self.fade_duration_slider = QSlider(Qt.Orientation.Horizontal) 240 | self.fade_duration_slider.setMinimum(10) 241 | self.fade_duration_slider.setMaximum(150) 242 | self.fade_duration_slider.setSingleStep(10) 243 | self.fade_duration_slider.setTickInterval(10) 244 | self.fade_duration_slider.setValue(config.fade_duration) 245 | row_layout.addWidget(self.fade_duration_slider, 1, 4) 246 | self.fade_duration_label = QLabel(f"{config.fade_duration / 1000:.2f} s") 247 | self.fade_duration_label.setFixedWidth(50) 248 | row_layout.addWidget(self.fade_duration_label, 1, 5) 249 | self.fade_duration_slider.valueChanged.connect( 250 | lambda v: self.fade_duration_label.setText(f"{v / 1000:.2f} s") 251 | ) 252 | 253 | # Extra duration, input denoise, output denoise in next row 254 | row_layout.addWidget(QLabel(_t("audio.extra_duration")), 2, 0) 255 | self.extra_duration_slider = QSlider(Qt.Orientation.Horizontal) 256 | self.extra_duration_slider.setMinimum(50) 257 | self.extra_duration_slider.setMaximum(1000) 258 | self.extra_duration_slider.setSingleStep(10) 259 | self.extra_duration_slider.setTickInterval(10) 260 | self.extra_duration_slider.setValue(config.extra_duration) 261 | row_layout.addWidget(self.extra_duration_slider, 2, 1) 262 | self.extra_duration_label = QLabel(f"{config.extra_duration / 1000:.2f} s") 263 | self.extra_duration_label.setFixedWidth(50) 264 | row_layout.addWidget(self.extra_duration_label, 2, 2) 265 | self.extra_duration_slider.valueChanged.connect( 266 | lambda v: self.extra_duration_label.setText(f"{v / 1000:.2f} s") 267 | ) 268 | 269 | self.input_denoise_checkbox = QCheckBox() 270 | self.input_denoise_checkbox.setText(_t("audio.input_denoise")) 271 | self.input_denoise_checkbox.setChecked(config.input_denoise) 272 | row_layout.addWidget(self.input_denoise_checkbox, 2, 3) 273 | 274 | self.output_denoise_checkbox = QCheckBox() 275 | self.output_denoise_checkbox.setText(_t("audio.output_denoise")) 276 | self.output_denoise_checkbox.setChecked(config.output_denoise) 277 | row_layout.addWidget(self.output_denoise_checkbox, 2, 4) 278 | 279 | row.setLayout(row_layout) 280 | self.main_layout.addWidget(row) 281 | 282 | def setup_backend_settings(self): 283 | widget = QGroupBox() 284 | widget.setTitle(_t("backend.title")) 285 | row = QHBoxLayout() 286 | 287 | # protocol 288 | row.addWidget(QLabel(_t("backend.protocol_label"))) 289 | self.backend_protocol = QComboBox() 290 | self.backend_protocol.setMinimumWidth(75) 291 | self.backend_protocol.addItems(["v1"]) 292 | self.backend_protocol.setCurrentText("v1") 293 | row.addWidget(self.backend_protocol) 294 | 295 | # set up backend (url) input, and a test button 296 | row.addWidget(QLabel(_t("backend.name"))) 297 | self.backend_input = QLineEdit() 298 | self.backend_input.setText(config.backend) 299 | row.addWidget(self.backend_input) 300 | 301 | self.test_button = QPushButton(_t("backend.test")) 302 | self.test_button.clicked.connect(self.test_backend) 303 | row.addWidget(self.test_button) 304 | 305 | widget.setLayout(row) 306 | self.main_layout.addWidget(widget) 307 | 308 | def setup_plugin_settings(self): 309 | plugin_id = config.current_plugin 310 | 311 | if plugin_id is None: 312 | self.get_plugin_config = lambda: dict() 313 | self.plugin_key_mapping = dict() 314 | self.plugin_layout.hide() 315 | return 316 | 317 | self.plugin_layout.show() 318 | self.plugin_layout.setTitle(_t(f"plugins.{plugin_id}.name")) 319 | 320 | if self.plugin_layout.layout(): 321 | # remove the old layout 322 | QWidget().setLayout(self.plugin_layout.layout()) 323 | 324 | # Find the plugin class from the config 325 | for plugin_cls in ALL_PLUGINS: 326 | if plugin_cls.id != plugin_id: 327 | continue 328 | 329 | layout, get_value_func, key_mappping = render_plugin(plugin_cls) 330 | self.get_plugin_config = get_value_func 331 | self.plugin_key_mapping = key_mappping 332 | self.plugin_layout.setLayout(layout) 333 | 334 | # resize the window to fit the new layout 335 | self.resize(self.sizeHint()) 336 | 337 | def setup_action_buttons(self): 338 | row = QWidget() 339 | row_layout = QHBoxLayout() 340 | row_layout.addStretch(1) 341 | 342 | self.start_button = QPushButton(_t("action.start")) 343 | self.start_button.clicked.connect(self.start_conversion) 344 | row_layout.addWidget(self.start_button) 345 | 346 | self.stop_button = QPushButton(_t("action.stop")) 347 | self.stop_button.setEnabled(False) 348 | self.stop_button.clicked.connect(self.stop_conversion) 349 | row_layout.addWidget(self.stop_button) 350 | 351 | self.latency_label = QLabel(_t("action.latency").format(latency=0)) 352 | row_layout.addWidget(self.latency_label) 353 | 354 | row.setLayout(row_layout) 355 | self.main_layout.addWidget(row) 356 | 357 | def change_theme(self, index): 358 | config.theme = self.theme_combo.itemData(index) 359 | 360 | save_config() 361 | qdarktheme.setup_theme(config.theme) 362 | 363 | def change_language(self, index): 364 | config.locale = self.language_combo.itemData(index) 365 | save_config() 366 | 367 | # pop up a message box to tell user app will restart 368 | msg_box = QMessageBox() 369 | msg_box.setIcon(QMessageBox.Icon.Warning) 370 | msg_box.setText(_t("i18n.restart_msg")) 371 | msg_box.setStandardButtons( 372 | QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No 373 | ) 374 | msg_box.setDefaultButton(QMessageBox.StandardButton.No) 375 | ret = msg_box.exec() 376 | 377 | if ret == QMessageBox.StandardButton.Yes: 378 | os.execv(sys.argv[0], sys.argv) 379 | 380 | def change_plugin(self, index): 381 | config.current_plugin = self.plugin_combo.itemData(index) 382 | if config.current_plugin == "none": 383 | config.current_plugin = None 384 | 385 | self.setup_plugin_settings() 386 | 387 | def test_backend(self): 388 | backend = self.backend_input.text() 389 | 390 | try: 391 | response = requests.options(backend, timeout=5) 392 | except: 393 | response = None 394 | 395 | message_box = QMessageBox() 396 | 397 | if response is not None and response.status_code == 200: 398 | message_box.setIcon(QMessageBox.Icon.Information) 399 | message_box.setText(_t("backend.test_succeed")) 400 | config.backend = backend 401 | save_config() 402 | else: 403 | message_box.setIcon(QMessageBox.Icon.Question) 404 | message_box.setText(_t("backend.test_failed")) 405 | 406 | message_box.exec() 407 | 408 | def save_config(self, save_to_file=True): 409 | config.backend = self.backend_input.text() 410 | config.input_device = self.input_device_combo.currentData() 411 | config.output_device = self.output_device_combo.currentData() 412 | config.db_threshold = self.db_threshold_slider.value() 413 | config.pitch_shift = self.pitch_shift_slider.value() 414 | config.sample_duration = self.sample_duration_slider.value() 415 | config.fade_duration = self.fade_duration_slider.value() 416 | config.extra_duration = self.extra_duration_slider.value() 417 | config.input_denoise = self.input_denoise_checkbox.isChecked() 418 | config.output_denoise = self.output_denoise_checkbox.isChecked() 419 | config.plugins[config.current_plugin] = self.get_plugin_config() 420 | 421 | save_config() 422 | 423 | # pop up a message box to tell user if they want to save the config to a file 424 | if not save_to_file: 425 | return 426 | 427 | msg_box = QMessageBox() 428 | msg_box.setIcon(QMessageBox.Icon.Question) 429 | msg_box.setText(_t("config.save_msg")) 430 | msg_box.setStandardButtons( 431 | QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No 432 | ) 433 | msg_box.setDefaultButton(QMessageBox.StandardButton.No) 434 | 435 | ret = msg_box.exec() 436 | if ret == QMessageBox.StandardButton.No: 437 | return 438 | 439 | file_name, _ = QFileDialog.getSaveFileName( 440 | self, _t("config.save_title"), "", "YAML (*.yaml)" 441 | ) 442 | 443 | if not file_name: 444 | return 445 | 446 | save_config(file_name) 447 | 448 | def load_config(self): 449 | # pop up a message box to select a config file 450 | file_name, _ = QFileDialog.getOpenFileName( 451 | self, _t("config.load_title"), "", "YAML (*.yaml)" 452 | ) 453 | 454 | if not file_name: 455 | return 456 | 457 | load_config(file_name) 458 | save_config() 459 | 460 | # pop up a message box to tell user app will restart 461 | msg_box = QMessageBox() 462 | msg_box.setIcon(QMessageBox.Icon.Information) 463 | msg_box.setText(_t("config.load_msg")) 464 | msg_box.setStandardButtons(QMessageBox.StandardButton.Ok) 465 | msg_box.exec() 466 | 467 | os.execv(sys.argv[0], sys.argv) 468 | 469 | def start_conversion(self): 470 | self.save_config(save_to_file=False) 471 | 472 | self.start_button.setEnabled(False) 473 | self.stop_button.setEnabled(True) 474 | 475 | # Create windows and buffers 476 | self.input_wav = np.zeros( 477 | ( 478 | config.sample_frames 479 | + config.fade_frames 480 | + config.sola_search_frames 481 | + 2 * config.extra_frames, 482 | ), 483 | dtype=np.float32, 484 | ) 485 | self.sola_buffer = np.zeros(config.fade_frames) 486 | self.fade_in_window = ( 487 | np.sin(np.pi * np.linspace(0, 0.5, config.fade_frames)) ** 2 488 | ) 489 | self.fade_out_window = ( 490 | np.sin(np.pi * np.linspace(0.5, 1, config.fade_frames)) ** 2 491 | ) 492 | 493 | self.vc_status.set() 494 | self.in_queue = queue.Queue() 495 | self.out_queue = queue.Queue() 496 | self.vc_thread = threading.Thread(target=self.vc_worker) 497 | self.bg_thread = threading.Thread(target=self.bg_worker) 498 | self.vc_thread.start() 499 | self.bg_thread.start() 500 | 501 | def stop_conversion(self): 502 | self.vc_status.clear() 503 | self.vc_thread.join() 504 | self.bg_thread.join() 505 | 506 | self.start_button.setEnabled(True) 507 | self.stop_button.setEnabled(False) 508 | 509 | def vc_worker(self): 510 | with sd.Stream( 511 | callback=self.audio_callback, 512 | blocksize=config.sample_frames, 513 | samplerate=config.sample_rate, 514 | dtype="float32", 515 | device=(config.input_device, config.output_device), 516 | ): 517 | while self.vc_status.is_set(): 518 | sd.sleep(config.sample_duration) 519 | 520 | def audio_callback(self, indata, outdata, frames, times, status): 521 | # push to queue 522 | self.in_queue.put((indata.copy(), outdata.shape[1], time.time())) 523 | 524 | try: 525 | outdata[:] = self.out_queue.get_nowait() 526 | except queue.Empty: 527 | outdata[:] = 0 528 | 529 | def bg_worker(self): 530 | while self.vc_status.is_set(): 531 | indata, channels, in_time = self.in_queue.get() 532 | 533 | try: 534 | outdata = self.worker_step(indata) 535 | self.latency_label.setText( 536 | _t("action.latency").format(latency=(time.time() - in_time) * 1000) 537 | ) 538 | except: 539 | import traceback 540 | 541 | traceback.print_exc() 542 | 543 | self.vc_status.clear() 544 | self.latency_label.setText(_t("action.error")) 545 | outdata = np.zeros((config.sample_frames,), dtype=np.float32) 546 | 547 | self.out_queue.put(outdata.repeat(channels).reshape((-1, channels))) 548 | 549 | def worker_step(self, indata): 550 | indata = librosa.to_mono(indata.T) 551 | 552 | if config.input_denoise: 553 | indata = nr.reduce_noise(y=indata, sr=config.sample_rate) 554 | 555 | # db threshold 556 | if config.db_threshold != -60: 557 | frame_length = 2048 558 | hop_length = 1024 559 | 560 | rms = librosa.feature.rms( 561 | y=indata, frame_length=frame_length, hop_length=hop_length 562 | ) 563 | rms_db = librosa.amplitude_to_db(rms, ref=1.0)[0] < config.db_threshold 564 | 565 | for i in range(len(rms_db)): 566 | if rms_db[i]: 567 | indata[i * hop_length : (i + 1) * hop_length] = 0 568 | 569 | # Rolling buffer 570 | self.input_wav[:] = np.concatenate( 571 | [ 572 | self.input_wav[config.sample_frames :], 573 | indata, 574 | ] 575 | ) 576 | 577 | buffer = BytesIO() 578 | sf.write(buffer, self.input_wav, config.sample_rate, format="wav") 579 | buffer.seek(0) 580 | 581 | safe_pad_length = ( 582 | config.extra_frames - config.fade_frames 583 | ) / config.sample_rate - 0.03 584 | safe_pad_length = max(0, safe_pad_length) 585 | 586 | data = { 587 | "fSafePrefixPadLength": str(safe_pad_length), 588 | "fPitchChange": str(config.pitch_shift), 589 | "sampleRate": str(config.sample_rate), 590 | } 591 | 592 | # Override plugin settings, and apply key mapping 593 | if ( 594 | config.current_plugin is not None 595 | and config.current_plugin in config.plugins 596 | ): 597 | for k, v in config.plugins[config.current_plugin].items(): 598 | if k in self.plugin_key_mapping: 599 | k = self.plugin_key_mapping[k] 600 | 601 | data[k] = str(v) 602 | 603 | response = requests.post( 604 | config.backend, 605 | files={ 606 | "sample": ("audio.wav", buffer, "audio/wav"), 607 | }, 608 | data=data, 609 | ) 610 | 611 | assert response.status_code == 200, f"Failed to request" 612 | 613 | buffer.close() 614 | 615 | with BytesIO(response.content) as buffer: 616 | buffer.seek(0) 617 | infer_wav, _ = librosa.load(buffer, sr=config.sample_rate, mono=True) 618 | 619 | infer_wav = infer_wav[ 620 | -config.sample_frames 621 | - config.fade_frames 622 | - config.sola_search_frames 623 | - config.extra_frames : -config.extra_frames 624 | ] 625 | 626 | # Sola alignment 627 | sola_target = infer_wav[None, : config.sola_search_frames + config.fade_frames] 628 | sola_kernel = np.flip(self.sola_buffer[None]) 629 | 630 | cor_nom = convolve( 631 | sola_target, 632 | sola_kernel, 633 | mode="valid", 634 | ) 635 | cor_den = np.sqrt( 636 | convolve( 637 | np.square(sola_target), 638 | np.ones((1, config.fade_frames)), 639 | mode="valid", 640 | ) 641 | + 1e-8 642 | ) 643 | sola_offset = np.argmax(cor_nom[0] / cor_den[0]) 644 | 645 | output_wav = infer_wav[sola_offset : sola_offset + config.sample_frames] 646 | output_wav[: config.fade_frames] *= self.fade_in_window 647 | output_wav[: config.fade_frames] += self.sola_buffer * self.fade_out_window 648 | 649 | if sola_offset < config.sola_search_frames: 650 | self.sola_buffer = infer_wav[ 651 | sola_offset 652 | + config.sample_frames : sola_offset 653 | + config.sample_frames 654 | + config.fade_frames 655 | ] 656 | else: 657 | self.sola_buffer = infer_wav[-config.fade_frames :] 658 | 659 | # Denoise 660 | if config.output_denoise: 661 | output_wav = nr.reduce_noise(y=output_wav, sr=config.sample_rate) 662 | 663 | return output_wav 664 | -------------------------------------------------------------------------------- /rtvc/i18n.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import yaml 4 | 5 | from rtvc.config import application_path, config 6 | 7 | # Load i18n files from locales/ directory 8 | i18n_path = application_path / "locales" 9 | i18n_files = list(i18n_path.glob("*.yaml")) 10 | 11 | i18n_map = {} 12 | for i18n_file in i18n_files: 13 | with open(i18n_file, "r", encoding="utf-8") as f: 14 | i18n_map[i18n_file.stem] = yaml.safe_load(f.read()) 15 | 16 | 17 | def _t(key: str | list[str], locale: str | None = None, fallback: str = "en_US") -> str: 18 | if locale is None: 19 | locale = config.locale 20 | 21 | if isinstance(key, str): 22 | key = key.split(".") 23 | 24 | try: 25 | node = i18n_map[locale] 26 | for k in key: 27 | node = node[k] 28 | except KeyError: 29 | if locale != fallback: 30 | return _t(key, locale=fallback) 31 | 32 | return ".".join(key) 33 | 34 | return node 35 | 36 | 37 | language_map = {k: v["name"] for k, v in i18n_map.items()} 38 | 39 | __all__ = ["_t", "language_map"] 40 | -------------------------------------------------------------------------------- /rtvc/locales/en_US.yaml: -------------------------------------------------------------------------------- 1 | name: "English (US)" 2 | title: "Real-time Voice Conversion (FishAudio) (Version: {version})" 3 | 4 | 5 | theme: 6 | name: "Theme" 7 | auto: "System" 8 | light: "Light" 9 | dark: "Dark" 10 | 11 | i18n: 12 | language: "Language" 13 | restart_msg: "Configuration changed, do you want to restart the app to take effect?" 14 | 15 | backend: 16 | title: "Backend Settings" 17 | protocol_label: "Protocol" 18 | name: "Backend" 19 | test: "Test" 20 | test_succeed: "Successfully connected to backend." 21 | test_failed: "Failed to connect to backend in 5s." 22 | 23 | audio_device: 24 | name: "Audio Device (Please use same kind of device for input and output)" 25 | input: "Input (Recording)" 26 | output: "Output (Playback)" 27 | 28 | audio: 29 | name: "Audio Settings" 30 | db_threshold: "DB Threshold" 31 | pitch_shift: "Pitch Shift" 32 | sample_duration: "Sample Duration" 33 | fade_duration: "Fade in and out duration" 34 | extra_duration: "Extra Duration" 35 | input_denoise: "Input Denoise" 36 | output_denoise: "Output Denoise" 37 | 38 | action: 39 | start: "Start Voice Conversion" 40 | stop: "Stop Voice Conversion" 41 | latency: "Latency: {latency:.2f} ms" 42 | error: "An error occurred, please restart the conversion" 43 | 44 | config: 45 | save: "Save Config" 46 | load: "Load Config" 47 | save_msg: "Configuration saved, do you want to export to a file?" 48 | save_title: "Export Configuration to YAML File" 49 | load_msg: "Import completed, the program will restart to take effect." 50 | load_title: "Import Configuration from YAML File" 51 | error: "Unable to parse configuration file, reset" 52 | 53 | plugins: 54 | name: "Plugin" 55 | none: 56 | name: "None" 57 | diffusion: 58 | name: "Diffusion Model" 59 | speaker: 60 | label: "Speaker" 61 | tooltip: "Speaker ID, e.g., 0, 1, 2" 62 | sample_method: 63 | label: "Sampling Algorithm" 64 | tooltip: "none for no sampling, PLMS for fast sampling" 65 | sample_interval: 66 | label: "Sampling Interval" 67 | tooltip: "How many steps to take a sample, the larger the number, the faster the speed, but the lower the quality" 68 | skip_steps: 69 | label: "Skip Steps" 70 | tooltip: "Shallow diffusion related configuration, skip steps like 970 to eliminate electronic sound only, without affecting the effect" 71 | hifisinger: 72 | name: "HiFiSinger" 73 | speaker: 74 | label: "Speaker" 75 | tooltip: "Speaker ID, e.g., 0, 1, 2" 76 | rvc: 77 | name: "RVC" 78 | speaker: 79 | label: "Speaker" 80 | tooltip: "Speaker ID, e.g., 0, 1, 2" 81 | index_ratio: 82 | label: "Index Ratio" 83 | tooltip: "TODO: Supplement Information" 84 | -------------------------------------------------------------------------------- /rtvc/locales/zh_CN.yaml: -------------------------------------------------------------------------------- 1 | name: "中文 (简体)" 2 | title: "实时语音转换 (FishAudio) (版本: {version})" 3 | 4 | theme: 5 | name: "主题" 6 | auto: "系统" 7 | light: "亮色" 8 | dark: "暗色" 9 | 10 | i18n: 11 | language: "语言" 12 | restart_msg: "配置已更改,是否要重新启动应用程序以使其生效?" 13 | 14 | backend: 15 | title: "后端设置" 16 | protocol_label: "协议" 17 | name: "后端" 18 | test: "测试" 19 | test_succeed: "成功连接到后端。" 20 | test_failed: "5 秒内无法连接到后端。" 21 | 22 | audio_device: 23 | name: "音频设备(请对输入和输出使用同类设备)" 24 | input: "输入(录音)" 25 | output: "输出(播放)" 26 | 27 | audio: 28 | name: "音频设置" 29 | db_threshold: "分贝阈值" 30 | pitch_shift: "音调偏移" 31 | sample_duration: "样本时长" 32 | fade_duration: "淡入淡出时长" 33 | extra_duration: "额外时长" 34 | input_denoise: "输入降噪" 35 | output_denoise: "输出降噪" 36 | 37 | action: 38 | start: "开始语音转换" 39 | stop: "停止语音转换" 40 | latency: "延迟: {latency:.2f} ms" 41 | error: "发生错误, 请重新启动转换" 42 | 43 | config: 44 | save: "保存配置" 45 | load: "加载配置" 46 | save_msg: "配置已保存, 你想导出到文件吗?" 47 | save_title: "导出配置到 YAML 文件" 48 | load_msg: "导入完成, 程序将重启以使其生效." 49 | load_title: "从 YAML 文件导入配置" 50 | error: "无法解析配置文件, 已重置" 51 | 52 | plugins: 53 | name: "插件" 54 | none: 55 | name: "无" 56 | diffusion: 57 | name: "扩散模型" 58 | speaker: 59 | label: "说话人" 60 | tooltip: "说话人 ID, 如 0, 1, 2" 61 | sample_method: 62 | label: "采样算法" 63 | tooltip: "none 为不采样, PLMS 为快速采样" 64 | sample_interval: 65 | label: "采样间隔" 66 | tooltip: "每隔多少步进行采样, 数字越大速度越快, 但是质量越低" 67 | skip_steps: 68 | label: "跳过步骤" 69 | tooltip: "浅扩散相关配置, 跳过如 970 步来实现只消除电音, 不影响效果" 70 | hifisinger: 71 | name: "HiFiSinger" 72 | speaker: 73 | label: "说话人" 74 | tooltip: "说话人 ID, 如 0, 1, 2" 75 | rvc: 76 | name: "RVC" 77 | speaker: 78 | label: "说话人" 79 | tooltip: "说话人 ID, 如 0, 1, 2" 80 | index_ratio: 81 | label: "索引比例" 82 | tooltip: "TODO: 补充信息" 83 | -------------------------------------------------------------------------------- /rtvc/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | from rtvc.plugins.diffusion import DiffusionPlugin 2 | from rtvc.plugins.hifisinger import HiFiSingerPlugin 3 | from rtvc.plugins.rvc import RVCPlugin 4 | 5 | ALL_PLUGINS = [ 6 | DiffusionPlugin, 7 | RVCPlugin, 8 | HiFiSingerPlugin, 9 | ] 10 | -------------------------------------------------------------------------------- /rtvc/plugins/base.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from PyQt6.QtCore import Qt 4 | from PyQt6.QtWidgets import ( 5 | QCheckBox, 6 | QComboBox, 7 | QGridLayout, 8 | QGroupBox, 9 | QHBoxLayout, 10 | QLabel, 11 | QLineEdit, 12 | QMessageBox, 13 | QSlider, 14 | QVBoxLayout, 15 | ) 16 | 17 | from rtvc.config import config 18 | from rtvc.i18n import _t 19 | 20 | 21 | def slider( 22 | minimum: int, maximum: int, step: int = 1, map_key: str | None = None 23 | ) -> int: 24 | namespace = dict(min=minimum, max=maximum, step=step, map_key=map_key) 25 | return type("Slider", (int,), namespace) 26 | 27 | 28 | def input(map_key: str | None = None) -> str: 29 | namespace = dict(map_key=map_key) 30 | return type("Input", (str,), namespace) 31 | 32 | 33 | def checkbox(map_key: str | None = None) -> bool: 34 | namespace = dict(map_key=map_key) 35 | return type("Checkbox", (bool,), namespace) 36 | 37 | 38 | def dropdown(options: list[tuple[str, str]], map_key: str | None = None) -> str: 39 | namespace = dict(options=options, map_key=map_key) 40 | return type("Dropdown", (str,), namespace) 41 | 42 | 43 | @dataclass 44 | class WithSpeaker: 45 | # Backward compatibility 46 | speaker: input(map_key="sSpeakId") = "0" 47 | 48 | 49 | def render_plugin(plugin_cls: dataclass) -> QGroupBox: 50 | layout = QVBoxLayout() 51 | 52 | # Inspect all the fields of the plugin class 53 | fields = plugin_cls.__dataclass_fields__ 54 | class_id = plugin_cls.id 55 | _t_key = f"plugins.{class_id}" 56 | 57 | try: 58 | plugin_config = plugin_cls(**config.plugins.get(class_id, {})) 59 | except TypeError as e: 60 | # Popup a message 61 | msg = QMessageBox() 62 | msg.setIcon(QMessageBox.Icon.Critical) 63 | msg.setText(_t(f"config.error")) 64 | msg.setInformativeText(str(e)) 65 | msg.exec() 66 | 67 | plugin_config = plugin_cls() 68 | 69 | get_value_funcs = {} 70 | key_mappping = {} 71 | 72 | for key, value in fields.items(): 73 | type = value.type.__name__ 74 | if type not in ["Slider", "Input", "Checkbox", "Dropdown"]: 75 | continue 76 | 77 | if hasattr(value.type, "map_key") and value.type.map_key is not None: 78 | key_mappping[key] = value.type.map_key 79 | 80 | row = QHBoxLayout() 81 | row.setAlignment(Qt.AlignmentFlag.AlignLeft) 82 | 83 | if type == "Slider": 84 | slider = QSlider() 85 | slider.setOrientation(Qt.Orientation.Horizontal) 86 | slider.setMinimum(value.type.min) 87 | slider.setMaximum(value.type.max) 88 | slider.setSingleStep(value.type.step) 89 | slider.setTickInterval(value.type.step) 90 | slider.setValue(getattr(plugin_config, key)) 91 | value_label = QLabel(f"{slider.value()}") 92 | slider.valueChanged.connect( 93 | lambda value, value_label=value_label: value_label.setText(str(value)) 94 | ) 95 | get_value_funcs[key] = lambda slider=slider: slider.value() 96 | row.addWidget(QLabel(_t(f"{_t_key}.{key}.label"))) 97 | row.addWidget(slider) 98 | slider.setToolTip(_t(f"{_t_key}.{key}.tooltip")) 99 | row.addWidget(value_label) 100 | 101 | elif type == "Input": 102 | line_edit = QLineEdit() 103 | line_edit.setText(getattr(plugin_config, key)) 104 | get_value_funcs[key] = lambda line_edit=line_edit: line_edit.text() 105 | row.addWidget(QLabel(_t(f"{_t_key}.{key}.label"))) 106 | line_edit.setToolTip(_t(f"{_t_key}.{key}.tooltip")) 107 | row.addWidget(line_edit) 108 | 109 | elif type == "Checkbox": 110 | checkbox = QCheckBox() 111 | checkbox.setChecked(getattr(plugin_config, key)) 112 | get_value_funcs[key] = lambda checkbox=checkbox: checkbox.isChecked() 113 | row.addWidget(QLabel(_t(f"{_t_key}.{key}.label"))) 114 | checkbox.setToolTip(_t(f"{_t_key}.{key}.tooltip")) 115 | row.addWidget(checkbox) 116 | 117 | elif type == "Dropdown": 118 | dropdown = QComboBox() 119 | dropdown.setMinimumWidth(200) 120 | dropdown.addItems([item[0] for item in value.type.options]) 121 | for i, item in enumerate(value.type.options): 122 | if item[1] == getattr(plugin_config, key): 123 | dropdown.setCurrentIndex(i) 124 | get_value_funcs[key] = lambda dropdown=dropdown: dropdown.currentText() 125 | row.addWidget(QLabel(_t(f"{_t_key}.{key}.label"))) 126 | dropdown.setToolTip(_t(f"{_t_key}.{key}.tooltip")) 127 | row.addWidget(dropdown) 128 | 129 | layout.addLayout(row) 130 | 131 | return ( 132 | layout, 133 | lambda: {key: func() for key, func in get_value_funcs.items()}, 134 | key_mappping, 135 | ) 136 | -------------------------------------------------------------------------------- /rtvc/plugins/diffusion.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import ClassVar 3 | 4 | from rtvc.plugins.base import WithSpeaker, dropdown, slider 5 | 6 | 7 | @dataclass 8 | class DiffusionPlugin(WithSpeaker): 9 | id: ClassVar[str] = "diffusion" 10 | 11 | sample_method: dropdown( 12 | [ 13 | ("None", "none"), 14 | ("PLMS", "plms"), 15 | ] 16 | ) = "plms" 17 | sample_interval: slider(1, 100, 5) = 20 18 | skip_steps: slider(0, 1000, 10) = 0 19 | -------------------------------------------------------------------------------- /rtvc/plugins/hifisinger.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import ClassVar 3 | 4 | from rtvc.plugins.base import WithSpeaker, slider 5 | 6 | 7 | @dataclass 8 | class HiFiSingerPlugin(WithSpeaker): 9 | id: ClassVar[str] = "hifisinger" 10 | -------------------------------------------------------------------------------- /rtvc/plugins/rvc.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import ClassVar 3 | 4 | from rtvc.plugins.base import WithSpeaker, dropdown, slider 5 | 6 | 7 | @dataclass 8 | class RVCPlugin(WithSpeaker): 9 | id: ClassVar[str] = "rvc" 10 | 11 | index_ratio: slider(0, 100, 1) = 20 12 | -------------------------------------------------------------------------------- /tests/test_sola.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn.functional as F 4 | from scipy.signal import convolve 5 | 6 | 7 | def test_numpy_sola(): 8 | fade_frames = 90 * 44100 // 1000 9 | sola_search_frames = 12 * 44100 // 1000 10 | 11 | sola_buffer = np.random.randn(fade_frames).astype(np.float32) 12 | infer_wav = np.random.randn(fade_frames * 5).astype(np.float32) 13 | infer_wav[114 : 114 + fade_frames] = sola_buffer 14 | 15 | conv_input = infer_wav[None, : fade_frames + sola_search_frames] 16 | cor_nom = F.conv1d( 17 | torch.from_numpy(conv_input).float(), 18 | torch.from_numpy(sola_buffer[None, None, :]).float(), 19 | ) 20 | 21 | cor_nom1 = convolve(conv_input, np.flip(sola_buffer[None, :]), mode="valid") 22 | assert np.allclose(cor_nom.numpy(), cor_nom1, atol=1e-2) 23 | 24 | cor_den = torch.sqrt( 25 | F.conv1d( 26 | torch.from_numpy(conv_input).float() ** 2, 27 | torch.ones(1, 1, fade_frames), 28 | ) 29 | + 1e-8 30 | ) 31 | 32 | cor_den1 = np.sqrt( 33 | convolve(conv_input**2, np.ones((1, fade_frames)), mode="valid") + 1e-8 34 | ) 35 | assert np.allclose(cor_den.numpy(), cor_den1, atol=1e-2) 36 | 37 | sola_offset = torch.argmax(cor_nom[0] / cor_den[0]) 38 | sola_offset1 = np.argmax(cor_nom1[0] / cor_den1[0]) 39 | 40 | assert sola_offset == sola_offset1 == 114 41 | --------------------------------------------------------------------------------