├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── LICENSE
├── README.md
├── build.py
├── extra-hooks
    └── hook-librosa.py
├── pdm.lock
├── pyproject.toml
├── rtvc
    ├── __init__.py
    ├── __main__.py
    ├── assets
    │   └── icon.png
    ├── audio.py
    ├── config.py
    ├── gui.py
    ├── i18n.py
    ├── locales
    │   ├── en_US.yaml
    │   └── zh_CN.yaml
    └── plugins
    │   ├── __init__.py
    │   ├── base.py
    │   ├── diffusion.py
    │   ├── hifisinger.py
    │   └── rvc.py
└── tests
    └── test_sola.py


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |     tags:
 7 |       - "v*.*.*"
 8 |   pull_request:
 9 |     branches: [main]
10 | 
11 | jobs:
12 |   lint:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@v3
16 |       - uses: actions/setup-python@v4
17 |         with:
18 |           python-version: "3.10"
19 |       - uses: pdm-project/setup-pdm@v3
20 |       - name: Install dependencies
21 |         run: pdm sync
22 |       - name: Lint with black and isort
23 |         run: pdm run lint-check
24 | 
25 |   build:
26 |     needs: lint
27 |     runs-on: ${{ matrix.os }}
28 |     strategy:
29 |       fail-fast: false
30 |       matrix:
31 |         os: [ubuntu-latest, macos-latest, windows-latest]
32 |         python-version: ["3.10"]
33 |         package-type: ["onefile", "onedir"]
34 |     steps:
35 |       - uses: actions/checkout@v3
36 |       - uses: actions/setup-python@v4
37 |         with:
38 |           python-version: ${{ matrix.python-version }}
39 |       - uses: pdm-project/setup-pdm@v3
40 |       - name: Install dependencies
41 |         run: pdm sync
42 |       - name: Build pyinstaller package
43 |         env:
44 |           PACKAGE_TYPE: ${{ matrix.package-type }}
45 |         run: pdm run build.py
46 |       - name: Upload artifact
47 |         uses: actions/upload-artifact@v3
48 |         with:
49 |           name: rtvc-${{ matrix.os }}-${{ matrix.python-version }}-${{ matrix.package-type }}
50 |           path: dist
51 | 
52 |   publish:
53 |     needs: lint
54 |     runs-on: ubuntu-latest
55 |     if: startsWith(github.ref, 'refs/tags/v')
56 |     steps:
57 |       - uses: actions/checkout@v3
58 |       - uses: actions/setup-python@v4
59 |         with:
60 |           python-version: "3.10"
61 |       - uses: pdm-project/setup-pdm@v3
62 |       - name: Install dependencies
63 |         run: pdm sync
64 |       - name: Publish
65 |         run: pdm publish -u __token__ -P ${{ secrets.PYPI_TOKEN }}
66 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | .pdm-python
162 | .DS_Store
163 | __main__.build
164 | *.build
165 | __main__
166 | Info.plist
167 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Lengyue
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RTVC: Real-Time Voice Conversion GUI
 2 | [![PyPI Version](https://img.shields.io/pypi/v/rtvc.svg?style=flat-square)](https://pypi.python.org/pypi/rtvc)
 3 | [![Downloads](https://img.shields.io/pypi/dm/rtvc.svg?style=flat-square)](https://pypi.python.org/pypi/rtvc)
 4 | [![CI Status](https://img.shields.io/github/actions/workflow/status/fishaudio/realtime-vc-gui/ci.yml?style=flat-square&logo=GitHub)](https://github.com/fishaudio/realtime-vc-gui/actions)
 5 | [![License](https://img.shields.io/github/license/fishaudio/realtime-vc-gui?style=flat-square)](https://github.com/fishaudio/realtime-vc-gui/blob/main/LICENSE)
 6 | 
 7 | <img src="https://s2.loli.net/2023/05/30/h1QazX7BS4jMDTd.png" width="800" />
 8 | 
 9 | ## Usage
10 | You can download the latest release from **[here](https://nightly.link/fishaudio/realtime-vc-gui/workflows/ci/main)** 
11 | 
12 | or install from PyPI.
13 | 
14 | ```bash
15 | pip install rtvc
16 | rtvc
17 | ```
18 | 
19 | or use pipx
20 | 
21 | ```bash
22 | # or
23 | pipx run rtvc
24 | ```
25 | 


--------------------------------------------------------------------------------
/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import platform
 3 | import subprocess as sp
 4 | 
 5 | package_type = os.environ.get("PACKAGE_TYPE", "onedir")
 6 | assert package_type in ("onedir", "onefile"), "PACKAGE_TYPE must be onedir or onefile"
 7 | 
 8 | # upgrade dependencies manually
 9 | if platform.system() == "Windows":
10 |     sp.check_call(["pip", "install", "--upgrade", "pywin32", "cffi"])
11 | 
12 | sep = ";" if platform.system() == "Windows" else ":"
13 | 
14 | args = [
15 |     "pyinstaller",
16 |     "rtvc/__main__.py",
17 |     f"--{package_type}",
18 |     "-n",
19 |     "rtvc",
20 |     "--additional-hooks=extra-hooks",
21 |     "--noconfirm",
22 |     "--add-data",
23 |     f"rtvc/assets{sep}assets",
24 |     "--add-data",
25 |     f"rtvc/locales{sep}locales",
26 | ]
27 | 
28 | sp.check_call(args)
29 | 


--------------------------------------------------------------------------------
/extra-hooks/hook-librosa.py:
--------------------------------------------------------------------------------
1 | from PyInstaller.utils.hooks import collect_data_files
2 | 
3 | datas = collect_data_files("librosa")
4 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "rtvc"
 3 | version = "0.2.0"
 4 | description = "Real-Time Voice Conversion GUI"
 5 | readme = "README.md"
 6 | requires-python = ">=3.10,<3.12"
 7 | license = {text = "MIT"}
 8 | keywords = ["voice-conversion", "svc"]
 9 | authors = [
10 |     {name = "lengyue", email = "lengyue@lengyue.me"},
11 | ]
12 | classifiers = [
13 |     "Development Status :: 3 - Alpha",
14 |     "Programming Language :: Python :: 3.10",
15 | ]
16 | 
17 | dependencies = [
18 |     "noisereduce>=2.0.1",
19 |     "sounddevice>=0.4.6",
20 |     "pyyaml>=6.0",
21 |     "PyQt6>=6.5.0",
22 |     "pyqtdarktheme==2.1.0",
23 |     "requests>=2.31.0",
24 |     "librosa==0.9.2",
25 | ]
26 | 
27 | [project.urls]
28 | repository = "https://github.com/fishaudio/realtime-vc-gui"
29 | 
30 | [project.gui-scripts]
31 | rtvc = "rtvc.__main__:main"
32 | 
33 | [tool.pdm]
34 | [tool.pdm.build]
35 | includes = ["rtvc"]
36 | 
37 | [tool.pdm.dev-dependencies]
38 | dev = [
39 |     "isort>=5.12.0",
40 |     "black>=23.3.0",
41 |     "pytest>=7.3.1",
42 |     "torch>=2.0.1",
43 |     "pyinstaller>=5.11.0",
44 | ]
45 | 
46 | [build-system]
47 | requires = ["pdm-backend"]
48 | build-backend = "pdm.backend"
49 | 
50 | [tool.pdm.scripts]
51 | lint = { shell = "black . && isort ." }
52 | lint-check = { shell = "black --check . && isort --check ." }
53 | test = { shell = "PYTHONPATH=. pytest -n=auto -q tests" }
54 | docs = { shell = "sphinx-autobuild docs docs/_build/html" }
55 | 
56 | [[tool.pdm.source]]
57 | type = "find_links"
58 | name = "torch-cpu"
59 | url = "https://download.pytorch.org/whl/cpu"
60 | verify_ssl = true
61 | 
62 | [tool.isort]
63 | profile = "black"
64 | extend_skip = ["dataset", "logs"]
65 | 


--------------------------------------------------------------------------------
/rtvc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fishaudio/realtime-vc-gui/85cbdb7f3a4cd0c7900e215202ef1c0c41a6ff36/rtvc/__init__.py


--------------------------------------------------------------------------------
/rtvc/__main__.py:
--------------------------------------------------------------------------------
 1 | import signal
 2 | import sys
 3 | 
 4 | import qdarktheme
 5 | from PyQt6 import QtWidgets
 6 | 
 7 | from rtvc.config import config
 8 | from rtvc.gui import MainWindow
 9 | 
10 | 
11 | def main():
12 |     qdarktheme.enable_hi_dpi()
13 |     app = QtWidgets.QApplication(sys.argv)
14 |     window = MainWindow()
15 |     qdarktheme.setup_theme(config.theme)
16 | 
17 |     # run
18 |     window.show()
19 |     app.exec()
20 | 
21 | 
22 | # handle Ctrl+C
23 | signal.signal(signal.SIGINT, signal.SIG_DFL)
24 | 
25 | if __name__ == "__main__":
26 |     main()
27 | 


--------------------------------------------------------------------------------
/rtvc/assets/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fishaudio/realtime-vc-gui/85cbdb7f3a4cd0c7900e215202ef1c0c41a6ff36/rtvc/assets/icon.png


--------------------------------------------------------------------------------
/rtvc/audio.py:
--------------------------------------------------------------------------------
 1 | import sounddevice as sd
 2 | 
 3 | 
 4 | def get_devices(update: bool = True):
 5 |     if update:
 6 |         sd._terminate()
 7 |         sd._initialize()
 8 | 
 9 |     devices = sd.query_devices()
10 |     hostapis = sd.query_hostapis()
11 | 
12 |     for hostapi in hostapis:
13 |         for device_idx in hostapi["devices"]:
14 |             devices[device_idx]["hostapi_name"] = hostapi["name"]
15 | 
16 |     input_devices = [
17 |         {"id": idx, "name": f"{d['name']} ({d['hostapi_name']})"}
18 |         for idx, d in enumerate(devices)
19 |         if d["max_input_channels"] > 0
20 |     ]
21 | 
22 |     output_devices = [
23 |         {"id": idx, "name": f"{d['name']} ({d['hostapi_name']})"}
24 |         for idx, d in enumerate(devices)
25 |         if d["max_output_channels"] > 0
26 |     ]
27 | 
28 |     return input_devices, output_devices
29 | 


--------------------------------------------------------------------------------
/rtvc/config.py:
--------------------------------------------------------------------------------
 1 | import locale
 2 | import sys
 3 | from dataclasses import dataclass, field
 4 | from pathlib import Path
 5 | from typing import Literal
 6 | 
 7 | import yaml
 8 | 
 9 | if getattr(sys, "frozen", False):
10 |     # If the application is run as a bundle, the PyInstaller bootloader
11 |     # extends the sys module by a flag frozen=True and sets the app
12 |     # path into variable _MEIPASS'.
13 |     application_path = Path(sys._MEIPASS)
14 | else:
15 |     application_path = Path(__file__).parent
16 | 
17 | 
18 | @dataclass
19 | class Config:
20 |     theme: Literal["auto", "light", "dark"] = "auto"
21 |     locale: str = locale.getdefaultlocale()[0]
22 |     backend: str = "http://localhost:6844/voiceChangeModel"
23 | 
24 |     input_device: str | None = None
25 |     output_device: str | None = None
26 | 
27 |     db_threshold: int = -30
28 |     pitch_shift: int = 0
29 |     sample_duration: int = 1000
30 |     fade_duration: int = 80
31 |     extra_duration: int = 50
32 |     input_denoise: bool = False
33 |     output_denoise: bool = False
34 |     sample_rate: int = 44100
35 |     sola_search_duration: int = 12
36 |     buffer_num: int = 4
37 | 
38 |     # Plugins
39 |     current_plugin: str | None = None
40 |     plugins: dict[str, dict] = field(default_factory=dict)
41 | 
42 |     @property
43 |     def sample_frames(self):
44 |         return self.sample_duration * self.sample_rate // 1000
45 | 
46 |     @property
47 |     def fade_frames(self):
48 |         return self.fade_duration * self.sample_rate // 1000
49 | 
50 |     @property
51 |     def extra_frames(self):
52 |         return self.extra_duration * self.sample_rate // 1000
53 | 
54 |     @property
55 |     def sola_search_frames(self):
56 |         return self.sola_search_duration * self.sample_rate // 1000
57 | 
58 | 
59 | default_config_path = str((Path.home() / ".rtvc" / "config.yaml").absolute())
60 | config = Config()
61 | 
62 | 
63 | def load_config(path: Path | str = default_config_path) -> Config:
64 |     global config
65 | 
66 |     path = Path(path)
67 | 
68 |     if path.exists():
69 |         try:
70 |             with open(path, "r", encoding="utf-8") as f:
71 |                 config = Config(**yaml.safe_load(f.read()))
72 |         except Exception:
73 |             config = Config()
74 |             print("Failed to load config file, use default config instead.")
75 | 
76 |     return config
77 | 
78 | 
79 | def save_config(path: Path | str = default_config_path) -> None:
80 |     path = Path(path)
81 | 
82 |     if not path.parent.exists():
83 |         path.parent.mkdir(parents=True)
84 | 
85 |     with open(path, "w", encoding="utf-8") as f:
86 |         yaml.safe_dump(config.__dict__, f)
87 | 
88 | 
89 | # Auto load config
90 | load_config()
91 | save_config()
92 | 


--------------------------------------------------------------------------------
/rtvc/gui.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import queue
  3 | import sys
  4 | import threading
  5 | import time
  6 | from io import BytesIO
  7 | 
  8 | import librosa
  9 | import noisereduce as nr
 10 | import numpy as np
 11 | import pkg_resources
 12 | import qdarktheme
 13 | import requests
 14 | import sounddevice as sd
 15 | import soundfile as sf
 16 | from PyQt6.QtCore import Qt
 17 | from PyQt6.QtGui import QIcon
 18 | from PyQt6.QtWidgets import (
 19 |     QCheckBox,
 20 |     QComboBox,
 21 |     QFileDialog,
 22 |     QGridLayout,
 23 |     QGroupBox,
 24 |     QHBoxLayout,
 25 |     QLabel,
 26 |     QLineEdit,
 27 |     QMessageBox,
 28 |     QPushButton,
 29 |     QSlider,
 30 |     QVBoxLayout,
 31 |     QWidget,
 32 | )
 33 | from scipy.signal import convolve
 34 | 
 35 | from rtvc.audio import get_devices
 36 | from rtvc.config import application_path, config, load_config, save_config
 37 | from rtvc.i18n import _t, language_map
 38 | from rtvc.plugins import ALL_PLUGINS
 39 | from rtvc.plugins.base import render_plugin
 40 | 
 41 | 
 42 | class MainWindow(QWidget):
 43 |     def __init__(self):
 44 |         super().__init__()
 45 | 
 46 |         self.setWindowIcon(QIcon(str(application_path / "assets" / "icon.png")))
 47 | 
 48 |         version = pkg_resources.get_distribution("rtvc").version
 49 |         # remove +editable if it exists
 50 |         version = version.split("+")[0]
 51 |         self.setWindowTitle(_t("title").format(version=version))
 52 | 
 53 |         self.main_layout = QVBoxLayout()
 54 |         # Stick to the top
 55 |         self.main_layout.setAlignment(Qt.AlignmentFlag.AlignTop)
 56 | 
 57 |         self.setup_ui_settings()
 58 |         self.setup_backend_settings()
 59 |         self.setup_device_settings()
 60 |         self.setup_audio_settings()
 61 |         self.plugin_layout = QGroupBox()
 62 |         self.main_layout.addWidget(self.plugin_layout)
 63 |         self.setup_plugin_settings()
 64 |         self.setup_action_buttons()
 65 |         self.setLayout(self.main_layout)
 66 | 
 67 |         # Use size hint to set a reasonable size
 68 |         self.setMinimumWidth(900)
 69 | 
 70 |         # Voice Conversion Thread
 71 |         self.thread = None
 72 |         self.vc_status = threading.Event()
 73 | 
 74 |     def setup_ui_settings(self):
 75 |         # we have language and backend settings in the first row
 76 |         row = QHBoxLayout()
 77 |         row.setAlignment(Qt.AlignmentFlag.AlignLeft)
 78 | 
 79 |         # set up a theme combo box
 80 |         row.addWidget(QLabel(_t("theme.name")))
 81 |         self.theme_combo = QComboBox()
 82 |         self.theme_combo.addItem(_t("theme.auto"), "auto")
 83 |         self.theme_combo.addItem(_t("theme.light"), "light")
 84 |         self.theme_combo.addItem(_t("theme.dark"), "dark")
 85 |         self.theme_combo.setCurrentText(_t(f"theme.{config.theme}"))
 86 |         self.theme_combo.currentIndexChanged.connect(self.change_theme)
 87 |         self.theme_combo.setMinimumWidth(100)
 88 |         row.addWidget(self.theme_combo)
 89 | 
 90 |         # set up language combo box
 91 |         row.addWidget(QLabel(_t("i18n.language")))
 92 |         self.language_combo = QComboBox()
 93 | 
 94 |         for k, v in language_map.items():
 95 |             self.language_combo.addItem(v, k)
 96 | 
 97 |         self.language_combo.setCurrentText(language_map.get(config.locale, 'en_US'))
 98 |         self.language_combo.currentIndexChanged.connect(self.change_language)
 99 |         self.language_combo.setMinimumWidth(150)
100 |         row.addWidget(self.language_combo)
101 | 
102 |         # setup plugin combo box
103 |         row.addWidget(QLabel(_t("plugins.name")))
104 |         self.plugin_combo = QComboBox()
105 |         self.plugin_combo.addItem(_t("plugins.none.name"), "none")
106 |         for plugin in ALL_PLUGINS:
107 |             self.plugin_combo.addItem(_t(f"plugins.{plugin.id}.name"), plugin.id)
108 | 
109 |         if config.current_plugin is not None:
110 |             self.plugin_combo.setCurrentText(
111 |                 _t(f"plugins.{config.current_plugin}.name")
112 |             )
113 |         else:
114 |             self.plugin_combo.setCurrentText(_t("plugins.none.name"))
115 | 
116 |         self.plugin_combo.currentIndexChanged.connect(self.change_plugin)
117 |         self.plugin_combo.setMinimumWidth(150)
118 |         row.addWidget(self.plugin_combo)
119 | 
120 |         # save button
121 |         self.save_button = QPushButton(_t("config.save"))
122 |         self.save_button.clicked.connect(self.save_config)
123 |         row.addWidget(self.save_button)
124 | 
125 |         # load button
126 |         self.load_button = QPushButton(_t("config.load"))
127 |         self.load_button.clicked.connect(self.load_config)
128 |         row.addWidget(self.load_button)
129 | 
130 |         self.main_layout.addLayout(row)
131 | 
132 |     def setup_device_settings(self):
133 |         # second row: a group box for audio device settings
134 |         row = QGroupBox(_t("audio_device.name"))
135 |         row_layout = QGridLayout()
136 |         row_layout.setAlignment(Qt.AlignmentFlag.AlignLeft)
137 | 
138 |         # fetch devices
139 |         input_devices, output_devices = get_devices()
140 | 
141 |         # input device
142 |         row_layout.addWidget(QLabel(_t("audio_device.input")), 0, 0)
143 |         self.input_device_combo = QComboBox()
144 |         for device in input_devices:
145 |             self.input_device_combo.addItem(device["name"], device["id"])
146 | 
147 |         # find the current device from config
148 |         if config.input_device is not None:
149 |             for i in range(self.input_device_combo.count()):
150 |                 if self.input_device_combo.itemData(i) == config.input_device:
151 |                     self.input_device_combo.setCurrentIndex(i)
152 |                     break
153 |             else:
154 |                 # not found, use default
155 |                 self.input_device_combo.setCurrentIndex(0)
156 |                 config.input_device = self.input_device_combo.itemData(0)
157 | 
158 |         self.input_device_combo.setFixedWidth(300)
159 |         row_layout.addWidget(self.input_device_combo, 0, 1)
160 | 
161 |         # output device
162 |         row_layout.addWidget(QLabel(_t("audio_device.output")), 1, 0)
163 |         self.output_device_combo = QComboBox()
164 |         for device in output_devices:
165 |             self.output_device_combo.addItem(device["name"], device["id"])
166 | 
167 |         # find the current device from config
168 |         if config.output_device is not None:
169 |             for i in range(self.output_device_combo.count()):
170 |                 if self.output_device_combo.itemData(i) == config.output_device:
171 |                     self.output_device_combo.setCurrentIndex(i)
172 |                     break
173 |             else:
174 |                 # not found, use default
175 |                 self.output_device_combo.setCurrentIndex(0)
176 |                 config.output_device = self.output_device_combo.itemData(0)
177 | 
178 |         self.input_device_combo.setFixedWidth(300)
179 |         row_layout.addWidget(self.output_device_combo, 1, 1)
180 | 
181 |         row.setLayout(row_layout)
182 | 
183 |         self.main_layout.addWidget(row)
184 | 
185 |     def setup_audio_settings(self):
186 |         # third row: a group box for audio settings
187 |         row = QGroupBox(_t("audio.name"))
188 |         row_layout = QGridLayout()
189 | 
190 |         # db_threshold, pitch_shift
191 |         row_layout.addWidget(QLabel(_t("audio.db_threshold")), 0, 0)
192 |         self.db_threshold_slider = QSlider(Qt.Orientation.Horizontal)
193 |         self.db_threshold_slider.setMinimum(-60)
194 |         self.db_threshold_slider.setMaximum(0)
195 |         self.db_threshold_slider.setSingleStep(1)
196 |         self.db_threshold_slider.setTickInterval(1)
197 |         self.db_threshold_slider.setValue(config.db_threshold)
198 |         row_layout.addWidget(self.db_threshold_slider, 0, 1)
199 |         self.db_threshold_label = QLabel(f"{config.db_threshold} dB")
200 |         self.db_threshold_label.setFixedWidth(50)
201 |         row_layout.addWidget(self.db_threshold_label, 0, 2)
202 |         self.db_threshold_slider.valueChanged.connect(
203 |             lambda v: self.db_threshold_label.setText(f"{v} dB")
204 |         )
205 | 
206 |         row_layout.addWidget(QLabel(_t("audio.pitch_shift")), 0, 3)
207 |         self.pitch_shift_slider = QSlider(Qt.Orientation.Horizontal)
208 |         self.pitch_shift_slider.setMinimum(-24)
209 |         self.pitch_shift_slider.setMaximum(24)
210 |         self.pitch_shift_slider.setSingleStep(1)
211 |         self.pitch_shift_slider.setTickInterval(1)
212 |         self.pitch_shift_slider.setValue(config.pitch_shift)
213 |         row_layout.addWidget(self.pitch_shift_slider, 0, 4)
214 |         self.pitch_shift_label = QLabel(f"{config.pitch_shift}")
215 |         self.pitch_shift_label.setFixedWidth(50)
216 |         row_layout.addWidget(self.pitch_shift_label, 0, 5)
217 |         self.pitch_shift_slider.valueChanged.connect(
218 |             lambda v: self.pitch_shift_label.setText(f"{v}")
219 |         )
220 | 
221 |         # performance related
222 |         # sample_duration, fade_duration
223 |         row_layout.addWidget(QLabel(_t("audio.sample_duration")), 1, 0)
224 |         self.sample_duration_slider = QSlider(Qt.Orientation.Horizontal)
225 |         self.sample_duration_slider.setMinimum(100)
226 |         self.sample_duration_slider.setMaximum(3000)
227 |         self.sample_duration_slider.setSingleStep(100)
228 |         self.sample_duration_slider.setTickInterval(100)
229 |         self.sample_duration_slider.setValue(config.sample_duration)
230 |         row_layout.addWidget(self.sample_duration_slider, 1, 1)
231 |         self.sample_duration_label = QLabel(f"{config.sample_duration / 1000:.1f} s")
232 |         self.sample_duration_label.setFixedWidth(50)
233 |         row_layout.addWidget(self.sample_duration_label, 1, 2)
234 |         self.sample_duration_slider.valueChanged.connect(
235 |             lambda v: self.sample_duration_label.setText(f"{v / 1000:.1f} s")
236 |         )
237 | 
238 |         row_layout.addWidget(QLabel(_t("audio.fade_duration")), 1, 3)
239 |         self.fade_duration_slider = QSlider(Qt.Orientation.Horizontal)
240 |         self.fade_duration_slider.setMinimum(10)
241 |         self.fade_duration_slider.setMaximum(150)
242 |         self.fade_duration_slider.setSingleStep(10)
243 |         self.fade_duration_slider.setTickInterval(10)
244 |         self.fade_duration_slider.setValue(config.fade_duration)
245 |         row_layout.addWidget(self.fade_duration_slider, 1, 4)
246 |         self.fade_duration_label = QLabel(f"{config.fade_duration / 1000:.2f} s")
247 |         self.fade_duration_label.setFixedWidth(50)
248 |         row_layout.addWidget(self.fade_duration_label, 1, 5)
249 |         self.fade_duration_slider.valueChanged.connect(
250 |             lambda v: self.fade_duration_label.setText(f"{v / 1000:.2f} s")
251 |         )
252 | 
253 |         # Extra duration, input denoise, output denoise in next row
254 |         row_layout.addWidget(QLabel(_t("audio.extra_duration")), 2, 0)
255 |         self.extra_duration_slider = QSlider(Qt.Orientation.Horizontal)
256 |         self.extra_duration_slider.setMinimum(50)
257 |         self.extra_duration_slider.setMaximum(1000)
258 |         self.extra_duration_slider.setSingleStep(10)
259 |         self.extra_duration_slider.setTickInterval(10)
260 |         self.extra_duration_slider.setValue(config.extra_duration)
261 |         row_layout.addWidget(self.extra_duration_slider, 2, 1)
262 |         self.extra_duration_label = QLabel(f"{config.extra_duration / 1000:.2f} s")
263 |         self.extra_duration_label.setFixedWidth(50)
264 |         row_layout.addWidget(self.extra_duration_label, 2, 2)
265 |         self.extra_duration_slider.valueChanged.connect(
266 |             lambda v: self.extra_duration_label.setText(f"{v / 1000:.2f} s")
267 |         )
268 | 
269 |         self.input_denoise_checkbox = QCheckBox()
270 |         self.input_denoise_checkbox.setText(_t("audio.input_denoise"))
271 |         self.input_denoise_checkbox.setChecked(config.input_denoise)
272 |         row_layout.addWidget(self.input_denoise_checkbox, 2, 3)
273 | 
274 |         self.output_denoise_checkbox = QCheckBox()
275 |         self.output_denoise_checkbox.setText(_t("audio.output_denoise"))
276 |         self.output_denoise_checkbox.setChecked(config.output_denoise)
277 |         row_layout.addWidget(self.output_denoise_checkbox, 2, 4)
278 | 
279 |         row.setLayout(row_layout)
280 |         self.main_layout.addWidget(row)
281 | 
282 |     def setup_backend_settings(self):
283 |         widget = QGroupBox()
284 |         widget.setTitle(_t("backend.title"))
285 |         row = QHBoxLayout()
286 | 
287 |         # protocol
288 |         row.addWidget(QLabel(_t("backend.protocol_label")))
289 |         self.backend_protocol = QComboBox()
290 |         self.backend_protocol.setMinimumWidth(75)
291 |         self.backend_protocol.addItems(["v1"])
292 |         self.backend_protocol.setCurrentText("v1")
293 |         row.addWidget(self.backend_protocol)
294 | 
295 |         # set up backend (url) input, and a test button
296 |         row.addWidget(QLabel(_t("backend.name")))
297 |         self.backend_input = QLineEdit()
298 |         self.backend_input.setText(config.backend)
299 |         row.addWidget(self.backend_input)
300 | 
301 |         self.test_button = QPushButton(_t("backend.test"))
302 |         self.test_button.clicked.connect(self.test_backend)
303 |         row.addWidget(self.test_button)
304 | 
305 |         widget.setLayout(row)
306 |         self.main_layout.addWidget(widget)
307 | 
308 |     def setup_plugin_settings(self):
309 |         plugin_id = config.current_plugin
310 | 
311 |         if plugin_id is None:
312 |             self.get_plugin_config = lambda: dict()
313 |             self.plugin_key_mapping = dict()
314 |             self.plugin_layout.hide()
315 |             return
316 | 
317 |         self.plugin_layout.show()
318 |         self.plugin_layout.setTitle(_t(f"plugins.{plugin_id}.name"))
319 | 
320 |         if self.plugin_layout.layout():
321 |             # remove the old layout
322 |             QWidget().setLayout(self.plugin_layout.layout())
323 | 
324 |         # Find the plugin class from the config
325 |         for plugin_cls in ALL_PLUGINS:
326 |             if plugin_cls.id != plugin_id:
327 |                 continue
328 | 
329 |             layout, get_value_func, key_mappping = render_plugin(plugin_cls)
330 |             self.get_plugin_config = get_value_func
331 |             self.plugin_key_mapping = key_mappping
332 |             self.plugin_layout.setLayout(layout)
333 | 
334 |         # resize the window to fit the new layout
335 |         self.resize(self.sizeHint())
336 | 
337 |     def setup_action_buttons(self):
338 |         row = QWidget()
339 |         row_layout = QHBoxLayout()
340 |         row_layout.addStretch(1)
341 | 
342 |         self.start_button = QPushButton(_t("action.start"))
343 |         self.start_button.clicked.connect(self.start_conversion)
344 |         row_layout.addWidget(self.start_button)
345 | 
346 |         self.stop_button = QPushButton(_t("action.stop"))
347 |         self.stop_button.setEnabled(False)
348 |         self.stop_button.clicked.connect(self.stop_conversion)
349 |         row_layout.addWidget(self.stop_button)
350 | 
351 |         self.latency_label = QLabel(_t("action.latency").format(latency=0))
352 |         row_layout.addWidget(self.latency_label)
353 | 
354 |         row.setLayout(row_layout)
355 |         self.main_layout.addWidget(row)
356 | 
357 |     def change_theme(self, index):
358 |         config.theme = self.theme_combo.itemData(index)
359 | 
360 |         save_config()
361 |         qdarktheme.setup_theme(config.theme)
362 | 
363 |     def change_language(self, index):
364 |         config.locale = self.language_combo.itemData(index)
365 |         save_config()
366 | 
367 |         # pop up a message box to tell user app will restart
368 |         msg_box = QMessageBox()
369 |         msg_box.setIcon(QMessageBox.Icon.Warning)
370 |         msg_box.setText(_t("i18n.restart_msg"))
371 |         msg_box.setStandardButtons(
372 |             QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No
373 |         )
374 |         msg_box.setDefaultButton(QMessageBox.StandardButton.No)
375 |         ret = msg_box.exec()
376 | 
377 |         if ret == QMessageBox.StandardButton.Yes:
378 |             os.execv(sys.argv[0], sys.argv)
379 | 
380 |     def change_plugin(self, index):
381 |         config.current_plugin = self.plugin_combo.itemData(index)
382 |         if config.current_plugin == "none":
383 |             config.current_plugin = None
384 | 
385 |         self.setup_plugin_settings()
386 | 
387 |     def test_backend(self):
388 |         backend = self.backend_input.text()
389 | 
390 |         try:
391 |             response = requests.options(backend, timeout=5)
392 |         except:
393 |             response = None
394 | 
395 |         message_box = QMessageBox()
396 | 
397 |         if response is not None and response.status_code == 200:
398 |             message_box.setIcon(QMessageBox.Icon.Information)
399 |             message_box.setText(_t("backend.test_succeed"))
400 |             config.backend = backend
401 |             save_config()
402 |         else:
403 |             message_box.setIcon(QMessageBox.Icon.Question)
404 |             message_box.setText(_t("backend.test_failed"))
405 | 
406 |         message_box.exec()
407 | 
408 |     def save_config(self, save_to_file=True):
409 |         config.backend = self.backend_input.text()
410 |         config.input_device = self.input_device_combo.currentData()
411 |         config.output_device = self.output_device_combo.currentData()
412 |         config.db_threshold = self.db_threshold_slider.value()
413 |         config.pitch_shift = self.pitch_shift_slider.value()
414 |         config.sample_duration = self.sample_duration_slider.value()
415 |         config.fade_duration = self.fade_duration_slider.value()
416 |         config.extra_duration = self.extra_duration_slider.value()
417 |         config.input_denoise = self.input_denoise_checkbox.isChecked()
418 |         config.output_denoise = self.output_denoise_checkbox.isChecked()
419 |         config.plugins[config.current_plugin] = self.get_plugin_config()
420 | 
421 |         save_config()
422 | 
423 |         # pop up a message box to tell user if they want to save the config to a file
424 |         if not save_to_file:
425 |             return
426 | 
427 |         msg_box = QMessageBox()
428 |         msg_box.setIcon(QMessageBox.Icon.Question)
429 |         msg_box.setText(_t("config.save_msg"))
430 |         msg_box.setStandardButtons(
431 |             QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No
432 |         )
433 |         msg_box.setDefaultButton(QMessageBox.StandardButton.No)
434 | 
435 |         ret = msg_box.exec()
436 |         if ret == QMessageBox.StandardButton.No:
437 |             return
438 | 
439 |         file_name, _ = QFileDialog.getSaveFileName(
440 |             self, _t("config.save_title"), "", "YAML (*.yaml)"
441 |         )
442 | 
443 |         if not file_name:
444 |             return
445 | 
446 |         save_config(file_name)
447 | 
448 |     def load_config(self):
449 |         # pop up a message box to select a config file
450 |         file_name, _ = QFileDialog.getOpenFileName(
451 |             self, _t("config.load_title"), "", "YAML (*.yaml)"
452 |         )
453 | 
454 |         if not file_name:
455 |             return
456 | 
457 |         load_config(file_name)
458 |         save_config()
459 | 
460 |         # pop up a message box to tell user app will restart
461 |         msg_box = QMessageBox()
462 |         msg_box.setIcon(QMessageBox.Icon.Information)
463 |         msg_box.setText(_t("config.load_msg"))
464 |         msg_box.setStandardButtons(QMessageBox.StandardButton.Ok)
465 |         msg_box.exec()
466 | 
467 |         os.execv(sys.argv[0], sys.argv)
468 | 
469 |     def start_conversion(self):
470 |         self.save_config(save_to_file=False)
471 | 
472 |         self.start_button.setEnabled(False)
473 |         self.stop_button.setEnabled(True)
474 | 
475 |         # Create windows and buffers
476 |         self.input_wav = np.zeros(
477 |             (
478 |                 config.sample_frames
479 |                 + config.fade_frames
480 |                 + config.sola_search_frames
481 |                 + 2 * config.extra_frames,
482 |             ),
483 |             dtype=np.float32,
484 |         )
485 |         self.sola_buffer = np.zeros(config.fade_frames)
486 |         self.fade_in_window = (
487 |             np.sin(np.pi * np.linspace(0, 0.5, config.fade_frames)) ** 2
488 |         )
489 |         self.fade_out_window = (
490 |             np.sin(np.pi * np.linspace(0.5, 1, config.fade_frames)) ** 2
491 |         )
492 | 
493 |         self.vc_status.set()
494 |         self.in_queue = queue.Queue()
495 |         self.out_queue = queue.Queue()
496 |         self.vc_thread = threading.Thread(target=self.vc_worker)
497 |         self.bg_thread = threading.Thread(target=self.bg_worker)
498 |         self.vc_thread.start()
499 |         self.bg_thread.start()
500 | 
501 |     def stop_conversion(self):
502 |         self.vc_status.clear()
503 |         self.vc_thread.join()
504 |         self.bg_thread.join()
505 | 
506 |         self.start_button.setEnabled(True)
507 |         self.stop_button.setEnabled(False)
508 | 
509 |     def vc_worker(self):
510 |         with sd.Stream(
511 |             callback=self.audio_callback,
512 |             blocksize=config.sample_frames,
513 |             samplerate=config.sample_rate,
514 |             dtype="float32",
515 |             device=(config.input_device, config.output_device),
516 |         ):
517 |             while self.vc_status.is_set():
518 |                 sd.sleep(config.sample_duration)
519 | 
520 |     def audio_callback(self, indata, outdata, frames, times, status):
521 |         # push to queue
522 |         self.in_queue.put((indata.copy(), outdata.shape[1], time.time()))
523 | 
524 |         try:
525 |             outdata[:] = self.out_queue.get_nowait()
526 |         except queue.Empty:
527 |             outdata[:] = 0
528 | 
529 |     def bg_worker(self):
530 |         while self.vc_status.is_set():
531 |             indata, channels, in_time = self.in_queue.get()
532 | 
533 |             try:
534 |                 outdata = self.worker_step(indata)
535 |                 self.latency_label.setText(
536 |                     _t("action.latency").format(latency=(time.time() - in_time) * 1000)
537 |                 )
538 |             except:
539 |                 import traceback
540 | 
541 |                 traceback.print_exc()
542 | 
543 |                 self.vc_status.clear()
544 |                 self.latency_label.setText(_t("action.error"))
545 |                 outdata = np.zeros((config.sample_frames,), dtype=np.float32)
546 | 
547 |             self.out_queue.put(outdata.repeat(channels).reshape((-1, channels)))
548 | 
549 |     def worker_step(self, indata):
550 |         indata = librosa.to_mono(indata.T)
551 | 
552 |         if config.input_denoise:
553 |             indata = nr.reduce_noise(y=indata, sr=config.sample_rate)
554 | 
555 |         # db threshold
556 |         if config.db_threshold != -60:
557 |             frame_length = 2048
558 |             hop_length = 1024
559 | 
560 |             rms = librosa.feature.rms(
561 |                 y=indata, frame_length=frame_length, hop_length=hop_length
562 |             )
563 |             rms_db = librosa.amplitude_to_db(rms, ref=1.0)[0] < config.db_threshold
564 | 
565 |             for i in range(len(rms_db)):
566 |                 if rms_db[i]:
567 |                     indata[i * hop_length : (i + 1) * hop_length] = 0
568 | 
569 |         # Rolling buffer
570 |         self.input_wav[:] = np.concatenate(
571 |             [
572 |                 self.input_wav[config.sample_frames :],
573 |                 indata,
574 |             ]
575 |         )
576 | 
577 |         buffer = BytesIO()
578 |         sf.write(buffer, self.input_wav, config.sample_rate, format="wav")
579 |         buffer.seek(0)
580 | 
581 |         safe_pad_length = (
582 |             config.extra_frames - config.fade_frames
583 |         ) / config.sample_rate - 0.03
584 |         safe_pad_length = max(0, safe_pad_length)
585 | 
586 |         data = {
587 |             "fSafePrefixPadLength": str(safe_pad_length),
588 |             "fPitchChange": str(config.pitch_shift),
589 |             "sampleRate": str(config.sample_rate),
590 |         }
591 | 
592 |         # Override plugin settings, and apply key mapping
593 |         if (
594 |             config.current_plugin is not None
595 |             and config.current_plugin in config.plugins
596 |         ):
597 |             for k, v in config.plugins[config.current_plugin].items():
598 |                 if k in self.plugin_key_mapping:
599 |                     k = self.plugin_key_mapping[k]
600 | 
601 |                 data[k] = str(v)
602 | 
603 |         response = requests.post(
604 |             config.backend,
605 |             files={
606 |                 "sample": ("audio.wav", buffer, "audio/wav"),
607 |             },
608 |             data=data,
609 |         )
610 | 
611 |         assert response.status_code == 200, f"Failed to request"
612 | 
613 |         buffer.close()
614 | 
615 |         with BytesIO(response.content) as buffer:
616 |             buffer.seek(0)
617 |             infer_wav, _ = librosa.load(buffer, sr=config.sample_rate, mono=True)
618 | 
619 |         infer_wav = infer_wav[
620 |             -config.sample_frames
621 |             - config.fade_frames
622 |             - config.sola_search_frames
623 |             - config.extra_frames : -config.extra_frames
624 |         ]
625 | 
626 |         # Sola alignment
627 |         sola_target = infer_wav[None, : config.sola_search_frames + config.fade_frames]
628 |         sola_kernel = np.flip(self.sola_buffer[None])
629 | 
630 |         cor_nom = convolve(
631 |             sola_target,
632 |             sola_kernel,
633 |             mode="valid",
634 |         )
635 |         cor_den = np.sqrt(
636 |             convolve(
637 |                 np.square(sola_target),
638 |                 np.ones((1, config.fade_frames)),
639 |                 mode="valid",
640 |             )
641 |             + 1e-8
642 |         )
643 |         sola_offset = np.argmax(cor_nom[0] / cor_den[0])
644 | 
645 |         output_wav = infer_wav[sola_offset : sola_offset + config.sample_frames]
646 |         output_wav[: config.fade_frames] *= self.fade_in_window
647 |         output_wav[: config.fade_frames] += self.sola_buffer * self.fade_out_window
648 | 
649 |         if sola_offset < config.sola_search_frames:
650 |             self.sola_buffer = infer_wav[
651 |                 sola_offset
652 |                 + config.sample_frames : sola_offset
653 |                 + config.sample_frames
654 |                 + config.fade_frames
655 |             ]
656 |         else:
657 |             self.sola_buffer = infer_wav[-config.fade_frames :]
658 | 
659 |         # Denoise
660 |         if config.output_denoise:
661 |             output_wav = nr.reduce_noise(y=output_wav, sr=config.sample_rate)
662 | 
663 |         return output_wav
664 | 


--------------------------------------------------------------------------------
/rtvc/i18n.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import yaml
 4 | 
 5 | from rtvc.config import application_path, config
 6 | 
 7 | # Load i18n files from locales/ directory
 8 | i18n_path = application_path / "locales"
 9 | i18n_files = list(i18n_path.glob("*.yaml"))
10 | 
11 | i18n_map = {}
12 | for i18n_file in i18n_files:
13 |     with open(i18n_file, "r", encoding="utf-8") as f:
14 |         i18n_map[i18n_file.stem] = yaml.safe_load(f.read())
15 | 
16 | 
17 | def _t(key: str | list[str], locale: str | None = None, fallback: str = "en_US") -> str:
18 |     if locale is None:
19 |         locale = config.locale
20 | 
21 |     if isinstance(key, str):
22 |         key = key.split(".")
23 | 
24 |     try:
25 |         node = i18n_map[locale]
26 |         for k in key:
27 |             node = node[k]
28 |     except KeyError:
29 |         if locale != fallback:
30 |             return _t(key, locale=fallback)
31 | 
32 |         return ".".join(key)
33 | 
34 |     return node
35 | 
36 | 
37 | language_map = {k: v["name"] for k, v in i18n_map.items()}
38 | 
39 | __all__ = ["_t", "language_map"]
40 | 


--------------------------------------------------------------------------------
/rtvc/locales/en_US.yaml:
--------------------------------------------------------------------------------
 1 | name: "English (US)"
 2 | title: "Real-time Voice Conversion (FishAudio) (Version: {version})"
 3 | 
 4 | 
 5 | theme:
 6 |   name: "Theme"
 7 |   auto: "System"
 8 |   light: "Light"
 9 |   dark: "Dark"
10 | 
11 | i18n:
12 |   language: "Language"
13 |   restart_msg: "Configuration changed, do you want to restart the app to take effect?"
14 | 
15 | backend:
16 |   title: "Backend Settings"
17 |   protocol_label: "Protocol"
18 |   name: "Backend"
19 |   test: "Test"
20 |   test_succeed: "Successfully connected to backend."
21 |   test_failed: "Failed to connect to backend in 5s."
22 | 
23 | audio_device:
24 |   name: "Audio Device (Please use same kind of device for input and output)"
25 |   input: "Input (Recording)"
26 |   output: "Output (Playback)"
27 | 
28 | audio:
29 |   name: "Audio Settings"
30 |   db_threshold: "DB Threshold"
31 |   pitch_shift: "Pitch Shift"
32 |   sample_duration: "Sample Duration"
33 |   fade_duration: "Fade in and out duration"
34 |   extra_duration: "Extra Duration"
35 |   input_denoise: "Input Denoise"
36 |   output_denoise: "Output Denoise"
37 | 
38 | action:
39 |   start: "Start Voice Conversion"
40 |   stop: "Stop Voice Conversion"
41 |   latency: "Latency: {latency:.2f} ms"
42 |   error: "An error occurred, please restart the conversion"
43 | 
44 | config:
45 |   save: "Save Config"
46 |   load: "Load Config"
47 |   save_msg: "Configuration saved, do you want to export to a file?"
48 |   save_title: "Export Configuration to YAML File"
49 |   load_msg: "Import completed, the program will restart to take effect."
50 |   load_title: "Import Configuration from YAML File"
51 |   error: "Unable to parse configuration file, reset"
52 | 
53 | plugins:
54 |   name: "Plugin"
55 |   none:
56 |     name: "None"
57 |   diffusion:
58 |     name: "Diffusion Model"
59 |     speaker:
60 |       label: "Speaker"
61 |       tooltip: "Speaker ID, e.g., 0, 1, 2"
62 |     sample_method: 
63 |       label: "Sampling Algorithm"
64 |       tooltip: "none for no sampling, PLMS for fast sampling"
65 |     sample_interval: 
66 |       label: "Sampling Interval"
67 |       tooltip: "How many steps to take a sample, the larger the number, the faster the speed, but the lower the quality"
68 |     skip_steps: 
69 |       label: "Skip Steps"
70 |       tooltip: "Shallow diffusion related configuration, skip steps like 970 to eliminate electronic sound only, without affecting the effect"
71 |   hifisinger:
72 |     name: "HiFiSinger"
73 |     speaker:
74 |       label: "Speaker"
75 |       tooltip: "Speaker ID, e.g., 0, 1, 2"
76 |   rvc:
77 |     name: "RVC"
78 |     speaker:
79 |       label: "Speaker"
80 |       tooltip: "Speaker ID, e.g., 0, 1, 2"
81 |     index_ratio:
82 |       label: "Index Ratio"
83 |       tooltip: "TODO: Supplement Information"
84 | 


--------------------------------------------------------------------------------
/rtvc/locales/zh_CN.yaml:
--------------------------------------------------------------------------------
 1 | name: "中文 (简体)"
 2 | title: "实时语音转换 (FishAudio) (版本: {version})"
 3 | 
 4 | theme:
 5 |   name: "主题"
 6 |   auto: "系统"
 7 |   light: "亮色"
 8 |   dark: "暗色"
 9 | 
10 | i18n:
11 |   language: "语言"
12 |   restart_msg: "配置已更改，是否要重新启动应用程序以使其生效?"
13 | 
14 | backend:
15 |   title: "后端设置"
16 |   protocol_label: "协议"
17 |   name: "后端"
18 |   test: "测试"
19 |   test_succeed: "成功连接到后端。"
20 |   test_failed: "5 秒内无法连接到后端。"
21 | 
22 | audio_device:
23 |   name: "音频设备（请对输入和输出使用同类设备）"
24 |   input: "输入（录音）"
25 |   output: "输出（播放）"
26 | 
27 | audio:
28 |   name: "音频设置"
29 |   db_threshold: "分贝阈值"
30 |   pitch_shift: "音调偏移"
31 |   sample_duration: "样本时长"
32 |   fade_duration: "淡入淡出时长"
33 |   extra_duration: "额外时长"
34 |   input_denoise: "输入降噪"
35 |   output_denoise: "输出降噪"
36 | 
37 | action:
38 |   start: "开始语音转换"
39 |   stop: "停止语音转换"
40 |   latency: "延迟: {latency:.2f} ms"
41 |   error: "发生错误, 请重新启动转换"
42 | 
43 | config:
44 |   save: "保存配置"
45 |   load: "加载配置"
46 |   save_msg: "配置已保存, 你想导出到文件吗?"
47 |   save_title: "导出配置到 YAML 文件"
48 |   load_msg: "导入完成, 程序将重启以使其生效."
49 |   load_title: "从 YAML 文件导入配置"
50 |   error: "无法解析配置文件, 已重置"
51 | 
52 | plugins:
53 |   name: "插件"
54 |   none:
55 |     name: "无"
56 |   diffusion:
57 |     name: "扩散模型"
58 |     speaker:
59 |       label: "说话人"
60 |       tooltip: "说话人 ID, 如 0, 1, 2"
61 |     sample_method: 
62 |       label: "采样算法"
63 |       tooltip: "none 为不采样, PLMS 为快速采样"
64 |     sample_interval: 
65 |       label: "采样间隔"
66 |       tooltip: "每隔多少步进行采样, 数字越大速度越快, 但是质量越低"
67 |     skip_steps: 
68 |       label: "跳过步骤"
69 |       tooltip: "浅扩散相关配置, 跳过如 970 步来实现只消除电音, 不影响效果"
70 |   hifisinger:
71 |     name: "HiFiSinger"
72 |     speaker:
73 |       label: "说话人"
74 |       tooltip: "说话人 ID, 如 0, 1, 2"
75 |   rvc:
76 |     name: "RVC"
77 |     speaker:
78 |       label: "说话人"
79 |       tooltip: "说话人 ID, 如 0, 1, 2"
80 |     index_ratio:
81 |       label: "索引比例"
82 |       tooltip: "TODO: 补充信息"
83 | 


--------------------------------------------------------------------------------
/rtvc/plugins/__init__.py:
--------------------------------------------------------------------------------
 1 | from rtvc.plugins.diffusion import DiffusionPlugin
 2 | from rtvc.plugins.hifisinger import HiFiSingerPlugin
 3 | from rtvc.plugins.rvc import RVCPlugin
 4 | 
 5 | ALL_PLUGINS = [
 6 |     DiffusionPlugin,
 7 |     RVCPlugin,
 8 |     HiFiSingerPlugin,
 9 | ]
10 | 


--------------------------------------------------------------------------------
/rtvc/plugins/base.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | 
  3 | from PyQt6.QtCore import Qt
  4 | from PyQt6.QtWidgets import (
  5 |     QCheckBox,
  6 |     QComboBox,
  7 |     QGridLayout,
  8 |     QGroupBox,
  9 |     QHBoxLayout,
 10 |     QLabel,
 11 |     QLineEdit,
 12 |     QMessageBox,
 13 |     QSlider,
 14 |     QVBoxLayout,
 15 | )
 16 | 
 17 | from rtvc.config import config
 18 | from rtvc.i18n import _t
 19 | 
 20 | 
 21 | def slider(
 22 |     minimum: int, maximum: int, step: int = 1, map_key: str | None = None
 23 | ) -> int:
 24 |     namespace = dict(min=minimum, max=maximum, step=step, map_key=map_key)
 25 |     return type("Slider", (int,), namespace)
 26 | 
 27 | 
 28 | def input(map_key: str | None = None) -> str:
 29 |     namespace = dict(map_key=map_key)
 30 |     return type("Input", (str,), namespace)
 31 | 
 32 | 
 33 | def checkbox(map_key: str | None = None) -> bool:
 34 |     namespace = dict(map_key=map_key)
 35 |     return type("Checkbox", (bool,), namespace)
 36 | 
 37 | 
 38 | def dropdown(options: list[tuple[str, str]], map_key: str | None = None) -> str:
 39 |     namespace = dict(options=options, map_key=map_key)
 40 |     return type("Dropdown", (str,), namespace)
 41 | 
 42 | 
 43 | @dataclass
 44 | class WithSpeaker:
 45 |     # Backward compatibility
 46 |     speaker: input(map_key="sSpeakId") = "0"
 47 | 
 48 | 
 49 | def render_plugin(plugin_cls: dataclass) -> QGroupBox:
 50 |     layout = QVBoxLayout()
 51 | 
 52 |     # Inspect all the fields of the plugin class
 53 |     fields = plugin_cls.__dataclass_fields__
 54 |     class_id = plugin_cls.id
 55 |     _t_key = f"plugins.{class_id}"
 56 | 
 57 |     try:
 58 |         plugin_config = plugin_cls(**config.plugins.get(class_id, {}))
 59 |     except TypeError as e:
 60 |         # Popup a message
 61 |         msg = QMessageBox()
 62 |         msg.setIcon(QMessageBox.Icon.Critical)
 63 |         msg.setText(_t(f"config.error"))
 64 |         msg.setInformativeText(str(e))
 65 |         msg.exec()
 66 | 
 67 |         plugin_config = plugin_cls()
 68 | 
 69 |     get_value_funcs = {}
 70 |     key_mappping = {}
 71 | 
 72 |     for key, value in fields.items():
 73 |         type = value.type.__name__
 74 |         if type not in ["Slider", "Input", "Checkbox", "Dropdown"]:
 75 |             continue
 76 | 
 77 |         if hasattr(value.type, "map_key") and value.type.map_key is not None:
 78 |             key_mappping[key] = value.type.map_key
 79 | 
 80 |         row = QHBoxLayout()
 81 |         row.setAlignment(Qt.AlignmentFlag.AlignLeft)
 82 | 
 83 |         if type == "Slider":
 84 |             slider = QSlider()
 85 |             slider.setOrientation(Qt.Orientation.Horizontal)
 86 |             slider.setMinimum(value.type.min)
 87 |             slider.setMaximum(value.type.max)
 88 |             slider.setSingleStep(value.type.step)
 89 |             slider.setTickInterval(value.type.step)
 90 |             slider.setValue(getattr(plugin_config, key))
 91 |             value_label = QLabel(f"{slider.value()}")
 92 |             slider.valueChanged.connect(
 93 |                 lambda value, value_label=value_label: value_label.setText(str(value))
 94 |             )
 95 |             get_value_funcs[key] = lambda slider=slider: slider.value()
 96 |             row.addWidget(QLabel(_t(f"{_t_key}.{key}.label")))
 97 |             row.addWidget(slider)
 98 |             slider.setToolTip(_t(f"{_t_key}.{key}.tooltip"))
 99 |             row.addWidget(value_label)
100 | 
101 |         elif type == "Input":
102 |             line_edit = QLineEdit()
103 |             line_edit.setText(getattr(plugin_config, key))
104 |             get_value_funcs[key] = lambda line_edit=line_edit: line_edit.text()
105 |             row.addWidget(QLabel(_t(f"{_t_key}.{key}.label")))
106 |             line_edit.setToolTip(_t(f"{_t_key}.{key}.tooltip"))
107 |             row.addWidget(line_edit)
108 | 
109 |         elif type == "Checkbox":
110 |             checkbox = QCheckBox()
111 |             checkbox.setChecked(getattr(plugin_config, key))
112 |             get_value_funcs[key] = lambda checkbox=checkbox: checkbox.isChecked()
113 |             row.addWidget(QLabel(_t(f"{_t_key}.{key}.label")))
114 |             checkbox.setToolTip(_t(f"{_t_key}.{key}.tooltip"))
115 |             row.addWidget(checkbox)
116 | 
117 |         elif type == "Dropdown":
118 |             dropdown = QComboBox()
119 |             dropdown.setMinimumWidth(200)
120 |             dropdown.addItems([item[0] for item in value.type.options])
121 |             for i, item in enumerate(value.type.options):
122 |                 if item[1] == getattr(plugin_config, key):
123 |                     dropdown.setCurrentIndex(i)
124 |             get_value_funcs[key] = lambda dropdown=dropdown: dropdown.currentText()
125 |             row.addWidget(QLabel(_t(f"{_t_key}.{key}.label")))
126 |             dropdown.setToolTip(_t(f"{_t_key}.{key}.tooltip"))
127 |             row.addWidget(dropdown)
128 | 
129 |         layout.addLayout(row)
130 | 
131 |     return (
132 |         layout,
133 |         lambda: {key: func() for key, func in get_value_funcs.items()},
134 |         key_mappping,
135 |     )
136 | 


--------------------------------------------------------------------------------
/rtvc/plugins/diffusion.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import ClassVar
 3 | 
 4 | from rtvc.plugins.base import WithSpeaker, dropdown, slider
 5 | 
 6 | 
 7 | @dataclass
 8 | class DiffusionPlugin(WithSpeaker):
 9 |     id: ClassVar[str] = "diffusion"
10 | 
11 |     sample_method: dropdown(
12 |         [
13 |             ("None", "none"),
14 |             ("PLMS", "plms"),
15 |         ]
16 |     ) = "plms"
17 |     sample_interval: slider(1, 100, 5) = 20
18 |     skip_steps: slider(0, 1000, 10) = 0
19 | 


--------------------------------------------------------------------------------
/rtvc/plugins/hifisinger.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import ClassVar
 3 | 
 4 | from rtvc.plugins.base import WithSpeaker, slider
 5 | 
 6 | 
 7 | @dataclass
 8 | class HiFiSingerPlugin(WithSpeaker):
 9 |     id: ClassVar[str] = "hifisinger"
10 | 


--------------------------------------------------------------------------------
/rtvc/plugins/rvc.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import ClassVar
 3 | 
 4 | from rtvc.plugins.base import WithSpeaker, dropdown, slider
 5 | 
 6 | 
 7 | @dataclass
 8 | class RVCPlugin(WithSpeaker):
 9 |     id: ClassVar[str] = "rvc"
10 | 
11 |     index_ratio: slider(0, 100, 1) = 20
12 | 


--------------------------------------------------------------------------------
/tests/test_sola.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn.functional as F
 4 | from scipy.signal import convolve
 5 | 
 6 | 
 7 | def test_numpy_sola():
 8 |     fade_frames = 90 * 44100 // 1000
 9 |     sola_search_frames = 12 * 44100 // 1000
10 | 
11 |     sola_buffer = np.random.randn(fade_frames).astype(np.float32)
12 |     infer_wav = np.random.randn(fade_frames * 5).astype(np.float32)
13 |     infer_wav[114 : 114 + fade_frames] = sola_buffer
14 | 
15 |     conv_input = infer_wav[None, : fade_frames + sola_search_frames]
16 |     cor_nom = F.conv1d(
17 |         torch.from_numpy(conv_input).float(),
18 |         torch.from_numpy(sola_buffer[None, None, :]).float(),
19 |     )
20 | 
21 |     cor_nom1 = convolve(conv_input, np.flip(sola_buffer[None, :]), mode="valid")
22 |     assert np.allclose(cor_nom.numpy(), cor_nom1, atol=1e-2)
23 | 
24 |     cor_den = torch.sqrt(
25 |         F.conv1d(
26 |             torch.from_numpy(conv_input).float() ** 2,
27 |             torch.ones(1, 1, fade_frames),
28 |         )
29 |         + 1e-8
30 |     )
31 | 
32 |     cor_den1 = np.sqrt(
33 |         convolve(conv_input**2, np.ones((1, fade_frames)), mode="valid") + 1e-8
34 |     )
35 |     assert np.allclose(cor_den.numpy(), cor_den1, atol=1e-2)
36 | 
37 |     sola_offset = torch.argmax(cor_nom[0] / cor_den[0])
38 |     sola_offset1 = np.argmax(cor_nom1[0] / cor_den1[0])
39 | 
40 |     assert sola_offset == sola_offset1 == 114
41 | 


--------------------------------------------------------------------------------