├── silero_vad.onnx ├── icons ├── bw_icon.png ├── splash.png ├── MacOS_icon.png └── Windows-icon-open.ico ├── requirements.txt ├── lexisynth_types.py ├── .github └── workflows │ ├── check-format.yaml │ ├── release.yaml │ └── build.yaml ├── lexisynth.iss ├── entitlements.plist ├── audio_player.py ├── LICENSE ├── file_poller.py ├── README.md ├── ls_logging.py ├── models_info.py ├── storage.py ├── log_view.py ├── model_download_dialog.ui ├── log_view.ui ├── .gitignore ├── model_download_dialog.py ├── lexisynth.spec ├── text_to_speech.py ├── obs_websocket.py ├── audio_capture.py ├── transcription.py ├── settings_dialog.py ├── about.ui ├── translation.py ├── settings_dialog.ui ├── language_codes.py ├── main.py └── mainwindow.ui /silero_vad.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/lexisynth/main/silero_vad.onnx -------------------------------------------------------------------------------- /icons/bw_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/lexisynth/main/icons/bw_icon.png -------------------------------------------------------------------------------- /icons/splash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/lexisynth/main/icons/splash.png -------------------------------------------------------------------------------- /icons/MacOS_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/lexisynth/main/icons/MacOS_icon.png -------------------------------------------------------------------------------- /icons/Windows-icon-open.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/lexisynth/main/icons/Windows-icon-open.ico -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ctranslate2 2 | faster-whisper 3 | obsws_python 4 | pillow 5 | platformdirs 6 | pyaudio 7 | pydub 8 | pyinstaller 9 | pyqt6 10 | python-dotenv 11 | sentencepiece 12 | sounddevice 13 | soundfile 14 | -------------------------------------------------------------------------------- /lexisynth_types.py: -------------------------------------------------------------------------------- 1 | class AudioSource: 2 | class SourceType: 3 | FILE = 0 4 | DEVICE = 1 5 | 6 | def __init__(self, sourceType, sourceName): 7 | self.sourceType = sourceType 8 | self.sourceName = sourceName 9 | -------------------------------------------------------------------------------- /.github/workflows/check-format.yaml: -------------------------------------------------------------------------------- 1 | name: Check Python Formatting 2 | 3 | on: 4 | workflow_call: 5 | 6 | jobs: 7 | check-format: 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - name: Checkout code 12 | uses: actions/checkout@v4 13 | 14 | - name: Set up Python 15 | uses: actions/setup-python@v5 16 | with: 17 | python-version: 3.x 18 | 19 | - name: Install dependencies 20 | run: pip install black 21 | 22 | - name: Check formatting 23 | run: black --check . 24 | -------------------------------------------------------------------------------- /lexisynth.iss: -------------------------------------------------------------------------------- 1 | [Setup] 2 | AppName=LexiSynth 3 | AppVersion=0.0.1-beta1 4 | DefaultDirName={pf}\LexiSynth 5 | DefaultGroupName=LexiSynth 6 | OutputDir=.\dist 7 | OutputBaseFilename=lexisynth-setup 8 | Compression=lzma 9 | SolidCompression=yes 10 | ArchitecturesInstallIn64BitMode=x64 11 | 12 | [Files] 13 | Source: "dist\lexisynth\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs createallsubdirs 14 | 15 | [Icons] 16 | Name: "{group}\LexiSynth"; Filename: "{app}\lexisynth.exe" 17 | 18 | [Run] 19 | Filename: "{app}\lexisynth.exe"; Description: "Launch LexiSynth"; Flags: nowait postinstall skipifsilent 20 | -------------------------------------------------------------------------------- /entitlements.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | com.apple.security.cs.allow-jit 7 | 8 | com.apple.security.cs.allow-unsigned-executable-memory 9 | 10 | com.apple.security.cs.disable-library-validation 11 | 12 | 13 | com.apple.security.device.microphone 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /audio_player.py: -------------------------------------------------------------------------------- 1 | import queue 2 | import time 3 | from pydub import AudioSegment 4 | from pydub.playback import play 5 | import io 6 | from PyQt6.QtCore import QThread 7 | 8 | 9 | class AudioBuffer: 10 | class AudioBufferType: 11 | RAW = 0 12 | MP3 = 1 13 | 14 | def __init__(self, type, bytes): 15 | self.buffer = queue.Queue() 16 | self.type = type 17 | self.bytes = bytes 18 | 19 | 20 | class AudioPlayer(QThread): 21 | def __init__(self): 22 | super().__init__() 23 | self.queue = queue.Queue() 24 | self.isRunning = False 25 | 26 | def add_to_queue(self, audio: AudioBuffer): 27 | self.queue.put(audio) 28 | 29 | def stop(self): 30 | self.isRunning = False 31 | 32 | def run(self): 33 | while self.isRunning: 34 | if self.queue.empty(): 35 | time.sleep(0.1) 36 | continue 37 | audio = self.queue.get() 38 | if audio.type == AudioBuffer.AudioBufferType.MP3: 39 | audio = AudioSegment.from_mp3(io.BytesIO(audio.bytes)) 40 | play(audio) 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 OCC AI: Open tools for Content Creators and Streamers 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /file_poller.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from PyQt6.QtCore import QThread 4 | from queue import Queue 5 | from ls_logging import logger 6 | 7 | 8 | class FilePoller(QThread): 9 | def __init__(self, filename: str, cadence_ms: int, queue: Queue): 10 | super().__init__() 11 | self.filename = filename 12 | self.cadence_seconds = cadence_ms / 1000.0 # Convert ms to seconds 13 | self.queue = queue 14 | self.stop_flag = False 15 | self.last_content = None 16 | 17 | def run(self): 18 | # check if file exists 19 | if not os.path.exists(self.filename): 20 | logger.error(f"File {self.filename} does not exist") 21 | return 22 | while not self.stop_flag: 23 | if os.path.exists(self.filename): 24 | with open(self.filename, "r") as file: 25 | content = file.read() 26 | if content and content != self.last_content: 27 | self.queue.put_nowait(content) 28 | self.last_content = content 29 | time.sleep(self.cadence_seconds) 30 | 31 | def stop(self): 32 | self.stop_flag = True 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LexiSynth 2 | 3 | LexiSynth is an AI speech analysis and synthesis tool built with Python. It leverages the power of PyInstaller, CTranslate2, and Faster-Whisper to provide a robust and efficient solution for speech processing tasks. 4 | 5 | ## Features 6 | 7 | - Speech Analysis: Analyze speech patterns and extract meaningful insights. 8 | - Speech Synthesis: Convert text into natural-sounding speech. 9 | - Built with Python: Leverage the power and simplicity of Python for customization and rapid development. 10 | - CTranslate2 and Faster-Whisper: Utilize these powerful libraries for efficient and high-quality speech processing. 11 | 12 | ## Build Instructions 13 | 14 | To build LexiSynth using PyInstaller, follow the steps below: 15 | 16 | 1. Ensure you have Python 3.11. You can check your Python version by running `python --version` in your terminal. 17 | 18 | 2. Install the required Python packages. In the root directory of the project, run: 19 | 20 | ```bash 21 | pip install -r requirements.txt 22 | ``` 23 | 24 | 3. Build the executable using PyInstaller. In the root directory of the project, run: 25 | 26 | MacOSX: 27 | ```bash 28 | pyinstaller --clean --noconfirm lexisynth.spec -- --mac_osx 29 | ``` 30 | 31 | Windows: 32 | ```bash 33 | pyinstaller --clean --noconfirm lexisynth.spec -- --win 34 | ``` 35 | 36 | This will create a `dist` directory containing the executable file for LexiSynth. 37 | 38 | ## Usage 39 | 40 | To use LexiSynth, simply run the executable file created in the `dist` directory. 41 | 42 | ## License 43 | 44 | This project is released under the MIT license. See [LICENSE](LICENSE) for details. 45 | -------------------------------------------------------------------------------- /ls_logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from platformdirs import user_log_dir 4 | from datetime import datetime 5 | from dotenv import load_dotenv 6 | 7 | # get the user data directory 8 | data_dir = user_log_dir("lexisynth") 9 | if not os.path.exists(data_dir): 10 | os.makedirs(data_dir) 11 | 12 | # prepend the user data directory 13 | current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 14 | log_file_path = os.path.join(data_dir, f"lexisynth_{current_time}.log") 15 | 16 | # Load the environment variables from the .env file 17 | load_dotenv(os.path.abspath(os.path.join(os.path.dirname(__file__), ".env"))) 18 | 19 | # Create a logger 20 | logger = logging.getLogger(__name__) 21 | logger.setLevel(logging.DEBUG) 22 | 23 | # check to see if there are more log files, and only keep the most recent 10 24 | log_files = [ 25 | f for f in os.listdir(data_dir) if f.startswith("lexisynth_") and f.endswith(".log") 26 | ] 27 | # sort log files by date 28 | log_files.sort() 29 | if len(log_files) > 10: 30 | for f in log_files[:-10]: 31 | os.remove(os.path.join(data_dir, f)) 32 | 33 | # Create a file handler 34 | file_handler = logging.FileHandler(log_file_path) 35 | file_handler.setLevel(logging.DEBUG) 36 | 37 | # Create a formatter 38 | formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(module)s - %(message)s") 39 | file_handler.setFormatter(formatter) 40 | 41 | # Add the file handler to the logger 42 | logger.addHandler(file_handler) 43 | 44 | # if the .env file has a debug flag, set the logger to output to console 45 | if os.getenv("LEXISYNTH_DEBUG"): 46 | console_handler = logging.StreamHandler() 47 | console_handler.setLevel(logging.DEBUG) 48 | console_handler.setFormatter(formatter) 49 | logger.addHandler(console_handler) 50 | logger.debug("Debug mode enabled") 51 | -------------------------------------------------------------------------------- /models_info.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | from platformdirs import user_data_dir 3 | 4 | 5 | class ModelDownloadInfo: 6 | # URLs for downloading the models 7 | M2M_100 = { 8 | "url": "https://lexistream-downloads.s3.amazonaws.com/m2m_100_418M-ct2-int8.zip", 9 | "file_name": "m2m_100_418M-ct2-int8.zip", 10 | "model_folder_name": "M2M-100", 11 | "model_name": "M2M-100", 12 | } 13 | FASTER_WHISPER_TINY_CT2 = { 14 | "url": "https://lexistream-downloads.s3.amazonaws.com/faster-whisper-tiny-ct2-int8.zip", 15 | "file_name": "faster-whisper-tiny-ct2-int8.zip", 16 | "model_folder_name": "Faster-Whisper-Tiny-CT2", 17 | "model_name": "Faster-Whisper Tiny", 18 | } 19 | FASTER_WHISPER_BASE_CT2 = { 20 | "url": "https://lexistream-downloads.s3.amazonaws.com/faster-whisper-base-ct2-int8.zip", 21 | "file_name": "faster-whisper-base-ct2-int8.zip", 22 | "model_folder_name": "Faster-Whisper-Base-CT2", 23 | "model_name": "Faster-Whisper Base", 24 | } 25 | FASTER_WHISPER_SMALL_CT2 = { 26 | "url": "https://lexistream-downloads.s3.amazonaws.com/faster-whisper-small-ct2-int8.zip", 27 | "file_name": "faster-whisper-small-ct2-int8.zip", 28 | "model_folder_name": "Faster-Whisper-Small-CT2", 29 | "model_name": "Faster-Whisper Small", 30 | } 31 | 32 | 33 | def checkForModelDownload(modelInfo): 34 | # check if the model has been downloaded to the data dir 35 | data_dir = user_data_dir("lexisynth") 36 | if not path.exists(data_dir): 37 | return False 38 | model_dir = path.join(data_dir, modelInfo["model_folder_name"]) 39 | if not path.exists(model_dir): 40 | return False 41 | return True 42 | 43 | 44 | def getAbsoluteModelPath(modelInfo): 45 | # get the absolute path to the model 46 | data_dir = user_data_dir("lexisynth") 47 | return path.join(data_dir, modelInfo["model_folder_name"]) 48 | -------------------------------------------------------------------------------- /storage.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from platformdirs import user_data_dir 4 | 5 | 6 | def store_data(file_path, document_name, data): 7 | # Store data into a JSON file 8 | # get the user data directory 9 | data_dir = user_data_dir("lexisynth") 10 | if not os.path.exists(data_dir): 11 | os.makedirs(data_dir) 12 | 13 | # prepend the user data directory 14 | file_path = os.path.join(data_dir, file_path) 15 | 16 | if os.path.exists(file_path): 17 | with open(file_path, "r") as f: 18 | try: 19 | documents = json.load(f) 20 | except json.JSONDecodeError: 21 | documents = {} 22 | else: 23 | documents = {} 24 | 25 | if document_name in documents and isinstance(documents[document_name], dict): 26 | documents[document_name].update(data) 27 | else: 28 | documents[document_name] = data 29 | 30 | with open(file_path, "w") as f: 31 | json.dump(documents, f, indent=2) 32 | 33 | 34 | def remove_data(file_path, document_name): 35 | # Remove data from a JSON file 36 | # prepend the user data directory 37 | file_path = os.path.join(user_data_dir("lexisynth"), file_path) 38 | 39 | if not os.path.exists(file_path): 40 | return 41 | 42 | with open(file_path, "r") as f: 43 | documents = json.load(f) 44 | 45 | if document_name in documents: 46 | del documents[document_name] 47 | 48 | with open(file_path, "w") as f: 49 | json.dump(documents, f, indent=2) 50 | 51 | 52 | def fetch_data(file_path, document_name, default=None): 53 | # Fetch data from a JSON file 54 | # prepend the user data directory 55 | file_path = os.path.join(user_data_dir("lexisynth"), file_path) 56 | 57 | if not os.path.exists(file_path): 58 | return default 59 | 60 | with open(file_path, "r") as f: 61 | try: 62 | documents = json.load(f) 63 | except json.JSONDecodeError: 64 | return default 65 | 66 | if document_name in documents: 67 | return documents[document_name] 68 | else: 69 | return default 70 | -------------------------------------------------------------------------------- /log_view.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | import platform 3 | from PyQt6.QtWidgets import QDialog 4 | from PyQt6.QtCore import QTimer 5 | from PyQt6.uic import loadUi 6 | from ls_logging import log_file_path 7 | 8 | 9 | class LogViewerDialog(QDialog): 10 | def __init__(self): 11 | super().__init__() 12 | loadUi(path.abspath(path.join(path.dirname(__file__), "log_view.ui")), self) 13 | self.timer = QTimer() 14 | self.timer.timeout.connect(self.update_ui) 15 | self.timer.start(1000) # Update UI every 1 second 16 | self.current_log_data = "" 17 | self.pushButton_openlogfolder.clicked.connect(self.open_log_folder) 18 | 19 | def open_log_folder(self): 20 | # Open the folder containing the log file 21 | # check if this is windows, mac or linux 22 | if path.exists(log_file_path): 23 | os_name = platform.system() 24 | 25 | if os_name == "Windows": 26 | from os import startfile 27 | 28 | startfile(path.dirname(log_file_path)) 29 | elif os_name == "Linux": 30 | import subprocess 31 | 32 | subprocess.Popen(["xdg-open", path.dirname(log_file_path)]) 33 | elif os_name == "Darwin": 34 | import subprocess 35 | 36 | subprocess.Popen(["open", path.dirname(log_file_path)]) 37 | 38 | def update_ui(self): 39 | with open(log_file_path, "r") as log_file: 40 | lines = log_file.readlines() 41 | last_1000_lines = lines[-1000:] 42 | log_data = "".join(last_1000_lines) 43 | if log_data == self.current_log_data: 44 | return 45 | self.current_log_data = log_data 46 | # Update the UI with the log data 47 | self.textEdit_log.setPlainText(log_data) 48 | if self.checkBox_autoScroll.isChecked(): 49 | # scroll to the bottom 50 | self.textEdit_log.verticalScrollBar().setValue( 51 | self.textEdit_log.verticalScrollBar().maximum() 52 | ) 53 | self.scrollArea.ensureWidgetVisible(self.textEdit_log) 54 | -------------------------------------------------------------------------------- /model_download_dialog.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | Dialog 4 | 5 | 6 | 7 | 0 8 | 0 9 | 220 10 | 128 11 | 12 | 13 | 14 | Model Download 15 | 16 | 17 | 18 | 19 | 20 | Downloading model. Please wait. 21 | 22 | 23 | 24 | 25 | 26 | 27 | Progress 28 | 29 | 30 | Qt::AlignCenter 31 | 32 | 33 | 34 | 35 | 36 | 37 | 0 38 | 39 | 40 | 41 | 42 | 43 | 44 | true 45 | 46 | 47 | Qt::Horizontal 48 | 49 | 50 | QDialogButtonBox::Cancel 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | buttonBox 60 | accepted() 61 | Dialog 62 | accept() 63 | 64 | 65 | 248 66 | 254 67 | 68 | 69 | 157 70 | 274 71 | 72 | 73 | 74 | 75 | buttonBox 76 | rejected() 77 | Dialog 78 | reject() 79 | 80 | 81 | 316 82 | 260 83 | 84 | 85 | 286 86 | 274 87 | 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /log_view.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | Dialog 4 | 5 | 6 | 7 | 0 8 | 0 9 | 553 10 | 300 11 | 12 | 13 | 14 | Dialog 15 | 16 | 17 | 18 | 19 | 20 | 0 21 | 22 | 23 | 0 24 | 25 | 26 | 27 | 28 | Auto Scroll 29 | 30 | 31 | true 32 | 33 | 34 | 35 | 36 | 37 | 38 | Open Log Folder 39 | 40 | 41 | 42 | 43 | 44 | 45 | Qt::Horizontal 46 | 47 | 48 | QDialogButtonBox::Close 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | true 58 | 59 | 60 | 61 | 62 | 0 63 | 0 64 | 533 65 | 250 66 | 67 | 68 | 69 | 70 | 0 71 | 72 | 73 | 0 74 | 75 | 76 | 0 77 | 78 | 79 | 0 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | buttonBox 94 | accepted() 95 | Dialog 96 | accept() 97 | 98 | 99 | 248 100 | 254 101 | 102 | 103 | 157 104 | 274 105 | 106 | 107 | 108 | 109 | buttonBox 110 | rejected() 111 | Dialog 112 | reject() 113 | 114 | 115 | 316 116 | 260 117 | 118 | 119 | 286 120 | 274 121 | 122 | 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | output/ 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | cover/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | .pybuilder/ 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | # For a library or package, you might want to ignore these files since the code is 88 | # intended to run in multiple environments; otherwise, check them in: 89 | # .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # poetry 99 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 100 | # This is especially recommended for binary packages to ensure reproducibility, and is more 101 | # commonly ignored for libraries. 102 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 103 | #poetry.lock 104 | 105 | # pdm 106 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 107 | #pdm.lock 108 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 109 | # in version control. 110 | # https://pdm.fming.dev/#use-with-ide 111 | .pdm.toml 112 | 113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 114 | __pypackages__/ 115 | 116 | # Celery stuff 117 | celerybeat-schedule 118 | celerybeat.pid 119 | 120 | # SageMath parsed files 121 | *.sage.py 122 | 123 | # Environments 124 | .env 125 | .venv 126 | env/ 127 | venv/ 128 | ENV/ 129 | env.bak/ 130 | venv.bak/ 131 | 132 | # Spyder project settings 133 | .spyderproject 134 | .spyproject 135 | 136 | # Rope project settings 137 | .ropeproject 138 | 139 | # mkdocs documentation 140 | /site 141 | 142 | # mypy 143 | .mypy_cache/ 144 | .dmypy.json 145 | dmypy.json 146 | 147 | # Pyre type checker 148 | .pyre/ 149 | 150 | # pytype static type analyzer 151 | .pytype/ 152 | 153 | # Cython debug symbols 154 | cython_debug/ 155 | 156 | # PyCharm 157 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 158 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 159 | # and can be added to the global gitignore or merged into this file. For a more nuclear 160 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 161 | #.idea/ 162 | 163 | # macOS 164 | *.DS_Store 165 | -------------------------------------------------------------------------------- /model_download_dialog.py: -------------------------------------------------------------------------------- 1 | from PyQt6 import uic 2 | from PyQt6.QtWidgets import QDialog 3 | import requests 4 | import os 5 | from platformdirs import user_data_dir 6 | import zipfile 7 | from ls_logging import logger 8 | from models_info import checkForModelDownload 9 | from PyQt6.QtCore import pyqtSignal 10 | from PyQt6.QtCore import QThread 11 | from os import path 12 | 13 | 14 | class ModelDownloadDialog(QDialog): 15 | def __init__(self, modelInfo, parent=None): 16 | super(ModelDownloadDialog, self).__init__(parent) 17 | uic.loadUi( 18 | path.abspath(path.join(path.dirname(__file__), "model_download_dialog.ui")), 19 | self, 20 | ) 21 | # start the download process 22 | self.modelInfo = modelInfo 23 | self.downloadThread = None 24 | self.startDownload() 25 | 26 | def startDownload(self): 27 | # start the download process 28 | self.label_modelDownloading.setText( 29 | f"Downloading {self.modelInfo['model_name']}" 30 | ) 31 | # start the download on a separate QThread 32 | self.downloadThread = ModelDownloadThread(self.modelInfo) 33 | self.downloadThread.finished.connect(self.finished) 34 | self.downloadThread.progressSignal.connect(self.progress) 35 | self.downloadThread.start() 36 | 37 | def finished(self): 38 | self.downloadThread = None 39 | # close the dialog 40 | self.accept() 41 | 42 | def progress(self, progress: int, message: str): 43 | # update the progress bar 44 | self.progressBar.setValue(progress) 45 | self.label_progress.setText(message) 46 | 47 | def closeEvent(self, event): 48 | # stop the download thread if it is running 49 | if self.downloadThread is not None: 50 | self.downloadThread.running = False 51 | self.downloadThread.wait() 52 | self.downloadThread = None 53 | super(ModelDownloadDialog, self).closeEvent(event) 54 | 55 | 56 | class ModelDownloadThread(QThread): 57 | # progress and message signal 58 | progressSignal = pyqtSignal(int, str) 59 | 60 | def __init__(self, modelInfo): 61 | super(ModelDownloadThread, self).__init__() 62 | self.modelInfo = modelInfo 63 | self.running = False 64 | 65 | def run(self): 66 | # download the model 67 | 68 | # get the file name 69 | url = self.modelInfo["url"] 70 | file_name = url.split("/")[-1] 71 | # put file in user data folder for lexisynth 72 | data_dir = user_data_dir("lexisynth") 73 | if not os.path.exists(data_dir): 74 | os.makedirs(data_dir) 75 | file_name = os.path.join(data_dir, file_name) 76 | logger.debug(f"Downloading model to {file_name}") 77 | 78 | # check if the file already exists 79 | if checkForModelDownload(self.modelInfo): 80 | # file already exists, no need to download 81 | self.progressSignal.emit((100, "Model already downloaded")) 82 | return 83 | # check if .zip leftover found from previous download 84 | if os.path.exists(file_name): 85 | os.remove(file_name) 86 | 87 | # download the file 88 | r = requests.get(url, stream=True) 89 | r.raise_for_status() 90 | total_size = int(r.headers.get("content-length", 0)) 91 | 92 | self.running = True 93 | with open(file_name, "wb") as f: 94 | for chunk in r.iter_content(chunk_size=8192): 95 | if not self.running: 96 | return 97 | if chunk: 98 | f.write(chunk) 99 | # update progress bar according to the download 100 | self.progressSignal.emit( 101 | int(100 * f.tell() / total_size), 102 | "Progress {0:.2f}%".format(100 * f.tell() / total_size), 103 | ) 104 | 105 | self.progressSignal.emit(100, "Model downloaded successfully. Unzipping...") 106 | # unzip the file 107 | with zipfile.ZipFile(file_name, "r") as zip_ref: 108 | zip_ref.extractall( 109 | os.path.join(data_dir, self.modelInfo["model_folder_name"]) 110 | ) 111 | # remove the zip file 112 | os.remove(file_name) 113 | 114 | self.progressSignal.emit(100, "Model unzipped successfully") 115 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | # only run this workflow on the main branch and when a tag is pushed 2 | # this workflow will create a release draft and upload the build artifacts 3 | # to the release draft 4 | name: Release 5 | run-name: ${{ github.ref_name }} release run 🚀 6 | on: 7 | push: 8 | branches: 9 | - main 10 | tags: 11 | - '*' 12 | permissions: 13 | contents: write 14 | concurrency: 15 | group: '${{ github.workflow }} @ ${{ github.ref }}' 16 | cancel-in-progress: ${{ github.ref_type == 'tag' }} 17 | jobs: 18 | build-project: 19 | name: Build Project 🧱 20 | uses: ./.github/workflows/build.yaml 21 | secrets: inherit 22 | permissions: 23 | contents: read 24 | 25 | create-release: 26 | name: Create Release 🛫 27 | if: github.ref_type == 'tag' 28 | runs-on: ubuntu-22.04 29 | needs: build-project 30 | defaults: 31 | run: 32 | shell: bash 33 | steps: 34 | - name: Check Release Tag ☑️ 35 | id: check 36 | run: | 37 | : Check Release Tag ☑️ 38 | if [[ "${RUNNER_DEBUG}" ]]; then set -x; fi 39 | shopt -s extglob 40 | 41 | case "${GITHUB_REF_NAME}" in 42 | +([0-9]).+([0-9]).+([0-9]) ) 43 | echo 'validTag=true' >> $GITHUB_OUTPUT 44 | echo 'prerelease=false' >> $GITHUB_OUTPUT 45 | echo "version=${GITHUB_REF_NAME}" >> $GITHUB_OUTPUT 46 | ;; 47 | +([0-9]).+([0-9]).+([0-9])-@(beta|rc)*([0-9]) ) 48 | echo 'validTag=true' >> $GITHUB_OUTPUT 49 | echo 'prerelease=true' >> $GITHUB_OUTPUT 50 | echo "version=${GITHUB_REF_NAME}" >> $GITHUB_OUTPUT 51 | ;; 52 | *) echo 'validTag=false' >> $GITHUB_OUTPUT ;; 53 | esac 54 | 55 | - name: Download Build Artifacts 📥 56 | uses: actions/download-artifact@v4 57 | if: fromJSON(steps.check.outputs.validTag) 58 | id: download 59 | 60 | - name: Print downloaded artifacts 📥 61 | if: fromJSON(steps.check.outputs.validTag) 62 | run: | 63 | : Print downloaded artifacts 📥 64 | if [[ "${RUNNER_DEBUG}" ]]; then set -x; fi 65 | shopt -s extglob 66 | 67 | ls -laR ${{ steps.download.outputs.artifacts }} 68 | 69 | - name: Rename Files 🏷️ 70 | if: fromJSON(steps.check.outputs.validTag) 71 | run: | 72 | : Rename Files 🏷️ 73 | if [[ "${RUNNER_DEBUG}" ]]; then set -x; fi 74 | shopt -s extglob 75 | shopt -s nullglob 76 | 77 | root_dir="$(pwd)" 78 | commit_hash="${GITHUB_SHA:0:9}" 79 | 80 | variants=( 81 | 'linux' 82 | 'macos-x86' 83 | 'windows' 84 | ) 85 | 86 | mkdir -p "${root_dir}/uploads" 87 | 88 | for variant in "${variants[@]}"; do 89 | 90 | candidates=(*-${variant}/@(*)) 91 | 92 | for candidate in "${candidates[@]}"; do 93 | cp "${candidate}" "${root_dir}/uploads/lexisynth-${variant}-${GITHUB_REF_NAME}-${commit_hash}.${candidate##*.}" 94 | done 95 | done 96 | 97 | - name: Create Latest Release Info File 98 | if: fromJSON(steps.check.outputs.validTag) 99 | run: | 100 | echo "LATEST_RELEASE_TAG=${GITHUB_REF_NAME}" > release_info.env 101 | echo "LATEST_COMMIT_HASH=${GITHUB_SHA}" >> release_info.env 102 | echo "LATEST_RELEASE_DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ")" >> release_info.env 103 | cp release_info.env "$(pwd)/uploads/lexisynth_release_info.env" 104 | 105 | - name: Generate Checksums 🪪 106 | if: fromJSON(steps.check.outputs.validTag) 107 | run: | 108 | : Generate Checksums 🪪 109 | if [[ "${RUNNER_DEBUG}" ]]; then set -x; fi 110 | shopt -s extglob 111 | 112 | echo "### Checksums" > ${{ github.workspace }}/CHECKSUMS.txt 113 | # find the files from the above step and generate checksums 114 | for file in ${{ github.workspace }}/uploads/lexisynth-*; do 115 | echo " ${file##*/}: $(sha256sum "${file}" | cut -d " " -f 1)" >> ${{ github.workspace }}/CHECKSUMS.txt 116 | done 117 | 118 | - name: Create Release 🛫 119 | if: fromJSON(steps.check.outputs.validTag) 120 | id: create_release 121 | uses: softprops/action-gh-release@v1 122 | with: 123 | draft: true 124 | body_path: ${{ github.workspace }}/CHECKSUMS.txt 125 | files: | 126 | ${{ github.workspace }}/uploads/lexisynth-*.dmg 127 | ${{ github.workspace }}/uploads/lexisynth-*.tar 128 | ${{ github.workspace }}/uploads/lexisynth-*.zip 129 | -------------------------------------------------------------------------------- /lexisynth.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | import os 3 | 4 | # parse command line arguments 5 | import argparse 6 | 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument('--mac_osx', action='store_true') 9 | parser.add_argument('--win', action='store_true') 10 | 11 | args = parser.parse_args() 12 | 13 | a = Analysis( 14 | [ 15 | 'audio_capture.py', 16 | 'audio_player.py', 17 | 'file_poller.py', 18 | 'language_codes.py', 19 | 'lexisynth_types.py', 20 | 'log_view.py', 21 | 'ls_logging.py', 22 | 'main.py', 23 | 'model_download_dialog.py', 24 | 'models_info.py', 25 | 'obs_websocket.py', 26 | 'settings_dialog.py', 27 | 'storage.py', 28 | 'transcription.py', 29 | 'translation.py', 30 | ], 31 | pathex=[], 32 | binaries=[], 33 | datas=[ 34 | ('about.ui', '.'), 35 | ('log_view.ui', '.'), 36 | ('mainwindow.ui', '.'), 37 | ('model_download_dialog.ui', '.'), 38 | ('settings_dialog.ui', '.'), 39 | ('.env', '.'), 40 | ('icons/splash.png', './icons'), 41 | ('icons/MacOS_icon.png', './icons'), 42 | ('icons/Windows-icon-open.ico', '.icons'), 43 | ('silero_vad.onnx', './faster_whisper/assets'), 44 | ], 45 | hiddenimports=[], 46 | hookspath=[], 47 | hooksconfig={}, 48 | runtime_hooks=[], 49 | excludes=["botocore", "transformers", "IPython", "tensorflow", "matplotlib", "pandas", "sklearn", "skimage", "scipy", "torch", "torchvision", "torchaudio", "nltk", "cv2"], 50 | noarchive=False, 51 | ) 52 | 53 | exclude = ["IPython", "tensorflow", "matplotlib", "pandas", "sklearn", "skimage", "scipy", "torch", "torchvision", "torchaudio", "nltk", "cv2"] 54 | a.binaries = [x for x in a.binaries if not x[0].startswith(tuple(exclude))] 55 | 56 | pyz = PYZ(a.pure) 57 | 58 | if args.win: 59 | splash = Splash('icons/splash.png', 60 | binaries=a.binaries, 61 | datas=a.datas, 62 | text_pos=(10, 20), 63 | text_size=10, 64 | text_color='black') 65 | exe = EXE( 66 | pyz, 67 | a.scripts, 68 | splash, 69 | name='lexisynth', 70 | icon='icons/Windows-icon-open.ico', 71 | debug=False, 72 | exclude_binaries=True, 73 | bootloader_ignore_signals=False, 74 | strip=False, 75 | upx=True, 76 | upx_exclude=[], 77 | console=False, 78 | disable_windowed_traceback=False, 79 | argv_emulation=False, 80 | target_arch=None, 81 | ) 82 | coll = COLLECT( 83 | exe, 84 | a.binaries, 85 | a.zipfiles, 86 | a.datas, 87 | splash.binaries, 88 | strip=False, 89 | upx=True, 90 | upx_exclude=[], 91 | name='lexisynth' 92 | ) 93 | elif args.mac_osx: 94 | exe = EXE(pyz, 95 | a.scripts, 96 | [], 97 | exclude_binaries=True, 98 | name='lexisynth', 99 | debug=False, 100 | bootloader_ignore_signals=False, 101 | strip=False, 102 | upx=True, 103 | upx_exclude=[], 104 | runtime_tmpdir=None, 105 | console=False, 106 | disable_windowed_traceback=False, 107 | argv_emulation=False, 108 | target_arch=None, 109 | codesign_identity=os.environ.get('APPLE_APP_DEVELOPER_ID', ''), 110 | entitlements_file='./entitlements.plist', 111 | ) 112 | coll = COLLECT(exe, 113 | a.binaries, 114 | a.zipfiles, 115 | a.datas, 116 | strip=False, 117 | upx=True, 118 | upx_exclude=[], 119 | name='lexisynth') 120 | app = BUNDLE( 121 | exe, 122 | coll, 123 | name='lexisynth.app', 124 | icon='icons/MacOS_icon.png', 125 | bundle_identifier='com.royshilkrot.lexisynth', 126 | version='0.0.1', 127 | info_plist={ 128 | 'NSPrincipalClass': 'NSApplication', 129 | 'NSAppleScriptEnabled': False, 130 | 'NSMicrophoneUsageDescription': 'Getting audio from the microphone to perform speech-to-text' 131 | } 132 | ) 133 | else: 134 | # Linux 135 | splash = Splash('icons/splash.png', 136 | binaries=a.binaries, 137 | datas=a.datas, 138 | text_pos=(10, 20), 139 | text_size=10, 140 | text_color='black') 141 | exe = EXE( 142 | pyz, 143 | a.binaries, 144 | a.datas, 145 | a.scripts, 146 | splash, 147 | splash.binaries, 148 | name='lexisynth', 149 | icon='icons/Windows-icon-open.ico', 150 | debug=False, 151 | bootloader_ignore_signals=False, 152 | strip=False, 153 | upx=True, 154 | console=False, 155 | disable_windowed_traceback=False, 156 | argv_emulation=False, 157 | target_arch=None, 158 | ) 159 | -------------------------------------------------------------------------------- /text_to_speech.py: -------------------------------------------------------------------------------- 1 | import time 2 | import queue 3 | from PyQt6.QtCore import QThread, pyqtSignal, QTimer 4 | import requests 5 | from ls_logging import logger 6 | 7 | from storage import fetch_data 8 | 9 | 10 | class TextToSpeechThread(QThread): 11 | speech_available = pyqtSignal(object) 12 | progress_available = pyqtSignal(int) 13 | start_progress = pyqtSignal() 14 | stop_progress = pyqtSignal() 15 | 16 | def __init__(self, parent=None): 17 | super(TextToSpeechThread, self).__init__(parent) 18 | self.input_queue = queue.Queue() 19 | self.running = False 20 | self.openai_api_key = None 21 | self.elevenlabs_api_key = None 22 | self.last_run_time_ms = 1000 23 | self.run_time_avg_moving_window = 500 24 | self.current_run_time_start = time.time() 25 | self.progressTimer = QTimer() 26 | self.progressTimer.timeout.connect(self.progressCallback) 27 | self.start_progress.connect(self.progressTimer.start) 28 | self.stop_progress.connect(self.progressTimer.stop) 29 | self.speech_engine = "OpenAI" 30 | 31 | def add_text(self, text): 32 | self.input_queue.put(text) 33 | 34 | def stop(self): 35 | self.running = False 36 | 37 | def run(self): 38 | while True: 39 | # Get the next text from the queue 40 | try: 41 | text = self.input_queue.get(block=False) 42 | except queue.Empty: 43 | time.sleep(0.1) 44 | continue 45 | 46 | if text is None: 47 | # sleep for a bit to avoid busy waiting 48 | time.sleep(0.1) 49 | continue 50 | 51 | self.current_run_time_start = time.time() 52 | self.start_progress.emit() 53 | 54 | # Time the translation operation 55 | start_time = time.time() 56 | 57 | if self.speech_engine == "OpenAI": 58 | self.synthesize_speech_openai(text) 59 | else: 60 | logger.error(f"Unknown speech engine: {self.speech_engine}") 61 | self.running = False 62 | return 63 | 64 | end_time = time.time() 65 | 66 | self.stop_progress.emit() 67 | self.progress_available.emit(0) 68 | 69 | # prevent 0 time 70 | self.last_run_time_ms = max(100, (end_time - start_time) * 1000) 71 | self.run_time_avg_moving_window = ( 72 | self.run_time_avg_moving_window * 0.9 73 | ) + (self.last_run_time_ms * 0.1) 74 | 75 | def synthesize_speech_openai(self, text): 76 | if self.openai_api_key is None: 77 | self.openai_api_key = fetch_data("settings.json", "settings", {}).get( 78 | "openai_api_key" 79 | ) 80 | if self.openai_api_key is None: 81 | logger.error("OpenAI API key not found") 82 | return 83 | # send a request to openai with requests 84 | # build API request 85 | data = {"model": "tts-1", "input": text, "voice": "alloy"} 86 | # send the request 87 | response = requests.post( 88 | "https://api.openai.com/v1/audio/speech", 89 | headers={ 90 | "Authorization": f"Bearer {self.openai_api_key}", 91 | "Content-Type": "application/json", 92 | }, 93 | json=data, 94 | ) 95 | if response.status_code != 200: 96 | logger.error(f"OpenAI API request failed: {response.status_code}") 97 | return "Error: OpenAI API request failed" 98 | # the response should be a .mp3 file 99 | self.speech_available.emit(response.content) 100 | 101 | def synthesize_speech_elevenlabs(self, text): 102 | if self.elevenlabs_api_key is None: 103 | self.elevenlabs_api_key = fetch_data("settings.json", "settings", {}).get( 104 | "elevenlabs_api_key" 105 | ) 106 | if self.elevenlabs_api_key is None: 107 | logger.error("Elevenlabs API key not found") 108 | return 109 | # send a request to elevenlabs with requests 110 | # build API request 111 | data = {"text": text} 112 | # send the request 113 | response = requests.post( 114 | "https://api.eleven-labs.com/text-to-speech/v1/synthesize", 115 | headers={ 116 | "Authorization": f"Bearer {self.elevenlabs_api_key}", 117 | "Content-Type": "application/json", 118 | }, 119 | json=data, 120 | ) 121 | if response.status_code != 200: 122 | logger.error(f"Elevenlabs API request failed: {response.status_code}") 123 | return "Error: Elevenlabs API request failed" 124 | # the response should be a .mp3 file 125 | self.speech_available.emit(response.content) 126 | 127 | def progressCallback(self): 128 | # calculate how much time in ms passed since the start of the current translation 129 | current_run_time_elapsed = (time.time() - self.current_run_time_start) * 1000 130 | # calculate the progress in percentage 131 | progress = min( 132 | 100, int(current_run_time_elapsed / self.run_time_avg_moving_window * 100) 133 | ) 134 | self.progress_available.emit(progress) 135 | -------------------------------------------------------------------------------- /obs_websocket.py: -------------------------------------------------------------------------------- 1 | import json 2 | from os import path 3 | import time 4 | import obsws_python as obs 5 | from ls_logging import logger 6 | from queue import Queue 7 | from PyQt6.QtCore import QThread 8 | 9 | from storage import fetch_data 10 | 11 | 12 | def open_obs_websocket(server_info): 13 | # Open a websocket connection to OBS 14 | try: 15 | cl = obs.ReqClient( 16 | host=server_info["ip"], 17 | port=server_info["port"], 18 | password=server_info["password"], 19 | timeout=10, 20 | ) 21 | resp = cl.get_version() 22 | logger.info(f"OBS Version: {resp.obs_version}") 23 | return cl 24 | except Exception as e: 25 | logger.warn(f"Error: {e}") 26 | return None 27 | 28 | 29 | def open_obs_websocket_from_settings(): 30 | # Open a websocket connection to OBS using settings 31 | settings = fetch_data("settings.json", "settings", {}) 32 | obs_host = settings.get("obs_host", "localhost") 33 | obs_port = settings.get("obs_port", "4455") 34 | obs_password = settings.get("obs_password", "") 35 | return open_obs_websocket( 36 | {"ip": obs_host, "port": obs_port, "password": obs_password} 37 | ) 38 | 39 | 40 | def disconnect_obs_websocket(obs_client: obs.ReqClient): 41 | # Disconnect the OBS websocket 42 | try: 43 | obs_client.base_client.ws.close() 44 | except Exception as e: 45 | logger.warn(f"Error: {e}") 46 | 47 | 48 | def get_all_sources(obs_client: obs.ReqClient): 49 | # Get all the sources from OBS 50 | try: 51 | # get all scenes 52 | resp = obs_client.get_scene_list() 53 | scenes = resp.scenes 54 | # get all sources from all scenes 55 | sources = [] 56 | for scene in scenes: 57 | resp = obs_client.get_scene_item_list(scene["sceneName"]) 58 | # add the sources with their scene name 59 | for source in resp.scene_items: 60 | source["sceneName"] = scene["sceneName"] 61 | sources.append(source) 62 | return sources 63 | except Exception as e: 64 | logger.exception("Error: unable to get all sources") 65 | return None 66 | 67 | 68 | def get_all_text_sources(obs_client: obs.ReqClient): 69 | # Get all the text sources from OBS 70 | sources = get_all_sources(obs_client) 71 | if sources is None: 72 | return None 73 | text_sources = [] 74 | for source in sources: 75 | if str(source["inputKind"]).startswith("text_"): 76 | source_settings = obs_client.get_input_settings( 77 | source["sourceName"] 78 | ).input_settings 79 | # check if source has text 80 | if "text" in source_settings: 81 | text_sources.append(source) 82 | return text_sources 83 | 84 | 85 | def get_source_by_name(obs_client: obs.ReqClient, source_name): 86 | # Get a source from OBS by name 87 | try: 88 | # get all scenes 89 | resp = obs_client.get_scene_list() 90 | scenes = resp.scenes 91 | # get all sources from all scenes 92 | sources = [] 93 | for scene in scenes: 94 | resp = obs_client.get_scene_item_list(scene["sceneName"]) 95 | # add the sources with their scene name 96 | for source in resp.scene_items: 97 | source["sceneName"] = scene["sceneName"] 98 | sources.append(source) 99 | # find the source by name 100 | for source in sources: 101 | if source["sourceName"] == source_name: 102 | return source 103 | return None 104 | except Exception as e: 105 | logger.exception("Error: unable to get source by name") 106 | return None 107 | 108 | 109 | class OBSPoller(QThread): 110 | def __init__( 111 | self, 112 | obs_client: obs.ReqClient, 113 | obs_source_name: str, 114 | queue: Queue, 115 | polling_freq=1000, 116 | ): 117 | super().__init__() 118 | self.obs_client = obs_client 119 | self.obs_source_name = obs_source_name 120 | self.queue = queue 121 | self.polling_freq = polling_freq 122 | self.running = False 123 | self.last_content = None 124 | 125 | def stop(self): 126 | self.running = False 127 | 128 | def run(self): 129 | logger.info("OBS polling thread started") 130 | self.running = True 131 | while self.running: 132 | try: 133 | # get the value of the source 134 | source = get_source_by_name(self.obs_client, self.obs_source_name) 135 | if source is None: 136 | logger.error(f"Source {self.obs_source_name} not found") 137 | break 138 | source_settings = self.obs_client.get_input_settings( 139 | source["sourceName"] 140 | ).input_settings 141 | source_content = ( 142 | source_settings["text"] if "text" in source_settings else None 143 | ) 144 | if source_content and source_content != self.last_content: 145 | self.queue.put_nowait(source_content) 146 | self.last_content = source_content 147 | except Exception as e: 148 | logger.exception(f"Error: {e}") 149 | time.sleep(self.polling_freq / 1000) 150 | logger.info("OBS polling thread stopped") 151 | -------------------------------------------------------------------------------- /audio_capture.py: -------------------------------------------------------------------------------- 1 | import time 2 | import sounddevice as sd 3 | from PyQt6 import QtCore 4 | import numpy as np 5 | from lexisynth_types import AudioSource 6 | from ls_logging import logger 7 | import queue 8 | import soundfile as sf 9 | 10 | 11 | class AudioRecorder(QtCore.QThread): 12 | data_available = QtCore.pyqtSignal(np.ndarray) 13 | progress_and_volume = QtCore.pyqtSignal(tuple) 14 | 15 | def __init__( 16 | self, 17 | audio_source: AudioSource, 18 | chunk_size_ms, 19 | fs=44100, 20 | channels=1, 21 | dtype="float32", 22 | ): 23 | super().__init__() 24 | self.chunk_size_ms = chunk_size_ms 25 | self.fs = fs 26 | self.channels = channels 27 | self.dtype = dtype 28 | self.stream = None 29 | self.audio_source = audio_source 30 | self.block_read_freq_ms = 33 # 33ms 31 | self.number_of_blocks = chunk_size_ms / self.block_read_freq_ms 32 | self.q = queue.Queue(maxsize=self.number_of_blocks) 33 | self.soundfile = None 34 | self.running = False 35 | self.last_run_time = time.time() 36 | self.output_queue = None 37 | 38 | def run(self) -> None: 39 | self.running = True 40 | while self.running: 41 | # check if enough time passed since the last run 42 | if (time.time() - self.last_run_time) < ( 43 | float(self.block_read_freq_ms) / 1000.0 44 | ): 45 | # sleep to avoid busy waiting 46 | time.sleep(0.001) 47 | continue 48 | self.last_run_time = time.time() 49 | 50 | magnitude = 0 51 | new_data = False 52 | if self.audio_source.sourceType == AudioSource.SourceType.FILE: 53 | if self.soundfile is None: 54 | logger.error("Soundfile is not initialized") 55 | break 56 | # read a block of data from the soundfile 57 | data = self.soundfile.read(self.read_size_frames()) 58 | if not len(data): 59 | logger.warning("File data is empty. End of file?") 60 | continue 61 | magnitude = np.max(np.abs(data)) 62 | self.q.put_nowait(data) 63 | new_data = True 64 | elif self.audio_source.sourceType == AudioSource.SourceType.DEVICE: 65 | while ( 66 | self.stream.read_available >= self.read_size_frames() 67 | and not self.q.full() 68 | ): 69 | # read a block of data from the sounddevice 70 | data, overflowed = self.stream.read(self.read_size_frames()) 71 | # take one channel if there are multiple channels 72 | if len(data.shape) > 1: 73 | # merge the channels by averaging 74 | data = np.mean(data, axis=1) 75 | if overflowed: 76 | logger.warning(f"Overflowed (got {len(data)})") 77 | magnitude = np.max(np.abs(data)) 78 | self.q.put_nowait(data) 79 | new_data = True 80 | else: 81 | logger.error("Unknown audio source type") 82 | break 83 | 84 | if new_data: 85 | # emit progress signal with the buffer capacity in milliseconds and the volume in the frame 86 | self.progress_and_volume.emit( 87 | (self.q.qsize() * self.block_read_freq_ms, magnitude) 88 | ) 89 | # check if q has enough data to emit according to the chunk size 90 | if self.q.full(): 91 | # emit the entire chunk of data 92 | self.data_available.emit( 93 | np.concatenate( 94 | [self.q.get() for _ in range(self.q.qsize())], axis=0 95 | ) 96 | ) 97 | 98 | logger.info("Audio capture thread stopped") 99 | 100 | def start(self): 101 | logger.info( 102 | f"Starting audio capture with {self.fs} Hz, {self.channels} channels, and {self.dtype} data type" 103 | ) 104 | # if this is a file source, stream the file progressively with soundfile 105 | if self.audio_source.sourceType == AudioSource.SourceType.FILE: 106 | logger.info(f"Opening file {self.audio_source.sourceName}") 107 | self.soundfile = sf.SoundFile(self.audio_source.sourceName) 108 | self.fs = self.soundfile.samplerate 109 | logger.debug(f"File info: {self.soundfile}") 110 | 111 | # if this is a device source, stream the device with sounddevice 112 | elif self.audio_source.sourceType == AudioSource.SourceType.DEVICE: 113 | logger.info(f"Opening device {self.audio_source.sourceName}") 114 | self.stream = sd.InputStream( 115 | device=self.audio_source.sourceName, 116 | samplerate=self.fs, 117 | blocksize=self.read_size_frames(), 118 | channels=self.channels, 119 | dtype=self.dtype, 120 | ) 121 | logger.info(f"Stream samplerate: {self.stream.samplerate}") 122 | self.stream.start() 123 | else: 124 | logger.error("Unknown audio source type") 125 | return 126 | 127 | super().start() 128 | 129 | def stop(self): 130 | logger.info("Stopping audio capture") 131 | self.running = False 132 | if self.soundfile: 133 | self.soundfile.close() 134 | if self.stream: 135 | self.stream.stop() 136 | 137 | def read_size_frames(self): 138 | return int(self.fs * self.block_read_freq_ms / 1000) 139 | 140 | def get_chunk_size_frames(self): 141 | return int(self.fs * self.chunk_size_ms / 1000) 142 | 143 | @staticmethod 144 | def get_audio_devices() -> list[AudioSource]: 145 | devices = sd.query_devices() 146 | devices_list = [] 147 | if type(devices) is dict: 148 | devices_list = [devices] 149 | else: 150 | for device in devices: 151 | if device["max_input_channels"] > 0: 152 | logger.debug(f"Audio device: {device}") 153 | devices_list.append(device) 154 | return [ 155 | AudioSource( 156 | sourceName=device["name"], 157 | sourceType=AudioSource.SourceType.DEVICE, 158 | ) 159 | for device in devices_list 160 | ] 161 | -------------------------------------------------------------------------------- /transcription.py: -------------------------------------------------------------------------------- 1 | import queue 2 | import time 3 | from PyQt6 import QtCore 4 | from PyQt6.QtCore import QThread 5 | from PyQt6.QtWidgets import QDialog 6 | from faster_whisper import WhisperModel 7 | from language_codes import LanguageCodes 8 | from ls_logging import logger 9 | import numpy as np 10 | from model_download_dialog import ModelDownloadDialog 11 | 12 | from models_info import ModelDownloadInfo, checkForModelDownload, getAbsoluteModelPath 13 | 14 | 15 | def linear_interpolate_audio(audio_frame, original_rate, target_rate): 16 | # Calculate the duration of the audio in seconds 17 | duration = audio_frame.shape[0] / original_rate 18 | 19 | # Calculate the number of samples in the resampled audio 20 | target_length = int(duration * target_rate) 21 | 22 | # Generate sample number arrays for original and target 23 | original_samples = np.arange(audio_frame.shape[0]) 24 | target_samples = np.linspace(0, audio_frame.shape[0] - 1, target_length) 25 | 26 | # Use numpy's interpolation function 27 | resampled_audio = np.interp(target_samples, original_samples, audio_frame) 28 | return resampled_audio 29 | 30 | 31 | def find_point_of_repetition(sentence): 32 | # i'd like to find the point where the token start to repeat. 33 | # for example: 6952, 345, 11, 5613, 13, 314, 1053, 587, 5613, 13, 314, 1053, 587, 5613, 13, 314, 1053 34 | # the point of repetition is 5613, 13, 314, 1053, 587, 35 | # therefore the function should return 3, 8, 6 36 | # find the location of a sequence of at least two tokens that repeats 37 | words = sentence.lower().split() 38 | for i in range(len(words)): 39 | for j in range(i + 1, len(words)): 40 | if words[i] == words[j]: 41 | # check if the sequence repeats 42 | k = 1 43 | while j + k < len(words) and words[i + k] == words[j + k]: 44 | k += 1 45 | if k > 1: 46 | return i, j, k 47 | return None 48 | 49 | 50 | def checkAndDownloadModel(modelInfo): 51 | if not checkForModelDownload(modelInfo): 52 | # show the download dialog 53 | modelDownloadDialog = ModelDownloadDialog(modelInfo) 54 | modelDownloadDialog.exec() 55 | 56 | 57 | class AudioTranscriber(QThread): 58 | text_available = QtCore.pyqtSignal(str) 59 | 60 | def __init__(self): 61 | super().__init__() 62 | self.input_queue = queue.Queue() 63 | self.model = None 64 | self.running = False 65 | self.language = None 66 | # check if model has been downloaded already 67 | checkAndDownloadModel(ModelDownloadInfo.FASTER_WHISPER_TINY_CT2) 68 | 69 | def set_language(self, language: str): 70 | if language is None: 71 | self.language = None 72 | return 73 | if language == "Auto": 74 | self.language = None 75 | return 76 | if language in LanguageCodes.getLanguageCodes(): 77 | self.language = language 78 | return 79 | if language in LanguageCodes.getLanguageNames(): 80 | self.language = LanguageCodes.getLanguageCode(language) 81 | return 82 | logger.error(f"Language {language} not found") 83 | self.language = None 84 | 85 | def set_model_size(self, model_size: str): 86 | if model_size is None: 87 | return 88 | if model_size == "Tiny (75Mb)": 89 | checkAndDownloadModel(ModelDownloadInfo.FASTER_WHISPER_TINY_CT2) 90 | self.model = WhisperModel( 91 | getAbsoluteModelPath(ModelDownloadInfo.FASTER_WHISPER_TINY_CT2), 92 | device="cpu", 93 | compute_type="int8", 94 | ) 95 | logger.info("Model loaded: tiny") 96 | return 97 | if model_size == "Small (400Mb)": 98 | checkAndDownloadModel(ModelDownloadInfo.FASTER_WHISPER_SMALL_CT2) 99 | self.model = WhisperModel( 100 | getAbsoluteModelPath(ModelDownloadInfo.FASTER_WHISPER_SMALL_CT2), 101 | device="cpu", 102 | compute_type="int8", 103 | ) 104 | logger.info("Model loaded: small") 105 | return 106 | if model_size == "Base (140Mb)": 107 | checkAndDownloadModel(ModelDownloadInfo.FASTER_WHISPER_BASE_CT2) 108 | self.model = WhisperModel( 109 | getAbsoluteModelPath(ModelDownloadInfo.FASTER_WHISPER_BASE_CT2), 110 | device="cpu", 111 | compute_type="int8", 112 | ) 113 | logger.info("Model loaded: base") 114 | return 115 | logger.error(f"Model size {model_size} not found") 116 | 117 | def stop(self): 118 | self.running = False 119 | 120 | def run(self): 121 | logger.info("Transcription thread started") 122 | if self.model is None: 123 | model_size = "tiny.en" 124 | self.model = WhisperModel( 125 | getAbsoluteModelPath(ModelDownloadInfo.FASTER_WHISPER_TINY_CT2), 126 | device="cpu", 127 | compute_type="int8", 128 | ) 129 | logger.info(f"Model loaded: {model_size}") 130 | 131 | self.running = True 132 | while self.running: 133 | try: 134 | audio_data = self.input_queue.get_nowait() 135 | except queue.Empty: 136 | # sleep for a bit to avoid busy waiting 137 | time.sleep(0.1) 138 | continue 139 | if audio_data is None or len(audio_data) == 0: 140 | # sleep for a bit to avoid busy waiting 141 | time.sleep(0.1) 142 | continue 143 | 144 | # resample the audio data to 16kHz 145 | resampled_audio_data = linear_interpolate_audio( 146 | audio_data, 44100, 16000 147 | ).astype(np.float32) 148 | 149 | # transcribe the audio data 150 | segments, _ = self.model.transcribe( 151 | resampled_audio_data, 152 | language=self.language, 153 | max_new_tokens=40, 154 | vad_filter=True, 155 | vad_parameters=dict(min_silence_duration_ms=500), 156 | temperature=0.0, 157 | ) 158 | 159 | segments_list = list(segments) 160 | if len(segments_list) == 0: 161 | logger.debug("No segments found") 162 | continue 163 | 164 | # get one single segment from the segments iterator 165 | segment = segments_list[0] 166 | if segment is None: 167 | logger.debug("None segment found") 168 | continue 169 | repetition = find_point_of_repetition(segment.text) 170 | result_text = segment.text.strip() 171 | if repetition: 172 | # remove the repetition 173 | result_text = " ".join(segment.text.split()[: repetition[1]]) 174 | 175 | self.text_available.emit(result_text) 176 | 177 | logger.info("Transcription thread stopped") 178 | 179 | def queue_audio_data(self, audio_data): 180 | self.input_queue.put_nowait(audio_data) 181 | -------------------------------------------------------------------------------- /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | name: Cross-Platform Build with PyInstaller 2 | 3 | on: 4 | pull_request: 5 | branches: [ main ] 6 | workflow_call: 7 | 8 | jobs: 9 | check-format: 10 | name: Check Formatting 🔍 11 | uses: ./.github/workflows/check-format.yaml 12 | permissions: 13 | contents: read 14 | 15 | build: 16 | needs: check-format 17 | strategy: 18 | matrix: 19 | os: [macos-latest, windows-latest] # ubuntu-latest, 20 | include: 21 | - os: macos-latest 22 | python-version: '3.11' 23 | target: macos-x86 24 | runs-on: macos-12 25 | - os: ubuntu-latest 26 | python-version: '3.11' 27 | target: linux 28 | runs-on: ubuntu-latest 29 | - os: windows-latest 30 | python-version: '3.11' 31 | target: windows 32 | runs-on: windows-latest 33 | 34 | runs-on: ${{ matrix.runs-on }} 35 | 36 | steps: 37 | - uses: actions/checkout@v4 38 | 39 | - name: Set up Python 40 | if: matrix.os != 'windows-latest' 41 | uses: actions/setup-python@v5 42 | with: 43 | python-version: ${{ matrix.python-version }} 44 | 45 | - name: Install dependencies for Linux 46 | if: matrix.os == 'ubuntu-latest' 47 | run: | 48 | sudo apt-get update 49 | sudo apt-get install -y portaudio19-dev 50 | pip install --upgrade setuptools wheel 51 | 52 | - name: Install dependencies for MacOS 53 | if: matrix.os == 'macos-latest' || matrix.os == 'macos-latest-xlarge' 54 | run: | 55 | brew install portaudio 56 | 57 | - name: Install dependencies 58 | run: | 59 | python -m pip install -r requirements.txt 60 | 61 | 62 | - name: Import Apple Certificate 63 | if: matrix.os == 'macos-latest' || matrix.os == 'macos-latest-xlarge' && github.runner != 'self-hosted' 64 | run: | 65 | if security list-keychains | grep -q "github_build.keychain"; then 66 | security delete-keychain github_build.keychain 67 | fi 68 | security create-keychain -p "" github_build.keychain 69 | security default-keychain -s github_build.keychain 70 | security set-keychain-settings -lut 21600 github_build.keychain 71 | echo "${{ secrets.APPLE_CERTIFICATE }}" | base64 --decode > apple_certificate.p12 72 | security import apple_certificate.p12 -k github_build.keychain -P "${{ secrets.APPLE_CERTIFICATE_PASSWORD }}" \ 73 | -t cert -f pkcs12 -T /usr/bin/codesign -T /usr/bin/security -T /usr/bin/xcrun 74 | security unlock-keychain -p "" github_build.keychain 75 | security set-key-partition-list -S 'apple-tool:,apple:' -s -k "" github_build.keychain 76 | security list-keychain -d user -s github_build.keychain 'login-keychain' 77 | env: 78 | APPLE_CERTIFICATE: ${{ secrets.APPLE_CERTIFICATE }} 79 | APPLE_CERTIFICATE_PASSWORD: ${{ secrets.APPLE_CERTIFICATE_PASSWORD }} 80 | 81 | - name: Unlock keychain on Mac 82 | if: matrix.os == 'macos-latest' || matrix.os == 'macos-latest-xlarge' 83 | run: | 84 | security unlock-keychain -p "" github_build.keychain 85 | security set-key-partition-list -S apple-tool:,apple: -k "" -D "Developer" -t private github_build.keychain 86 | 87 | - name: List available signing identities 88 | if: matrix.os == 'macos-latest' || matrix.os == 'macos-latest-xlarge' 89 | run: | 90 | security find-identity -v -p codesigning 91 | 92 | # write a .env file with the secrets 93 | - name: Write .env file Mac & Linux 94 | if: matrix.os != 'windows-latest' 95 | run: | 96 | echo "LOCAL_RELEASE_TAG=${GITHUB_REF_NAME}" >> .env 97 | echo "LOCAL_RELEASE_DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ")" >> .env 98 | echo "KMP_DUPLICATE_LIB_OK=TRUE" >> .env 99 | 100 | - name: Write .env file Windows 101 | if: matrix.os == 'windows-latest' 102 | run: | 103 | @" 104 | LOCAL_RELEASE_TAG=$env:GITHUB_REF_NAME 105 | LOCAL_RELEASE_DATE=$(Get-Date -Format 'yyyy-MM-ddTHH:mm:ssZ') 106 | KMP_DUPLICATE_LIB_OK=TRUE 107 | "@ | Out-File -FilePath .env -Encoding ASCII 108 | shell: pwsh 109 | 110 | - name: Build with PyInstaller (MacOS) 111 | if: matrix.os == 'macos-latest' || matrix.os == 'macos-latest-xlarge' 112 | run: | 113 | pyinstaller --clean --noconfirm lexisynth.spec -- --mac_osx 114 | env: 115 | APPLE_APP_DEVELOPER_ID: ${{ secrets.APPLE_APP_DEVELOPER_ID }} 116 | 117 | - name: Build with PyInstaller (Windows) 118 | if: matrix.os == 'windows-latest' 119 | run: | 120 | pyinstaller --clean --noconfirm lexisynth.spec -- --win 121 | 122 | - name: Build with PyInstaller (Linux) 123 | if: matrix.os == 'ubuntu-latest' 124 | run: | 125 | pyinstaller --clean --noconfirm lexisynth.spec 126 | 127 | - name: Zip Application for Notarization 128 | if: matrix.os == 'macos-latest' && github.event_name != 'pull_request' 129 | run: | 130 | ditto -c -k --keepParent dist/lexisynth.app lexisynth.zip 131 | 132 | - name: Notarize and Staple 133 | if: matrix.os == 'macos-latest' && github.event_name != 'pull_request' 134 | run: | 135 | xcrun notarytool submit lexisynth.zip --apple-id \ 136 | "${{ secrets.APPLE_DEVELOPER_ID_USER }}" --password \ 137 | "${{ secrets.APPLE_DEVELOPER_ID_PASSWORD }}" --team-id \ 138 | "${{ secrets.APPLE_DEVELOPER_ID_TEAM }}" --wait --verbose 139 | chmod 755 dist/lexisynth.app 140 | xcrun stapler staple dist/lexisynth.app 141 | 142 | - name: Verify Notarization 143 | if: matrix.os == 'macos-latest' && github.event_name != 'pull_request' 144 | run: | 145 | spctl -a -v dist/lexisynth.app 146 | rm lexisynth.zip 147 | 148 | - name: Compile .ISS to .EXE Installer 149 | if: matrix.os == 'windows-latest' 150 | uses: Minionguyjpro/Inno-Setup-Action@v1.2.4 151 | with: 152 | path: lexisynth.iss 153 | options: /O+ 154 | 155 | - name: Create tar Linux 156 | if: matrix.os == 'ubuntu-latest' 157 | # strip the folder name from the tar 158 | run: | 159 | chmod a+x dist/lexisynth 160 | tar -cvf lexisynth.tar -C dist lexisynth 161 | 162 | - name: Create dmg MacOS 163 | if: matrix.os == 'macos-latest' || matrix.os == 'macos-latest-xlarge' 164 | run: | 165 | chmod a+x dist/lexisynth.app 166 | hdiutil create -volname "LexiSynth" -srcfolder dist/lexisynth.app -ov -format UDZO lexisynth.dmg 167 | 168 | - name: Create zip on Windows 169 | if: matrix.os == 'windows-latest' 170 | run: | 171 | Compress-Archive -Path "dist/lexisynth-setup.exe" -DestinationPath "./lexisynth.zip" 172 | shell: pwsh 173 | 174 | - name: Upload artifact 175 | uses: actions/upload-artifact@v4 176 | with: 177 | name: lexisynth-${{ matrix.target }} 178 | # only upload exe on windows, tar on linux, dmg on macos 179 | path: | 180 | lexisynth.dmg 181 | lexisynth.tar 182 | lexisynth.zip 183 | -------------------------------------------------------------------------------- /settings_dialog.py: -------------------------------------------------------------------------------- 1 | from model_download_dialog import ModelDownloadDialog 2 | from models_info import ModelDownloadInfo, checkForModelDownload 3 | from obs_websocket import disconnect_obs_websocket, open_obs_websocket 4 | from os import path 5 | from platformdirs import user_data_dir 6 | from PyQt6 import QtGui 7 | from PyQt6.QtCore import pyqtSignal 8 | from PyQt6.QtWidgets import QDialog, QFileDialog 9 | from PyQt6.uic import loadUi 10 | from storage import fetch_data, store_data 11 | 12 | 13 | class SettingsDialog(QDialog): 14 | settingsChanged = pyqtSignal(dict) 15 | 16 | def __init__(self, page=None, parent=None): 17 | super(SettingsDialog, self).__init__(parent) 18 | 19 | loadUi( 20 | path.abspath(path.join(path.dirname(__file__), "settings_dialog.ui")), self 21 | ) 22 | 23 | # select the page if provided in tabWidget 24 | if page is not None: 25 | self.tabWidget.setCurrentIndex(page) 26 | 27 | # load data from settings 28 | self.loadSettings() 29 | 30 | # if dialog is accepted, save the settings 31 | self.accepted.connect(self.saveSettings) 32 | 33 | self.toolButton_selectLLMFolder.clicked.connect( 34 | lambda: self.selectFolderForLineEdit(self.lineEdit_localLLMFolder) 35 | ) 36 | self.toolButton_outputsFolderSelect.clicked.connect( 37 | lambda: self.selectFolderForLineEdit(self.lineEdit_outputsFolder) 38 | ) 39 | self.comboBox_localLLMSelect.currentIndexChanged.connect( 40 | self.localLLMSelectChanged 41 | ) 42 | self.pushButton_obsTestConnection.clicked.connect(self.testObsConnection) 43 | self.lineEdit_inputFilePollingFreq.setValidator( 44 | QtGui.QIntValidator(100, 100000, self) 45 | ) 46 | self.lineEdit_obsPollingFreq.setValidator( 47 | QtGui.QIntValidator(100, 100000, self) 48 | ) 49 | 50 | def localLLMSelectChanged(self, index): 51 | if self.comboBox_localLLMSelect.currentText() == "Custom": 52 | self.lineEdit_localLLMFolder.setEnabled(True) 53 | self.toolButton_selectLLMFolder.setEnabled(True) 54 | else: 55 | self.lineEdit_localLLMFolder.setEnabled(False) 56 | self.toolButton_selectLLMFolder.setEnabled(False) 57 | if self.comboBox_localLLMSelect.currentText() == "M2M-100 Translation": 58 | # check if model has been downloaded already 59 | if checkForModelDownload(ModelDownloadInfo.M2M_100): 60 | return 61 | # show the download dialog 62 | modelDownloadDialog = ModelDownloadDialog( 63 | ModelDownloadInfo.M2M_100, self 64 | ) 65 | if modelDownloadDialog.exec() == QDialog.DialogCode.Rejected: 66 | # if the download was cancelled, revert to the previous selection 67 | self.comboBox_localLLMSelect.setCurrentIndex(0) 68 | return 69 | 70 | if not checkForModelDownload(ModelDownloadInfo.M2M_100): 71 | # if the model was not downloaded, revert to the previous selection 72 | self.comboBox_localLLMSelect.setCurrentIndex(0) 73 | return 74 | 75 | def selectFolderForLineEdit(self, lineEdit): 76 | # open a file dialog to select the LLM folder 77 | folder = lineEdit.text() 78 | folder = QFileDialog.getExistingDirectory(self, "Select a folder", folder) 79 | if folder: 80 | lineEdit.setText(folder) 81 | 82 | def loadSettings(self): 83 | # load settings from storage 84 | settings = fetch_data("settings.json", "settings", {}) 85 | self.lineEdit_localLLMFolder.setText(settings.get("local_llm_folder", "")) 86 | self.lineEdit_openaiapikey.setText(settings.get("openai_api_key", "")) 87 | self.lineEdit_deeplapikey.setText(settings.get("deepl_api_key", "")) 88 | self.lineEdit_obsHost.setText(settings.get("obs_host", "localhost")) 89 | self.lineEdit_obsPort.setText(settings.get("obs_port", "4455")) 90 | self.lineEdit_obsPassword.setText(settings.get("obs_password", "")) 91 | self.lineEdit_obsPollingFreq.setText(settings.get("obs_polling_freq", "1000")) 92 | self.lineEdit_inputFilePollingFreq.setText( 93 | settings.get("input_file_polling_freq", "1000") 94 | ) 95 | self.lineEdit_elevenlabsAPIKey.setText(settings.get("elevenlabs_api_key", "")) 96 | 97 | if settings.get("local_llm_select") is not None: 98 | self.comboBox_localLLMSelect.setCurrentIndex( 99 | settings.get("local_llm_select") 100 | ) 101 | 102 | if settings.get("outputs_folder", "") == "": 103 | settings["outputs_folder"] = path.join( 104 | user_data_dir("lexisynth"), "outputs" 105 | ) 106 | store_data("settings.json", "settings", settings) 107 | self.lineEdit_outputsFolder.setText(settings.get("outputs_folder", "")) 108 | 109 | def saveSettings(self): 110 | # save settings to storage 111 | settings = {"outputs_folder": self.lineEdit_outputsFolder.text()} 112 | if self.lineEdit_localLLMFolder.text() != "": 113 | settings["local_llm_folder"] = self.lineEdit_localLLMFolder.text() 114 | if self.lineEdit_openaiapikey.text() != "": 115 | settings["openai_api_key"] = self.lineEdit_openaiapikey.text() 116 | if self.lineEdit_deeplapikey.text() != "": 117 | settings["deepl_api_key"] = self.lineEdit_deeplapikey.text() 118 | if self.lineEdit_obsHost.text() != "": 119 | settings["obs_host"] = self.lineEdit_obsHost.text() 120 | if self.lineEdit_obsPort.text() != "": 121 | settings["obs_port"] = self.lineEdit_obsPort.text() 122 | if self.label_obsPollingFreq.text() != "": 123 | settings["obs_polling_freq"] = self.lineEdit_obsPollingFreq.text() 124 | if self.lineEdit_obsPassword.text() != "": 125 | settings["obs_password"] = self.lineEdit_obsPassword.text() 126 | if self.lineEdit_inputFilePollingFreq.text() != "": 127 | settings["input_file_polling_freq"] = ( 128 | self.lineEdit_inputFilePollingFreq.text() 129 | ) 130 | if self.comboBox_localLLMSelect.currentIndex() != 0: 131 | settings["local_llm_select"] = self.comboBox_localLLMSelect.currentIndex() 132 | if self.lineEdit_elevenlabsAPIKey.text() != "": 133 | settings["elevenlabs_api_key"] = self.lineEdit_elevenlabsAPIKey.text() 134 | 135 | store_data("settings.json", "settings", settings) 136 | 137 | # emit a signal to notify the main window that settings have changed 138 | self.settingsChanged.emit(settings) 139 | 140 | def testObsConnection(self): 141 | # test the OBS connection 142 | obs_host = self.lineEdit_obsHost.text() 143 | obs_port = self.lineEdit_obsPort.text() 144 | obs_password = self.lineEdit_obsPassword.text() 145 | obs_client = open_obs_websocket( 146 | {"ip": obs_host, "port": obs_port, "password": obs_password} 147 | ) 148 | if obs_client is not None: 149 | self.label_obsConnectionStatus.setText("Connection Successful") 150 | # close the connection 151 | disconnect_obs_websocket(obs_client) 152 | else: 153 | self.label_obsConnectionStatus.setText("Failed") 154 | -------------------------------------------------------------------------------- /about.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | Dialog 4 | 5 | 6 | 7 | 0 8 | 0 9 | 665 10 | 615 11 | 12 | 13 | 14 | Dialog 15 | 16 | 17 | 18 | 19 | 20 | true 21 | 22 | 23 | 24 | 25 | 0 26 | 0 27 | 624 28 | 1062 29 | 30 | 31 | 32 | 33 | 34 | 35 | <html><head/><body><p><span style=" font-weight:600;">About LexiSynth</span></p><p>Version: 0.0.1-beta<br/>LexiSynth is an AI-based speech analysis and synthesis tool for real-time applications.</p><p><span style=" font-weight:600;">Copyright © 2024 Roy Shilkrot. All Rights Reserved.</span></p><p><span style=" font-weight:600;">License</span><br/>LexiSynth is proprietary software licensed by Roy Shilkrot. This license permits commercial use but strictly prohibits any form of distribution or modification of the software and its documentation. For more details on licensing, please contact <a href="mailto:lexisynth@scoresight.live"><span style=" text-decoration: underline; color:#007af4;">lexisynth@scoresight.live</span></a>.</p><p><span style=" font-weight:600;">Third-Party Software</span><br/>LexiSynth incorporates components from third-party sources under their respective licenses:</p><ul style="margin-top: 0px; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; -qt-list-indent: 1;"><li style=" margin-top:12px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">CTranslate2: <a href=" https://github.com/OpenNMT/CTranslate2"><span style=" text-decoration: underline; color:#007af4;">https://github.com/OpenNMT/CTranslate2</span></a> MIT License</li><li style=" margin-top:12px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">faster-whisper: <a href=" https://github.com/SYSTRAN/faster-whisper"><span style=" text-decoration: underline; color:#007af4;">https://github.com/SYSTRAN/faster-whisper</span></a> MIT License</li><li style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><a href=" https://huggingface.co/jncraton/m2m100_418M-ct2-int8"><span style=" text-decoration: underline; color:#007af4;">https://huggingface.co/jncraton/m2m100_418M-ct2-int8</span></a> MIT License</li></ul><p>Detailed licensing information for these components is included within the software distribution.</p><p><span style=" font-weight:700;">Qt Application Framework</span></p><p>This application uses the Qt application framework, which is a comprehensive C++ library for cross-platform development of GUI applications. Qt is used under the terms of the GNU Lesser General Public License (LGPL) version 3. Qt is a registered trademark of The Qt Company Ltd and is developed and maintained by The Qt Project and various contributors.</p><p>For more information about Qt, including source code of Qt libraries used by this application and guidance on how to obtain or replace Qt libraries, please visit the Qt Project's official website at <a href="http://www.qt.io/"><span style=" text-decoration: underline; color:#007af4;">http://www.qt.io</span></a>.</p><p>We are committed to ensuring compliance with the LGPL v3 license and support the principles of open source software development. If you have any questions or concerns regarding our use of Qt, please contact us directly.</p><p><span style=" font-weight:600;">Disclaimer of Warranty</span><br/>LexiSynth is provided &quot;AS IS&quot;, without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose, and noninfringement. In no event shall Roy Shilkrot be liable for any claim, damages, or other liability, whether in an action of contract, tort or otherwise, arising from, out of, or in connection with the software or the use or other dealings in the software.</p><p><span style=" font-weight:600;">Limitation of Liability</span><br/>To the maximum extent permitted by applicable law, in no event will Roy Shilkrot, or its suppliers or licensors, be liable for any indirect, special, incidental, consequential, or punitive damages arising out of the use or inability to use Lexis, including, without limitation, damages for loss of goodwill, work stoppages, computer failure or malfunction, or any and all other commercial damages or losses, even if advised of the possibility thereof.</p><p><span style=" font-weight:600;">Contact Information</span><br/>For support, feedback, or more information, please visit <a href="https://scoresight.live/pages/lexisynth"><span style=" text-decoration: underline; color:#007af4;">https://scoresight.live/pages/lexisynth</span></a> or contact us at <a href="mailto:lexisynth@scoresight.live"><span style=" text-decoration: underline; color:#007af4;">lexisynth@scoresight.live</span></a> or <a href="https://discord.gg/BedTTVnZDg"><span style=" text-decoration: underline; color:#007af4;">https://discord.gg/BedTTVnZDg</span></a>.</p></body></html> 36 | 37 | 38 | true 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | Qt::Horizontal 50 | 51 | 52 | QDialogButtonBox::Close 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | buttonBox 62 | accepted() 63 | Dialog 64 | accept() 65 | 66 | 67 | 248 68 | 254 69 | 70 | 71 | 157 72 | 274 73 | 74 | 75 | 76 | 77 | buttonBox 78 | rejected() 79 | Dialog 80 | reject() 81 | 82 | 83 | 316 84 | 260 85 | 86 | 87 | 286 88 | 274 89 | 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /translation.py: -------------------------------------------------------------------------------- 1 | import json 2 | from os import path 3 | import queue 4 | import time 5 | import ctranslate2 6 | from PyQt6.QtCore import QThread 7 | from PyQt6 import QtCore 8 | from platformdirs import user_data_dir 9 | from ls_logging import logger 10 | import sentencepiece as spm 11 | from language_codes import LanguageCodes 12 | from models_info import ModelDownloadInfo 13 | from storage import fetch_data 14 | import requests 15 | 16 | 17 | class TranslationThread(QThread): 18 | text_available = QtCore.pyqtSignal(str) 19 | progress_available = QtCore.pyqtSignal(int) 20 | start_progress = QtCore.pyqtSignal() 21 | stop_progress = QtCore.pyqtSignal() 22 | 23 | def __init__(self): 24 | super().__init__() 25 | self.input_queue = queue.Queue() 26 | self.translator = None 27 | self.tokenizer = None 28 | self.source_language = "English" 29 | self.target_language = "Spanish" 30 | self.running = False 31 | # warm up the model 32 | self.progressTimer = QtCore.QTimer() 33 | self.progressTimer.timeout.connect(self.progressCallback) 34 | self.last_run_time_ms = 1000 35 | self.run_time_avg_moving_window = 500 36 | self.current_run_time_start = time.time() 37 | self.start_progress.connect(self.progressTimer.start) 38 | self.stop_progress.connect(self.progressTimer.stop) 39 | self.translationEngine = None 40 | self.openai_api_key = None 41 | self.deepl_api_key = None 42 | 43 | def setTranslationEngine(self, translationEngine): 44 | self.translationEngine = translationEngine 45 | 46 | def setupModel(self): 47 | local_llm_select = fetch_data("settings.json", "settings", {}).get( 48 | "local_llm_select" 49 | ) 50 | if local_llm_select is None: 51 | logger.error("Local LLM select is not set") 52 | return False 53 | if local_llm_select == 1: 54 | model_path = path.join( 55 | user_data_dir("lexisynth"), 56 | ModelDownloadInfo.M2M_100["model_folder_name"], 57 | ) 58 | if not path.exists(model_path): 59 | logger.error("M2M-100 model is not downloaded") 60 | return False 61 | else: 62 | model_path = fetch_data("settings.json", "settings", {}).get( 63 | "local_llm_folder" 64 | ) 65 | if model_path is None: 66 | logger.error("Custom Local LLM folder is not set") 67 | return False 68 | if not path.exists(model_path): 69 | logger.error("Custom Local LLM folder does not exist") 70 | return False 71 | 72 | self.translator = ctranslate2.Translator(model_path) 73 | self.tokenizer = spm.SentencePieceProcessor( 74 | path.join(model_path, "sentencepiece.bpe.model") 75 | ) 76 | return True 77 | 78 | def setLanguages(self, source_language, target_language): 79 | self.source_language = source_language 80 | self.target_language = target_language 81 | 82 | def stop(self): 83 | self.running = False 84 | 85 | def progressCallback(self): 86 | # calculate how much time in ms passed since the start of the current translation 87 | current_run_time_elapsed = (time.time() - self.current_run_time_start) * 1000 88 | # calculate the progress in percentage 89 | progress = min( 90 | 100, int(current_run_time_elapsed / self.run_time_avg_moving_window * 100) 91 | ) 92 | self.progress_available.emit(progress) 93 | 94 | def translateLocalLLM(self, text): 95 | src_language_code = LanguageCodes.getLanguageCode(self.source_language) 96 | tgt_language_code = LanguageCodes.getLanguageCode(self.target_language) 97 | 98 | source = [f"__{src_language_code}__"] + self.tokenizer.EncodeAsPieces( 99 | text, add_eos=True 100 | ) 101 | results = self.translator.translate_batch( 102 | [source], target_prefix=[[f"__{tgt_language_code}__"]] 103 | ) 104 | output_tokens = results[0].hypotheses[0][1:] 105 | return self.tokenizer.Decode(output_tokens) 106 | 107 | def translateOpenAI(self, text): 108 | if self.openai_api_key is None: 109 | self.openai_api_key = fetch_data("settings.json", "settings", {}).get( 110 | "openai_api_key" 111 | ) 112 | if self.openai_api_key is None: 113 | logger.error("OpenAI API key is not set") 114 | return "Error: OpenAI API key is not set" 115 | # build API request 116 | data = { 117 | "model": "gpt-3.5-turbo", 118 | "messages": [ 119 | { 120 | "role": "user", 121 | "content": f"translate from {self.source_language} to {self.target_language}: {text}", 122 | } 123 | ], 124 | } 125 | # send the request 126 | response = requests.post( 127 | "https://api.openai.com/v1/chat/completions", 128 | headers={ 129 | "Authorization": f"Bearer {self.openai_api_key}", 130 | "Content-Type": "application/json", 131 | }, 132 | json=data, 133 | ) 134 | if response.status_code != 200: 135 | logger.error(f"OpenAI API request failed: {response.status_code}") 136 | return "Error: OpenAI API request failed" 137 | # parse the response 138 | response_json = response.json() 139 | if "choices" not in response_json or len(response_json["choices"]) == 0: 140 | logger.error("OpenAI API response is empty") 141 | return "Error: OpenAI API response is empty" 142 | return response_json["choices"][0]["message"]["content"] 143 | 144 | def translateDeepL(self, text): 145 | if self.deepl_api_key is None: 146 | self.deepl_api_key = fetch_data("settings.json", "settings", {}).get( 147 | "deepl_api_key" 148 | ) 149 | if self.deepl_api_key is None: 150 | logger.error("DeepL API key is not set") 151 | return "Error: DeepL API key is not set" 152 | # build API request 153 | data = { 154 | "text": [text], 155 | "source_lang": LanguageCodes.getLanguageCode(self.source_language), 156 | "target_lang": LanguageCodes.getLanguageCode(self.target_language), 157 | } 158 | # send the request 159 | response = requests.post( 160 | "https://api-free.deepl.com/v2/translate", 161 | headers={ 162 | "Authorization": f"DeepL-Auth-Key {self.deepl_api_key}", 163 | "Content-Type": "application/json", 164 | "User-Agent": "LexiSynth/1.0 (+https://scoresight.live/lexisynth)", 165 | "Accept": "application/json", 166 | }, 167 | json=data, 168 | ) 169 | if response.status_code != 200: 170 | logger.error(f"DeepL API request failed: {response.status_code}") 171 | logger.error(response.text) 172 | return "Error: DeepL API request failed" 173 | # parse the response 174 | response_json = response.json() 175 | if ( 176 | "translations" not in response_json 177 | or len(response_json["translations"]) == 0 178 | ): 179 | logger.error("DeepL API response is empty") 180 | return "Error: DeepL API response is empty" 181 | return response_json["translations"][0]["text"] 182 | 183 | def run(self): 184 | if self.translationEngine is None: 185 | logger.error("Translation engine is not set") 186 | self.running = False 187 | return 188 | 189 | logger.info("Translation thread started") 190 | self.running = True 191 | while self.running: 192 | # Get the next text from the queue 193 | try: 194 | text = self.input_queue.get(block=False) 195 | except queue.Empty: 196 | time.sleep(0.1) 197 | continue 198 | 199 | if text is None: 200 | # sleep for a bit to avoid busy waiting 201 | time.sleep(0.1) 202 | continue 203 | 204 | self.current_run_time_start = time.time() 205 | self.start_progress.emit() 206 | 207 | # Time the translation operation 208 | start_time = time.time() 209 | if self.translationEngine == "Local LLM": 210 | if self.translator is None or self.tokenizer is None: 211 | if not self.setupModel(): 212 | logger.error( 213 | "Cannot start translation thread, model is not set up" 214 | ) 215 | self.running = False 216 | return 217 | 218 | output_text = self.translateLocalLLM(text) 219 | elif self.translationEngine == "OpenAI API": 220 | output_text = self.translateOpenAI(text) 221 | elif self.translationEngine == "DeepL API": 222 | output_text = self.translateDeepL(text) 223 | else: 224 | logger.error(f"Unknown translation engine: {self.translationEngine}") 225 | self.running = False 226 | return 227 | end_time = time.time() 228 | 229 | self.stop_progress.emit() 230 | self.progress_available.emit(0) 231 | 232 | # prevent 0 time 233 | self.last_run_time_ms = max(100, (end_time - start_time) * 1000) 234 | self.run_time_avg_moving_window = ( 235 | self.run_time_avg_moving_window * 0.9 236 | ) + (self.last_run_time_ms * 0.1) 237 | 238 | # Emit the translated text 239 | self.text_available.emit(output_text) 240 | 241 | logger.info("Translation thread stopped") 242 | -------------------------------------------------------------------------------- /settings_dialog.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | Dialog 4 | 5 | 6 | 7 | 0 8 | 0 9 | 400 10 | 308 11 | 12 | 13 | 14 | Dialog 15 | 16 | 17 | 18 | 19 | 20 | Qt::Horizontal 21 | 22 | 23 | QDialogButtonBox::Cancel|QDialogButtonBox::Ok 24 | 25 | 26 | 27 | 28 | 29 | 30 | 0 31 | 32 | 33 | 34 | General 35 | 36 | 37 | 38 | QFormLayout::ExpandingFieldsGrow 39 | 40 | 41 | 42 | 43 | Outputs Folder 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 3 52 | 53 | 54 | 0 55 | 56 | 57 | 0 58 | 59 | 60 | 0 61 | 62 | 63 | 0 64 | 65 | 66 | 67 | 68 | false 69 | 70 | 71 | 72 | 73 | 74 | 75 | 📂 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | Input File Polling (ms) 86 | 87 | 88 | 89 | 90 | 91 | 92 | 1000 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | LLM 101 | 102 | 103 | 104 | QFormLayout::ExpandingFieldsGrow 105 | 106 | 107 | 108 | 109 | Local LLM Folder 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 0 118 | 119 | 120 | 0 121 | 122 | 123 | 0 124 | 125 | 126 | 0 127 | 128 | 129 | 130 | 131 | false 132 | 133 | 134 | 135 | 136 | 137 | 138 | ... 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | OpenAI API Key 149 | 150 | 151 | 152 | 153 | 154 | 155 | QLineEdit::Password 156 | 157 | 158 | 159 | 160 | 161 | 162 | DeepL API Key 163 | 164 | 165 | 166 | 167 | 168 | 169 | QLineEdit::Password 170 | 171 | 172 | 173 | 174 | 175 | 176 | Local LLM 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 0 185 | 0 186 | 187 | 188 | 189 | 190 | Select Local LLM 191 | 192 | 193 | 194 | 195 | M2M-100 Translation 196 | 197 | 198 | 199 | 200 | Custom 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | Qt::Vertical 209 | 210 | 211 | 212 | 20 213 | 40 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | OBS 223 | 224 | 225 | 226 | 227 | 228 | Hostname 229 | 230 | 231 | 232 | 233 | 234 | 235 | localhost 236 | 237 | 238 | 239 | 240 | 241 | 242 | Port 243 | 244 | 245 | 246 | 247 | 248 | 249 | 4455 250 | 251 | 252 | 253 | 254 | 255 | 256 | Password 257 | 258 | 259 | 260 | 261 | 262 | 263 | QLineEdit::Password 264 | 265 | 266 | 267 | 268 | 269 | 270 | true 271 | 272 | 273 | Test Connection 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 0 282 | 0 283 | 284 | 285 | 286 | Not Connected 287 | 288 | 289 | 290 | 291 | 292 | 293 | Qt::Vertical 294 | 295 | 296 | 297 | 20 298 | 40 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 1000 307 | 308 | 309 | 310 | 311 | 312 | 313 | Polling Freq. (ms) 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | Speech 322 | 323 | 324 | 325 | 326 | 327 | QLineEdit::Password 328 | 329 | 330 | 331 | 332 | 333 | 334 | ElevenLabs API Key 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | buttonBox 348 | accepted() 349 | Dialog 350 | accept() 351 | 352 | 353 | 248 354 | 254 355 | 356 | 357 | 157 358 | 274 359 | 360 | 361 | 362 | 363 | buttonBox 364 | rejected() 365 | Dialog 366 | reject() 367 | 368 | 369 | 316 370 | 260 371 | 372 | 373 | 286 374 | 274 375 | 376 | 377 | 378 | 379 | 380 | -------------------------------------------------------------------------------- /language_codes.py: -------------------------------------------------------------------------------- 1 | class LanguageCodes: 2 | ENGLISH = "en" 3 | FRENCH = "fr" 4 | SPANISH = "es" 5 | GERMAN = "de" 6 | ITALIAN = "it" 7 | DUTCH = "nl" 8 | PORTUGUESE = "pt" 9 | RUSSIAN = "ru" 10 | CHINESE = "zh" 11 | JAPANESE = "ja" 12 | KOREAN = "ko" 13 | ARABIC = "ar" 14 | HINDI = "hi" 15 | TURKISH = "tr" 16 | GREEK = "el" 17 | HEBREW = "he" 18 | POLISH = "pl" 19 | UKRAINIAN = "uk" 20 | CZECH = "cs" 21 | SLOVAK = "sk" 22 | BULGARIAN = "bg" 23 | ROMANIAN = "ro" 24 | HUNGARIAN = "hu" 25 | FINNISH = "fi" 26 | SWEDISH = "sv" 27 | DANISH = "da" 28 | NORWEGIAN = "no" 29 | ICELANDIC = "is" 30 | ESTONIAN = "et" 31 | LATVIAN = "lv" 32 | LITHUANIAN = "lt" 33 | MALTESE = "mt" 34 | CROATIAN = "hr" 35 | SERBIAN = "sr" 36 | BOSNIAN = "bs" 37 | SLOVENIAN = "sl" 38 | ALBANIAN = "sq" 39 | MACEDONIAN = "mk" 40 | MONTENEGRIN = "me" 41 | KURDISH = "ku" 42 | PERSIAN = "fa" 43 | PASHTO = "ps" 44 | URDU = "ur" 45 | BENGALI = "bn" 46 | TAMIL = "ta" 47 | TELUGU = "te" 48 | MARATHI = "mr" 49 | GUJARATI = "gu" 50 | PUNJABI = "pa" 51 | NEPALI = "ne" 52 | SINHALA = "si" 53 | BURMESE = "my" 54 | KHMER = "km" 55 | LAO = "lo" 56 | THAI = "th" 57 | VIETNAMESE = "vi" 58 | INDONESIAN = "id" 59 | MALAY = "ms" 60 | FILIPINO = "fil" 61 | JAVANESE = "jv" 62 | 63 | def getLanguageName(code): 64 | if code == LanguageCodes.ENGLISH: 65 | return "English" 66 | elif code == LanguageCodes.FRENCH: 67 | return "French" 68 | elif code == LanguageCodes.SPANISH: 69 | return "Spanish" 70 | elif code == LanguageCodes.GERMAN: 71 | return "German" 72 | elif code == LanguageCodes.ITALIAN: 73 | return "Italian" 74 | elif code == LanguageCodes.DUTCH: 75 | return "Dutch" 76 | elif code == LanguageCodes.PORTUGUESE: 77 | return "Portuguese" 78 | elif code == LanguageCodes.RUSSIAN: 79 | return "Russian" 80 | elif code == LanguageCodes.CHINESE: 81 | return "Chinese" 82 | elif code == LanguageCodes.JAPANESE: 83 | return "Japanese" 84 | elif code == LanguageCodes.KOREAN: 85 | return "Korean" 86 | elif code == LanguageCodes.ARABIC: 87 | return "Arabic" 88 | elif code == LanguageCodes.HINDI: 89 | return "Hindi" 90 | elif code == LanguageCodes.TURKISH: 91 | return "Turkish" 92 | elif code == LanguageCodes.GREEK: 93 | return "Greek" 94 | elif code == LanguageCodes.HEBREW: 95 | return "Hebrew" 96 | elif code == LanguageCodes.POLISH: 97 | return "Polish" 98 | elif code == LanguageCodes.UKRAINIAN: 99 | return "Ukrainian" 100 | elif code == LanguageCodes.CZECH: 101 | return "Czech" 102 | elif code == LanguageCodes.SLOVAK: 103 | return "Slovak" 104 | elif code == LanguageCodes.BULGARIAN: 105 | return "Bulgarian" 106 | elif code == LanguageCodes.ROMANIAN: 107 | return "Romanian" 108 | elif code == LanguageCodes.HUNGARIAN: 109 | return "Hungarian" 110 | elif code == LanguageCodes.FINNISH: 111 | return "Finnish" 112 | elif code == LanguageCodes.SWEDISH: 113 | return "Swedish" 114 | elif code == LanguageCodes.DANISH: 115 | return "Danish" 116 | elif code == LanguageCodes.NORWEGIAN: 117 | return "Norwegian" 118 | elif code == LanguageCodes.ICELANDIC: 119 | return "Icelandic" 120 | elif code == LanguageCodes.ESTONIAN: 121 | return "Estonian" 122 | elif code == LanguageCodes.LATVIAN: 123 | return "Latvian" 124 | elif code == LanguageCodes.LITHUANIAN: 125 | return "Lithuanian" 126 | elif code == LanguageCodes.MALTESE: 127 | return "Maltese" 128 | elif code == LanguageCodes.CROATIAN: 129 | return "Croatian" 130 | elif code == LanguageCodes.SERBIAN: 131 | return "Serbian" 132 | elif code == LanguageCodes.BOSNIAN: 133 | return "Bosnian" 134 | elif code == LanguageCodes.SLOVENIAN: 135 | return "Slovenian" 136 | elif code == LanguageCodes.ALBANIAN: 137 | return "Albanian" 138 | elif code == LanguageCodes.MACEDONIAN: 139 | return "Macedonian" 140 | elif code == LanguageCodes.MONTENEGRIN: 141 | return "Montenegrin" 142 | elif code == LanguageCodes.KURDISH: 143 | return "Kurdish" 144 | elif code == LanguageCodes.PERSIAN: 145 | return "Persian" 146 | elif code == LanguageCodes.PASHTO: 147 | return "Pashto" 148 | elif code == LanguageCodes.URDU: 149 | return "Urdu" 150 | elif code == LanguageCodes.BENGALI: 151 | return "Bengali" 152 | elif code == LanguageCodes.TAMIL: 153 | return "Tamil" 154 | elif code == LanguageCodes.TELUGU: 155 | return "Telugu" 156 | elif code == LanguageCodes.MARATHI: 157 | return "Marathi" 158 | elif code == LanguageCodes.GUJARATI: 159 | return "Gujarati" 160 | elif code == LanguageCodes.PUNJABI: 161 | return "Punjabi" 162 | elif code == LanguageCodes.NEPALI: 163 | return "Nepali" 164 | elif code == LanguageCodes.SINHALA: 165 | return "Sinhala" 166 | elif code == LanguageCodes.BURMESE: 167 | return "Burmese" 168 | elif code == LanguageCodes.KHMER: 169 | return "Khmer" 170 | elif code == LanguageCodes.LAO: 171 | return "Lao" 172 | elif code == LanguageCodes.THAI: 173 | return "Thai" 174 | elif code == LanguageCodes.VIETNAMESE: 175 | return "Vietnamese" 176 | elif code == LanguageCodes.INDONESIAN: 177 | return "Indonesian" 178 | elif code == LanguageCodes.MALAY: 179 | return "Malay" 180 | elif code == LanguageCodes.FILIPINO: 181 | return "Filipino" 182 | elif code == LanguageCodes.JAVANESE: 183 | return "Javanese" 184 | else: 185 | return "Unknown" 186 | 187 | def getLanguageCode(name) -> str: 188 | if name == "English": 189 | return LanguageCodes.ENGLISH 190 | elif name == "French": 191 | return LanguageCodes.FRENCH 192 | elif name == "Spanish": 193 | return LanguageCodes.SPANISH 194 | elif name == "German": 195 | return LanguageCodes.GERMAN 196 | elif name == "Italian": 197 | return LanguageCodes.ITALIAN 198 | elif name == "Dutch": 199 | return LanguageCodes.DUTCH 200 | elif name == "Portuguese": 201 | return LanguageCodes.PORTUGUESE 202 | elif name == "Russian": 203 | return LanguageCodes.RUSSIAN 204 | elif name == "Chinese": 205 | return LanguageCodes.CHINESE 206 | elif name == "Japanese": 207 | return LanguageCodes.JAPANESE 208 | elif name == "Korean": 209 | return LanguageCodes.KOREAN 210 | elif name == "Arabic": 211 | return LanguageCodes.ARABIC 212 | elif name == "Hindi": 213 | return LanguageCodes.HINDI 214 | elif name == "Turkish": 215 | return LanguageCodes.TURKISH 216 | elif name == "Greek": 217 | return LanguageCodes.GREEK 218 | elif name == "Hebrew": 219 | return LanguageCodes.HEBREW 220 | elif name == "Polish": 221 | return LanguageCodes.POLISH 222 | elif name == "Ukrainian": 223 | return LanguageCodes.UKRAINIAN 224 | elif name == "Czech": 225 | return LanguageCodes.CZECH 226 | elif name == "Slovak": 227 | return LanguageCodes.SLOVAK 228 | elif name == "Bulgarian": 229 | return LanguageCodes.BULGARIAN 230 | elif name == "Romanian": 231 | return LanguageCodes.ROMANIAN 232 | elif name == "Hungarian": 233 | return LanguageCodes.HUNGARIAN 234 | elif name == "Finnish": 235 | return LanguageCodes.FINNISH 236 | elif name == "Swedish": 237 | return LanguageCodes.SWEDISH 238 | elif name == "Danish": 239 | return LanguageCodes.DANISH 240 | elif name == "Norwegian": 241 | return LanguageCodes.NORWEGIAN 242 | elif name == "Icelandic": 243 | return LanguageCodes.ICELANDIC 244 | elif name == "Estonian": 245 | return LanguageCodes.ESTONIAN 246 | elif name == "Latvian": 247 | return LanguageCodes.LATVIAN 248 | elif name == "Lithuanian": 249 | return LanguageCodes.LITHUANIAN 250 | elif name == "Maltese": 251 | return LanguageCodes.MALTESE 252 | elif name == "Croatian": 253 | return LanguageCodes.CROATIAN 254 | elif name == "Serbian": 255 | return LanguageCodes.SERBIAN 256 | elif name == "Bosnian": 257 | return LanguageCodes.BOSNIAN 258 | elif name == "Slovenian": 259 | return LanguageCodes.SLOVENIAN 260 | elif name == "Albanian": 261 | return LanguageCodes.ALBANIAN 262 | elif name == "Macedonian": 263 | return LanguageCodes.MACEDONIAN 264 | elif name == "Montenegrin": 265 | return LanguageCodes.MONTENEGRIN 266 | elif name == "Kurdish": 267 | return LanguageCodes.KURDISH 268 | elif name == "Persian": 269 | return LanguageCodes.PERSIAN 270 | elif name == "Pashto": 271 | return LanguageCodes.PASHTO 272 | elif name == "Urdu": 273 | return LanguageCodes.URDU 274 | elif name == "Bengali": 275 | return LanguageCodes.BENGALI 276 | elif name == "Tamil": 277 | return LanguageCodes.TAMIL 278 | elif name == "Telugu": 279 | return LanguageCodes.TELUGU 280 | elif name == "Marathi": 281 | return LanguageCodes.MARATHI 282 | elif name == "Gujarati": 283 | return LanguageCodes.GUJARATI 284 | elif name == "Punjabi": 285 | return LanguageCodes.PUNJABI 286 | elif name == "Nepali": 287 | return LanguageCodes.NEPALI 288 | elif name == "Sinhala": 289 | return LanguageCodes.SINHALA 290 | elif name == "Burmese": 291 | return LanguageCodes.BURMESE 292 | elif name == "Khmer": 293 | return LanguageCodes.KHMER 294 | elif name == "Lao": 295 | return LanguageCodes.LAO 296 | elif name == "Thai": 297 | return LanguageCodes.THAI 298 | elif name == "Vietnamese": 299 | return LanguageCodes.VIETNAMESE 300 | elif name == "Indonesian": 301 | return LanguageCodes.INDONESIAN 302 | elif name == "Malay": 303 | return LanguageCodes.MALAY 304 | elif name == "Filipino": 305 | return LanguageCodes.FILIPINO 306 | elif name == "Javanese": 307 | return LanguageCodes.JAVANESE 308 | else: 309 | return "Unknown" 310 | 311 | def getLanguageCodes(): 312 | return [ 313 | LanguageCodes.ENGLISH, 314 | LanguageCodes.FRENCH, 315 | LanguageCodes.SPANISH, 316 | LanguageCodes.GERMAN, 317 | LanguageCodes.ITALIAN, 318 | LanguageCodes.DUTCH, 319 | LanguageCodes.PORTUGUESE, 320 | LanguageCodes.RUSSIAN, 321 | LanguageCodes.CHINESE, 322 | LanguageCodes.JAPANESE, 323 | LanguageCodes.KOREAN, 324 | LanguageCodes.ARABIC, 325 | LanguageCodes.HINDI, 326 | LanguageCodes.TURKISH, 327 | LanguageCodes.GREEK, 328 | LanguageCodes.HEBREW, 329 | LanguageCodes.POLISH, 330 | LanguageCodes.UKRAINIAN, 331 | LanguageCodes.CZECH, 332 | LanguageCodes.SLOVAK, 333 | LanguageCodes.BULGARIAN, 334 | LanguageCodes.ROMANIAN, 335 | LanguageCodes.HUNGARIAN, 336 | LanguageCodes.FINNISH, 337 | LanguageCodes.SWEDISH, 338 | LanguageCodes.DANISH, 339 | LanguageCodes.NORWEGIAN, 340 | LanguageCodes.ICELANDIC, 341 | LanguageCodes.ESTONIAN, 342 | LanguageCodes.LATVIAN, 343 | LanguageCodes.LITHUANIAN, 344 | LanguageCodes.MALTESE, 345 | LanguageCodes.CROATIAN, 346 | LanguageCodes.SERBIAN, 347 | LanguageCodes.BOSNIAN, 348 | LanguageCodes.SLOVENIAN, 349 | LanguageCodes.ALBANIAN, 350 | LanguageCodes.MACEDONIAN, 351 | LanguageCodes.MONTENEGRIN, 352 | LanguageCodes.KURDISH, 353 | LanguageCodes.PERSIAN, 354 | LanguageCodes.PASHTO, 355 | LanguageCodes.URDU, 356 | LanguageCodes.BENGALI, 357 | LanguageCodes.TAMIL, 358 | LanguageCodes.TELUGU, 359 | LanguageCodes.MARATHI, 360 | LanguageCodes.GUJARATI, 361 | LanguageCodes.PUNJABI, 362 | LanguageCodes.NEPALI, 363 | LanguageCodes.SINHALA, 364 | LanguageCodes.BURMESE, 365 | LanguageCodes.KHMER, 366 | LanguageCodes.LAO, 367 | LanguageCodes.THAI, 368 | LanguageCodes.VIETNAMESE, 369 | LanguageCodes.INDONESIAN, 370 | LanguageCodes.MALAY, 371 | LanguageCodes.FILIPINO, 372 | LanguageCodes.JAVANESE, 373 | ] 374 | 375 | def getLanguageNames(): 376 | return [ 377 | "English", 378 | "French", 379 | "Spanish", 380 | "German", 381 | "Italian", 382 | "Dutch", 383 | "Portuguese", 384 | "Russian", 385 | "Chinese", 386 | "Japanese", 387 | "Korean", 388 | "Arabic", 389 | "Hindi", 390 | "Turkish", 391 | "Greek", 392 | "Hebrew", 393 | "Polish", 394 | "Ukrainian", 395 | "Czech", 396 | "Slovak", 397 | "Bulgarian", 398 | "Romanian", 399 | "Hungarian", 400 | "Finnish", 401 | "Swedish", 402 | "Danish", 403 | "Norwegian", 404 | "Icelandic", 405 | "Estonian", 406 | "Latvian", 407 | "Lithuanian", 408 | "Maltese", 409 | "Croatian", 410 | "Serbian", 411 | "Bosnian", 412 | "Slovenian", 413 | "Albanian", 414 | "Macedonian", 415 | "Montenegrin", 416 | "Kurdish", 417 | "Persian", 418 | "Pashto", 419 | "Urdu", 420 | "Bengali", 421 | "Tamil", 422 | "Telugu", 423 | "Marathi", 424 | "Gujarati", 425 | "Punjabi", 426 | "Nepali", 427 | "Sinhala", 428 | "Burmese", 429 | "Khmer", 430 | "Lao", 431 | "Thai", 432 | "Vietnamese", 433 | "Indonesian", 434 | "Malay", 435 | "Filipino", 436 | "Javanese", 437 | ] 438 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | from PyQt6 import QtWidgets, uic 4 | from PyQt6.QtCore import QTimer 5 | from PyQt6.QtWidgets import QDialog 6 | from PyQt6.uic import loadUi 7 | import sys 8 | from os import path 9 | from audio_capture import AudioRecorder 10 | from audio_player import AudioPlayer 11 | from file_poller import FilePoller 12 | from language_codes import LanguageCodes 13 | from lexisynth_types import AudioSource 14 | from log_view import LogViewerDialog 15 | from ls_logging import logger 16 | from obs_websocket import ( 17 | OBSPoller, 18 | disconnect_obs_websocket, 19 | get_all_sources, 20 | get_all_text_sources, 21 | open_obs_websocket, 22 | open_obs_websocket_from_settings, 23 | ) 24 | from settings_dialog import SettingsDialog 25 | from storage import fetch_data, store_data 26 | from transcription import AudioTranscriber 27 | from translation import TranslationThread 28 | from text_to_speech import TextToSpeechThread 29 | 30 | NOT_IMPLEMENTED = "Not implemented yet" 31 | 32 | 33 | def disable_dropdown_options_by_text(combo_box, text, negative_case=False): 34 | for i in range(combo_box.count()): 35 | disable = False 36 | if (isinstance(text, list) and combo_box.itemText(i) in text) or ( 37 | isinstance(text, str) and combo_box.itemText(i) == text 38 | ): 39 | if not negative_case: 40 | disable = True 41 | else: 42 | if negative_case: 43 | disable = True 44 | 45 | if disable: 46 | combo_box.model().item(i).setEnabled(False) 47 | combo_box.model().item(i).setToolTip(NOT_IMPLEMENTED) 48 | 49 | 50 | def toggle_all_widgets_in_a_groupbox(group_box, enabled): 51 | # if the widget layout is form layout, iterate the layout and hide all the widgets 52 | if type(group_box.layout()) == QtWidgets.QFormLayout: 53 | for i in range(group_box.layout().rowCount()): 54 | group_box.layout().setRowVisible(i, enabled) 55 | return 56 | # iterate the layout and hide all the widgets 57 | for i in range(group_box.layout().count()): 58 | widget = group_box.layout().itemAt(i).widget() 59 | if widget: 60 | widget.setVisible(enabled) 61 | 62 | 63 | class MainWindow(QtWidgets.QMainWindow): 64 | def __init__(self): 65 | super(MainWindow, self).__init__() 66 | uic.loadUi( 67 | path.abspath(path.join(path.dirname(__file__), "mainwindow.ui")), self 68 | ) 69 | 70 | # add File -> Settings menu 71 | menubar = self.menuBar() 72 | file_menu = menubar.addMenu("File") 73 | file_menu.addAction("Settings", self.openSettingsDialog) 74 | file_menu.addAction("About", self.openAboutDialog) 75 | file_menu.addAction("View Current Log", self.openLogsDialog) 76 | self.log_dialog = None 77 | 78 | # populate audio sources 79 | self.populateAudioSources() 80 | self.comboBox_audioSources.currentIndexChanged.connect(self.audioSourceChanged) 81 | self.audioSource = None 82 | self.audioCapture = None 83 | self.audioTranscriber = AudioTranscriber() 84 | self.audioTranscriber.text_available.connect(self.transcriptionAvailable) 85 | self.translator = TranslationThread() 86 | self.translator.text_available.connect(self.translationTextAvailable) 87 | self.translator.progress_available.connect( 88 | lambda progress: self.progressBar_translationProgress.setValue(progress) 89 | ) 90 | self.translation_poller = None 91 | self.textToSpeech = TextToSpeechThread() 92 | self.textToSpeech.progress_available.connect( 93 | lambda progress: self.progressBar_ttsProgress.setValue(progress) 94 | ) 95 | self.audioPlayer = AudioPlayer() 96 | self.textToSpeech.speech_available.connect( 97 | lambda audio: self.audioPlayer.add_to_queue(audio) 98 | ) 99 | self.audioPlayer.start() 100 | 101 | # default chunk size is 3000ms 102 | self.horizontalSlider_chunkSize.setValue(3) 103 | self.comboBox_modelSize.currentTextChanged.connect( 104 | self.transcriptionModelSizeChanged 105 | ) 106 | 107 | self.groupBox_statusTranscription.toggled.connect( 108 | lambda checked: toggle_all_widgets_in_a_groupbox( 109 | self.groupBox_statusTranscription, checked 110 | ) 111 | ) 112 | self.groupBox_statusTranslate.toggled.connect( 113 | lambda checked: toggle_all_widgets_in_a_groupbox( 114 | self.groupBox_statusTranslate, checked 115 | ) 116 | ) 117 | self.groupBox_cleanStream.toggled.connect( 118 | lambda checked: toggle_all_widgets_in_a_groupbox( 119 | self.groupBox_cleanStream, checked 120 | ) 121 | ) 122 | toggle_all_widgets_in_a_groupbox(self.groupBox_cleanStream, False) 123 | self.groupBox_output.toggled.connect( 124 | lambda checked: toggle_all_widgets_in_a_groupbox( 125 | self.groupBox_output, checked 126 | ) 127 | ) 128 | toggle_all_widgets_in_a_groupbox(self.groupBox_output, False) 129 | self.groupBox_transcriptionOpts.toggled.connect( 130 | lambda checked: toggle_all_widgets_in_a_groupbox( 131 | self.groupBox_transcriptionOpts, checked 132 | ) 133 | ) 134 | toggle_all_widgets_in_a_groupbox(self.groupBox_transcriptionOpts, False) 135 | self.groupBox_langOutputs.toggled.connect( 136 | lambda checked: toggle_all_widgets_in_a_groupbox( 137 | self.groupBox_langOutputs, checked 138 | ) 139 | ) 140 | toggle_all_widgets_in_a_groupbox(self.groupBox_langOutputs, False) 141 | self.groupBox_ttsOutput.toggled.connect( 142 | lambda checked: toggle_all_widgets_in_a_groupbox( 143 | self.groupBox_ttsOutput, checked 144 | ) 145 | ) 146 | toggle_all_widgets_in_a_groupbox(self.groupBox_ttsOutput, False) 147 | self.groupBox_analyze.toggled.connect( 148 | lambda checked: toggle_all_widgets_in_a_groupbox( 149 | self.groupBox_analyze, checked 150 | ) 151 | ) 152 | toggle_all_widgets_in_a_groupbox(self.groupBox_analyze, False) 153 | self.groupBox_translation.toggled.connect( 154 | lambda checked: toggle_all_widgets_in_a_groupbox( 155 | self.groupBox_translation, checked 156 | ) 157 | ) 158 | toggle_all_widgets_in_a_groupbox(self.groupBox_translation, False) 159 | 160 | # language engine change 161 | self.comboBox_languageEngine.currentIndexChanged.connect( 162 | self.languageEngineChanged 163 | ) 164 | 165 | # populate languages 166 | self.comboBox_fromLanguage.addItems(LanguageCodes.getLanguageNames()) 167 | self.comboBox_toLanguage.addItems(LanguageCodes.getLanguageNames()) 168 | self.comboBox_toLanguage.setCurrentIndex(1) 169 | 170 | self.comboBox_transcriptionLanguage.addItem("Auto") 171 | self.comboBox_transcriptionLanguage.addItems(LanguageCodes.getLanguageNames()) 172 | self.comboBox_transcriptionLanguage.setCurrentIndex(0) 173 | 174 | self.comboBox_transcriptionLanguage.currentTextChanged.connect( 175 | self.transcriptionLanguageChanged 176 | ) 177 | self.comboBox_toLanguage.currentIndexChanged.connect( 178 | self.setTranslationLanguages 179 | ) 180 | self.comboBox_fromLanguage.currentIndexChanged.connect( 181 | self.setTranslationLanguages 182 | ) 183 | self.groupBox_translation.toggled.connect(self.startTranslation) 184 | 185 | # speech engine 186 | self.comboBox_speechEngine.currentIndexChanged.connect(self.speechEngineChanged) 187 | 188 | # disable everything on comboBox_transcriptionOutputText except for "Text File" and "No text output" 189 | disable_dropdown_options_by_text( 190 | self.comboBox_transcriptionOutputText, 191 | ["No text output", "Text File"], 192 | negative_case=True, 193 | ) 194 | disable_dropdown_options_by_text( 195 | self.comboBox_translationOutputTextOptions, 196 | ["No text output", "Text File"], 197 | negative_case=True, 198 | ) 199 | self.comboBox_transcriptionOutputText.currentIndexChanged.connect( 200 | self.transcriptionOutputTextChanged 201 | ) 202 | self.comboBox_translationOutputTextOptions.currentIndexChanged.connect( 203 | self.translationOutputTextChanged 204 | ) 205 | self.comboBox_translationSourceSelect.currentIndexChanged.connect( 206 | self.translationSourceChanged 207 | ) 208 | disable_dropdown_options_by_text(self.comboBox_translationSourceSelect, "URL") 209 | 210 | self.outputsFolder = None 211 | self.transcriptionOutputTextFilePath = None 212 | self.translationOutputTextFilePath = None 213 | self.obs_client = None 214 | 215 | QTimer.singleShot(10, self.load_settings) 216 | 217 | def load_settings(self): 218 | main_settings = fetch_data("settings.json", "main", {}) 219 | if main_settings.get("language_engine") is not None: 220 | self.comboBox_languageEngine.setCurrentText( 221 | main_settings.get("language_engine") 222 | ) 223 | if main_settings.get("transcription_output") is not None: 224 | self.comboBox_transcriptionOutputText.setCurrentText( 225 | main_settings.get("transcription_output") 226 | ) 227 | if main_settings.get("translation_output") is not None: 228 | self.comboBox_translationOutputTextOptions.setCurrentText( 229 | main_settings.get("translation_output") 230 | ) 231 | if main_settings.get("translation_source") is not None: 232 | self.comboBox_translationSourceSelect.setCurrentText( 233 | main_settings.get("translation_source") 234 | ) 235 | if main_settings.get("transcription_language") is not None: 236 | self.comboBox_transcriptionLanguage.setCurrentText( 237 | main_settings.get("transcription_language") 238 | ) 239 | if main_settings.get("transcription_model_size") is not None: 240 | self.comboBox_modelSize.setCurrentText( 241 | main_settings.get("transcription_model_size") 242 | ) 243 | if main_settings.get("audio_source") is not None: 244 | if main_settings.get("audio_source") == "device": 245 | self.comboBox_audioSources.setCurrentText( 246 | main_settings.get("audio_device") 247 | ) 248 | self.audioSource = AudioSource( 249 | AudioSource.SourceType.DEVICE, main_settings.get("audio_device") 250 | ) 251 | else: 252 | self.comboBox_audioSources.setCurrentText("Select Audio Source") 253 | if main_settings.get("from_language") is not None: 254 | self.comboBox_fromLanguage.setCurrentText( 255 | main_settings.get("from_language") 256 | ) 257 | if main_settings.get("to_language") is not None: 258 | self.comboBox_toLanguage.setCurrentText(main_settings.get("to_language")) 259 | if main_settings.get("translation_on") is not None: 260 | self.groupBox_translation.setChecked(main_settings.get("translation_on")) 261 | if main_settings.get("speech_engine") is not None: 262 | self.comboBox_speechEngine.setCurrentText( 263 | main_settings.get("speech_engine") 264 | ) 265 | 266 | def openLogsDialog(self): 267 | if self.log_dialog is None: 268 | # open the logs dialog 269 | self.log_dialog = LogViewerDialog() 270 | self.log_dialog.setWindowTitle("Logs") 271 | 272 | # show the dialog, non modal 273 | self.log_dialog.show() 274 | 275 | def openAboutDialog(self): 276 | # open the about dialog 277 | about_dialog = QDialog() 278 | loadUi( 279 | path.abspath(path.join(path.dirname(__file__), "about.ui")), 280 | about_dialog, 281 | ) 282 | about_dialog.setWindowTitle("About Lexis") 283 | about_dialog.exec() 284 | 285 | def ensure_output_folder(self): 286 | if self.outputsFolder is None: 287 | self.outputsFolder = fetch_data("settings.json", "settings", {}).get( 288 | "outputs_folder", None 289 | ) 290 | if self.outputsFolder is not None: 291 | if not path.exists(self.outputsFolder): 292 | try: 293 | os.makedirs(self.outputsFolder) 294 | except Exception as e: 295 | logger.error(f"Error creating outputs folder: {e}") 296 | self.outputsFolder = None 297 | return False 298 | return True 299 | return False 300 | 301 | def speechEngineChanged(self): 302 | self.textToSpeech.stop() 303 | if self.comboBox_speechEngine.currentText() == "OpenAI": 304 | if not fetch_data("settings.json", "settings", {}).get("openai_api_key"): 305 | self.comboBox_speechEngine.setCurrentIndex(0) 306 | self.openSettingsDialog(1) 307 | return 308 | self.textToSpeech.speech_engine = "OpenAI" 309 | self.textToSpeech.start() 310 | elif self.comboBox_speechEngine.currentText() == "ElevenLabs": 311 | if not fetch_data("settings.json", "settings", {}).get( 312 | "elevenlabs_api_key" 313 | ): 314 | self.comboBox_speechEngine.setCurrentIndex(0) 315 | self.openSettingsDialog(1) 316 | return 317 | self.textToSpeech.speech_engine = "ElevenLabs" 318 | self.textToSpeech.start() 319 | else: 320 | logger.error( 321 | f"Unknown speech engine: {self.comboBox_speechEngine.currentText()}" 322 | ) 323 | self.comboBox_speechEngine.setCurrentIndex(0) 324 | 325 | store_data( 326 | "settings.json", 327 | "main", 328 | {"speech_engine": self.comboBox_speechEngine.currentText()}, 329 | ) 330 | 331 | def transcriptionLanguageChanged(self): 332 | logger.debug( 333 | "transcription language changed to:" 334 | + self.comboBox_transcriptionLanguage.currentText() 335 | ) 336 | self.audioTranscriber.set_language( 337 | self.comboBox_transcriptionLanguage.currentText() 338 | ) 339 | store_data( 340 | "settings.json", 341 | "main", 342 | { 343 | "transcription_language": self.comboBox_transcriptionLanguage.currentText() 344 | }, 345 | ) 346 | 347 | def transcriptionModelSizeChanged(self): 348 | self.audioTranscriber.set_model_size(self.comboBox_modelSize.currentText()) 349 | store_data( 350 | "settings.json", 351 | "main", 352 | {"transcription_model_size": self.comboBox_modelSize.currentText()}, 353 | ) 354 | 355 | def transcriptionOutputTextChanged(self): 356 | self.transcriptionOutputTextFilePath = None 357 | if self.comboBox_transcriptionOutputText.currentText() == "Text File": 358 | if not self.ensure_output_folder(): 359 | self.comboBox_transcriptionOutputText.setCurrentIndex(0) 360 | self.openSettingsDialog(0) 361 | return 362 | self.transcriptionOutputTextFilePath = path.join( 363 | self.outputsFolder, "captions.txt" 364 | ) 365 | store_data("settings.json", "main", {"transcription_output": "text_file"}) 366 | 367 | def translationOutputTextChanged(self): 368 | self.translationOutputTextFilePath = None 369 | if self.comboBox_translationOutputTextOptions.currentText() == "Text File": 370 | if not self.ensure_output_folder(): 371 | self.comboBox_transcriptionOutputText.setCurrentIndex(0) 372 | self.openSettingsDialog(0) 373 | return 374 | self.translationOutputTextFilePath = path.join( 375 | self.outputsFolder, "translation.txt" 376 | ) 377 | store_data("settings.json", "main", {"translation_output": "text_file"}) 378 | 379 | def translationSourceChanged(self): 380 | self.textBrowser_transformedTextOutput.setText("") 381 | if self.transcriptionOutputTextFilePath is not None: 382 | if self.translation_poller: 383 | self.translation_poller.stop() 384 | self.translation_poller.wait() 385 | 386 | if self.comboBox_translationSourceSelect.currentText() == "File": 387 | fileDialog = QtWidgets.QFileDialog() 388 | fileDialog.setFileMode(QtWidgets.QFileDialog.FileMode.ExistingFile) 389 | fileDialog.setNameFilter("Text Files (*.txt)") 390 | fileDialog.setViewMode(QtWidgets.QFileDialog.ViewMode.List) 391 | fileDialog.exec() 392 | fileNames = fileDialog.selectedFiles() 393 | if fileNames and len(fileNames) > 0: 394 | if self.translation_poller: 395 | self.translation_poller.stop() 396 | self.translation_poller.wait() 397 | 398 | self.translation_poller = FilePoller( 399 | fileNames[0], 400 | cadence=fetch_data("settings.json", "settings", {}).get( 401 | "input_file_polling_freq", 1000 402 | ), 403 | queue=self.translator.input_queue, 404 | ) 405 | self.translation_poller.start() 406 | store_data( 407 | "settings.json", 408 | "main", 409 | {"translation_source": "file", "translation_file": fileNames[0]}, 410 | ) 411 | elif self.comboBox_translationSourceSelect.currentText() == "<-- Transcription": 412 | logger.info("transcription selected as translation source") 413 | store_data("settings.json", "main", {"translation_source": "transcription"}) 414 | elif ( 415 | self.comboBox_translationSourceSelect.currentText() 416 | == "--- Get OBS Sources ---" 417 | ): 418 | logger.info("Get OBS sources from websocket") 419 | self.getOBSSourcesForTranslation() 420 | self.comboBox_translationSourceSelect.setCurrentIndex(0) 421 | else: 422 | # obs source selected create an OBSPoller 423 | if self.obs_client is not None: 424 | source = self.comboBox_translationSourceSelect.currentText() 425 | if source.startswith("[OBS]"): 426 | source_name = source.split(" - ")[1] 427 | self.translation_poller = OBSPoller( 428 | self.obs_client, 429 | source_name, 430 | self.translator.input_queue, 431 | int( 432 | fetch_data("settings.json", "settings", {}).get( 433 | "obs_polling_freq", 1000 434 | ) 435 | ), 436 | ) 437 | self.translation_poller.start() 438 | store_data( 439 | "settings.json", 440 | "main", 441 | {"translation_source": "obs", "obs_source": source_name}, 442 | ) 443 | else: 444 | logger.error("Invalid OBS source selected") 445 | self.comboBox_translationSourceSelect.setCurrentIndex(0) 446 | 447 | def getOBSSourcesForTranslation(self): 448 | if self.obs_client is None: 449 | self.obs_client = open_obs_websocket_from_settings() 450 | if self.obs_client is not None: 451 | sources = get_all_text_sources(self.obs_client) 452 | if sources is not None and len(sources) > 0: 453 | # remove all previous obs sources that begin from index 3 454 | if self.comboBox_translationSourceSelect.count() > 4: 455 | for _ in range(4, self.comboBox_translationSourceSelect.count()): 456 | self.comboBox_translationSourceSelect.removeItem(4) 457 | # add the new sources 458 | for source in sources: 459 | self.comboBox_translationSourceSelect.addItem( 460 | f"[OBS] {source['sceneName']} - {source['sourceName']}" 461 | ) 462 | self.comboBox_translationSourceSelect.setCurrentIndex(0) 463 | else: 464 | logger.warn("Can't get OBS sources or no sources available") 465 | else: 466 | logger.error("OBS client is not connected") 467 | # open settings dialog 468 | self.openSettingsDialog(2) 469 | 470 | def openSettingsDialog(self, page=None): 471 | settingsDialog = SettingsDialog(page, self) 472 | settingsDialog.exec() 473 | 474 | def languageEngineChanged(self): 475 | # disable the widgets 476 | self.widget_textSourceSelect.setEnabled(False) 477 | self.groupBox_translation.setEnabled(False) 478 | 479 | if self.comboBox_languageEngine.currentText() != "Select Language Engine": 480 | logger.info( 481 | f"language engine changed to: {self.comboBox_languageEngine.currentText()}" 482 | ) 483 | settings = fetch_data("settings.json", "settings", {}) 484 | if self.comboBox_languageEngine.currentText() == "Local LLM": 485 | # check settings for local LLM folder, if it doesn't exist, open settings dialog 486 | if not settings.get("local_llm_select"): 487 | self.comboBox_languageEngine.setCurrentIndex(0) 488 | self.openSettingsDialog(1) 489 | return 490 | if self.comboBox_languageEngine.currentText() == "OpenAI API": 491 | # check settings for openai api key, if it doesn't exist, open settings dialog 492 | if not settings.get("openai_api_key"): 493 | self.comboBox_languageEngine.setCurrentIndex(0) 494 | self.openSettingsDialog(1) 495 | return 496 | if self.comboBox_languageEngine.currentText() == "DeepL API": 497 | # check settings for deepl api key, if it doesn't exist, open settings dialog 498 | if not settings.get("deepl_api_key"): 499 | self.comboBox_languageEngine.setCurrentIndex(0) 500 | self.openSettingsDialog(1) 501 | return 502 | # enable the widgets 503 | self.widget_textSourceSelect.setEnabled(True) 504 | self.groupBox_translation.setEnabled(True) 505 | self.translator.setTranslationEngine( 506 | self.comboBox_languageEngine.currentText() 507 | ) 508 | store_data( 509 | "settings.json", 510 | "main", 511 | {"language_engine": self.comboBox_languageEngine.currentText()}, 512 | ) 513 | else: 514 | self.startTranslation(False) 515 | self.translator.setTranslationEngine(None) 516 | 517 | def setTranslationLanguages(self): 518 | self.translator.setLanguages( 519 | self.comboBox_fromLanguage.currentText(), 520 | self.comboBox_toLanguage.currentText(), 521 | ) 522 | store_data( 523 | "settings.json", 524 | "main", 525 | { 526 | "from_language": self.comboBox_fromLanguage.currentText(), 527 | "to_language": self.comboBox_toLanguage.currentText(), 528 | }, 529 | ) 530 | 531 | def startTranslation(self, checked): 532 | store_data("settings.json", "main", {"translation_on": checked}) 533 | if checked: 534 | self.translator.start() 535 | else: 536 | self.translator.stop() 537 | 538 | def populateAudioSources(self): 539 | self.comboBox_audioSources.clear() 540 | # add select audio source option 541 | self.comboBox_audioSources.insertItem(0, "Select Audio Source") 542 | self.comboBox_audioSources.setCurrentIndex(0) 543 | audioDevices = AudioRecorder.get_audio_devices() 544 | for device in audioDevices: 545 | self.comboBox_audioSources.addItem(device.sourceName) 546 | self.comboBox_audioSources.addItem("--- NDI Sources ---") 547 | disable_dropdown_options_by_text( 548 | self.comboBox_audioSources, "--- NDI Sources ---" 549 | ) 550 | # add file input option 551 | self.comboBox_audioSources.addItem("File") 552 | # add stream option 553 | self.comboBox_audioSources.addItem("Stream") 554 | disable_dropdown_options_by_text(self.comboBox_audioSources, "Stream") 555 | 556 | def audioSourceChanged(self): 557 | logger.info("audio source changed") 558 | self.audioSource = None 559 | # if file input selected, open file dialog 560 | if self.comboBox_audioSources.currentText() == "File": 561 | logger.info("file input selected") 562 | fileDialog = QtWidgets.QFileDialog() 563 | fileDialog.setFileMode(QtWidgets.QFileDialog.FileMode.ExistingFile) 564 | fileDialog.setNameFilter( 565 | "Audio Files (*.mp3 *.wav *.ogg *.flac *.m4a *.aac)" 566 | ) 567 | fileDialog.setViewMode(QtWidgets.QFileDialog.ViewMode.List) 568 | fileDialog.exec() 569 | fileNames = fileDialog.selectedFiles() 570 | if fileNames and len(fileNames) > 0: 571 | logger.info(f"file selected: {fileNames[0]}") 572 | self.audioSource = AudioSource( 573 | AudioSource.SourceType.FILE, fileNames[0] 574 | ) 575 | store_data( 576 | "settings.json", 577 | "main", 578 | {"audio_source": "file", "audio_file": fileNames[0]}, 579 | ) 580 | else: 581 | logger.info("device input selected") 582 | if self.comboBox_audioSources.currentText() != "Select Audio Source": 583 | self.audioSource = AudioSource( 584 | AudioSource.SourceType.DEVICE, 585 | self.comboBox_audioSources.currentText(), 586 | ) 587 | store_data( 588 | "settings.json", 589 | "main", 590 | { 591 | "audio_source": "device", 592 | "audio_device": self.comboBox_audioSources.currentText(), 593 | }, 594 | ) 595 | 596 | self.startAudioCapture() 597 | 598 | def startAudioCapture(self): 599 | logger.info("stopping exsting audio capture and starting new") 600 | if self.audioCapture: 601 | self.audioTranscriber.stop() 602 | self.audioTranscriber.wait() 603 | self.audioCapture.stop() 604 | self.audioCapture.wait() 605 | self.audioCapture = None 606 | 607 | if self.audioSource: 608 | self.audioTranscriber.start() 609 | logger.info(f"audio source: {self.audioSource.sourceName}") 610 | # start audio capture 611 | logger.info( 612 | f"starting audio capture with chunk size: {self.horizontalSlider_chunkSize.value()}" 613 | ) 614 | self.audioCapture = AudioRecorder( 615 | self.audioSource, self.horizontalSlider_chunkSize.value() * 1000 616 | ) 617 | self.audioCapture.progress_and_volume.connect(self.audioCaptureProgress) 618 | self.audioCapture.data_available.connect( 619 | self.audioTranscriber.queue_audio_data 620 | ) 621 | self.audioCapture.start() 622 | 623 | def audioCaptureProgress(self, progress): 624 | # update the volume progressbar 625 | self.progressBar_audioSignal.setValue(int(progress[1] * 300)) 626 | # update the buffer progressbar 627 | chunk_size_ms = float(self.horizontalSlider_chunkSize.value()) * 1000.0 628 | buffer_capacity = int(float(progress[0]) / chunk_size_ms * 100.0) 629 | self.progressBar_audioBuffer.setValue(buffer_capacity) 630 | # redraw the progressbars 631 | self.progressBar_audioSignal.repaint() 632 | self.progressBar_audioBuffer.repaint() 633 | 634 | def transcriptionAvailable(self, text): 635 | logger.info(f"transcribed text available: {text}") 636 | self.textBrowser_output.setText(text) 637 | # if translation is on - send to translator thread 638 | if self.groupBox_translation.isChecked(): 639 | if ( 640 | self.comboBox_translationSourceSelect.currentText() 641 | == "<-- Transcription" 642 | ): 643 | if self.translator.running: 644 | self.translator.input_queue.put_nowait(text) 645 | else: 646 | logger.error("Translator thread is not running") 647 | if self.transcriptionOutputTextFilePath is not None: 648 | try: 649 | # save to file with utf-8 encoding 650 | with open( 651 | self.transcriptionOutputTextFilePath, "w", encoding="utf-8" 652 | ) as f: 653 | f.write(text + "\n") 654 | except Exception as e: 655 | logger.error(f"Error saving transcription to file: {e}") 656 | 657 | def translationTextAvailable(self, text): 658 | logger.info(f"translated text available: {text}") 659 | self.textBrowser_transformedTextOutput.setText(text) 660 | if self.translationOutputTextFilePath is not None: 661 | try: 662 | # save to file with utf-8 encoding 663 | with open( 664 | self.translationOutputTextFilePath, "w", encoding="utf-8" 665 | ) as f: 666 | f.write(text + "\n") 667 | except Exception as e: 668 | logger.error(f"Error saving translation to file: {e}") 669 | # check if tts is on 670 | if self.comboBox_speechEngine.currentText() != "Select TTS Engine": 671 | self.textToSpeech.add_text(text) 672 | 673 | def closeEvent(self, event): 674 | logger.debug("closing") 675 | if self.audioCapture: 676 | self.audioCapture.stop() 677 | logger.debug("audio capture stopped, waiting for thread to finish") 678 | self.audioCapture.wait() 679 | self.audioTranscriber.stop() 680 | logger.debug("transcription thread stopped. waiting for thread to finish") 681 | self.audioTranscriber.wait() 682 | self.translator.stop() 683 | logger.debug("translation thread stopped. waiting for thread to finish") 684 | self.translator.wait() 685 | if self.translation_poller: 686 | self.translation_poller.stop() 687 | self.translation_poller.wait() 688 | if self.obs_client: 689 | disconnect_obs_websocket(self.obs_client) 690 | event.accept() 691 | 692 | 693 | if __name__ == "__main__": 694 | # only attempt splash when not on Mac OSX 695 | os_name = platform.system() 696 | if os_name != "Darwin": 697 | try: 698 | import pyi_splash # type: ignore 699 | 700 | pyi_splash.close() 701 | except ImportError: 702 | pass 703 | 704 | app = QtWidgets.QApplication(sys.argv) 705 | window = MainWindow() 706 | window.show() 707 | sys.exit(app.exec()) 708 | -------------------------------------------------------------------------------- /mainwindow.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | MainWindow 4 | 5 | 6 | 7 | 0 8 | 0 9 | 988 10 | 612 11 | 12 | 13 | 14 | LexiSynth - Live Language Assistant 15 | 16 | 17 | 18 | 19 | 0 20 | 21 | 22 | 0 23 | 24 | 25 | 0 26 | 27 | 28 | 0 29 | 30 | 31 | 32 | 33 | 34 | 24 35 | 36 | 37 | 38 | Caption 39 | 40 | 41 | Qt::AlignCenter 42 | 43 | 44 | 45 | 6 46 | 47 | 48 | 49 | 50 | 51 | 0 52 | 0 53 | 54 | 55 | 56 | 57 | QFormLayout::ExpandingFieldsGrow 58 | 59 | 60 | 0 61 | 62 | 63 | 0 64 | 65 | 66 | 0 67 | 68 | 69 | 0 70 | 71 | 72 | 0 73 | 74 | 75 | 76 | 77 | 78 | 0 79 | 0 80 | 81 | 82 | 83 | 84 | 20 85 | 0 86 | 87 | 88 | 89 | 90 | 13 91 | 92 | 93 | 94 | Source 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 13 103 | 104 | 105 | 106 | 107 | Select Audio Source 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 13 120 | 121 | 122 | 123 | Transcription Options 124 | 125 | 126 | true 127 | 128 | 129 | false 130 | 131 | 132 | 133 | QFormLayout::ExpandingFieldsGrow 134 | 135 | 136 | 0 137 | 138 | 139 | 0 140 | 141 | 142 | 6 143 | 144 | 145 | 0 146 | 147 | 148 | 0 149 | 150 | 151 | 0 152 | 153 | 154 | 155 | 156 | 157 | 13 158 | 159 | 160 | 161 | Chunk Seconds 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 0 170 | 0 171 | 172 | 173 | 174 | 175 | 13 176 | 177 | 178 | 179 | 1 180 | 181 | 182 | 10 183 | 184 | 185 | Qt::Horizontal 186 | 187 | 188 | QSlider::NoTicks 189 | 190 | 191 | 1 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 0 200 | 0 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | Language 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | Tiny (75Mb) 217 | 218 | 219 | 220 | 221 | Base (140Mb) 222 | 223 | 224 | 225 | 226 | Small (400Mb) 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | Model Size 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | false 245 | 246 | 247 | 248 | 13 249 | 250 | 251 | 252 | Clean Stream 253 | 254 | 255 | true 256 | 257 | 258 | false 259 | 260 | 261 | 262 | QFormLayout::ExpandingFieldsGrow 263 | 264 | 265 | 0 266 | 267 | 268 | 6 269 | 270 | 271 | 0 272 | 273 | 274 | 0 275 | 276 | 277 | 0 278 | 279 | 280 | 281 | 282 | 283 | 13 284 | 285 | 286 | 287 | Cleanup Method 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 13 296 | 297 | 298 | 299 | 300 | Mute 301 | 302 | 303 | 304 | 305 | Beep 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 13 315 | 316 | 317 | 318 | Words 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 13 327 | 328 | 329 | 330 | 331 | Internal List 332 | 333 | 334 | 335 | 336 | Custom List 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 16777215 349 | 120 350 | 351 | 352 | 353 | 354 | 13 355 | 356 | 357 | 358 | Transcription... 359 | 360 | 361 | 362 | 363 | 364 | 365 | Qt::Vertical 366 | 367 | 368 | 369 | 20 370 | 40 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 0 380 | 0 381 | 382 | 383 | 384 | 385 | 13 386 | 387 | 388 | 389 | Output Options 390 | 391 | 392 | true 393 | 394 | 395 | false 396 | 397 | 398 | 399 | 0 400 | 401 | 402 | 6 403 | 404 | 405 | 0 406 | 407 | 408 | 0 409 | 410 | 411 | 0 412 | 413 | 414 | 415 | 416 | 417 | 13 418 | 419 | 420 | 421 | Send text to 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 13 430 | 431 | 432 | 433 | 434 | No text output 435 | 436 | 437 | 438 | 439 | Text File 440 | 441 | 442 | 443 | 444 | OBS WS Text Source 445 | 446 | 447 | 448 | 449 | Caption stream 450 | 451 | 452 | 453 | 454 | HTTP (Browser / Overlay) 455 | 456 | 457 | 458 | 459 | SRT File 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 13 469 | 470 | 471 | 472 | Send audio 473 | 474 | 475 | 476 | 477 | 478 | 479 | false 480 | 481 | 482 | 483 | 13 484 | 485 | 486 | 487 | Not implemented yet 488 | 489 | 490 | 491 | No audio output 492 | 493 | 494 | 495 | 496 | File (one shot) 497 | 498 | 499 | 500 | 501 | File (continuous) 502 | 503 | 504 | 505 | 506 | -- Output Devices -- 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 0 516 | 517 | 518 | 0 519 | 520 | 521 | 0 522 | 523 | 524 | 0 525 | 526 | 527 | 0 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 13 540 | 541 | 542 | 543 | Status 544 | 545 | 546 | true 547 | 548 | 549 | 550 | 6 551 | 552 | 553 | 0 554 | 555 | 556 | 0 557 | 558 | 559 | 560 | 561 | 562 | 0 563 | 0 564 | 565 | 566 | 567 | 568 | 16777215 569 | 25 570 | 571 | 572 | 573 | 0 574 | 575 | 576 | false 577 | 578 | 579 | Qt::Vertical 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 0 588 | 0 589 | 590 | 591 | 592 | 593 | 10 594 | 595 | 596 | 597 | Buffer 598 | 599 | 600 | 601 | 602 | 603 | 604 | 0 605 | 606 | 607 | false 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 619 | 620 | true 621 | 622 | 623 | 624 | 24 625 | 626 | 627 | 628 | Translate 629 | 630 | 631 | Qt::AlignCenter 632 | 633 | 634 | false 635 | 636 | 637 | 638 | 6 639 | 640 | 641 | 642 | 643 | 644 | 13 645 | 646 | 647 | 648 | 649 | Select Language Engine 650 | 651 | 652 | 653 | 654 | Local LLM 655 | 656 | 657 | 658 | 659 | OpenAI API 660 | 661 | 662 | 663 | 664 | DeepL API 665 | 666 | 667 | 668 | 669 | 670 | 671 | 672 | false 673 | 674 | 675 | 676 | 6 677 | 678 | 679 | 0 680 | 681 | 682 | 0 683 | 684 | 685 | 0 686 | 687 | 688 | 0 689 | 690 | 691 | 692 | 693 | 694 | 13 695 | 696 | 697 | 698 | Text Source 699 | 700 | 701 | 702 | 703 | 704 | 705 | 706 | 0 707 | 0 708 | 709 | 710 | 711 | 712 | 13 713 | 714 | 715 | 716 | 717 | <-- Transcription 718 | 719 | 720 | 721 | 722 | File 723 | 724 | 725 | 726 | 727 | URL 728 | 729 | 730 | 731 | 732 | --- Get OBS Sources --- 733 | 734 | 735 | 736 | 737 | 738 | 739 | 740 | 741 | 742 | 743 | false 744 | 745 | 746 | 747 | 0 748 | 0 749 | 750 | 751 | 752 | 753 | 13 754 | 755 | 756 | 757 | Translate 758 | 759 | 760 | true 761 | 762 | 763 | false 764 | 765 | 766 | 767 | QFormLayout::ExpandingFieldsGrow 768 | 769 | 770 | 0 771 | 772 | 773 | 0 774 | 775 | 776 | 6 777 | 778 | 779 | 0 780 | 781 | 782 | 0 783 | 784 | 785 | 0 786 | 787 | 788 | 789 | 790 | 791 | 0 792 | 0 793 | 794 | 795 | 796 | 797 | 13 798 | 799 | 800 | 801 | 802 | 803 | 804 | 805 | 806 | 13 807 | 808 | 809 | 810 | To 811 | 812 | 813 | 814 | 815 | 816 | 817 | 818 | 0 819 | 0 820 | 821 | 822 | 823 | 824 | 13 825 | 826 | 827 | 828 | 829 | 830 | 831 | 832 | 833 | 13 834 | 835 | 836 | 837 | From 838 | 839 | 840 | 841 | 842 | 843 | 844 | 845 | 846 | 847 | false 848 | 849 | 850 | 851 | 13 852 | 853 | 854 | 855 | Analyze 856 | 857 | 858 | true 859 | 860 | 861 | false 862 | 863 | 864 | 865 | QFormLayout::ExpandingFieldsGrow 866 | 867 | 868 | 0 869 | 870 | 871 | 0 872 | 873 | 874 | 6 875 | 876 | 877 | 0 878 | 879 | 880 | 0 881 | 882 | 883 | 0 884 | 885 | 886 | 887 | 888 | 889 | 0 890 | 0 891 | 892 | 893 | 894 | 895 | 13 896 | 897 | 898 | 899 | Remove profanity 900 | 901 | 902 | 903 | 904 | 905 | 906 | 907 | 0 908 | 0 909 | 910 | 911 | 912 | 913 | 13 914 | 915 | 916 | 917 | Summarize 918 | 919 | 920 | 921 | 922 | 923 | 924 | 925 | 926 | 927 | 928 | 16777215 929 | 120 930 | 931 | 932 | 933 | 934 | 13 935 | 936 | 937 | 938 | false 939 | 940 | 941 | Transformed text... 942 | 943 | 944 | 945 | 946 | 947 | 948 | Qt::Vertical 949 | 950 | 951 | 952 | 20 953 | 40 954 | 955 | 956 | 957 | 958 | 959 | 960 | 961 | 962 | 13 963 | 964 | 965 | 966 | Output Options 967 | 968 | 969 | true 970 | 971 | 972 | false 973 | 974 | 975 | 976 | QFormLayout::ExpandingFieldsGrow 977 | 978 | 979 | 0 980 | 981 | 982 | 6 983 | 984 | 985 | 0 986 | 987 | 988 | 0 989 | 990 | 991 | 0 992 | 993 | 994 | 995 | 996 | 997 | 13 998 | 999 | 1000 | 1001 | Send to 1002 | 1003 | 1004 | 1005 | 1006 | 1007 | 1008 | 1009 | 0 1010 | 0 1011 | 1012 | 1013 | 1014 | 1015 | 13 1016 | 1017 | 1018 | 1019 | 1020 | No text output 1021 | 1022 | 1023 | 1024 | 1025 | Text File 1026 | 1027 | 1028 | 1029 | 1030 | OBS Text Source 1031 | 1032 | 1033 | 1034 | 1035 | Captions stream 1036 | 1037 | 1038 | 1039 | 1040 | HTTP (Browser / Overlay) 1041 | 1042 | 1043 | 1044 | 1045 | SRT File 1046 | 1047 | 1048 | 1049 | 1050 | 1051 | 1052 | 1053 | 1054 | 1055 | 1056 | 1057 | 13 1058 | 1059 | 1060 | 1061 | Status 1062 | 1063 | 1064 | true 1065 | 1066 | 1067 | 1068 | 0 1069 | 1070 | 1071 | 0 1072 | 1073 | 1074 | 1075 | 1076 | 0 1077 | 1078 | 1079 | false 1080 | 1081 | 1082 | 1083 | 1084 | 1085 | 1086 | 1087 | 1088 | 1089 | 1090 | 1091 | 1092 | true 1093 | 1094 | 1095 | 1096 | 24 1097 | 1098 | 1099 | 1100 | Synthesize 1101 | 1102 | 1103 | Qt::AlignCenter 1104 | 1105 | 1106 | false 1107 | 1108 | 1109 | 1110 | 1111 | 1112 | 1113 | 13 1114 | 1115 | 1116 | 1117 | 1118 | Select TTS Engine 1119 | 1120 | 1121 | 1122 | 1123 | OpenAI 1124 | 1125 | 1126 | 1127 | 1128 | ElevenLabs 1129 | 1130 | 1131 | 1132 | 1133 | 1134 | 1135 | 1136 | Qt::Vertical 1137 | 1138 | 1139 | 1140 | 20 1141 | 40 1142 | 1143 | 1144 | 1145 | 1146 | 1147 | 1148 | 1149 | 1150 | 0 1151 | 0 1152 | 1153 | 1154 | 1155 | 1156 | 13 1157 | 1158 | 1159 | 1160 | Output Options 1161 | 1162 | 1163 | true 1164 | 1165 | 1166 | false 1167 | 1168 | 1169 | 1170 | 0 1171 | 1172 | 1173 | 0 1174 | 1175 | 1176 | 0 1177 | 1178 | 1179 | 1180 | 1181 | 1182 | 13 1183 | 1184 | 1185 | 1186 | Send to 1187 | 1188 | 1189 | 1190 | 1191 | 1192 | 1193 | 1194 | 13 1195 | 1196 | 1197 | 1198 | 1199 | No audio output 1200 | 1201 | 1202 | 1203 | 1204 | File: One shot 1205 | 1206 | 1207 | 1208 | 1209 | File: Continuous 1210 | 1211 | 1212 | 1213 | 1214 | Local stream (HTTP) 1215 | 1216 | 1217 | 1218 | 1219 | -- Output Devices -- 1220 | 1221 | 1222 | 1223 | 1224 | 1225 | 1226 | 1227 | 1228 | 1229 | 1230 | 1231 | 13 1232 | 1233 | 1234 | 1235 | Status 1236 | 1237 | 1238 | true 1239 | 1240 | 1241 | 1242 | 0 1243 | 1244 | 1245 | 0 1246 | 1247 | 1248 | 1249 | 1250 | 0 1251 | 1252 | 1253 | false 1254 | 1255 | 1256 | 1257 | 1258 | 1259 | 1260 | 1261 | 1262 | 1263 | 1264 | 1265 | 1266 | 1267 | 1268 | 0 1269 | 0 1270 | 988 1271 | 21 1272 | 1273 | 1274 | 1275 | 1276 | 1277 | Settings 1278 | 1279 | 1280 | 1281 | 1282 | Settings 1283 | 1284 | 1285 | 1286 | 1287 | 1288 | 1289 | --------------------------------------------------------------------------------