├── silero_vad.onnx
├── icons
├── bw_icon.png
├── splash.png
├── MacOS_icon.png
└── Windows-icon-open.ico
├── requirements.txt
├── lexisynth_types.py
├── .github
└── workflows
│ ├── check-format.yaml
│ ├── release.yaml
│ └── build.yaml
├── lexisynth.iss
├── entitlements.plist
├── audio_player.py
├── LICENSE
├── file_poller.py
├── README.md
├── ls_logging.py
├── models_info.py
├── storage.py
├── log_view.py
├── model_download_dialog.ui
├── log_view.ui
├── .gitignore
├── model_download_dialog.py
├── lexisynth.spec
├── text_to_speech.py
├── obs_websocket.py
├── audio_capture.py
├── transcription.py
├── settings_dialog.py
├── about.ui
├── translation.py
├── settings_dialog.ui
├── language_codes.py
├── main.py
└── mainwindow.ui
/silero_vad.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/royshil/lexisynth/main/silero_vad.onnx
--------------------------------------------------------------------------------
/icons/bw_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/royshil/lexisynth/main/icons/bw_icon.png
--------------------------------------------------------------------------------
/icons/splash.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/royshil/lexisynth/main/icons/splash.png
--------------------------------------------------------------------------------
/icons/MacOS_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/royshil/lexisynth/main/icons/MacOS_icon.png
--------------------------------------------------------------------------------
/icons/Windows-icon-open.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/royshil/lexisynth/main/icons/Windows-icon-open.ico
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ctranslate2
2 | faster-whisper
3 | obsws_python
4 | pillow
5 | platformdirs
6 | pyaudio
7 | pydub
8 | pyinstaller
9 | pyqt6
10 | python-dotenv
11 | sentencepiece
12 | sounddevice
13 | soundfile
14 |
--------------------------------------------------------------------------------
/lexisynth_types.py:
--------------------------------------------------------------------------------
1 | class AudioSource:
2 | class SourceType:
3 | FILE = 0
4 | DEVICE = 1
5 |
6 | def __init__(self, sourceType, sourceName):
7 | self.sourceType = sourceType
8 | self.sourceName = sourceName
9 |
--------------------------------------------------------------------------------
/.github/workflows/check-format.yaml:
--------------------------------------------------------------------------------
1 | name: Check Python Formatting
2 |
3 | on:
4 | workflow_call:
5 |
6 | jobs:
7 | check-format:
8 | runs-on: ubuntu-latest
9 |
10 | steps:
11 | - name: Checkout code
12 | uses: actions/checkout@v4
13 |
14 | - name: Set up Python
15 | uses: actions/setup-python@v5
16 | with:
17 | python-version: 3.x
18 |
19 | - name: Install dependencies
20 | run: pip install black
21 |
22 | - name: Check formatting
23 | run: black --check .
24 |
--------------------------------------------------------------------------------
/lexisynth.iss:
--------------------------------------------------------------------------------
1 | [Setup]
2 | AppName=LexiSynth
3 | AppVersion=0.0.1-beta1
4 | DefaultDirName={pf}\LexiSynth
5 | DefaultGroupName=LexiSynth
6 | OutputDir=.\dist
7 | OutputBaseFilename=lexisynth-setup
8 | Compression=lzma
9 | SolidCompression=yes
10 | ArchitecturesInstallIn64BitMode=x64
11 |
12 | [Files]
13 | Source: "dist\lexisynth\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs createallsubdirs
14 |
15 | [Icons]
16 | Name: "{group}\LexiSynth"; Filename: "{app}\lexisynth.exe"
17 |
18 | [Run]
19 | Filename: "{app}\lexisynth.exe"; Description: "Launch LexiSynth"; Flags: nowait postinstall skipifsilent
20 |
--------------------------------------------------------------------------------
/entitlements.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | com.apple.security.cs.allow-jit
7 |
8 | com.apple.security.cs.allow-unsigned-executable-memory
9 |
10 | com.apple.security.cs.disable-library-validation
11 |
12 |
13 | com.apple.security.device.microphone
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/audio_player.py:
--------------------------------------------------------------------------------
1 | import queue
2 | import time
3 | from pydub import AudioSegment
4 | from pydub.playback import play
5 | import io
6 | from PyQt6.QtCore import QThread
7 |
8 |
9 | class AudioBuffer:
10 | class AudioBufferType:
11 | RAW = 0
12 | MP3 = 1
13 |
14 | def __init__(self, type, bytes):
15 | self.buffer = queue.Queue()
16 | self.type = type
17 | self.bytes = bytes
18 |
19 |
20 | class AudioPlayer(QThread):
21 | def __init__(self):
22 | super().__init__()
23 | self.queue = queue.Queue()
24 | self.isRunning = False
25 |
26 | def add_to_queue(self, audio: AudioBuffer):
27 | self.queue.put(audio)
28 |
29 | def stop(self):
30 | self.isRunning = False
31 |
32 | def run(self):
33 | while self.isRunning:
34 | if self.queue.empty():
35 | time.sleep(0.1)
36 | continue
37 | audio = self.queue.get()
38 | if audio.type == AudioBuffer.AudioBufferType.MP3:
39 | audio = AudioSegment.from_mp3(io.BytesIO(audio.bytes))
40 | play(audio)
41 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 OCC AI: Open tools for Content Creators and Streamers
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/file_poller.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | from PyQt6.QtCore import QThread
4 | from queue import Queue
5 | from ls_logging import logger
6 |
7 |
8 | class FilePoller(QThread):
9 | def __init__(self, filename: str, cadence_ms: int, queue: Queue):
10 | super().__init__()
11 | self.filename = filename
12 | self.cadence_seconds = cadence_ms / 1000.0 # Convert ms to seconds
13 | self.queue = queue
14 | self.stop_flag = False
15 | self.last_content = None
16 |
17 | def run(self):
18 | # check if file exists
19 | if not os.path.exists(self.filename):
20 | logger.error(f"File {self.filename} does not exist")
21 | return
22 | while not self.stop_flag:
23 | if os.path.exists(self.filename):
24 | with open(self.filename, "r") as file:
25 | content = file.read()
26 | if content and content != self.last_content:
27 | self.queue.put_nowait(content)
28 | self.last_content = content
29 | time.sleep(self.cadence_seconds)
30 |
31 | def stop(self):
32 | self.stop_flag = True
33 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # LexiSynth
2 |
3 | LexiSynth is an AI speech analysis and synthesis tool built with Python. It leverages the power of PyInstaller, CTranslate2, and Faster-Whisper to provide a robust and efficient solution for speech processing tasks.
4 |
5 | ## Features
6 |
7 | - Speech Analysis: Analyze speech patterns and extract meaningful insights.
8 | - Speech Synthesis: Convert text into natural-sounding speech.
9 | - Built with Python: Leverage the power and simplicity of Python for customization and rapid development.
10 | - CTranslate2 and Faster-Whisper: Utilize these powerful libraries for efficient and high-quality speech processing.
11 |
12 | ## Build Instructions
13 |
14 | To build LexiSynth using PyInstaller, follow the steps below:
15 |
16 | 1. Ensure you have Python 3.11. You can check your Python version by running `python --version` in your terminal.
17 |
18 | 2. Install the required Python packages. In the root directory of the project, run:
19 |
20 | ```bash
21 | pip install -r requirements.txt
22 | ```
23 |
24 | 3. Build the executable using PyInstaller. In the root directory of the project, run:
25 |
26 | MacOSX:
27 | ```bash
28 | pyinstaller --clean --noconfirm lexisynth.spec -- --mac_osx
29 | ```
30 |
31 | Windows:
32 | ```bash
33 | pyinstaller --clean --noconfirm lexisynth.spec -- --win
34 | ```
35 |
36 | This will create a `dist` directory containing the executable file for LexiSynth.
37 |
38 | ## Usage
39 |
40 | To use LexiSynth, simply run the executable file created in the `dist` directory.
41 |
42 | ## License
43 |
44 | This project is released under the MIT license. See [LICENSE](LICENSE) for details.
45 |
--------------------------------------------------------------------------------
/ls_logging.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | from platformdirs import user_log_dir
4 | from datetime import datetime
5 | from dotenv import load_dotenv
6 |
7 | # get the user data directory
8 | data_dir = user_log_dir("lexisynth")
9 | if not os.path.exists(data_dir):
10 | os.makedirs(data_dir)
11 |
12 | # prepend the user data directory
13 | current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
14 | log_file_path = os.path.join(data_dir, f"lexisynth_{current_time}.log")
15 |
16 | # Load the environment variables from the .env file
17 | load_dotenv(os.path.abspath(os.path.join(os.path.dirname(__file__), ".env")))
18 |
19 | # Create a logger
20 | logger = logging.getLogger(__name__)
21 | logger.setLevel(logging.DEBUG)
22 |
23 | # check to see if there are more log files, and only keep the most recent 10
24 | log_files = [
25 | f for f in os.listdir(data_dir) if f.startswith("lexisynth_") and f.endswith(".log")
26 | ]
27 | # sort log files by date
28 | log_files.sort()
29 | if len(log_files) > 10:
30 | for f in log_files[:-10]:
31 | os.remove(os.path.join(data_dir, f))
32 |
33 | # Create a file handler
34 | file_handler = logging.FileHandler(log_file_path)
35 | file_handler.setLevel(logging.DEBUG)
36 |
37 | # Create a formatter
38 | formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(module)s - %(message)s")
39 | file_handler.setFormatter(formatter)
40 |
41 | # Add the file handler to the logger
42 | logger.addHandler(file_handler)
43 |
44 | # if the .env file has a debug flag, set the logger to output to console
45 | if os.getenv("LEXISYNTH_DEBUG"):
46 | console_handler = logging.StreamHandler()
47 | console_handler.setLevel(logging.DEBUG)
48 | console_handler.setFormatter(formatter)
49 | logger.addHandler(console_handler)
50 | logger.debug("Debug mode enabled")
51 |
--------------------------------------------------------------------------------
/models_info.py:
--------------------------------------------------------------------------------
1 | from os import path
2 | from platformdirs import user_data_dir
3 |
4 |
5 | class ModelDownloadInfo:
6 | # URLs for downloading the models
7 | M2M_100 = {
8 | "url": "https://lexistream-downloads.s3.amazonaws.com/m2m_100_418M-ct2-int8.zip",
9 | "file_name": "m2m_100_418M-ct2-int8.zip",
10 | "model_folder_name": "M2M-100",
11 | "model_name": "M2M-100",
12 | }
13 | FASTER_WHISPER_TINY_CT2 = {
14 | "url": "https://lexistream-downloads.s3.amazonaws.com/faster-whisper-tiny-ct2-int8.zip",
15 | "file_name": "faster-whisper-tiny-ct2-int8.zip",
16 | "model_folder_name": "Faster-Whisper-Tiny-CT2",
17 | "model_name": "Faster-Whisper Tiny",
18 | }
19 | FASTER_WHISPER_BASE_CT2 = {
20 | "url": "https://lexistream-downloads.s3.amazonaws.com/faster-whisper-base-ct2-int8.zip",
21 | "file_name": "faster-whisper-base-ct2-int8.zip",
22 | "model_folder_name": "Faster-Whisper-Base-CT2",
23 | "model_name": "Faster-Whisper Base",
24 | }
25 | FASTER_WHISPER_SMALL_CT2 = {
26 | "url": "https://lexistream-downloads.s3.amazonaws.com/faster-whisper-small-ct2-int8.zip",
27 | "file_name": "faster-whisper-small-ct2-int8.zip",
28 | "model_folder_name": "Faster-Whisper-Small-CT2",
29 | "model_name": "Faster-Whisper Small",
30 | }
31 |
32 |
33 | def checkForModelDownload(modelInfo):
34 | # check if the model has been downloaded to the data dir
35 | data_dir = user_data_dir("lexisynth")
36 | if not path.exists(data_dir):
37 | return False
38 | model_dir = path.join(data_dir, modelInfo["model_folder_name"])
39 | if not path.exists(model_dir):
40 | return False
41 | return True
42 |
43 |
44 | def getAbsoluteModelPath(modelInfo):
45 | # get the absolute path to the model
46 | data_dir = user_data_dir("lexisynth")
47 | return path.join(data_dir, modelInfo["model_folder_name"])
48 |
--------------------------------------------------------------------------------
/storage.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | from platformdirs import user_data_dir
4 |
5 |
6 | def store_data(file_path, document_name, data):
7 | # Store data into a JSON file
8 | # get the user data directory
9 | data_dir = user_data_dir("lexisynth")
10 | if not os.path.exists(data_dir):
11 | os.makedirs(data_dir)
12 |
13 | # prepend the user data directory
14 | file_path = os.path.join(data_dir, file_path)
15 |
16 | if os.path.exists(file_path):
17 | with open(file_path, "r") as f:
18 | try:
19 | documents = json.load(f)
20 | except json.JSONDecodeError:
21 | documents = {}
22 | else:
23 | documents = {}
24 |
25 | if document_name in documents and isinstance(documents[document_name], dict):
26 | documents[document_name].update(data)
27 | else:
28 | documents[document_name] = data
29 |
30 | with open(file_path, "w") as f:
31 | json.dump(documents, f, indent=2)
32 |
33 |
34 | def remove_data(file_path, document_name):
35 | # Remove data from a JSON file
36 | # prepend the user data directory
37 | file_path = os.path.join(user_data_dir("lexisynth"), file_path)
38 |
39 | if not os.path.exists(file_path):
40 | return
41 |
42 | with open(file_path, "r") as f:
43 | documents = json.load(f)
44 |
45 | if document_name in documents:
46 | del documents[document_name]
47 |
48 | with open(file_path, "w") as f:
49 | json.dump(documents, f, indent=2)
50 |
51 |
52 | def fetch_data(file_path, document_name, default=None):
53 | # Fetch data from a JSON file
54 | # prepend the user data directory
55 | file_path = os.path.join(user_data_dir("lexisynth"), file_path)
56 |
57 | if not os.path.exists(file_path):
58 | return default
59 |
60 | with open(file_path, "r") as f:
61 | try:
62 | documents = json.load(f)
63 | except json.JSONDecodeError:
64 | return default
65 |
66 | if document_name in documents:
67 | return documents[document_name]
68 | else:
69 | return default
70 |
--------------------------------------------------------------------------------
/log_view.py:
--------------------------------------------------------------------------------
1 | from os import path
2 | import platform
3 | from PyQt6.QtWidgets import QDialog
4 | from PyQt6.QtCore import QTimer
5 | from PyQt6.uic import loadUi
6 | from ls_logging import log_file_path
7 |
8 |
9 | class LogViewerDialog(QDialog):
10 | def __init__(self):
11 | super().__init__()
12 | loadUi(path.abspath(path.join(path.dirname(__file__), "log_view.ui")), self)
13 | self.timer = QTimer()
14 | self.timer.timeout.connect(self.update_ui)
15 | self.timer.start(1000) # Update UI every 1 second
16 | self.current_log_data = ""
17 | self.pushButton_openlogfolder.clicked.connect(self.open_log_folder)
18 |
19 | def open_log_folder(self):
20 | # Open the folder containing the log file
21 | # check if this is windows, mac or linux
22 | if path.exists(log_file_path):
23 | os_name = platform.system()
24 |
25 | if os_name == "Windows":
26 | from os import startfile
27 |
28 | startfile(path.dirname(log_file_path))
29 | elif os_name == "Linux":
30 | import subprocess
31 |
32 | subprocess.Popen(["xdg-open", path.dirname(log_file_path)])
33 | elif os_name == "Darwin":
34 | import subprocess
35 |
36 | subprocess.Popen(["open", path.dirname(log_file_path)])
37 |
38 | def update_ui(self):
39 | with open(log_file_path, "r") as log_file:
40 | lines = log_file.readlines()
41 | last_1000_lines = lines[-1000:]
42 | log_data = "".join(last_1000_lines)
43 | if log_data == self.current_log_data:
44 | return
45 | self.current_log_data = log_data
46 | # Update the UI with the log data
47 | self.textEdit_log.setPlainText(log_data)
48 | if self.checkBox_autoScroll.isChecked():
49 | # scroll to the bottom
50 | self.textEdit_log.verticalScrollBar().setValue(
51 | self.textEdit_log.verticalScrollBar().maximum()
52 | )
53 | self.scrollArea.ensureWidgetVisible(self.textEdit_log)
54 |
--------------------------------------------------------------------------------
/model_download_dialog.ui:
--------------------------------------------------------------------------------
1 |
2 |
3 | Dialog
4 |
5 |
6 |
7 | 0
8 | 0
9 | 220
10 | 128
11 |
12 |
13 |
14 | Model Download
15 |
16 |
17 | -
18 |
19 |
20 | Downloading model. Please wait.
21 |
22 |
23 |
24 | -
25 |
26 |
27 | Progress
28 |
29 |
30 | Qt::AlignCenter
31 |
32 |
33 |
34 | -
35 |
36 |
37 | 0
38 |
39 |
40 |
41 | -
42 |
43 |
44 | true
45 |
46 |
47 | Qt::Horizontal
48 |
49 |
50 | QDialogButtonBox::Cancel
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 | buttonBox
60 | accepted()
61 | Dialog
62 | accept()
63 |
64 |
65 | 248
66 | 254
67 |
68 |
69 | 157
70 | 274
71 |
72 |
73 |
74 |
75 | buttonBox
76 | rejected()
77 | Dialog
78 | reject()
79 |
80 |
81 | 316
82 | 260
83 |
84 |
85 | 286
86 | 274
87 |
88 |
89 |
90 |
91 |
92 |
--------------------------------------------------------------------------------
/log_view.ui:
--------------------------------------------------------------------------------
1 |
2 |
3 | Dialog
4 |
5 |
6 |
7 | 0
8 | 0
9 | 553
10 | 300
11 |
12 |
13 |
14 | Dialog
15 |
16 |
17 | -
18 |
19 |
20 | 0
21 |
22 |
23 | 0
24 |
25 |
-
26 |
27 |
28 | Auto Scroll
29 |
30 |
31 | true
32 |
33 |
34 |
35 | -
36 |
37 |
38 | Open Log Folder
39 |
40 |
41 |
42 | -
43 |
44 |
45 | Qt::Horizontal
46 |
47 |
48 | QDialogButtonBox::Close
49 |
50 |
51 |
52 |
53 |
54 | -
55 |
56 |
57 | true
58 |
59 |
60 |
61 |
62 | 0
63 | 0
64 | 533
65 | 250
66 |
67 |
68 |
69 |
70 | 0
71 |
72 |
73 | 0
74 |
75 |
76 | 0
77 |
78 |
79 | 0
80 |
81 |
-
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 | buttonBox
94 | accepted()
95 | Dialog
96 | accept()
97 |
98 |
99 | 248
100 | 254
101 |
102 |
103 | 157
104 | 274
105 |
106 |
107 |
108 |
109 | buttonBox
110 | rejected()
111 | Dialog
112 | reject()
113 |
114 |
115 | 316
116 | 260
117 |
118 |
119 | 286
120 | 274
121 |
122 |
123 |
124 |
125 |
126 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 | output/
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 | cover/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | db.sqlite3
63 | db.sqlite3-journal
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # PyBuilder
76 | .pybuilder/
77 | target/
78 |
79 | # Jupyter Notebook
80 | .ipynb_checkpoints
81 |
82 | # IPython
83 | profile_default/
84 | ipython_config.py
85 |
86 | # pyenv
87 | # For a library or package, you might want to ignore these files since the code is
88 | # intended to run in multiple environments; otherwise, check them in:
89 | # .python-version
90 |
91 | # pipenv
92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
95 | # install all needed dependencies.
96 | #Pipfile.lock
97 |
98 | # poetry
99 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100 | # This is especially recommended for binary packages to ensure reproducibility, and is more
101 | # commonly ignored for libraries.
102 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103 | #poetry.lock
104 |
105 | # pdm
106 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107 | #pdm.lock
108 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109 | # in version control.
110 | # https://pdm.fming.dev/#use-with-ide
111 | .pdm.toml
112 |
113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114 | __pypackages__/
115 |
116 | # Celery stuff
117 | celerybeat-schedule
118 | celerybeat.pid
119 |
120 | # SageMath parsed files
121 | *.sage.py
122 |
123 | # Environments
124 | .env
125 | .venv
126 | env/
127 | venv/
128 | ENV/
129 | env.bak/
130 | venv.bak/
131 |
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 |
136 | # Rope project settings
137 | .ropeproject
138 |
139 | # mkdocs documentation
140 | /site
141 |
142 | # mypy
143 | .mypy_cache/
144 | .dmypy.json
145 | dmypy.json
146 |
147 | # Pyre type checker
148 | .pyre/
149 |
150 | # pytype static type analyzer
151 | .pytype/
152 |
153 | # Cython debug symbols
154 | cython_debug/
155 |
156 | # PyCharm
157 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159 | # and can be added to the global gitignore or merged into this file. For a more nuclear
160 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
161 | #.idea/
162 |
163 | # macOS
164 | *.DS_Store
165 |
--------------------------------------------------------------------------------
/model_download_dialog.py:
--------------------------------------------------------------------------------
1 | from PyQt6 import uic
2 | from PyQt6.QtWidgets import QDialog
3 | import requests
4 | import os
5 | from platformdirs import user_data_dir
6 | import zipfile
7 | from ls_logging import logger
8 | from models_info import checkForModelDownload
9 | from PyQt6.QtCore import pyqtSignal
10 | from PyQt6.QtCore import QThread
11 | from os import path
12 |
13 |
14 | class ModelDownloadDialog(QDialog):
15 | def __init__(self, modelInfo, parent=None):
16 | super(ModelDownloadDialog, self).__init__(parent)
17 | uic.loadUi(
18 | path.abspath(path.join(path.dirname(__file__), "model_download_dialog.ui")),
19 | self,
20 | )
21 | # start the download process
22 | self.modelInfo = modelInfo
23 | self.downloadThread = None
24 | self.startDownload()
25 |
26 | def startDownload(self):
27 | # start the download process
28 | self.label_modelDownloading.setText(
29 | f"Downloading {self.modelInfo['model_name']}"
30 | )
31 | # start the download on a separate QThread
32 | self.downloadThread = ModelDownloadThread(self.modelInfo)
33 | self.downloadThread.finished.connect(self.finished)
34 | self.downloadThread.progressSignal.connect(self.progress)
35 | self.downloadThread.start()
36 |
37 | def finished(self):
38 | self.downloadThread = None
39 | # close the dialog
40 | self.accept()
41 |
42 | def progress(self, progress: int, message: str):
43 | # update the progress bar
44 | self.progressBar.setValue(progress)
45 | self.label_progress.setText(message)
46 |
47 | def closeEvent(self, event):
48 | # stop the download thread if it is running
49 | if self.downloadThread is not None:
50 | self.downloadThread.running = False
51 | self.downloadThread.wait()
52 | self.downloadThread = None
53 | super(ModelDownloadDialog, self).closeEvent(event)
54 |
55 |
56 | class ModelDownloadThread(QThread):
57 | # progress and message signal
58 | progressSignal = pyqtSignal(int, str)
59 |
60 | def __init__(self, modelInfo):
61 | super(ModelDownloadThread, self).__init__()
62 | self.modelInfo = modelInfo
63 | self.running = False
64 |
65 | def run(self):
66 | # download the model
67 |
68 | # get the file name
69 | url = self.modelInfo["url"]
70 | file_name = url.split("/")[-1]
71 | # put file in user data folder for lexisynth
72 | data_dir = user_data_dir("lexisynth")
73 | if not os.path.exists(data_dir):
74 | os.makedirs(data_dir)
75 | file_name = os.path.join(data_dir, file_name)
76 | logger.debug(f"Downloading model to {file_name}")
77 |
78 | # check if the file already exists
79 | if checkForModelDownload(self.modelInfo):
80 | # file already exists, no need to download
81 | self.progressSignal.emit((100, "Model already downloaded"))
82 | return
83 | # check if .zip leftover found from previous download
84 | if os.path.exists(file_name):
85 | os.remove(file_name)
86 |
87 | # download the file
88 | r = requests.get(url, stream=True)
89 | r.raise_for_status()
90 | total_size = int(r.headers.get("content-length", 0))
91 |
92 | self.running = True
93 | with open(file_name, "wb") as f:
94 | for chunk in r.iter_content(chunk_size=8192):
95 | if not self.running:
96 | return
97 | if chunk:
98 | f.write(chunk)
99 | # update progress bar according to the download
100 | self.progressSignal.emit(
101 | int(100 * f.tell() / total_size),
102 | "Progress {0:.2f}%".format(100 * f.tell() / total_size),
103 | )
104 |
105 | self.progressSignal.emit(100, "Model downloaded successfully. Unzipping...")
106 | # unzip the file
107 | with zipfile.ZipFile(file_name, "r") as zip_ref:
108 | zip_ref.extractall(
109 | os.path.join(data_dir, self.modelInfo["model_folder_name"])
110 | )
111 | # remove the zip file
112 | os.remove(file_name)
113 |
114 | self.progressSignal.emit(100, "Model unzipped successfully")
115 |
--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
1 | # only run this workflow on the main branch and when a tag is pushed
2 | # this workflow will create a release draft and upload the build artifacts
3 | # to the release draft
4 | name: Release
5 | run-name: ${{ github.ref_name }} release run 🚀
6 | on:
7 | push:
8 | branches:
9 | - main
10 | tags:
11 | - '*'
12 | permissions:
13 | contents: write
14 | concurrency:
15 | group: '${{ github.workflow }} @ ${{ github.ref }}'
16 | cancel-in-progress: ${{ github.ref_type == 'tag' }}
17 | jobs:
18 | build-project:
19 | name: Build Project 🧱
20 | uses: ./.github/workflows/build.yaml
21 | secrets: inherit
22 | permissions:
23 | contents: read
24 |
25 | create-release:
26 | name: Create Release 🛫
27 | if: github.ref_type == 'tag'
28 | runs-on: ubuntu-22.04
29 | needs: build-project
30 | defaults:
31 | run:
32 | shell: bash
33 | steps:
34 | - name: Check Release Tag ☑️
35 | id: check
36 | run: |
37 | : Check Release Tag ☑️
38 | if [[ "${RUNNER_DEBUG}" ]]; then set -x; fi
39 | shopt -s extglob
40 |
41 | case "${GITHUB_REF_NAME}" in
42 | +([0-9]).+([0-9]).+([0-9]) )
43 | echo 'validTag=true' >> $GITHUB_OUTPUT
44 | echo 'prerelease=false' >> $GITHUB_OUTPUT
45 | echo "version=${GITHUB_REF_NAME}" >> $GITHUB_OUTPUT
46 | ;;
47 | +([0-9]).+([0-9]).+([0-9])-@(beta|rc)*([0-9]) )
48 | echo 'validTag=true' >> $GITHUB_OUTPUT
49 | echo 'prerelease=true' >> $GITHUB_OUTPUT
50 | echo "version=${GITHUB_REF_NAME}" >> $GITHUB_OUTPUT
51 | ;;
52 | *) echo 'validTag=false' >> $GITHUB_OUTPUT ;;
53 | esac
54 |
55 | - name: Download Build Artifacts 📥
56 | uses: actions/download-artifact@v4
57 | if: fromJSON(steps.check.outputs.validTag)
58 | id: download
59 |
60 | - name: Print downloaded artifacts 📥
61 | if: fromJSON(steps.check.outputs.validTag)
62 | run: |
63 | : Print downloaded artifacts 📥
64 | if [[ "${RUNNER_DEBUG}" ]]; then set -x; fi
65 | shopt -s extglob
66 |
67 | ls -laR ${{ steps.download.outputs.artifacts }}
68 |
69 | - name: Rename Files 🏷️
70 | if: fromJSON(steps.check.outputs.validTag)
71 | run: |
72 | : Rename Files 🏷️
73 | if [[ "${RUNNER_DEBUG}" ]]; then set -x; fi
74 | shopt -s extglob
75 | shopt -s nullglob
76 |
77 | root_dir="$(pwd)"
78 | commit_hash="${GITHUB_SHA:0:9}"
79 |
80 | variants=(
81 | 'linux'
82 | 'macos-x86'
83 | 'windows'
84 | )
85 |
86 | mkdir -p "${root_dir}/uploads"
87 |
88 | for variant in "${variants[@]}"; do
89 |
90 | candidates=(*-${variant}/@(*))
91 |
92 | for candidate in "${candidates[@]}"; do
93 | cp "${candidate}" "${root_dir}/uploads/lexisynth-${variant}-${GITHUB_REF_NAME}-${commit_hash}.${candidate##*.}"
94 | done
95 | done
96 |
97 | - name: Create Latest Release Info File
98 | if: fromJSON(steps.check.outputs.validTag)
99 | run: |
100 | echo "LATEST_RELEASE_TAG=${GITHUB_REF_NAME}" > release_info.env
101 | echo "LATEST_COMMIT_HASH=${GITHUB_SHA}" >> release_info.env
102 | echo "LATEST_RELEASE_DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ")" >> release_info.env
103 | cp release_info.env "$(pwd)/uploads/lexisynth_release_info.env"
104 |
105 | - name: Generate Checksums 🪪
106 | if: fromJSON(steps.check.outputs.validTag)
107 | run: |
108 | : Generate Checksums 🪪
109 | if [[ "${RUNNER_DEBUG}" ]]; then set -x; fi
110 | shopt -s extglob
111 |
112 | echo "### Checksums" > ${{ github.workspace }}/CHECKSUMS.txt
113 | # find the files from the above step and generate checksums
114 | for file in ${{ github.workspace }}/uploads/lexisynth-*; do
115 | echo " ${file##*/}: $(sha256sum "${file}" | cut -d " " -f 1)" >> ${{ github.workspace }}/CHECKSUMS.txt
116 | done
117 |
118 | - name: Create Release 🛫
119 | if: fromJSON(steps.check.outputs.validTag)
120 | id: create_release
121 | uses: softprops/action-gh-release@v1
122 | with:
123 | draft: true
124 | body_path: ${{ github.workspace }}/CHECKSUMS.txt
125 | files: |
126 | ${{ github.workspace }}/uploads/lexisynth-*.dmg
127 | ${{ github.workspace }}/uploads/lexisynth-*.tar
128 | ${{ github.workspace }}/uploads/lexisynth-*.zip
129 |
--------------------------------------------------------------------------------
/lexisynth.spec:
--------------------------------------------------------------------------------
1 | # -*- mode: python ; coding: utf-8 -*-
2 | import os
3 |
4 | # parse command line arguments
5 | import argparse
6 |
7 | parser = argparse.ArgumentParser()
8 | parser.add_argument('--mac_osx', action='store_true')
9 | parser.add_argument('--win', action='store_true')
10 |
11 | args = parser.parse_args()
12 |
13 | a = Analysis(
14 | [
15 | 'audio_capture.py',
16 | 'audio_player.py',
17 | 'file_poller.py',
18 | 'language_codes.py',
19 | 'lexisynth_types.py',
20 | 'log_view.py',
21 | 'ls_logging.py',
22 | 'main.py',
23 | 'model_download_dialog.py',
24 | 'models_info.py',
25 | 'obs_websocket.py',
26 | 'settings_dialog.py',
27 | 'storage.py',
28 | 'transcription.py',
29 | 'translation.py',
30 | ],
31 | pathex=[],
32 | binaries=[],
33 | datas=[
34 | ('about.ui', '.'),
35 | ('log_view.ui', '.'),
36 | ('mainwindow.ui', '.'),
37 | ('model_download_dialog.ui', '.'),
38 | ('settings_dialog.ui', '.'),
39 | ('.env', '.'),
40 | ('icons/splash.png', './icons'),
41 | ('icons/MacOS_icon.png', './icons'),
42 | ('icons/Windows-icon-open.ico', '.icons'),
43 | ('silero_vad.onnx', './faster_whisper/assets'),
44 | ],
45 | hiddenimports=[],
46 | hookspath=[],
47 | hooksconfig={},
48 | runtime_hooks=[],
49 | excludes=["botocore", "transformers", "IPython", "tensorflow", "matplotlib", "pandas", "sklearn", "skimage", "scipy", "torch", "torchvision", "torchaudio", "nltk", "cv2"],
50 | noarchive=False,
51 | )
52 |
53 | exclude = ["IPython", "tensorflow", "matplotlib", "pandas", "sklearn", "skimage", "scipy", "torch", "torchvision", "torchaudio", "nltk", "cv2"]
54 | a.binaries = [x for x in a.binaries if not x[0].startswith(tuple(exclude))]
55 |
56 | pyz = PYZ(a.pure)
57 |
58 | if args.win:
59 | splash = Splash('icons/splash.png',
60 | binaries=a.binaries,
61 | datas=a.datas,
62 | text_pos=(10, 20),
63 | text_size=10,
64 | text_color='black')
65 | exe = EXE(
66 | pyz,
67 | a.scripts,
68 | splash,
69 | name='lexisynth',
70 | icon='icons/Windows-icon-open.ico',
71 | debug=False,
72 | exclude_binaries=True,
73 | bootloader_ignore_signals=False,
74 | strip=False,
75 | upx=True,
76 | upx_exclude=[],
77 | console=False,
78 | disable_windowed_traceback=False,
79 | argv_emulation=False,
80 | target_arch=None,
81 | )
82 | coll = COLLECT(
83 | exe,
84 | a.binaries,
85 | a.zipfiles,
86 | a.datas,
87 | splash.binaries,
88 | strip=False,
89 | upx=True,
90 | upx_exclude=[],
91 | name='lexisynth'
92 | )
93 | elif args.mac_osx:
94 | exe = EXE(pyz,
95 | a.scripts,
96 | [],
97 | exclude_binaries=True,
98 | name='lexisynth',
99 | debug=False,
100 | bootloader_ignore_signals=False,
101 | strip=False,
102 | upx=True,
103 | upx_exclude=[],
104 | runtime_tmpdir=None,
105 | console=False,
106 | disable_windowed_traceback=False,
107 | argv_emulation=False,
108 | target_arch=None,
109 | codesign_identity=os.environ.get('APPLE_APP_DEVELOPER_ID', ''),
110 | entitlements_file='./entitlements.plist',
111 | )
112 | coll = COLLECT(exe,
113 | a.binaries,
114 | a.zipfiles,
115 | a.datas,
116 | strip=False,
117 | upx=True,
118 | upx_exclude=[],
119 | name='lexisynth')
120 | app = BUNDLE(
121 | exe,
122 | coll,
123 | name='lexisynth.app',
124 | icon='icons/MacOS_icon.png',
125 | bundle_identifier='com.royshilkrot.lexisynth',
126 | version='0.0.1',
127 | info_plist={
128 | 'NSPrincipalClass': 'NSApplication',
129 | 'NSAppleScriptEnabled': False,
130 | 'NSMicrophoneUsageDescription': 'Getting audio from the microphone to perform speech-to-text'
131 | }
132 | )
133 | else:
134 | # Linux
135 | splash = Splash('icons/splash.png',
136 | binaries=a.binaries,
137 | datas=a.datas,
138 | text_pos=(10, 20),
139 | text_size=10,
140 | text_color='black')
141 | exe = EXE(
142 | pyz,
143 | a.binaries,
144 | a.datas,
145 | a.scripts,
146 | splash,
147 | splash.binaries,
148 | name='lexisynth',
149 | icon='icons/Windows-icon-open.ico',
150 | debug=False,
151 | bootloader_ignore_signals=False,
152 | strip=False,
153 | upx=True,
154 | console=False,
155 | disable_windowed_traceback=False,
156 | argv_emulation=False,
157 | target_arch=None,
158 | )
159 |
--------------------------------------------------------------------------------
/text_to_speech.py:
--------------------------------------------------------------------------------
1 | import time
2 | import queue
3 | from PyQt6.QtCore import QThread, pyqtSignal, QTimer
4 | import requests
5 | from ls_logging import logger
6 |
7 | from storage import fetch_data
8 |
9 |
10 | class TextToSpeechThread(QThread):
11 | speech_available = pyqtSignal(object)
12 | progress_available = pyqtSignal(int)
13 | start_progress = pyqtSignal()
14 | stop_progress = pyqtSignal()
15 |
16 | def __init__(self, parent=None):
17 | super(TextToSpeechThread, self).__init__(parent)
18 | self.input_queue = queue.Queue()
19 | self.running = False
20 | self.openai_api_key = None
21 | self.elevenlabs_api_key = None
22 | self.last_run_time_ms = 1000
23 | self.run_time_avg_moving_window = 500
24 | self.current_run_time_start = time.time()
25 | self.progressTimer = QTimer()
26 | self.progressTimer.timeout.connect(self.progressCallback)
27 | self.start_progress.connect(self.progressTimer.start)
28 | self.stop_progress.connect(self.progressTimer.stop)
29 | self.speech_engine = "OpenAI"
30 |
31 | def add_text(self, text):
32 | self.input_queue.put(text)
33 |
34 | def stop(self):
35 | self.running = False
36 |
37 | def run(self):
38 | while True:
39 | # Get the next text from the queue
40 | try:
41 | text = self.input_queue.get(block=False)
42 | except queue.Empty:
43 | time.sleep(0.1)
44 | continue
45 |
46 | if text is None:
47 | # sleep for a bit to avoid busy waiting
48 | time.sleep(0.1)
49 | continue
50 |
51 | self.current_run_time_start = time.time()
52 | self.start_progress.emit()
53 |
54 | # Time the translation operation
55 | start_time = time.time()
56 |
57 | if self.speech_engine == "OpenAI":
58 | self.synthesize_speech_openai(text)
59 | else:
60 | logger.error(f"Unknown speech engine: {self.speech_engine}")
61 | self.running = False
62 | return
63 |
64 | end_time = time.time()
65 |
66 | self.stop_progress.emit()
67 | self.progress_available.emit(0)
68 |
69 | # prevent 0 time
70 | self.last_run_time_ms = max(100, (end_time - start_time) * 1000)
71 | self.run_time_avg_moving_window = (
72 | self.run_time_avg_moving_window * 0.9
73 | ) + (self.last_run_time_ms * 0.1)
74 |
75 | def synthesize_speech_openai(self, text):
76 | if self.openai_api_key is None:
77 | self.openai_api_key = fetch_data("settings.json", "settings", {}).get(
78 | "openai_api_key"
79 | )
80 | if self.openai_api_key is None:
81 | logger.error("OpenAI API key not found")
82 | return
83 | # send a request to openai with requests
84 | # build API request
85 | data = {"model": "tts-1", "input": text, "voice": "alloy"}
86 | # send the request
87 | response = requests.post(
88 | "https://api.openai.com/v1/audio/speech",
89 | headers={
90 | "Authorization": f"Bearer {self.openai_api_key}",
91 | "Content-Type": "application/json",
92 | },
93 | json=data,
94 | )
95 | if response.status_code != 200:
96 | logger.error(f"OpenAI API request failed: {response.status_code}")
97 | return "Error: OpenAI API request failed"
98 | # the response should be a .mp3 file
99 | self.speech_available.emit(response.content)
100 |
101 | def synthesize_speech_elevenlabs(self, text):
102 | if self.elevenlabs_api_key is None:
103 | self.elevenlabs_api_key = fetch_data("settings.json", "settings", {}).get(
104 | "elevenlabs_api_key"
105 | )
106 | if self.elevenlabs_api_key is None:
107 | logger.error("Elevenlabs API key not found")
108 | return
109 | # send a request to elevenlabs with requests
110 | # build API request
111 | data = {"text": text}
112 | # send the request
113 | response = requests.post(
114 | "https://api.eleven-labs.com/text-to-speech/v1/synthesize",
115 | headers={
116 | "Authorization": f"Bearer {self.elevenlabs_api_key}",
117 | "Content-Type": "application/json",
118 | },
119 | json=data,
120 | )
121 | if response.status_code != 200:
122 | logger.error(f"Elevenlabs API request failed: {response.status_code}")
123 | return "Error: Elevenlabs API request failed"
124 | # the response should be a .mp3 file
125 | self.speech_available.emit(response.content)
126 |
127 | def progressCallback(self):
128 | # calculate how much time in ms passed since the start of the current translation
129 | current_run_time_elapsed = (time.time() - self.current_run_time_start) * 1000
130 | # calculate the progress in percentage
131 | progress = min(
132 | 100, int(current_run_time_elapsed / self.run_time_avg_moving_window * 100)
133 | )
134 | self.progress_available.emit(progress)
135 |
--------------------------------------------------------------------------------
/obs_websocket.py:
--------------------------------------------------------------------------------
1 | import json
2 | from os import path
3 | import time
4 | import obsws_python as obs
5 | from ls_logging import logger
6 | from queue import Queue
7 | from PyQt6.QtCore import QThread
8 |
9 | from storage import fetch_data
10 |
11 |
12 | def open_obs_websocket(server_info):
13 | # Open a websocket connection to OBS
14 | try:
15 | cl = obs.ReqClient(
16 | host=server_info["ip"],
17 | port=server_info["port"],
18 | password=server_info["password"],
19 | timeout=10,
20 | )
21 | resp = cl.get_version()
22 | logger.info(f"OBS Version: {resp.obs_version}")
23 | return cl
24 | except Exception as e:
25 | logger.warn(f"Error: {e}")
26 | return None
27 |
28 |
29 | def open_obs_websocket_from_settings():
30 | # Open a websocket connection to OBS using settings
31 | settings = fetch_data("settings.json", "settings", {})
32 | obs_host = settings.get("obs_host", "localhost")
33 | obs_port = settings.get("obs_port", "4455")
34 | obs_password = settings.get("obs_password", "")
35 | return open_obs_websocket(
36 | {"ip": obs_host, "port": obs_port, "password": obs_password}
37 | )
38 |
39 |
40 | def disconnect_obs_websocket(obs_client: obs.ReqClient):
41 | # Disconnect the OBS websocket
42 | try:
43 | obs_client.base_client.ws.close()
44 | except Exception as e:
45 | logger.warn(f"Error: {e}")
46 |
47 |
48 | def get_all_sources(obs_client: obs.ReqClient):
49 | # Get all the sources from OBS
50 | try:
51 | # get all scenes
52 | resp = obs_client.get_scene_list()
53 | scenes = resp.scenes
54 | # get all sources from all scenes
55 | sources = []
56 | for scene in scenes:
57 | resp = obs_client.get_scene_item_list(scene["sceneName"])
58 | # add the sources with their scene name
59 | for source in resp.scene_items:
60 | source["sceneName"] = scene["sceneName"]
61 | sources.append(source)
62 | return sources
63 | except Exception as e:
64 | logger.exception("Error: unable to get all sources")
65 | return None
66 |
67 |
68 | def get_all_text_sources(obs_client: obs.ReqClient):
69 | # Get all the text sources from OBS
70 | sources = get_all_sources(obs_client)
71 | if sources is None:
72 | return None
73 | text_sources = []
74 | for source in sources:
75 | if str(source["inputKind"]).startswith("text_"):
76 | source_settings = obs_client.get_input_settings(
77 | source["sourceName"]
78 | ).input_settings
79 | # check if source has text
80 | if "text" in source_settings:
81 | text_sources.append(source)
82 | return text_sources
83 |
84 |
85 | def get_source_by_name(obs_client: obs.ReqClient, source_name):
86 | # Get a source from OBS by name
87 | try:
88 | # get all scenes
89 | resp = obs_client.get_scene_list()
90 | scenes = resp.scenes
91 | # get all sources from all scenes
92 | sources = []
93 | for scene in scenes:
94 | resp = obs_client.get_scene_item_list(scene["sceneName"])
95 | # add the sources with their scene name
96 | for source in resp.scene_items:
97 | source["sceneName"] = scene["sceneName"]
98 | sources.append(source)
99 | # find the source by name
100 | for source in sources:
101 | if source["sourceName"] == source_name:
102 | return source
103 | return None
104 | except Exception as e:
105 | logger.exception("Error: unable to get source by name")
106 | return None
107 |
108 |
109 | class OBSPoller(QThread):
110 | def __init__(
111 | self,
112 | obs_client: obs.ReqClient,
113 | obs_source_name: str,
114 | queue: Queue,
115 | polling_freq=1000,
116 | ):
117 | super().__init__()
118 | self.obs_client = obs_client
119 | self.obs_source_name = obs_source_name
120 | self.queue = queue
121 | self.polling_freq = polling_freq
122 | self.running = False
123 | self.last_content = None
124 |
125 | def stop(self):
126 | self.running = False
127 |
128 | def run(self):
129 | logger.info("OBS polling thread started")
130 | self.running = True
131 | while self.running:
132 | try:
133 | # get the value of the source
134 | source = get_source_by_name(self.obs_client, self.obs_source_name)
135 | if source is None:
136 | logger.error(f"Source {self.obs_source_name} not found")
137 | break
138 | source_settings = self.obs_client.get_input_settings(
139 | source["sourceName"]
140 | ).input_settings
141 | source_content = (
142 | source_settings["text"] if "text" in source_settings else None
143 | )
144 | if source_content and source_content != self.last_content:
145 | self.queue.put_nowait(source_content)
146 | self.last_content = source_content
147 | except Exception as e:
148 | logger.exception(f"Error: {e}")
149 | time.sleep(self.polling_freq / 1000)
150 | logger.info("OBS polling thread stopped")
151 |
--------------------------------------------------------------------------------
/audio_capture.py:
--------------------------------------------------------------------------------
1 | import time
2 | import sounddevice as sd
3 | from PyQt6 import QtCore
4 | import numpy as np
5 | from lexisynth_types import AudioSource
6 | from ls_logging import logger
7 | import queue
8 | import soundfile as sf
9 |
10 |
11 | class AudioRecorder(QtCore.QThread):
12 | data_available = QtCore.pyqtSignal(np.ndarray)
13 | progress_and_volume = QtCore.pyqtSignal(tuple)
14 |
15 | def __init__(
16 | self,
17 | audio_source: AudioSource,
18 | chunk_size_ms,
19 | fs=44100,
20 | channels=1,
21 | dtype="float32",
22 | ):
23 | super().__init__()
24 | self.chunk_size_ms = chunk_size_ms
25 | self.fs = fs
26 | self.channels = channels
27 | self.dtype = dtype
28 | self.stream = None
29 | self.audio_source = audio_source
30 | self.block_read_freq_ms = 33 # 33ms
31 | self.number_of_blocks = chunk_size_ms / self.block_read_freq_ms
32 | self.q = queue.Queue(maxsize=self.number_of_blocks)
33 | self.soundfile = None
34 | self.running = False
35 | self.last_run_time = time.time()
36 | self.output_queue = None
37 |
38 | def run(self) -> None:
39 | self.running = True
40 | while self.running:
41 | # check if enough time passed since the last run
42 | if (time.time() - self.last_run_time) < (
43 | float(self.block_read_freq_ms) / 1000.0
44 | ):
45 | # sleep to avoid busy waiting
46 | time.sleep(0.001)
47 | continue
48 | self.last_run_time = time.time()
49 |
50 | magnitude = 0
51 | new_data = False
52 | if self.audio_source.sourceType == AudioSource.SourceType.FILE:
53 | if self.soundfile is None:
54 | logger.error("Soundfile is not initialized")
55 | break
56 | # read a block of data from the soundfile
57 | data = self.soundfile.read(self.read_size_frames())
58 | if not len(data):
59 | logger.warning("File data is empty. End of file?")
60 | continue
61 | magnitude = np.max(np.abs(data))
62 | self.q.put_nowait(data)
63 | new_data = True
64 | elif self.audio_source.sourceType == AudioSource.SourceType.DEVICE:
65 | while (
66 | self.stream.read_available >= self.read_size_frames()
67 | and not self.q.full()
68 | ):
69 | # read a block of data from the sounddevice
70 | data, overflowed = self.stream.read(self.read_size_frames())
71 | # take one channel if there are multiple channels
72 | if len(data.shape) > 1:
73 | # merge the channels by averaging
74 | data = np.mean(data, axis=1)
75 | if overflowed:
76 | logger.warning(f"Overflowed (got {len(data)})")
77 | magnitude = np.max(np.abs(data))
78 | self.q.put_nowait(data)
79 | new_data = True
80 | else:
81 | logger.error("Unknown audio source type")
82 | break
83 |
84 | if new_data:
85 | # emit progress signal with the buffer capacity in milliseconds and the volume in the frame
86 | self.progress_and_volume.emit(
87 | (self.q.qsize() * self.block_read_freq_ms, magnitude)
88 | )
89 | # check if q has enough data to emit according to the chunk size
90 | if self.q.full():
91 | # emit the entire chunk of data
92 | self.data_available.emit(
93 | np.concatenate(
94 | [self.q.get() for _ in range(self.q.qsize())], axis=0
95 | )
96 | )
97 |
98 | logger.info("Audio capture thread stopped")
99 |
100 | def start(self):
101 | logger.info(
102 | f"Starting audio capture with {self.fs} Hz, {self.channels} channels, and {self.dtype} data type"
103 | )
104 | # if this is a file source, stream the file progressively with soundfile
105 | if self.audio_source.sourceType == AudioSource.SourceType.FILE:
106 | logger.info(f"Opening file {self.audio_source.sourceName}")
107 | self.soundfile = sf.SoundFile(self.audio_source.sourceName)
108 | self.fs = self.soundfile.samplerate
109 | logger.debug(f"File info: {self.soundfile}")
110 |
111 | # if this is a device source, stream the device with sounddevice
112 | elif self.audio_source.sourceType == AudioSource.SourceType.DEVICE:
113 | logger.info(f"Opening device {self.audio_source.sourceName}")
114 | self.stream = sd.InputStream(
115 | device=self.audio_source.sourceName,
116 | samplerate=self.fs,
117 | blocksize=self.read_size_frames(),
118 | channels=self.channels,
119 | dtype=self.dtype,
120 | )
121 | logger.info(f"Stream samplerate: {self.stream.samplerate}")
122 | self.stream.start()
123 | else:
124 | logger.error("Unknown audio source type")
125 | return
126 |
127 | super().start()
128 |
129 | def stop(self):
130 | logger.info("Stopping audio capture")
131 | self.running = False
132 | if self.soundfile:
133 | self.soundfile.close()
134 | if self.stream:
135 | self.stream.stop()
136 |
137 | def read_size_frames(self):
138 | return int(self.fs * self.block_read_freq_ms / 1000)
139 |
140 | def get_chunk_size_frames(self):
141 | return int(self.fs * self.chunk_size_ms / 1000)
142 |
143 | @staticmethod
144 | def get_audio_devices() -> list[AudioSource]:
145 | devices = sd.query_devices()
146 | devices_list = []
147 | if type(devices) is dict:
148 | devices_list = [devices]
149 | else:
150 | for device in devices:
151 | if device["max_input_channels"] > 0:
152 | logger.debug(f"Audio device: {device}")
153 | devices_list.append(device)
154 | return [
155 | AudioSource(
156 | sourceName=device["name"],
157 | sourceType=AudioSource.SourceType.DEVICE,
158 | )
159 | for device in devices_list
160 | ]
161 |
--------------------------------------------------------------------------------
/transcription.py:
--------------------------------------------------------------------------------
1 | import queue
2 | import time
3 | from PyQt6 import QtCore
4 | from PyQt6.QtCore import QThread
5 | from PyQt6.QtWidgets import QDialog
6 | from faster_whisper import WhisperModel
7 | from language_codes import LanguageCodes
8 | from ls_logging import logger
9 | import numpy as np
10 | from model_download_dialog import ModelDownloadDialog
11 |
12 | from models_info import ModelDownloadInfo, checkForModelDownload, getAbsoluteModelPath
13 |
14 |
15 | def linear_interpolate_audio(audio_frame, original_rate, target_rate):
16 | # Calculate the duration of the audio in seconds
17 | duration = audio_frame.shape[0] / original_rate
18 |
19 | # Calculate the number of samples in the resampled audio
20 | target_length = int(duration * target_rate)
21 |
22 | # Generate sample number arrays for original and target
23 | original_samples = np.arange(audio_frame.shape[0])
24 | target_samples = np.linspace(0, audio_frame.shape[0] - 1, target_length)
25 |
26 | # Use numpy's interpolation function
27 | resampled_audio = np.interp(target_samples, original_samples, audio_frame)
28 | return resampled_audio
29 |
30 |
31 | def find_point_of_repetition(sentence):
32 | # i'd like to find the point where the token start to repeat.
33 | # for example: 6952, 345, 11, 5613, 13, 314, 1053, 587, 5613, 13, 314, 1053, 587, 5613, 13, 314, 1053
34 | # the point of repetition is 5613, 13, 314, 1053, 587,
35 | # therefore the function should return 3, 8, 6
36 | # find the location of a sequence of at least two tokens that repeats
37 | words = sentence.lower().split()
38 | for i in range(len(words)):
39 | for j in range(i + 1, len(words)):
40 | if words[i] == words[j]:
41 | # check if the sequence repeats
42 | k = 1
43 | while j + k < len(words) and words[i + k] == words[j + k]:
44 | k += 1
45 | if k > 1:
46 | return i, j, k
47 | return None
48 |
49 |
50 | def checkAndDownloadModel(modelInfo):
51 | if not checkForModelDownload(modelInfo):
52 | # show the download dialog
53 | modelDownloadDialog = ModelDownloadDialog(modelInfo)
54 | modelDownloadDialog.exec()
55 |
56 |
57 | class AudioTranscriber(QThread):
58 | text_available = QtCore.pyqtSignal(str)
59 |
60 | def __init__(self):
61 | super().__init__()
62 | self.input_queue = queue.Queue()
63 | self.model = None
64 | self.running = False
65 | self.language = None
66 | # check if model has been downloaded already
67 | checkAndDownloadModel(ModelDownloadInfo.FASTER_WHISPER_TINY_CT2)
68 |
69 | def set_language(self, language: str):
70 | if language is None:
71 | self.language = None
72 | return
73 | if language == "Auto":
74 | self.language = None
75 | return
76 | if language in LanguageCodes.getLanguageCodes():
77 | self.language = language
78 | return
79 | if language in LanguageCodes.getLanguageNames():
80 | self.language = LanguageCodes.getLanguageCode(language)
81 | return
82 | logger.error(f"Language {language} not found")
83 | self.language = None
84 |
85 | def set_model_size(self, model_size: str):
86 | if model_size is None:
87 | return
88 | if model_size == "Tiny (75Mb)":
89 | checkAndDownloadModel(ModelDownloadInfo.FASTER_WHISPER_TINY_CT2)
90 | self.model = WhisperModel(
91 | getAbsoluteModelPath(ModelDownloadInfo.FASTER_WHISPER_TINY_CT2),
92 | device="cpu",
93 | compute_type="int8",
94 | )
95 | logger.info("Model loaded: tiny")
96 | return
97 | if model_size == "Small (400Mb)":
98 | checkAndDownloadModel(ModelDownloadInfo.FASTER_WHISPER_SMALL_CT2)
99 | self.model = WhisperModel(
100 | getAbsoluteModelPath(ModelDownloadInfo.FASTER_WHISPER_SMALL_CT2),
101 | device="cpu",
102 | compute_type="int8",
103 | )
104 | logger.info("Model loaded: small")
105 | return
106 | if model_size == "Base (140Mb)":
107 | checkAndDownloadModel(ModelDownloadInfo.FASTER_WHISPER_BASE_CT2)
108 | self.model = WhisperModel(
109 | getAbsoluteModelPath(ModelDownloadInfo.FASTER_WHISPER_BASE_CT2),
110 | device="cpu",
111 | compute_type="int8",
112 | )
113 | logger.info("Model loaded: base")
114 | return
115 | logger.error(f"Model size {model_size} not found")
116 |
117 | def stop(self):
118 | self.running = False
119 |
120 | def run(self):
121 | logger.info("Transcription thread started")
122 | if self.model is None:
123 | model_size = "tiny.en"
124 | self.model = WhisperModel(
125 | getAbsoluteModelPath(ModelDownloadInfo.FASTER_WHISPER_TINY_CT2),
126 | device="cpu",
127 | compute_type="int8",
128 | )
129 | logger.info(f"Model loaded: {model_size}")
130 |
131 | self.running = True
132 | while self.running:
133 | try:
134 | audio_data = self.input_queue.get_nowait()
135 | except queue.Empty:
136 | # sleep for a bit to avoid busy waiting
137 | time.sleep(0.1)
138 | continue
139 | if audio_data is None or len(audio_data) == 0:
140 | # sleep for a bit to avoid busy waiting
141 | time.sleep(0.1)
142 | continue
143 |
144 | # resample the audio data to 16kHz
145 | resampled_audio_data = linear_interpolate_audio(
146 | audio_data, 44100, 16000
147 | ).astype(np.float32)
148 |
149 | # transcribe the audio data
150 | segments, _ = self.model.transcribe(
151 | resampled_audio_data,
152 | language=self.language,
153 | max_new_tokens=40,
154 | vad_filter=True,
155 | vad_parameters=dict(min_silence_duration_ms=500),
156 | temperature=0.0,
157 | )
158 |
159 | segments_list = list(segments)
160 | if len(segments_list) == 0:
161 | logger.debug("No segments found")
162 | continue
163 |
164 | # get one single segment from the segments iterator
165 | segment = segments_list[0]
166 | if segment is None:
167 | logger.debug("None segment found")
168 | continue
169 | repetition = find_point_of_repetition(segment.text)
170 | result_text = segment.text.strip()
171 | if repetition:
172 | # remove the repetition
173 | result_text = " ".join(segment.text.split()[: repetition[1]])
174 |
175 | self.text_available.emit(result_text)
176 |
177 | logger.info("Transcription thread stopped")
178 |
179 | def queue_audio_data(self, audio_data):
180 | self.input_queue.put_nowait(audio_data)
181 |
--------------------------------------------------------------------------------
/.github/workflows/build.yaml:
--------------------------------------------------------------------------------
1 | name: Cross-Platform Build with PyInstaller
2 |
3 | on:
4 | pull_request:
5 | branches: [ main ]
6 | workflow_call:
7 |
8 | jobs:
9 | check-format:
10 | name: Check Formatting 🔍
11 | uses: ./.github/workflows/check-format.yaml
12 | permissions:
13 | contents: read
14 |
15 | build:
16 | needs: check-format
17 | strategy:
18 | matrix:
19 | os: [macos-latest, windows-latest] # ubuntu-latest,
20 | include:
21 | - os: macos-latest
22 | python-version: '3.11'
23 | target: macos-x86
24 | runs-on: macos-12
25 | - os: ubuntu-latest
26 | python-version: '3.11'
27 | target: linux
28 | runs-on: ubuntu-latest
29 | - os: windows-latest
30 | python-version: '3.11'
31 | target: windows
32 | runs-on: windows-latest
33 |
34 | runs-on: ${{ matrix.runs-on }}
35 |
36 | steps:
37 | - uses: actions/checkout@v4
38 |
39 | - name: Set up Python
40 | if: matrix.os != 'windows-latest'
41 | uses: actions/setup-python@v5
42 | with:
43 | python-version: ${{ matrix.python-version }}
44 |
45 | - name: Install dependencies for Linux
46 | if: matrix.os == 'ubuntu-latest'
47 | run: |
48 | sudo apt-get update
49 | sudo apt-get install -y portaudio19-dev
50 | pip install --upgrade setuptools wheel
51 |
52 | - name: Install dependencies for MacOS
53 | if: matrix.os == 'macos-latest' || matrix.os == 'macos-latest-xlarge'
54 | run: |
55 | brew install portaudio
56 |
57 | - name: Install dependencies
58 | run: |
59 | python -m pip install -r requirements.txt
60 |
61 |
62 | - name: Import Apple Certificate
63 | if: matrix.os == 'macos-latest' || matrix.os == 'macos-latest-xlarge' && github.runner != 'self-hosted'
64 | run: |
65 | if security list-keychains | grep -q "github_build.keychain"; then
66 | security delete-keychain github_build.keychain
67 | fi
68 | security create-keychain -p "" github_build.keychain
69 | security default-keychain -s github_build.keychain
70 | security set-keychain-settings -lut 21600 github_build.keychain
71 | echo "${{ secrets.APPLE_CERTIFICATE }}" | base64 --decode > apple_certificate.p12
72 | security import apple_certificate.p12 -k github_build.keychain -P "${{ secrets.APPLE_CERTIFICATE_PASSWORD }}" \
73 | -t cert -f pkcs12 -T /usr/bin/codesign -T /usr/bin/security -T /usr/bin/xcrun
74 | security unlock-keychain -p "" github_build.keychain
75 | security set-key-partition-list -S 'apple-tool:,apple:' -s -k "" github_build.keychain
76 | security list-keychain -d user -s github_build.keychain 'login-keychain'
77 | env:
78 | APPLE_CERTIFICATE: ${{ secrets.APPLE_CERTIFICATE }}
79 | APPLE_CERTIFICATE_PASSWORD: ${{ secrets.APPLE_CERTIFICATE_PASSWORD }}
80 |
81 | - name: Unlock keychain on Mac
82 | if: matrix.os == 'macos-latest' || matrix.os == 'macos-latest-xlarge'
83 | run: |
84 | security unlock-keychain -p "" github_build.keychain
85 | security set-key-partition-list -S apple-tool:,apple: -k "" -D "Developer" -t private github_build.keychain
86 |
87 | - name: List available signing identities
88 | if: matrix.os == 'macos-latest' || matrix.os == 'macos-latest-xlarge'
89 | run: |
90 | security find-identity -v -p codesigning
91 |
92 | # write a .env file with the secrets
93 | - name: Write .env file Mac & Linux
94 | if: matrix.os != 'windows-latest'
95 | run: |
96 | echo "LOCAL_RELEASE_TAG=${GITHUB_REF_NAME}" >> .env
97 | echo "LOCAL_RELEASE_DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ")" >> .env
98 | echo "KMP_DUPLICATE_LIB_OK=TRUE" >> .env
99 |
100 | - name: Write .env file Windows
101 | if: matrix.os == 'windows-latest'
102 | run: |
103 | @"
104 | LOCAL_RELEASE_TAG=$env:GITHUB_REF_NAME
105 | LOCAL_RELEASE_DATE=$(Get-Date -Format 'yyyy-MM-ddTHH:mm:ssZ')
106 | KMP_DUPLICATE_LIB_OK=TRUE
107 | "@ | Out-File -FilePath .env -Encoding ASCII
108 | shell: pwsh
109 |
110 | - name: Build with PyInstaller (MacOS)
111 | if: matrix.os == 'macos-latest' || matrix.os == 'macos-latest-xlarge'
112 | run: |
113 | pyinstaller --clean --noconfirm lexisynth.spec -- --mac_osx
114 | env:
115 | APPLE_APP_DEVELOPER_ID: ${{ secrets.APPLE_APP_DEVELOPER_ID }}
116 |
117 | - name: Build with PyInstaller (Windows)
118 | if: matrix.os == 'windows-latest'
119 | run: |
120 | pyinstaller --clean --noconfirm lexisynth.spec -- --win
121 |
122 | - name: Build with PyInstaller (Linux)
123 | if: matrix.os == 'ubuntu-latest'
124 | run: |
125 | pyinstaller --clean --noconfirm lexisynth.spec
126 |
127 | - name: Zip Application for Notarization
128 | if: matrix.os == 'macos-latest' && github.event_name != 'pull_request'
129 | run: |
130 | ditto -c -k --keepParent dist/lexisynth.app lexisynth.zip
131 |
132 | - name: Notarize and Staple
133 | if: matrix.os == 'macos-latest' && github.event_name != 'pull_request'
134 | run: |
135 | xcrun notarytool submit lexisynth.zip --apple-id \
136 | "${{ secrets.APPLE_DEVELOPER_ID_USER }}" --password \
137 | "${{ secrets.APPLE_DEVELOPER_ID_PASSWORD }}" --team-id \
138 | "${{ secrets.APPLE_DEVELOPER_ID_TEAM }}" --wait --verbose
139 | chmod 755 dist/lexisynth.app
140 | xcrun stapler staple dist/lexisynth.app
141 |
142 | - name: Verify Notarization
143 | if: matrix.os == 'macos-latest' && github.event_name != 'pull_request'
144 | run: |
145 | spctl -a -v dist/lexisynth.app
146 | rm lexisynth.zip
147 |
148 | - name: Compile .ISS to .EXE Installer
149 | if: matrix.os == 'windows-latest'
150 | uses: Minionguyjpro/Inno-Setup-Action@v1.2.4
151 | with:
152 | path: lexisynth.iss
153 | options: /O+
154 |
155 | - name: Create tar Linux
156 | if: matrix.os == 'ubuntu-latest'
157 | # strip the folder name from the tar
158 | run: |
159 | chmod a+x dist/lexisynth
160 | tar -cvf lexisynth.tar -C dist lexisynth
161 |
162 | - name: Create dmg MacOS
163 | if: matrix.os == 'macos-latest' || matrix.os == 'macos-latest-xlarge'
164 | run: |
165 | chmod a+x dist/lexisynth.app
166 | hdiutil create -volname "LexiSynth" -srcfolder dist/lexisynth.app -ov -format UDZO lexisynth.dmg
167 |
168 | - name: Create zip on Windows
169 | if: matrix.os == 'windows-latest'
170 | run: |
171 | Compress-Archive -Path "dist/lexisynth-setup.exe" -DestinationPath "./lexisynth.zip"
172 | shell: pwsh
173 |
174 | - name: Upload artifact
175 | uses: actions/upload-artifact@v4
176 | with:
177 | name: lexisynth-${{ matrix.target }}
178 | # only upload exe on windows, tar on linux, dmg on macos
179 | path: |
180 | lexisynth.dmg
181 | lexisynth.tar
182 | lexisynth.zip
183 |
--------------------------------------------------------------------------------
/settings_dialog.py:
--------------------------------------------------------------------------------
1 | from model_download_dialog import ModelDownloadDialog
2 | from models_info import ModelDownloadInfo, checkForModelDownload
3 | from obs_websocket import disconnect_obs_websocket, open_obs_websocket
4 | from os import path
5 | from platformdirs import user_data_dir
6 | from PyQt6 import QtGui
7 | from PyQt6.QtCore import pyqtSignal
8 | from PyQt6.QtWidgets import QDialog, QFileDialog
9 | from PyQt6.uic import loadUi
10 | from storage import fetch_data, store_data
11 |
12 |
13 | class SettingsDialog(QDialog):
14 | settingsChanged = pyqtSignal(dict)
15 |
16 | def __init__(self, page=None, parent=None):
17 | super(SettingsDialog, self).__init__(parent)
18 |
19 | loadUi(
20 | path.abspath(path.join(path.dirname(__file__), "settings_dialog.ui")), self
21 | )
22 |
23 | # select the page if provided in tabWidget
24 | if page is not None:
25 | self.tabWidget.setCurrentIndex(page)
26 |
27 | # load data from settings
28 | self.loadSettings()
29 |
30 | # if dialog is accepted, save the settings
31 | self.accepted.connect(self.saveSettings)
32 |
33 | self.toolButton_selectLLMFolder.clicked.connect(
34 | lambda: self.selectFolderForLineEdit(self.lineEdit_localLLMFolder)
35 | )
36 | self.toolButton_outputsFolderSelect.clicked.connect(
37 | lambda: self.selectFolderForLineEdit(self.lineEdit_outputsFolder)
38 | )
39 | self.comboBox_localLLMSelect.currentIndexChanged.connect(
40 | self.localLLMSelectChanged
41 | )
42 | self.pushButton_obsTestConnection.clicked.connect(self.testObsConnection)
43 | self.lineEdit_inputFilePollingFreq.setValidator(
44 | QtGui.QIntValidator(100, 100000, self)
45 | )
46 | self.lineEdit_obsPollingFreq.setValidator(
47 | QtGui.QIntValidator(100, 100000, self)
48 | )
49 |
50 | def localLLMSelectChanged(self, index):
51 | if self.comboBox_localLLMSelect.currentText() == "Custom":
52 | self.lineEdit_localLLMFolder.setEnabled(True)
53 | self.toolButton_selectLLMFolder.setEnabled(True)
54 | else:
55 | self.lineEdit_localLLMFolder.setEnabled(False)
56 | self.toolButton_selectLLMFolder.setEnabled(False)
57 | if self.comboBox_localLLMSelect.currentText() == "M2M-100 Translation":
58 | # check if model has been downloaded already
59 | if checkForModelDownload(ModelDownloadInfo.M2M_100):
60 | return
61 | # show the download dialog
62 | modelDownloadDialog = ModelDownloadDialog(
63 | ModelDownloadInfo.M2M_100, self
64 | )
65 | if modelDownloadDialog.exec() == QDialog.DialogCode.Rejected:
66 | # if the download was cancelled, revert to the previous selection
67 | self.comboBox_localLLMSelect.setCurrentIndex(0)
68 | return
69 |
70 | if not checkForModelDownload(ModelDownloadInfo.M2M_100):
71 | # if the model was not downloaded, revert to the previous selection
72 | self.comboBox_localLLMSelect.setCurrentIndex(0)
73 | return
74 |
75 | def selectFolderForLineEdit(self, lineEdit):
76 | # open a file dialog to select the LLM folder
77 | folder = lineEdit.text()
78 | folder = QFileDialog.getExistingDirectory(self, "Select a folder", folder)
79 | if folder:
80 | lineEdit.setText(folder)
81 |
82 | def loadSettings(self):
83 | # load settings from storage
84 | settings = fetch_data("settings.json", "settings", {})
85 | self.lineEdit_localLLMFolder.setText(settings.get("local_llm_folder", ""))
86 | self.lineEdit_openaiapikey.setText(settings.get("openai_api_key", ""))
87 | self.lineEdit_deeplapikey.setText(settings.get("deepl_api_key", ""))
88 | self.lineEdit_obsHost.setText(settings.get("obs_host", "localhost"))
89 | self.lineEdit_obsPort.setText(settings.get("obs_port", "4455"))
90 | self.lineEdit_obsPassword.setText(settings.get("obs_password", ""))
91 | self.lineEdit_obsPollingFreq.setText(settings.get("obs_polling_freq", "1000"))
92 | self.lineEdit_inputFilePollingFreq.setText(
93 | settings.get("input_file_polling_freq", "1000")
94 | )
95 | self.lineEdit_elevenlabsAPIKey.setText(settings.get("elevenlabs_api_key", ""))
96 |
97 | if settings.get("local_llm_select") is not None:
98 | self.comboBox_localLLMSelect.setCurrentIndex(
99 | settings.get("local_llm_select")
100 | )
101 |
102 | if settings.get("outputs_folder", "") == "":
103 | settings["outputs_folder"] = path.join(
104 | user_data_dir("lexisynth"), "outputs"
105 | )
106 | store_data("settings.json", "settings", settings)
107 | self.lineEdit_outputsFolder.setText(settings.get("outputs_folder", ""))
108 |
109 | def saveSettings(self):
110 | # save settings to storage
111 | settings = {"outputs_folder": self.lineEdit_outputsFolder.text()}
112 | if self.lineEdit_localLLMFolder.text() != "":
113 | settings["local_llm_folder"] = self.lineEdit_localLLMFolder.text()
114 | if self.lineEdit_openaiapikey.text() != "":
115 | settings["openai_api_key"] = self.lineEdit_openaiapikey.text()
116 | if self.lineEdit_deeplapikey.text() != "":
117 | settings["deepl_api_key"] = self.lineEdit_deeplapikey.text()
118 | if self.lineEdit_obsHost.text() != "":
119 | settings["obs_host"] = self.lineEdit_obsHost.text()
120 | if self.lineEdit_obsPort.text() != "":
121 | settings["obs_port"] = self.lineEdit_obsPort.text()
122 | if self.label_obsPollingFreq.text() != "":
123 | settings["obs_polling_freq"] = self.lineEdit_obsPollingFreq.text()
124 | if self.lineEdit_obsPassword.text() != "":
125 | settings["obs_password"] = self.lineEdit_obsPassword.text()
126 | if self.lineEdit_inputFilePollingFreq.text() != "":
127 | settings["input_file_polling_freq"] = (
128 | self.lineEdit_inputFilePollingFreq.text()
129 | )
130 | if self.comboBox_localLLMSelect.currentIndex() != 0:
131 | settings["local_llm_select"] = self.comboBox_localLLMSelect.currentIndex()
132 | if self.lineEdit_elevenlabsAPIKey.text() != "":
133 | settings["elevenlabs_api_key"] = self.lineEdit_elevenlabsAPIKey.text()
134 |
135 | store_data("settings.json", "settings", settings)
136 |
137 | # emit a signal to notify the main window that settings have changed
138 | self.settingsChanged.emit(settings)
139 |
140 | def testObsConnection(self):
141 | # test the OBS connection
142 | obs_host = self.lineEdit_obsHost.text()
143 | obs_port = self.lineEdit_obsPort.text()
144 | obs_password = self.lineEdit_obsPassword.text()
145 | obs_client = open_obs_websocket(
146 | {"ip": obs_host, "port": obs_port, "password": obs_password}
147 | )
148 | if obs_client is not None:
149 | self.label_obsConnectionStatus.setText("Connection Successful")
150 | # close the connection
151 | disconnect_obs_websocket(obs_client)
152 | else:
153 | self.label_obsConnectionStatus.setText("Failed")
154 |
--------------------------------------------------------------------------------
/about.ui:
--------------------------------------------------------------------------------
1 |
2 |
3 | Dialog
4 |
5 |
6 |
7 | 0
8 | 0
9 | 665
10 | 615
11 |
12 |
13 |
14 | Dialog
15 |
16 |
17 | -
18 |
19 |
20 | true
21 |
22 |
23 |
24 |
25 | 0
26 | 0
27 | 624
28 | 1062
29 |
30 |
31 |
32 |
-
33 |
34 |
35 | <html><head/><body><p><span style=" font-weight:600;">About LexiSynth</span></p><p>Version: 0.0.1-beta<br/>LexiSynth is an AI-based speech analysis and synthesis tool for real-time applications.</p><p><span style=" font-weight:600;">Copyright © 2024 Roy Shilkrot. All Rights Reserved.</span></p><p><span style=" font-weight:600;">License</span><br/>LexiSynth is proprietary software licensed by Roy Shilkrot. This license permits commercial use but strictly prohibits any form of distribution or modification of the software and its documentation. For more details on licensing, please contact <a href="mailto:lexisynth@scoresight.live"><span style=" text-decoration: underline; color:#007af4;">lexisynth@scoresight.live</span></a>.</p><p><span style=" font-weight:600;">Third-Party Software</span><br/>LexiSynth incorporates components from third-party sources under their respective licenses:</p><ul style="margin-top: 0px; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; -qt-list-indent: 1;"><li style=" margin-top:12px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">CTranslate2: <a href=" https://github.com/OpenNMT/CTranslate2"><span style=" text-decoration: underline; color:#007af4;">https://github.com/OpenNMT/CTranslate2</span></a> MIT License</li><li style=" margin-top:12px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">faster-whisper: <a href=" https://github.com/SYSTRAN/faster-whisper"><span style=" text-decoration: underline; color:#007af4;">https://github.com/SYSTRAN/faster-whisper</span></a> MIT License</li><li style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><a href=" https://huggingface.co/jncraton/m2m100_418M-ct2-int8"><span style=" text-decoration: underline; color:#007af4;">https://huggingface.co/jncraton/m2m100_418M-ct2-int8</span></a> MIT License</li></ul><p>Detailed licensing information for these components is included within the software distribution.</p><p><span style=" font-weight:700;">Qt Application Framework</span></p><p>This application uses the Qt application framework, which is a comprehensive C++ library for cross-platform development of GUI applications. Qt is used under the terms of the GNU Lesser General Public License (LGPL) version 3. Qt is a registered trademark of The Qt Company Ltd and is developed and maintained by The Qt Project and various contributors.</p><p>For more information about Qt, including source code of Qt libraries used by this application and guidance on how to obtain or replace Qt libraries, please visit the Qt Project's official website at <a href="http://www.qt.io/"><span style=" text-decoration: underline; color:#007af4;">http://www.qt.io</span></a>.</p><p>We are committed to ensuring compliance with the LGPL v3 license and support the principles of open source software development. If you have any questions or concerns regarding our use of Qt, please contact us directly.</p><p><span style=" font-weight:600;">Disclaimer of Warranty</span><br/>LexiSynth is provided "AS IS", without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose, and noninfringement. In no event shall Roy Shilkrot be liable for any claim, damages, or other liability, whether in an action of contract, tort or otherwise, arising from, out of, or in connection with the software or the use or other dealings in the software.</p><p><span style=" font-weight:600;">Limitation of Liability</span><br/>To the maximum extent permitted by applicable law, in no event will Roy Shilkrot, or its suppliers or licensors, be liable for any indirect, special, incidental, consequential, or punitive damages arising out of the use or inability to use Lexis, including, without limitation, damages for loss of goodwill, work stoppages, computer failure or malfunction, or any and all other commercial damages or losses, even if advised of the possibility thereof.</p><p><span style=" font-weight:600;">Contact Information</span><br/>For support, feedback, or more information, please visit <a href="https://scoresight.live/pages/lexisynth"><span style=" text-decoration: underline; color:#007af4;">https://scoresight.live/pages/lexisynth</span></a> or contact us at <a href="mailto:lexisynth@scoresight.live"><span style=" text-decoration: underline; color:#007af4;">lexisynth@scoresight.live</span></a> or <a href="https://discord.gg/BedTTVnZDg"><span style=" text-decoration: underline; color:#007af4;">https://discord.gg/BedTTVnZDg</span></a>.</p></body></html>
36 |
37 |
38 | true
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 | -
47 |
48 |
49 | Qt::Horizontal
50 |
51 |
52 | QDialogButtonBox::Close
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 | buttonBox
62 | accepted()
63 | Dialog
64 | accept()
65 |
66 |
67 | 248
68 | 254
69 |
70 |
71 | 157
72 | 274
73 |
74 |
75 |
76 |
77 | buttonBox
78 | rejected()
79 | Dialog
80 | reject()
81 |
82 |
83 | 316
84 | 260
85 |
86 |
87 | 286
88 | 274
89 |
90 |
91 |
92 |
93 |
94 |
--------------------------------------------------------------------------------
/translation.py:
--------------------------------------------------------------------------------
1 | import json
2 | from os import path
3 | import queue
4 | import time
5 | import ctranslate2
6 | from PyQt6.QtCore import QThread
7 | from PyQt6 import QtCore
8 | from platformdirs import user_data_dir
9 | from ls_logging import logger
10 | import sentencepiece as spm
11 | from language_codes import LanguageCodes
12 | from models_info import ModelDownloadInfo
13 | from storage import fetch_data
14 | import requests
15 |
16 |
17 | class TranslationThread(QThread):
18 | text_available = QtCore.pyqtSignal(str)
19 | progress_available = QtCore.pyqtSignal(int)
20 | start_progress = QtCore.pyqtSignal()
21 | stop_progress = QtCore.pyqtSignal()
22 |
23 | def __init__(self):
24 | super().__init__()
25 | self.input_queue = queue.Queue()
26 | self.translator = None
27 | self.tokenizer = None
28 | self.source_language = "English"
29 | self.target_language = "Spanish"
30 | self.running = False
31 | # warm up the model
32 | self.progressTimer = QtCore.QTimer()
33 | self.progressTimer.timeout.connect(self.progressCallback)
34 | self.last_run_time_ms = 1000
35 | self.run_time_avg_moving_window = 500
36 | self.current_run_time_start = time.time()
37 | self.start_progress.connect(self.progressTimer.start)
38 | self.stop_progress.connect(self.progressTimer.stop)
39 | self.translationEngine = None
40 | self.openai_api_key = None
41 | self.deepl_api_key = None
42 |
43 | def setTranslationEngine(self, translationEngine):
44 | self.translationEngine = translationEngine
45 |
46 | def setupModel(self):
47 | local_llm_select = fetch_data("settings.json", "settings", {}).get(
48 | "local_llm_select"
49 | )
50 | if local_llm_select is None:
51 | logger.error("Local LLM select is not set")
52 | return False
53 | if local_llm_select == 1:
54 | model_path = path.join(
55 | user_data_dir("lexisynth"),
56 | ModelDownloadInfo.M2M_100["model_folder_name"],
57 | )
58 | if not path.exists(model_path):
59 | logger.error("M2M-100 model is not downloaded")
60 | return False
61 | else:
62 | model_path = fetch_data("settings.json", "settings", {}).get(
63 | "local_llm_folder"
64 | )
65 | if model_path is None:
66 | logger.error("Custom Local LLM folder is not set")
67 | return False
68 | if not path.exists(model_path):
69 | logger.error("Custom Local LLM folder does not exist")
70 | return False
71 |
72 | self.translator = ctranslate2.Translator(model_path)
73 | self.tokenizer = spm.SentencePieceProcessor(
74 | path.join(model_path, "sentencepiece.bpe.model")
75 | )
76 | return True
77 |
78 | def setLanguages(self, source_language, target_language):
79 | self.source_language = source_language
80 | self.target_language = target_language
81 |
82 | def stop(self):
83 | self.running = False
84 |
85 | def progressCallback(self):
86 | # calculate how much time in ms passed since the start of the current translation
87 | current_run_time_elapsed = (time.time() - self.current_run_time_start) * 1000
88 | # calculate the progress in percentage
89 | progress = min(
90 | 100, int(current_run_time_elapsed / self.run_time_avg_moving_window * 100)
91 | )
92 | self.progress_available.emit(progress)
93 |
94 | def translateLocalLLM(self, text):
95 | src_language_code = LanguageCodes.getLanguageCode(self.source_language)
96 | tgt_language_code = LanguageCodes.getLanguageCode(self.target_language)
97 |
98 | source = [f"__{src_language_code}__"] + self.tokenizer.EncodeAsPieces(
99 | text, add_eos=True
100 | )
101 | results = self.translator.translate_batch(
102 | [source], target_prefix=[[f"__{tgt_language_code}__"]]
103 | )
104 | output_tokens = results[0].hypotheses[0][1:]
105 | return self.tokenizer.Decode(output_tokens)
106 |
107 | def translateOpenAI(self, text):
108 | if self.openai_api_key is None:
109 | self.openai_api_key = fetch_data("settings.json", "settings", {}).get(
110 | "openai_api_key"
111 | )
112 | if self.openai_api_key is None:
113 | logger.error("OpenAI API key is not set")
114 | return "Error: OpenAI API key is not set"
115 | # build API request
116 | data = {
117 | "model": "gpt-3.5-turbo",
118 | "messages": [
119 | {
120 | "role": "user",
121 | "content": f"translate from {self.source_language} to {self.target_language}: {text}",
122 | }
123 | ],
124 | }
125 | # send the request
126 | response = requests.post(
127 | "https://api.openai.com/v1/chat/completions",
128 | headers={
129 | "Authorization": f"Bearer {self.openai_api_key}",
130 | "Content-Type": "application/json",
131 | },
132 | json=data,
133 | )
134 | if response.status_code != 200:
135 | logger.error(f"OpenAI API request failed: {response.status_code}")
136 | return "Error: OpenAI API request failed"
137 | # parse the response
138 | response_json = response.json()
139 | if "choices" not in response_json or len(response_json["choices"]) == 0:
140 | logger.error("OpenAI API response is empty")
141 | return "Error: OpenAI API response is empty"
142 | return response_json["choices"][0]["message"]["content"]
143 |
144 | def translateDeepL(self, text):
145 | if self.deepl_api_key is None:
146 | self.deepl_api_key = fetch_data("settings.json", "settings", {}).get(
147 | "deepl_api_key"
148 | )
149 | if self.deepl_api_key is None:
150 | logger.error("DeepL API key is not set")
151 | return "Error: DeepL API key is not set"
152 | # build API request
153 | data = {
154 | "text": [text],
155 | "source_lang": LanguageCodes.getLanguageCode(self.source_language),
156 | "target_lang": LanguageCodes.getLanguageCode(self.target_language),
157 | }
158 | # send the request
159 | response = requests.post(
160 | "https://api-free.deepl.com/v2/translate",
161 | headers={
162 | "Authorization": f"DeepL-Auth-Key {self.deepl_api_key}",
163 | "Content-Type": "application/json",
164 | "User-Agent": "LexiSynth/1.0 (+https://scoresight.live/lexisynth)",
165 | "Accept": "application/json",
166 | },
167 | json=data,
168 | )
169 | if response.status_code != 200:
170 | logger.error(f"DeepL API request failed: {response.status_code}")
171 | logger.error(response.text)
172 | return "Error: DeepL API request failed"
173 | # parse the response
174 | response_json = response.json()
175 | if (
176 | "translations" not in response_json
177 | or len(response_json["translations"]) == 0
178 | ):
179 | logger.error("DeepL API response is empty")
180 | return "Error: DeepL API response is empty"
181 | return response_json["translations"][0]["text"]
182 |
183 | def run(self):
184 | if self.translationEngine is None:
185 | logger.error("Translation engine is not set")
186 | self.running = False
187 | return
188 |
189 | logger.info("Translation thread started")
190 | self.running = True
191 | while self.running:
192 | # Get the next text from the queue
193 | try:
194 | text = self.input_queue.get(block=False)
195 | except queue.Empty:
196 | time.sleep(0.1)
197 | continue
198 |
199 | if text is None:
200 | # sleep for a bit to avoid busy waiting
201 | time.sleep(0.1)
202 | continue
203 |
204 | self.current_run_time_start = time.time()
205 | self.start_progress.emit()
206 |
207 | # Time the translation operation
208 | start_time = time.time()
209 | if self.translationEngine == "Local LLM":
210 | if self.translator is None or self.tokenizer is None:
211 | if not self.setupModel():
212 | logger.error(
213 | "Cannot start translation thread, model is not set up"
214 | )
215 | self.running = False
216 | return
217 |
218 | output_text = self.translateLocalLLM(text)
219 | elif self.translationEngine == "OpenAI API":
220 | output_text = self.translateOpenAI(text)
221 | elif self.translationEngine == "DeepL API":
222 | output_text = self.translateDeepL(text)
223 | else:
224 | logger.error(f"Unknown translation engine: {self.translationEngine}")
225 | self.running = False
226 | return
227 | end_time = time.time()
228 |
229 | self.stop_progress.emit()
230 | self.progress_available.emit(0)
231 |
232 | # prevent 0 time
233 | self.last_run_time_ms = max(100, (end_time - start_time) * 1000)
234 | self.run_time_avg_moving_window = (
235 | self.run_time_avg_moving_window * 0.9
236 | ) + (self.last_run_time_ms * 0.1)
237 |
238 | # Emit the translated text
239 | self.text_available.emit(output_text)
240 |
241 | logger.info("Translation thread stopped")
242 |
--------------------------------------------------------------------------------
/settings_dialog.ui:
--------------------------------------------------------------------------------
1 |
2 |
3 | Dialog
4 |
5 |
6 |
7 | 0
8 | 0
9 | 400
10 | 308
11 |
12 |
13 |
14 | Dialog
15 |
16 |
17 | -
18 |
19 |
20 | Qt::Horizontal
21 |
22 |
23 | QDialogButtonBox::Cancel|QDialogButtonBox::Ok
24 |
25 |
26 |
27 | -
28 |
29 |
30 | 0
31 |
32 |
33 |
34 | General
35 |
36 |
37 |
38 | QFormLayout::ExpandingFieldsGrow
39 |
40 |
-
41 |
42 |
43 | Outputs Folder
44 |
45 |
46 |
47 | -
48 |
49 |
50 |
51 | 3
52 |
53 |
54 | 0
55 |
56 |
57 | 0
58 |
59 |
60 | 0
61 |
62 |
63 | 0
64 |
65 |
-
66 |
67 |
68 | false
69 |
70 |
71 |
72 | -
73 |
74 |
75 | 📂
76 |
77 |
78 |
79 |
80 |
81 |
82 | -
83 |
84 |
85 | Input File Polling (ms)
86 |
87 |
88 |
89 | -
90 |
91 |
92 | 1000
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 | LLM
101 |
102 |
103 |
104 | QFormLayout::ExpandingFieldsGrow
105 |
106 | -
107 |
108 |
109 | Local LLM Folder
110 |
111 |
112 |
113 | -
114 |
115 |
116 |
117 | 0
118 |
119 |
120 | 0
121 |
122 |
123 | 0
124 |
125 |
126 | 0
127 |
128 |
-
129 |
130 |
131 | false
132 |
133 |
134 |
135 | -
136 |
137 |
138 | ...
139 |
140 |
141 |
142 |
143 |
144 |
145 | -
146 |
147 |
148 | OpenAI API Key
149 |
150 |
151 |
152 | -
153 |
154 |
155 | QLineEdit::Password
156 |
157 |
158 |
159 | -
160 |
161 |
162 | DeepL API Key
163 |
164 |
165 |
166 | -
167 |
168 |
169 | QLineEdit::Password
170 |
171 |
172 |
173 | -
174 |
175 |
176 | Local LLM
177 |
178 |
179 |
180 | -
181 |
182 |
183 |
184 | 0
185 | 0
186 |
187 |
188 |
-
189 |
190 | Select Local LLM
191 |
192 |
193 | -
194 |
195 | M2M-100 Translation
196 |
197 |
198 | -
199 |
200 | Custom
201 |
202 |
203 |
204 |
205 | -
206 |
207 |
208 | Qt::Vertical
209 |
210 |
211 |
212 | 20
213 | 40
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 | OBS
223 |
224 |
225 | -
226 |
227 |
228 | Hostname
229 |
230 |
231 |
232 | -
233 |
234 |
235 | localhost
236 |
237 |
238 |
239 | -
240 |
241 |
242 | Port
243 |
244 |
245 |
246 | -
247 |
248 |
249 | 4455
250 |
251 |
252 |
253 | -
254 |
255 |
256 | Password
257 |
258 |
259 |
260 | -
261 |
262 |
263 | QLineEdit::Password
264 |
265 |
266 |
267 | -
268 |
269 |
270 | true
271 |
272 |
273 | Test Connection
274 |
275 |
276 |
277 | -
278 |
279 |
280 |
281 | 0
282 | 0
283 |
284 |
285 |
286 | Not Connected
287 |
288 |
289 |
290 | -
291 |
292 |
293 | Qt::Vertical
294 |
295 |
296 |
297 | 20
298 | 40
299 |
300 |
301 |
302 |
303 | -
304 |
305 |
306 | 1000
307 |
308 |
309 |
310 | -
311 |
312 |
313 | Polling Freq. (ms)
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 | Speech
322 |
323 |
324 | -
325 |
326 |
327 | QLineEdit::Password
328 |
329 |
330 |
331 | -
332 |
333 |
334 | ElevenLabs API Key
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 | buttonBox
348 | accepted()
349 | Dialog
350 | accept()
351 |
352 |
353 | 248
354 | 254
355 |
356 |
357 | 157
358 | 274
359 |
360 |
361 |
362 |
363 | buttonBox
364 | rejected()
365 | Dialog
366 | reject()
367 |
368 |
369 | 316
370 | 260
371 |
372 |
373 | 286
374 | 274
375 |
376 |
377 |
378 |
379 |
380 |
--------------------------------------------------------------------------------
/language_codes.py:
--------------------------------------------------------------------------------
1 | class LanguageCodes:
2 | ENGLISH = "en"
3 | FRENCH = "fr"
4 | SPANISH = "es"
5 | GERMAN = "de"
6 | ITALIAN = "it"
7 | DUTCH = "nl"
8 | PORTUGUESE = "pt"
9 | RUSSIAN = "ru"
10 | CHINESE = "zh"
11 | JAPANESE = "ja"
12 | KOREAN = "ko"
13 | ARABIC = "ar"
14 | HINDI = "hi"
15 | TURKISH = "tr"
16 | GREEK = "el"
17 | HEBREW = "he"
18 | POLISH = "pl"
19 | UKRAINIAN = "uk"
20 | CZECH = "cs"
21 | SLOVAK = "sk"
22 | BULGARIAN = "bg"
23 | ROMANIAN = "ro"
24 | HUNGARIAN = "hu"
25 | FINNISH = "fi"
26 | SWEDISH = "sv"
27 | DANISH = "da"
28 | NORWEGIAN = "no"
29 | ICELANDIC = "is"
30 | ESTONIAN = "et"
31 | LATVIAN = "lv"
32 | LITHUANIAN = "lt"
33 | MALTESE = "mt"
34 | CROATIAN = "hr"
35 | SERBIAN = "sr"
36 | BOSNIAN = "bs"
37 | SLOVENIAN = "sl"
38 | ALBANIAN = "sq"
39 | MACEDONIAN = "mk"
40 | MONTENEGRIN = "me"
41 | KURDISH = "ku"
42 | PERSIAN = "fa"
43 | PASHTO = "ps"
44 | URDU = "ur"
45 | BENGALI = "bn"
46 | TAMIL = "ta"
47 | TELUGU = "te"
48 | MARATHI = "mr"
49 | GUJARATI = "gu"
50 | PUNJABI = "pa"
51 | NEPALI = "ne"
52 | SINHALA = "si"
53 | BURMESE = "my"
54 | KHMER = "km"
55 | LAO = "lo"
56 | THAI = "th"
57 | VIETNAMESE = "vi"
58 | INDONESIAN = "id"
59 | MALAY = "ms"
60 | FILIPINO = "fil"
61 | JAVANESE = "jv"
62 |
63 | def getLanguageName(code):
64 | if code == LanguageCodes.ENGLISH:
65 | return "English"
66 | elif code == LanguageCodes.FRENCH:
67 | return "French"
68 | elif code == LanguageCodes.SPANISH:
69 | return "Spanish"
70 | elif code == LanguageCodes.GERMAN:
71 | return "German"
72 | elif code == LanguageCodes.ITALIAN:
73 | return "Italian"
74 | elif code == LanguageCodes.DUTCH:
75 | return "Dutch"
76 | elif code == LanguageCodes.PORTUGUESE:
77 | return "Portuguese"
78 | elif code == LanguageCodes.RUSSIAN:
79 | return "Russian"
80 | elif code == LanguageCodes.CHINESE:
81 | return "Chinese"
82 | elif code == LanguageCodes.JAPANESE:
83 | return "Japanese"
84 | elif code == LanguageCodes.KOREAN:
85 | return "Korean"
86 | elif code == LanguageCodes.ARABIC:
87 | return "Arabic"
88 | elif code == LanguageCodes.HINDI:
89 | return "Hindi"
90 | elif code == LanguageCodes.TURKISH:
91 | return "Turkish"
92 | elif code == LanguageCodes.GREEK:
93 | return "Greek"
94 | elif code == LanguageCodes.HEBREW:
95 | return "Hebrew"
96 | elif code == LanguageCodes.POLISH:
97 | return "Polish"
98 | elif code == LanguageCodes.UKRAINIAN:
99 | return "Ukrainian"
100 | elif code == LanguageCodes.CZECH:
101 | return "Czech"
102 | elif code == LanguageCodes.SLOVAK:
103 | return "Slovak"
104 | elif code == LanguageCodes.BULGARIAN:
105 | return "Bulgarian"
106 | elif code == LanguageCodes.ROMANIAN:
107 | return "Romanian"
108 | elif code == LanguageCodes.HUNGARIAN:
109 | return "Hungarian"
110 | elif code == LanguageCodes.FINNISH:
111 | return "Finnish"
112 | elif code == LanguageCodes.SWEDISH:
113 | return "Swedish"
114 | elif code == LanguageCodes.DANISH:
115 | return "Danish"
116 | elif code == LanguageCodes.NORWEGIAN:
117 | return "Norwegian"
118 | elif code == LanguageCodes.ICELANDIC:
119 | return "Icelandic"
120 | elif code == LanguageCodes.ESTONIAN:
121 | return "Estonian"
122 | elif code == LanguageCodes.LATVIAN:
123 | return "Latvian"
124 | elif code == LanguageCodes.LITHUANIAN:
125 | return "Lithuanian"
126 | elif code == LanguageCodes.MALTESE:
127 | return "Maltese"
128 | elif code == LanguageCodes.CROATIAN:
129 | return "Croatian"
130 | elif code == LanguageCodes.SERBIAN:
131 | return "Serbian"
132 | elif code == LanguageCodes.BOSNIAN:
133 | return "Bosnian"
134 | elif code == LanguageCodes.SLOVENIAN:
135 | return "Slovenian"
136 | elif code == LanguageCodes.ALBANIAN:
137 | return "Albanian"
138 | elif code == LanguageCodes.MACEDONIAN:
139 | return "Macedonian"
140 | elif code == LanguageCodes.MONTENEGRIN:
141 | return "Montenegrin"
142 | elif code == LanguageCodes.KURDISH:
143 | return "Kurdish"
144 | elif code == LanguageCodes.PERSIAN:
145 | return "Persian"
146 | elif code == LanguageCodes.PASHTO:
147 | return "Pashto"
148 | elif code == LanguageCodes.URDU:
149 | return "Urdu"
150 | elif code == LanguageCodes.BENGALI:
151 | return "Bengali"
152 | elif code == LanguageCodes.TAMIL:
153 | return "Tamil"
154 | elif code == LanguageCodes.TELUGU:
155 | return "Telugu"
156 | elif code == LanguageCodes.MARATHI:
157 | return "Marathi"
158 | elif code == LanguageCodes.GUJARATI:
159 | return "Gujarati"
160 | elif code == LanguageCodes.PUNJABI:
161 | return "Punjabi"
162 | elif code == LanguageCodes.NEPALI:
163 | return "Nepali"
164 | elif code == LanguageCodes.SINHALA:
165 | return "Sinhala"
166 | elif code == LanguageCodes.BURMESE:
167 | return "Burmese"
168 | elif code == LanguageCodes.KHMER:
169 | return "Khmer"
170 | elif code == LanguageCodes.LAO:
171 | return "Lao"
172 | elif code == LanguageCodes.THAI:
173 | return "Thai"
174 | elif code == LanguageCodes.VIETNAMESE:
175 | return "Vietnamese"
176 | elif code == LanguageCodes.INDONESIAN:
177 | return "Indonesian"
178 | elif code == LanguageCodes.MALAY:
179 | return "Malay"
180 | elif code == LanguageCodes.FILIPINO:
181 | return "Filipino"
182 | elif code == LanguageCodes.JAVANESE:
183 | return "Javanese"
184 | else:
185 | return "Unknown"
186 |
187 | def getLanguageCode(name) -> str:
188 | if name == "English":
189 | return LanguageCodes.ENGLISH
190 | elif name == "French":
191 | return LanguageCodes.FRENCH
192 | elif name == "Spanish":
193 | return LanguageCodes.SPANISH
194 | elif name == "German":
195 | return LanguageCodes.GERMAN
196 | elif name == "Italian":
197 | return LanguageCodes.ITALIAN
198 | elif name == "Dutch":
199 | return LanguageCodes.DUTCH
200 | elif name == "Portuguese":
201 | return LanguageCodes.PORTUGUESE
202 | elif name == "Russian":
203 | return LanguageCodes.RUSSIAN
204 | elif name == "Chinese":
205 | return LanguageCodes.CHINESE
206 | elif name == "Japanese":
207 | return LanguageCodes.JAPANESE
208 | elif name == "Korean":
209 | return LanguageCodes.KOREAN
210 | elif name == "Arabic":
211 | return LanguageCodes.ARABIC
212 | elif name == "Hindi":
213 | return LanguageCodes.HINDI
214 | elif name == "Turkish":
215 | return LanguageCodes.TURKISH
216 | elif name == "Greek":
217 | return LanguageCodes.GREEK
218 | elif name == "Hebrew":
219 | return LanguageCodes.HEBREW
220 | elif name == "Polish":
221 | return LanguageCodes.POLISH
222 | elif name == "Ukrainian":
223 | return LanguageCodes.UKRAINIAN
224 | elif name == "Czech":
225 | return LanguageCodes.CZECH
226 | elif name == "Slovak":
227 | return LanguageCodes.SLOVAK
228 | elif name == "Bulgarian":
229 | return LanguageCodes.BULGARIAN
230 | elif name == "Romanian":
231 | return LanguageCodes.ROMANIAN
232 | elif name == "Hungarian":
233 | return LanguageCodes.HUNGARIAN
234 | elif name == "Finnish":
235 | return LanguageCodes.FINNISH
236 | elif name == "Swedish":
237 | return LanguageCodes.SWEDISH
238 | elif name == "Danish":
239 | return LanguageCodes.DANISH
240 | elif name == "Norwegian":
241 | return LanguageCodes.NORWEGIAN
242 | elif name == "Icelandic":
243 | return LanguageCodes.ICELANDIC
244 | elif name == "Estonian":
245 | return LanguageCodes.ESTONIAN
246 | elif name == "Latvian":
247 | return LanguageCodes.LATVIAN
248 | elif name == "Lithuanian":
249 | return LanguageCodes.LITHUANIAN
250 | elif name == "Maltese":
251 | return LanguageCodes.MALTESE
252 | elif name == "Croatian":
253 | return LanguageCodes.CROATIAN
254 | elif name == "Serbian":
255 | return LanguageCodes.SERBIAN
256 | elif name == "Bosnian":
257 | return LanguageCodes.BOSNIAN
258 | elif name == "Slovenian":
259 | return LanguageCodes.SLOVENIAN
260 | elif name == "Albanian":
261 | return LanguageCodes.ALBANIAN
262 | elif name == "Macedonian":
263 | return LanguageCodes.MACEDONIAN
264 | elif name == "Montenegrin":
265 | return LanguageCodes.MONTENEGRIN
266 | elif name == "Kurdish":
267 | return LanguageCodes.KURDISH
268 | elif name == "Persian":
269 | return LanguageCodes.PERSIAN
270 | elif name == "Pashto":
271 | return LanguageCodes.PASHTO
272 | elif name == "Urdu":
273 | return LanguageCodes.URDU
274 | elif name == "Bengali":
275 | return LanguageCodes.BENGALI
276 | elif name == "Tamil":
277 | return LanguageCodes.TAMIL
278 | elif name == "Telugu":
279 | return LanguageCodes.TELUGU
280 | elif name == "Marathi":
281 | return LanguageCodes.MARATHI
282 | elif name == "Gujarati":
283 | return LanguageCodes.GUJARATI
284 | elif name == "Punjabi":
285 | return LanguageCodes.PUNJABI
286 | elif name == "Nepali":
287 | return LanguageCodes.NEPALI
288 | elif name == "Sinhala":
289 | return LanguageCodes.SINHALA
290 | elif name == "Burmese":
291 | return LanguageCodes.BURMESE
292 | elif name == "Khmer":
293 | return LanguageCodes.KHMER
294 | elif name == "Lao":
295 | return LanguageCodes.LAO
296 | elif name == "Thai":
297 | return LanguageCodes.THAI
298 | elif name == "Vietnamese":
299 | return LanguageCodes.VIETNAMESE
300 | elif name == "Indonesian":
301 | return LanguageCodes.INDONESIAN
302 | elif name == "Malay":
303 | return LanguageCodes.MALAY
304 | elif name == "Filipino":
305 | return LanguageCodes.FILIPINO
306 | elif name == "Javanese":
307 | return LanguageCodes.JAVANESE
308 | else:
309 | return "Unknown"
310 |
311 | def getLanguageCodes():
312 | return [
313 | LanguageCodes.ENGLISH,
314 | LanguageCodes.FRENCH,
315 | LanguageCodes.SPANISH,
316 | LanguageCodes.GERMAN,
317 | LanguageCodes.ITALIAN,
318 | LanguageCodes.DUTCH,
319 | LanguageCodes.PORTUGUESE,
320 | LanguageCodes.RUSSIAN,
321 | LanguageCodes.CHINESE,
322 | LanguageCodes.JAPANESE,
323 | LanguageCodes.KOREAN,
324 | LanguageCodes.ARABIC,
325 | LanguageCodes.HINDI,
326 | LanguageCodes.TURKISH,
327 | LanguageCodes.GREEK,
328 | LanguageCodes.HEBREW,
329 | LanguageCodes.POLISH,
330 | LanguageCodes.UKRAINIAN,
331 | LanguageCodes.CZECH,
332 | LanguageCodes.SLOVAK,
333 | LanguageCodes.BULGARIAN,
334 | LanguageCodes.ROMANIAN,
335 | LanguageCodes.HUNGARIAN,
336 | LanguageCodes.FINNISH,
337 | LanguageCodes.SWEDISH,
338 | LanguageCodes.DANISH,
339 | LanguageCodes.NORWEGIAN,
340 | LanguageCodes.ICELANDIC,
341 | LanguageCodes.ESTONIAN,
342 | LanguageCodes.LATVIAN,
343 | LanguageCodes.LITHUANIAN,
344 | LanguageCodes.MALTESE,
345 | LanguageCodes.CROATIAN,
346 | LanguageCodes.SERBIAN,
347 | LanguageCodes.BOSNIAN,
348 | LanguageCodes.SLOVENIAN,
349 | LanguageCodes.ALBANIAN,
350 | LanguageCodes.MACEDONIAN,
351 | LanguageCodes.MONTENEGRIN,
352 | LanguageCodes.KURDISH,
353 | LanguageCodes.PERSIAN,
354 | LanguageCodes.PASHTO,
355 | LanguageCodes.URDU,
356 | LanguageCodes.BENGALI,
357 | LanguageCodes.TAMIL,
358 | LanguageCodes.TELUGU,
359 | LanguageCodes.MARATHI,
360 | LanguageCodes.GUJARATI,
361 | LanguageCodes.PUNJABI,
362 | LanguageCodes.NEPALI,
363 | LanguageCodes.SINHALA,
364 | LanguageCodes.BURMESE,
365 | LanguageCodes.KHMER,
366 | LanguageCodes.LAO,
367 | LanguageCodes.THAI,
368 | LanguageCodes.VIETNAMESE,
369 | LanguageCodes.INDONESIAN,
370 | LanguageCodes.MALAY,
371 | LanguageCodes.FILIPINO,
372 | LanguageCodes.JAVANESE,
373 | ]
374 |
375 | def getLanguageNames():
376 | return [
377 | "English",
378 | "French",
379 | "Spanish",
380 | "German",
381 | "Italian",
382 | "Dutch",
383 | "Portuguese",
384 | "Russian",
385 | "Chinese",
386 | "Japanese",
387 | "Korean",
388 | "Arabic",
389 | "Hindi",
390 | "Turkish",
391 | "Greek",
392 | "Hebrew",
393 | "Polish",
394 | "Ukrainian",
395 | "Czech",
396 | "Slovak",
397 | "Bulgarian",
398 | "Romanian",
399 | "Hungarian",
400 | "Finnish",
401 | "Swedish",
402 | "Danish",
403 | "Norwegian",
404 | "Icelandic",
405 | "Estonian",
406 | "Latvian",
407 | "Lithuanian",
408 | "Maltese",
409 | "Croatian",
410 | "Serbian",
411 | "Bosnian",
412 | "Slovenian",
413 | "Albanian",
414 | "Macedonian",
415 | "Montenegrin",
416 | "Kurdish",
417 | "Persian",
418 | "Pashto",
419 | "Urdu",
420 | "Bengali",
421 | "Tamil",
422 | "Telugu",
423 | "Marathi",
424 | "Gujarati",
425 | "Punjabi",
426 | "Nepali",
427 | "Sinhala",
428 | "Burmese",
429 | "Khmer",
430 | "Lao",
431 | "Thai",
432 | "Vietnamese",
433 | "Indonesian",
434 | "Malay",
435 | "Filipino",
436 | "Javanese",
437 | ]
438 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import platform
3 | from PyQt6 import QtWidgets, uic
4 | from PyQt6.QtCore import QTimer
5 | from PyQt6.QtWidgets import QDialog
6 | from PyQt6.uic import loadUi
7 | import sys
8 | from os import path
9 | from audio_capture import AudioRecorder
10 | from audio_player import AudioPlayer
11 | from file_poller import FilePoller
12 | from language_codes import LanguageCodes
13 | from lexisynth_types import AudioSource
14 | from log_view import LogViewerDialog
15 | from ls_logging import logger
16 | from obs_websocket import (
17 | OBSPoller,
18 | disconnect_obs_websocket,
19 | get_all_sources,
20 | get_all_text_sources,
21 | open_obs_websocket,
22 | open_obs_websocket_from_settings,
23 | )
24 | from settings_dialog import SettingsDialog
25 | from storage import fetch_data, store_data
26 | from transcription import AudioTranscriber
27 | from translation import TranslationThread
28 | from text_to_speech import TextToSpeechThread
29 |
30 | NOT_IMPLEMENTED = "Not implemented yet"
31 |
32 |
33 | def disable_dropdown_options_by_text(combo_box, text, negative_case=False):
34 | for i in range(combo_box.count()):
35 | disable = False
36 | if (isinstance(text, list) and combo_box.itemText(i) in text) or (
37 | isinstance(text, str) and combo_box.itemText(i) == text
38 | ):
39 | if not negative_case:
40 | disable = True
41 | else:
42 | if negative_case:
43 | disable = True
44 |
45 | if disable:
46 | combo_box.model().item(i).setEnabled(False)
47 | combo_box.model().item(i).setToolTip(NOT_IMPLEMENTED)
48 |
49 |
50 | def toggle_all_widgets_in_a_groupbox(group_box, enabled):
51 | # if the widget layout is form layout, iterate the layout and hide all the widgets
52 | if type(group_box.layout()) == QtWidgets.QFormLayout:
53 | for i in range(group_box.layout().rowCount()):
54 | group_box.layout().setRowVisible(i, enabled)
55 | return
56 | # iterate the layout and hide all the widgets
57 | for i in range(group_box.layout().count()):
58 | widget = group_box.layout().itemAt(i).widget()
59 | if widget:
60 | widget.setVisible(enabled)
61 |
62 |
63 | class MainWindow(QtWidgets.QMainWindow):
64 | def __init__(self):
65 | super(MainWindow, self).__init__()
66 | uic.loadUi(
67 | path.abspath(path.join(path.dirname(__file__), "mainwindow.ui")), self
68 | )
69 |
70 | # add File -> Settings menu
71 | menubar = self.menuBar()
72 | file_menu = menubar.addMenu("File")
73 | file_menu.addAction("Settings", self.openSettingsDialog)
74 | file_menu.addAction("About", self.openAboutDialog)
75 | file_menu.addAction("View Current Log", self.openLogsDialog)
76 | self.log_dialog = None
77 |
78 | # populate audio sources
79 | self.populateAudioSources()
80 | self.comboBox_audioSources.currentIndexChanged.connect(self.audioSourceChanged)
81 | self.audioSource = None
82 | self.audioCapture = None
83 | self.audioTranscriber = AudioTranscriber()
84 | self.audioTranscriber.text_available.connect(self.transcriptionAvailable)
85 | self.translator = TranslationThread()
86 | self.translator.text_available.connect(self.translationTextAvailable)
87 | self.translator.progress_available.connect(
88 | lambda progress: self.progressBar_translationProgress.setValue(progress)
89 | )
90 | self.translation_poller = None
91 | self.textToSpeech = TextToSpeechThread()
92 | self.textToSpeech.progress_available.connect(
93 | lambda progress: self.progressBar_ttsProgress.setValue(progress)
94 | )
95 | self.audioPlayer = AudioPlayer()
96 | self.textToSpeech.speech_available.connect(
97 | lambda audio: self.audioPlayer.add_to_queue(audio)
98 | )
99 | self.audioPlayer.start()
100 |
101 | # default chunk size is 3000ms
102 | self.horizontalSlider_chunkSize.setValue(3)
103 | self.comboBox_modelSize.currentTextChanged.connect(
104 | self.transcriptionModelSizeChanged
105 | )
106 |
107 | self.groupBox_statusTranscription.toggled.connect(
108 | lambda checked: toggle_all_widgets_in_a_groupbox(
109 | self.groupBox_statusTranscription, checked
110 | )
111 | )
112 | self.groupBox_statusTranslate.toggled.connect(
113 | lambda checked: toggle_all_widgets_in_a_groupbox(
114 | self.groupBox_statusTranslate, checked
115 | )
116 | )
117 | self.groupBox_cleanStream.toggled.connect(
118 | lambda checked: toggle_all_widgets_in_a_groupbox(
119 | self.groupBox_cleanStream, checked
120 | )
121 | )
122 | toggle_all_widgets_in_a_groupbox(self.groupBox_cleanStream, False)
123 | self.groupBox_output.toggled.connect(
124 | lambda checked: toggle_all_widgets_in_a_groupbox(
125 | self.groupBox_output, checked
126 | )
127 | )
128 | toggle_all_widgets_in_a_groupbox(self.groupBox_output, False)
129 | self.groupBox_transcriptionOpts.toggled.connect(
130 | lambda checked: toggle_all_widgets_in_a_groupbox(
131 | self.groupBox_transcriptionOpts, checked
132 | )
133 | )
134 | toggle_all_widgets_in_a_groupbox(self.groupBox_transcriptionOpts, False)
135 | self.groupBox_langOutputs.toggled.connect(
136 | lambda checked: toggle_all_widgets_in_a_groupbox(
137 | self.groupBox_langOutputs, checked
138 | )
139 | )
140 | toggle_all_widgets_in_a_groupbox(self.groupBox_langOutputs, False)
141 | self.groupBox_ttsOutput.toggled.connect(
142 | lambda checked: toggle_all_widgets_in_a_groupbox(
143 | self.groupBox_ttsOutput, checked
144 | )
145 | )
146 | toggle_all_widgets_in_a_groupbox(self.groupBox_ttsOutput, False)
147 | self.groupBox_analyze.toggled.connect(
148 | lambda checked: toggle_all_widgets_in_a_groupbox(
149 | self.groupBox_analyze, checked
150 | )
151 | )
152 | toggle_all_widgets_in_a_groupbox(self.groupBox_analyze, False)
153 | self.groupBox_translation.toggled.connect(
154 | lambda checked: toggle_all_widgets_in_a_groupbox(
155 | self.groupBox_translation, checked
156 | )
157 | )
158 | toggle_all_widgets_in_a_groupbox(self.groupBox_translation, False)
159 |
160 | # language engine change
161 | self.comboBox_languageEngine.currentIndexChanged.connect(
162 | self.languageEngineChanged
163 | )
164 |
165 | # populate languages
166 | self.comboBox_fromLanguage.addItems(LanguageCodes.getLanguageNames())
167 | self.comboBox_toLanguage.addItems(LanguageCodes.getLanguageNames())
168 | self.comboBox_toLanguage.setCurrentIndex(1)
169 |
170 | self.comboBox_transcriptionLanguage.addItem("Auto")
171 | self.comboBox_transcriptionLanguage.addItems(LanguageCodes.getLanguageNames())
172 | self.comboBox_transcriptionLanguage.setCurrentIndex(0)
173 |
174 | self.comboBox_transcriptionLanguage.currentTextChanged.connect(
175 | self.transcriptionLanguageChanged
176 | )
177 | self.comboBox_toLanguage.currentIndexChanged.connect(
178 | self.setTranslationLanguages
179 | )
180 | self.comboBox_fromLanguage.currentIndexChanged.connect(
181 | self.setTranslationLanguages
182 | )
183 | self.groupBox_translation.toggled.connect(self.startTranslation)
184 |
185 | # speech engine
186 | self.comboBox_speechEngine.currentIndexChanged.connect(self.speechEngineChanged)
187 |
188 | # disable everything on comboBox_transcriptionOutputText except for "Text File" and "No text output"
189 | disable_dropdown_options_by_text(
190 | self.comboBox_transcriptionOutputText,
191 | ["No text output", "Text File"],
192 | negative_case=True,
193 | )
194 | disable_dropdown_options_by_text(
195 | self.comboBox_translationOutputTextOptions,
196 | ["No text output", "Text File"],
197 | negative_case=True,
198 | )
199 | self.comboBox_transcriptionOutputText.currentIndexChanged.connect(
200 | self.transcriptionOutputTextChanged
201 | )
202 | self.comboBox_translationOutputTextOptions.currentIndexChanged.connect(
203 | self.translationOutputTextChanged
204 | )
205 | self.comboBox_translationSourceSelect.currentIndexChanged.connect(
206 | self.translationSourceChanged
207 | )
208 | disable_dropdown_options_by_text(self.comboBox_translationSourceSelect, "URL")
209 |
210 | self.outputsFolder = None
211 | self.transcriptionOutputTextFilePath = None
212 | self.translationOutputTextFilePath = None
213 | self.obs_client = None
214 |
215 | QTimer.singleShot(10, self.load_settings)
216 |
217 | def load_settings(self):
218 | main_settings = fetch_data("settings.json", "main", {})
219 | if main_settings.get("language_engine") is not None:
220 | self.comboBox_languageEngine.setCurrentText(
221 | main_settings.get("language_engine")
222 | )
223 | if main_settings.get("transcription_output") is not None:
224 | self.comboBox_transcriptionOutputText.setCurrentText(
225 | main_settings.get("transcription_output")
226 | )
227 | if main_settings.get("translation_output") is not None:
228 | self.comboBox_translationOutputTextOptions.setCurrentText(
229 | main_settings.get("translation_output")
230 | )
231 | if main_settings.get("translation_source") is not None:
232 | self.comboBox_translationSourceSelect.setCurrentText(
233 | main_settings.get("translation_source")
234 | )
235 | if main_settings.get("transcription_language") is not None:
236 | self.comboBox_transcriptionLanguage.setCurrentText(
237 | main_settings.get("transcription_language")
238 | )
239 | if main_settings.get("transcription_model_size") is not None:
240 | self.comboBox_modelSize.setCurrentText(
241 | main_settings.get("transcription_model_size")
242 | )
243 | if main_settings.get("audio_source") is not None:
244 | if main_settings.get("audio_source") == "device":
245 | self.comboBox_audioSources.setCurrentText(
246 | main_settings.get("audio_device")
247 | )
248 | self.audioSource = AudioSource(
249 | AudioSource.SourceType.DEVICE, main_settings.get("audio_device")
250 | )
251 | else:
252 | self.comboBox_audioSources.setCurrentText("Select Audio Source")
253 | if main_settings.get("from_language") is not None:
254 | self.comboBox_fromLanguage.setCurrentText(
255 | main_settings.get("from_language")
256 | )
257 | if main_settings.get("to_language") is not None:
258 | self.comboBox_toLanguage.setCurrentText(main_settings.get("to_language"))
259 | if main_settings.get("translation_on") is not None:
260 | self.groupBox_translation.setChecked(main_settings.get("translation_on"))
261 | if main_settings.get("speech_engine") is not None:
262 | self.comboBox_speechEngine.setCurrentText(
263 | main_settings.get("speech_engine")
264 | )
265 |
266 | def openLogsDialog(self):
267 | if self.log_dialog is None:
268 | # open the logs dialog
269 | self.log_dialog = LogViewerDialog()
270 | self.log_dialog.setWindowTitle("Logs")
271 |
272 | # show the dialog, non modal
273 | self.log_dialog.show()
274 |
275 | def openAboutDialog(self):
276 | # open the about dialog
277 | about_dialog = QDialog()
278 | loadUi(
279 | path.abspath(path.join(path.dirname(__file__), "about.ui")),
280 | about_dialog,
281 | )
282 | about_dialog.setWindowTitle("About Lexis")
283 | about_dialog.exec()
284 |
285 | def ensure_output_folder(self):
286 | if self.outputsFolder is None:
287 | self.outputsFolder = fetch_data("settings.json", "settings", {}).get(
288 | "outputs_folder", None
289 | )
290 | if self.outputsFolder is not None:
291 | if not path.exists(self.outputsFolder):
292 | try:
293 | os.makedirs(self.outputsFolder)
294 | except Exception as e:
295 | logger.error(f"Error creating outputs folder: {e}")
296 | self.outputsFolder = None
297 | return False
298 | return True
299 | return False
300 |
301 | def speechEngineChanged(self):
302 | self.textToSpeech.stop()
303 | if self.comboBox_speechEngine.currentText() == "OpenAI":
304 | if not fetch_data("settings.json", "settings", {}).get("openai_api_key"):
305 | self.comboBox_speechEngine.setCurrentIndex(0)
306 | self.openSettingsDialog(1)
307 | return
308 | self.textToSpeech.speech_engine = "OpenAI"
309 | self.textToSpeech.start()
310 | elif self.comboBox_speechEngine.currentText() == "ElevenLabs":
311 | if not fetch_data("settings.json", "settings", {}).get(
312 | "elevenlabs_api_key"
313 | ):
314 | self.comboBox_speechEngine.setCurrentIndex(0)
315 | self.openSettingsDialog(1)
316 | return
317 | self.textToSpeech.speech_engine = "ElevenLabs"
318 | self.textToSpeech.start()
319 | else:
320 | logger.error(
321 | f"Unknown speech engine: {self.comboBox_speechEngine.currentText()}"
322 | )
323 | self.comboBox_speechEngine.setCurrentIndex(0)
324 |
325 | store_data(
326 | "settings.json",
327 | "main",
328 | {"speech_engine": self.comboBox_speechEngine.currentText()},
329 | )
330 |
331 | def transcriptionLanguageChanged(self):
332 | logger.debug(
333 | "transcription language changed to:"
334 | + self.comboBox_transcriptionLanguage.currentText()
335 | )
336 | self.audioTranscriber.set_language(
337 | self.comboBox_transcriptionLanguage.currentText()
338 | )
339 | store_data(
340 | "settings.json",
341 | "main",
342 | {
343 | "transcription_language": self.comboBox_transcriptionLanguage.currentText()
344 | },
345 | )
346 |
347 | def transcriptionModelSizeChanged(self):
348 | self.audioTranscriber.set_model_size(self.comboBox_modelSize.currentText())
349 | store_data(
350 | "settings.json",
351 | "main",
352 | {"transcription_model_size": self.comboBox_modelSize.currentText()},
353 | )
354 |
355 | def transcriptionOutputTextChanged(self):
356 | self.transcriptionOutputTextFilePath = None
357 | if self.comboBox_transcriptionOutputText.currentText() == "Text File":
358 | if not self.ensure_output_folder():
359 | self.comboBox_transcriptionOutputText.setCurrentIndex(0)
360 | self.openSettingsDialog(0)
361 | return
362 | self.transcriptionOutputTextFilePath = path.join(
363 | self.outputsFolder, "captions.txt"
364 | )
365 | store_data("settings.json", "main", {"transcription_output": "text_file"})
366 |
367 | def translationOutputTextChanged(self):
368 | self.translationOutputTextFilePath = None
369 | if self.comboBox_translationOutputTextOptions.currentText() == "Text File":
370 | if not self.ensure_output_folder():
371 | self.comboBox_transcriptionOutputText.setCurrentIndex(0)
372 | self.openSettingsDialog(0)
373 | return
374 | self.translationOutputTextFilePath = path.join(
375 | self.outputsFolder, "translation.txt"
376 | )
377 | store_data("settings.json", "main", {"translation_output": "text_file"})
378 |
379 | def translationSourceChanged(self):
380 | self.textBrowser_transformedTextOutput.setText("")
381 | if self.transcriptionOutputTextFilePath is not None:
382 | if self.translation_poller:
383 | self.translation_poller.stop()
384 | self.translation_poller.wait()
385 |
386 | if self.comboBox_translationSourceSelect.currentText() == "File":
387 | fileDialog = QtWidgets.QFileDialog()
388 | fileDialog.setFileMode(QtWidgets.QFileDialog.FileMode.ExistingFile)
389 | fileDialog.setNameFilter("Text Files (*.txt)")
390 | fileDialog.setViewMode(QtWidgets.QFileDialog.ViewMode.List)
391 | fileDialog.exec()
392 | fileNames = fileDialog.selectedFiles()
393 | if fileNames and len(fileNames) > 0:
394 | if self.translation_poller:
395 | self.translation_poller.stop()
396 | self.translation_poller.wait()
397 |
398 | self.translation_poller = FilePoller(
399 | fileNames[0],
400 | cadence=fetch_data("settings.json", "settings", {}).get(
401 | "input_file_polling_freq", 1000
402 | ),
403 | queue=self.translator.input_queue,
404 | )
405 | self.translation_poller.start()
406 | store_data(
407 | "settings.json",
408 | "main",
409 | {"translation_source": "file", "translation_file": fileNames[0]},
410 | )
411 | elif self.comboBox_translationSourceSelect.currentText() == "<-- Transcription":
412 | logger.info("transcription selected as translation source")
413 | store_data("settings.json", "main", {"translation_source": "transcription"})
414 | elif (
415 | self.comboBox_translationSourceSelect.currentText()
416 | == "--- Get OBS Sources ---"
417 | ):
418 | logger.info("Get OBS sources from websocket")
419 | self.getOBSSourcesForTranslation()
420 | self.comboBox_translationSourceSelect.setCurrentIndex(0)
421 | else:
422 | # obs source selected create an OBSPoller
423 | if self.obs_client is not None:
424 | source = self.comboBox_translationSourceSelect.currentText()
425 | if source.startswith("[OBS]"):
426 | source_name = source.split(" - ")[1]
427 | self.translation_poller = OBSPoller(
428 | self.obs_client,
429 | source_name,
430 | self.translator.input_queue,
431 | int(
432 | fetch_data("settings.json", "settings", {}).get(
433 | "obs_polling_freq", 1000
434 | )
435 | ),
436 | )
437 | self.translation_poller.start()
438 | store_data(
439 | "settings.json",
440 | "main",
441 | {"translation_source": "obs", "obs_source": source_name},
442 | )
443 | else:
444 | logger.error("Invalid OBS source selected")
445 | self.comboBox_translationSourceSelect.setCurrentIndex(0)
446 |
447 | def getOBSSourcesForTranslation(self):
448 | if self.obs_client is None:
449 | self.obs_client = open_obs_websocket_from_settings()
450 | if self.obs_client is not None:
451 | sources = get_all_text_sources(self.obs_client)
452 | if sources is not None and len(sources) > 0:
453 | # remove all previous obs sources that begin from index 3
454 | if self.comboBox_translationSourceSelect.count() > 4:
455 | for _ in range(4, self.comboBox_translationSourceSelect.count()):
456 | self.comboBox_translationSourceSelect.removeItem(4)
457 | # add the new sources
458 | for source in sources:
459 | self.comboBox_translationSourceSelect.addItem(
460 | f"[OBS] {source['sceneName']} - {source['sourceName']}"
461 | )
462 | self.comboBox_translationSourceSelect.setCurrentIndex(0)
463 | else:
464 | logger.warn("Can't get OBS sources or no sources available")
465 | else:
466 | logger.error("OBS client is not connected")
467 | # open settings dialog
468 | self.openSettingsDialog(2)
469 |
470 | def openSettingsDialog(self, page=None):
471 | settingsDialog = SettingsDialog(page, self)
472 | settingsDialog.exec()
473 |
474 | def languageEngineChanged(self):
475 | # disable the widgets
476 | self.widget_textSourceSelect.setEnabled(False)
477 | self.groupBox_translation.setEnabled(False)
478 |
479 | if self.comboBox_languageEngine.currentText() != "Select Language Engine":
480 | logger.info(
481 | f"language engine changed to: {self.comboBox_languageEngine.currentText()}"
482 | )
483 | settings = fetch_data("settings.json", "settings", {})
484 | if self.comboBox_languageEngine.currentText() == "Local LLM":
485 | # check settings for local LLM folder, if it doesn't exist, open settings dialog
486 | if not settings.get("local_llm_select"):
487 | self.comboBox_languageEngine.setCurrentIndex(0)
488 | self.openSettingsDialog(1)
489 | return
490 | if self.comboBox_languageEngine.currentText() == "OpenAI API":
491 | # check settings for openai api key, if it doesn't exist, open settings dialog
492 | if not settings.get("openai_api_key"):
493 | self.comboBox_languageEngine.setCurrentIndex(0)
494 | self.openSettingsDialog(1)
495 | return
496 | if self.comboBox_languageEngine.currentText() == "DeepL API":
497 | # check settings for deepl api key, if it doesn't exist, open settings dialog
498 | if not settings.get("deepl_api_key"):
499 | self.comboBox_languageEngine.setCurrentIndex(0)
500 | self.openSettingsDialog(1)
501 | return
502 | # enable the widgets
503 | self.widget_textSourceSelect.setEnabled(True)
504 | self.groupBox_translation.setEnabled(True)
505 | self.translator.setTranslationEngine(
506 | self.comboBox_languageEngine.currentText()
507 | )
508 | store_data(
509 | "settings.json",
510 | "main",
511 | {"language_engine": self.comboBox_languageEngine.currentText()},
512 | )
513 | else:
514 | self.startTranslation(False)
515 | self.translator.setTranslationEngine(None)
516 |
517 | def setTranslationLanguages(self):
518 | self.translator.setLanguages(
519 | self.comboBox_fromLanguage.currentText(),
520 | self.comboBox_toLanguage.currentText(),
521 | )
522 | store_data(
523 | "settings.json",
524 | "main",
525 | {
526 | "from_language": self.comboBox_fromLanguage.currentText(),
527 | "to_language": self.comboBox_toLanguage.currentText(),
528 | },
529 | )
530 |
531 | def startTranslation(self, checked):
532 | store_data("settings.json", "main", {"translation_on": checked})
533 | if checked:
534 | self.translator.start()
535 | else:
536 | self.translator.stop()
537 |
538 | def populateAudioSources(self):
539 | self.comboBox_audioSources.clear()
540 | # add select audio source option
541 | self.comboBox_audioSources.insertItem(0, "Select Audio Source")
542 | self.comboBox_audioSources.setCurrentIndex(0)
543 | audioDevices = AudioRecorder.get_audio_devices()
544 | for device in audioDevices:
545 | self.comboBox_audioSources.addItem(device.sourceName)
546 | self.comboBox_audioSources.addItem("--- NDI Sources ---")
547 | disable_dropdown_options_by_text(
548 | self.comboBox_audioSources, "--- NDI Sources ---"
549 | )
550 | # add file input option
551 | self.comboBox_audioSources.addItem("File")
552 | # add stream option
553 | self.comboBox_audioSources.addItem("Stream")
554 | disable_dropdown_options_by_text(self.comboBox_audioSources, "Stream")
555 |
556 | def audioSourceChanged(self):
557 | logger.info("audio source changed")
558 | self.audioSource = None
559 | # if file input selected, open file dialog
560 | if self.comboBox_audioSources.currentText() == "File":
561 | logger.info("file input selected")
562 | fileDialog = QtWidgets.QFileDialog()
563 | fileDialog.setFileMode(QtWidgets.QFileDialog.FileMode.ExistingFile)
564 | fileDialog.setNameFilter(
565 | "Audio Files (*.mp3 *.wav *.ogg *.flac *.m4a *.aac)"
566 | )
567 | fileDialog.setViewMode(QtWidgets.QFileDialog.ViewMode.List)
568 | fileDialog.exec()
569 | fileNames = fileDialog.selectedFiles()
570 | if fileNames and len(fileNames) > 0:
571 | logger.info(f"file selected: {fileNames[0]}")
572 | self.audioSource = AudioSource(
573 | AudioSource.SourceType.FILE, fileNames[0]
574 | )
575 | store_data(
576 | "settings.json",
577 | "main",
578 | {"audio_source": "file", "audio_file": fileNames[0]},
579 | )
580 | else:
581 | logger.info("device input selected")
582 | if self.comboBox_audioSources.currentText() != "Select Audio Source":
583 | self.audioSource = AudioSource(
584 | AudioSource.SourceType.DEVICE,
585 | self.comboBox_audioSources.currentText(),
586 | )
587 | store_data(
588 | "settings.json",
589 | "main",
590 | {
591 | "audio_source": "device",
592 | "audio_device": self.comboBox_audioSources.currentText(),
593 | },
594 | )
595 |
596 | self.startAudioCapture()
597 |
598 | def startAudioCapture(self):
599 | logger.info("stopping exsting audio capture and starting new")
600 | if self.audioCapture:
601 | self.audioTranscriber.stop()
602 | self.audioTranscriber.wait()
603 | self.audioCapture.stop()
604 | self.audioCapture.wait()
605 | self.audioCapture = None
606 |
607 | if self.audioSource:
608 | self.audioTranscriber.start()
609 | logger.info(f"audio source: {self.audioSource.sourceName}")
610 | # start audio capture
611 | logger.info(
612 | f"starting audio capture with chunk size: {self.horizontalSlider_chunkSize.value()}"
613 | )
614 | self.audioCapture = AudioRecorder(
615 | self.audioSource, self.horizontalSlider_chunkSize.value() * 1000
616 | )
617 | self.audioCapture.progress_and_volume.connect(self.audioCaptureProgress)
618 | self.audioCapture.data_available.connect(
619 | self.audioTranscriber.queue_audio_data
620 | )
621 | self.audioCapture.start()
622 |
623 | def audioCaptureProgress(self, progress):
624 | # update the volume progressbar
625 | self.progressBar_audioSignal.setValue(int(progress[1] * 300))
626 | # update the buffer progressbar
627 | chunk_size_ms = float(self.horizontalSlider_chunkSize.value()) * 1000.0
628 | buffer_capacity = int(float(progress[0]) / chunk_size_ms * 100.0)
629 | self.progressBar_audioBuffer.setValue(buffer_capacity)
630 | # redraw the progressbars
631 | self.progressBar_audioSignal.repaint()
632 | self.progressBar_audioBuffer.repaint()
633 |
634 | def transcriptionAvailable(self, text):
635 | logger.info(f"transcribed text available: {text}")
636 | self.textBrowser_output.setText(text)
637 | # if translation is on - send to translator thread
638 | if self.groupBox_translation.isChecked():
639 | if (
640 | self.comboBox_translationSourceSelect.currentText()
641 | == "<-- Transcription"
642 | ):
643 | if self.translator.running:
644 | self.translator.input_queue.put_nowait(text)
645 | else:
646 | logger.error("Translator thread is not running")
647 | if self.transcriptionOutputTextFilePath is not None:
648 | try:
649 | # save to file with utf-8 encoding
650 | with open(
651 | self.transcriptionOutputTextFilePath, "w", encoding="utf-8"
652 | ) as f:
653 | f.write(text + "\n")
654 | except Exception as e:
655 | logger.error(f"Error saving transcription to file: {e}")
656 |
657 | def translationTextAvailable(self, text):
658 | logger.info(f"translated text available: {text}")
659 | self.textBrowser_transformedTextOutput.setText(text)
660 | if self.translationOutputTextFilePath is not None:
661 | try:
662 | # save to file with utf-8 encoding
663 | with open(
664 | self.translationOutputTextFilePath, "w", encoding="utf-8"
665 | ) as f:
666 | f.write(text + "\n")
667 | except Exception as e:
668 | logger.error(f"Error saving translation to file: {e}")
669 | # check if tts is on
670 | if self.comboBox_speechEngine.currentText() != "Select TTS Engine":
671 | self.textToSpeech.add_text(text)
672 |
673 | def closeEvent(self, event):
674 | logger.debug("closing")
675 | if self.audioCapture:
676 | self.audioCapture.stop()
677 | logger.debug("audio capture stopped, waiting for thread to finish")
678 | self.audioCapture.wait()
679 | self.audioTranscriber.stop()
680 | logger.debug("transcription thread stopped. waiting for thread to finish")
681 | self.audioTranscriber.wait()
682 | self.translator.stop()
683 | logger.debug("translation thread stopped. waiting for thread to finish")
684 | self.translator.wait()
685 | if self.translation_poller:
686 | self.translation_poller.stop()
687 | self.translation_poller.wait()
688 | if self.obs_client:
689 | disconnect_obs_websocket(self.obs_client)
690 | event.accept()
691 |
692 |
693 | if __name__ == "__main__":
694 | # only attempt splash when not on Mac OSX
695 | os_name = platform.system()
696 | if os_name != "Darwin":
697 | try:
698 | import pyi_splash # type: ignore
699 |
700 | pyi_splash.close()
701 | except ImportError:
702 | pass
703 |
704 | app = QtWidgets.QApplication(sys.argv)
705 | window = MainWindow()
706 | window.show()
707 | sys.exit(app.exec())
708 |
--------------------------------------------------------------------------------
/mainwindow.ui:
--------------------------------------------------------------------------------
1 |
2 |
3 | MainWindow
4 |
5 |
6 |
7 | 0
8 | 0
9 | 988
10 | 612
11 |
12 |
13 |
14 | LexiSynth - Live Language Assistant
15 |
16 |
17 |
18 |
19 | 0
20 |
21 |
22 | 0
23 |
24 |
25 | 0
26 |
27 |
28 | 0
29 |
30 | -
31 |
32 |
33 |
34 | 24
35 |
36 |
37 |
38 | Caption
39 |
40 |
41 | Qt::AlignCenter
42 |
43 |
44 |
45 | 6
46 |
47 |
-
48 |
49 |
50 |
51 | 0
52 | 0
53 |
54 |
55 |
56 |
57 | QFormLayout::ExpandingFieldsGrow
58 |
59 |
60 | 0
61 |
62 |
63 | 0
64 |
65 |
66 | 0
67 |
68 |
69 | 0
70 |
71 |
72 | 0
73 |
74 |
-
75 |
76 |
77 |
78 | 0
79 | 0
80 |
81 |
82 |
83 |
84 | 20
85 | 0
86 |
87 |
88 |
89 |
90 | 13
91 |
92 |
93 |
94 | Source
95 |
96 |
97 |
98 | -
99 |
100 |
101 |
102 | 13
103 |
104 |
105 |
-
106 |
107 | Select Audio Source
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 | -
116 |
117 |
118 |
119 | 13
120 |
121 |
122 |
123 | Transcription Options
124 |
125 |
126 | true
127 |
128 |
129 | false
130 |
131 |
132 |
133 | QFormLayout::ExpandingFieldsGrow
134 |
135 |
136 | 0
137 |
138 |
139 | 0
140 |
141 |
142 | 6
143 |
144 |
145 | 0
146 |
147 |
148 | 0
149 |
150 |
151 | 0
152 |
153 |
-
154 |
155 |
156 |
157 | 13
158 |
159 |
160 |
161 | Chunk Seconds
162 |
163 |
164 |
165 | -
166 |
167 |
168 |
169 | 0
170 | 0
171 |
172 |
173 |
174 |
175 | 13
176 |
177 |
178 |
179 | 1
180 |
181 |
182 | 10
183 |
184 |
185 | Qt::Horizontal
186 |
187 |
188 | QSlider::NoTicks
189 |
190 |
191 | 1
192 |
193 |
194 |
195 | -
196 |
197 |
198 |
199 | 0
200 | 0
201 |
202 |
203 |
204 |
205 | -
206 |
207 |
208 | Language
209 |
210 |
211 |
212 | -
213 |
214 |
-
215 |
216 | Tiny (75Mb)
217 |
218 |
219 | -
220 |
221 | Base (140Mb)
222 |
223 |
224 | -
225 |
226 | Small (400Mb)
227 |
228 |
229 |
230 |
231 | -
232 |
233 |
234 | Model Size
235 |
236 |
237 |
238 |
239 |
240 |
241 | -
242 |
243 |
244 | false
245 |
246 |
247 |
248 | 13
249 |
250 |
251 |
252 | Clean Stream
253 |
254 |
255 | true
256 |
257 |
258 | false
259 |
260 |
261 |
262 | QFormLayout::ExpandingFieldsGrow
263 |
264 |
265 | 0
266 |
267 |
268 | 6
269 |
270 |
271 | 0
272 |
273 |
274 | 0
275 |
276 |
277 | 0
278 |
279 |
-
280 |
281 |
282 |
283 | 13
284 |
285 |
286 |
287 | Cleanup Method
288 |
289 |
290 |
291 | -
292 |
293 |
294 |
295 | 13
296 |
297 |
298 |
-
299 |
300 | Mute
301 |
302 |
303 | -
304 |
305 | Beep
306 |
307 |
308 |
309 |
310 | -
311 |
312 |
313 |
314 | 13
315 |
316 |
317 |
318 | Words
319 |
320 |
321 |
322 | -
323 |
324 |
325 |
326 | 13
327 |
328 |
329 |
-
330 |
331 | Internal List
332 |
333 |
334 | -
335 |
336 | Custom List
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 | -
345 |
346 |
347 |
348 | 16777215
349 | 120
350 |
351 |
352 |
353 |
354 | 13
355 |
356 |
357 |
358 | Transcription...
359 |
360 |
361 |
362 | -
363 |
364 |
365 | Qt::Vertical
366 |
367 |
368 |
369 | 20
370 | 40
371 |
372 |
373 |
374 |
375 | -
376 |
377 |
378 |
379 | 0
380 | 0
381 |
382 |
383 |
384 |
385 | 13
386 |
387 |
388 |
389 | Output Options
390 |
391 |
392 | true
393 |
394 |
395 | false
396 |
397 |
398 |
399 | 0
400 |
401 |
402 | 6
403 |
404 |
405 | 0
406 |
407 |
408 | 0
409 |
410 |
411 | 0
412 |
413 |
-
414 |
415 |
416 |
417 | 13
418 |
419 |
420 |
421 | Send text to
422 |
423 |
424 |
425 | -
426 |
427 |
428 |
429 | 13
430 |
431 |
432 |
-
433 |
434 | No text output
435 |
436 |
437 | -
438 |
439 | Text File
440 |
441 |
442 | -
443 |
444 | OBS WS Text Source
445 |
446 |
447 | -
448 |
449 | Caption stream
450 |
451 |
452 | -
453 |
454 | HTTP (Browser / Overlay)
455 |
456 |
457 | -
458 |
459 | SRT File
460 |
461 |
462 |
463 |
464 | -
465 |
466 |
467 |
468 | 13
469 |
470 |
471 |
472 | Send audio
473 |
474 |
475 |
476 | -
477 |
478 |
479 | false
480 |
481 |
482 |
483 | 13
484 |
485 |
486 |
487 | Not implemented yet
488 |
489 |
-
490 |
491 | No audio output
492 |
493 |
494 | -
495 |
496 | File (one shot)
497 |
498 |
499 | -
500 |
501 | File (continuous)
502 |
503 |
504 | -
505 |
506 | -- Output Devices --
507 |
508 |
509 |
510 |
511 | -
512 |
513 |
514 |
515 | 0
516 |
517 |
518 | 0
519 |
520 |
521 | 0
522 |
523 |
524 | 0
525 |
526 |
527 | 0
528 |
529 |
530 |
531 |
532 |
533 |
534 |
535 | -
536 |
537 |
538 |
539 | 13
540 |
541 |
542 |
543 | Status
544 |
545 |
546 | true
547 |
548 |
549 |
550 | 6
551 |
552 |
553 | 0
554 |
555 |
556 | 0
557 |
558 |
-
559 |
560 |
561 |
562 | 0
563 | 0
564 |
565 |
566 |
567 |
568 | 16777215
569 | 25
570 |
571 |
572 |
573 | 0
574 |
575 |
576 | false
577 |
578 |
579 | Qt::Vertical
580 |
581 |
582 |
583 | -
584 |
585 |
586 |
587 | 0
588 | 0
589 |
590 |
591 |
592 |
593 | 10
594 |
595 |
596 |
597 | Buffer
598 |
599 |
600 |
601 | -
602 |
603 |
604 | 0
605 |
606 |
607 | false
608 |
609 |
610 |
611 |
612 |
613 |
614 |
615 |
616 |
617 | -
618 |
619 |
620 | true
621 |
622 |
623 |
624 | 24
625 |
626 |
627 |
628 | Translate
629 |
630 |
631 | Qt::AlignCenter
632 |
633 |
634 | false
635 |
636 |
637 |
638 | 6
639 |
640 |
-
641 |
642 |
643 |
644 | 13
645 |
646 |
647 |
-
648 |
649 | Select Language Engine
650 |
651 |
652 | -
653 |
654 | Local LLM
655 |
656 |
657 | -
658 |
659 | OpenAI API
660 |
661 |
662 | -
663 |
664 | DeepL API
665 |
666 |
667 |
668 |
669 | -
670 |
671 |
672 | false
673 |
674 |
675 |
676 | 6
677 |
678 |
679 | 0
680 |
681 |
682 | 0
683 |
684 |
685 | 0
686 |
687 |
688 | 0
689 |
690 |
-
691 |
692 |
693 |
694 | 13
695 |
696 |
697 |
698 | Text Source
699 |
700 |
701 |
702 | -
703 |
704 |
705 |
706 | 0
707 | 0
708 |
709 |
710 |
711 |
712 | 13
713 |
714 |
715 |
-
716 |
717 | <-- Transcription
718 |
719 |
720 | -
721 |
722 | File
723 |
724 |
725 | -
726 |
727 | URL
728 |
729 |
730 | -
731 |
732 | --- Get OBS Sources ---
733 |
734 |
735 |
736 |
737 |
738 |
739 |
740 | -
741 |
742 |
743 | false
744 |
745 |
746 |
747 | 0
748 | 0
749 |
750 |
751 |
752 |
753 | 13
754 |
755 |
756 |
757 | Translate
758 |
759 |
760 | true
761 |
762 |
763 | false
764 |
765 |
766 |
767 | QFormLayout::ExpandingFieldsGrow
768 |
769 |
770 | 0
771 |
772 |
773 | 0
774 |
775 |
776 | 6
777 |
778 |
779 | 0
780 |
781 |
782 | 0
783 |
784 |
785 | 0
786 |
787 |
-
788 |
789 |
790 |
791 | 0
792 | 0
793 |
794 |
795 |
796 |
797 | 13
798 |
799 |
800 |
801 |
802 | -
803 |
804 |
805 |
806 | 13
807 |
808 |
809 |
810 | To
811 |
812 |
813 |
814 | -
815 |
816 |
817 |
818 | 0
819 | 0
820 |
821 |
822 |
823 |
824 | 13
825 |
826 |
827 |
828 |
829 | -
830 |
831 |
832 |
833 | 13
834 |
835 |
836 |
837 | From
838 |
839 |
840 |
841 |
842 |
843 |
844 | -
845 |
846 |
847 | false
848 |
849 |
850 |
851 | 13
852 |
853 |
854 |
855 | Analyze
856 |
857 |
858 | true
859 |
860 |
861 | false
862 |
863 |
864 |
865 | QFormLayout::ExpandingFieldsGrow
866 |
867 |
868 | 0
869 |
870 |
871 | 0
872 |
873 |
874 | 6
875 |
876 |
877 | 0
878 |
879 |
880 | 0
881 |
882 |
883 | 0
884 |
885 |
-
886 |
887 |
888 |
889 | 0
890 | 0
891 |
892 |
893 |
894 |
895 | 13
896 |
897 |
898 |
899 | Remove profanity
900 |
901 |
902 |
903 | -
904 |
905 |
906 |
907 | 0
908 | 0
909 |
910 |
911 |
912 |
913 | 13
914 |
915 |
916 |
917 | Summarize
918 |
919 |
920 |
921 |
922 |
923 |
924 | -
925 |
926 |
927 |
928 | 16777215
929 | 120
930 |
931 |
932 |
933 |
934 | 13
935 |
936 |
937 |
938 | false
939 |
940 |
941 | Transformed text...
942 |
943 |
944 |
945 | -
946 |
947 |
948 | Qt::Vertical
949 |
950 |
951 |
952 | 20
953 | 40
954 |
955 |
956 |
957 |
958 | -
959 |
960 |
961 |
962 | 13
963 |
964 |
965 |
966 | Output Options
967 |
968 |
969 | true
970 |
971 |
972 | false
973 |
974 |
975 |
976 | QFormLayout::ExpandingFieldsGrow
977 |
978 |
979 | 0
980 |
981 |
982 | 6
983 |
984 |
985 | 0
986 |
987 |
988 | 0
989 |
990 |
991 | 0
992 |
993 |
-
994 |
995 |
996 |
997 | 13
998 |
999 |
1000 |
1001 | Send to
1002 |
1003 |
1004 |
1005 | -
1006 |
1007 |
1008 |
1009 | 0
1010 | 0
1011 |
1012 |
1013 |
1014 |
1015 | 13
1016 |
1017 |
1018 |
-
1019 |
1020 | No text output
1021 |
1022 |
1023 | -
1024 |
1025 | Text File
1026 |
1027 |
1028 | -
1029 |
1030 | OBS Text Source
1031 |
1032 |
1033 | -
1034 |
1035 | Captions stream
1036 |
1037 |
1038 | -
1039 |
1040 | HTTP (Browser / Overlay)
1041 |
1042 |
1043 | -
1044 |
1045 | SRT File
1046 |
1047 |
1048 |
1049 |
1050 |
1051 |
1052 |
1053 | -
1054 |
1055 |
1056 |
1057 | 13
1058 |
1059 |
1060 |
1061 | Status
1062 |
1063 |
1064 | true
1065 |
1066 |
1067 |
1068 | 0
1069 |
1070 |
1071 | 0
1072 |
1073 |
-
1074 |
1075 |
1076 | 0
1077 |
1078 |
1079 | false
1080 |
1081 |
1082 |
1083 |
1084 |
1085 |
1086 |
1087 |
1088 |
1089 | -
1090 |
1091 |
1092 | true
1093 |
1094 |
1095 |
1096 | 24
1097 |
1098 |
1099 |
1100 | Synthesize
1101 |
1102 |
1103 | Qt::AlignCenter
1104 |
1105 |
1106 | false
1107 |
1108 |
1109 |
-
1110 |
1111 |
1112 |
1113 | 13
1114 |
1115 |
1116 |
-
1117 |
1118 | Select TTS Engine
1119 |
1120 |
1121 | -
1122 |
1123 | OpenAI
1124 |
1125 |
1126 | -
1127 |
1128 | ElevenLabs
1129 |
1130 |
1131 |
1132 |
1133 | -
1134 |
1135 |
1136 | Qt::Vertical
1137 |
1138 |
1139 |
1140 | 20
1141 | 40
1142 |
1143 |
1144 |
1145 |
1146 | -
1147 |
1148 |
1149 |
1150 | 0
1151 | 0
1152 |
1153 |
1154 |
1155 |
1156 | 13
1157 |
1158 |
1159 |
1160 | Output Options
1161 |
1162 |
1163 | true
1164 |
1165 |
1166 | false
1167 |
1168 |
1169 |
1170 | 0
1171 |
1172 |
1173 | 0
1174 |
1175 |
1176 | 0
1177 |
1178 |
-
1179 |
1180 |
1181 |
1182 | 13
1183 |
1184 |
1185 |
1186 | Send to
1187 |
1188 |
1189 |
1190 | -
1191 |
1192 |
1193 |
1194 | 13
1195 |
1196 |
1197 |
-
1198 |
1199 | No audio output
1200 |
1201 |
1202 | -
1203 |
1204 | File: One shot
1205 |
1206 |
1207 | -
1208 |
1209 | File: Continuous
1210 |
1211 |
1212 | -
1213 |
1214 | Local stream (HTTP)
1215 |
1216 |
1217 | -
1218 |
1219 | -- Output Devices --
1220 |
1221 |
1222 |
1223 |
1224 |
1225 |
1226 |
1227 | -
1228 |
1229 |
1230 |
1231 | 13
1232 |
1233 |
1234 |
1235 | Status
1236 |
1237 |
1238 | true
1239 |
1240 |
1241 |
1242 | 0
1243 |
1244 |
1245 | 0
1246 |
1247 |
-
1248 |
1249 |
1250 | 0
1251 |
1252 |
1253 | false
1254 |
1255 |
1256 |
1257 |
1258 |
1259 |
1260 |
1261 |
1262 |
1263 |
1264 |
1265 |
1275 |
1276 |
1277 | Settings
1278 |
1279 |
1280 |
1281 |
1282 | Settings
1283 |
1284 |
1285 |
1286 |
1287 |
1288 |
1289 |
--------------------------------------------------------------------------------