├── __init__.py ├── pytranscriber ├── __init__.py ├── control │ ├── __init__.py │ ├── thread_cancel_autosub.py │ ├── ctr_engine.py │ ├── ctr_main.py │ ├── ctr_proxy.py │ ├── thread_exec_whisper.py │ ├── ctr_db.py │ ├── thread_exec_generic.py │ ├── ctr_whisper.py │ ├── thread_exec_autosub.py │ └── ctr_autosub.py ├── gui │ ├── __init__.py │ ├── proxy │ │ ├── __init__.py │ │ ├── view_proxy.py │ │ ├── window_proxy.py │ │ └── window_proxy.ui │ ├── Português.qm │ ├── 简体中文 - Chinese Simplified.qm │ ├── 繁體中文 - Chinese Traditional.qm │ ├── message_util.py │ ├── 简体中文 - Chinese Simplified.ts │ ├── 繁體中文 - Chinese Traditional.ts │ ├── Português.ts │ ├── proxy.py │ ├── proxy.ui │ └── main │ │ ├── window_main.ui │ │ └── window_main.py ├── model │ ├── __init__.py │ ├── transcription_parameters.py │ ├── whisper.py │ └── google_speech.py └── util │ ├── __init__.py │ ├── srtparser.py │ └── util.py ├── MANIFEST.in ├── whisper ├── version.py ├── __main__.py ├── assets │ └── mel_filters.npz ├── normalizers │ ├── __init__.py │ └── basic.py ├── triton_ops.py ├── audio.py ├── __init__.py ├── utils.py └── model.py ├── nuitka-win-standalone.bat ├── doc ├── lightning.jpeg ├── pyTranscriber.png ├── screenshot1.png ├── screenshot2.png ├── screenshot3.png ├── entitlements.plist └── technical_details.md ├── deployment ├── nuitka-win-standalone.bat ├── freeze-nuitka-win.bat ├── freeze-win.sh ├── freeze-linux.sh ├── freeze-linux-nuitka.sh └── win │ ├── script-installer-windows.iss │ └── script-installer-windows-standalone.iss ├── pytranscriber.sqlite ├── patches ├── note.txt ├── autosub-0.3.13.patch └── autosub-0.4.0.patch ├── .gitignore ├── freeze-mac.sh ├── .github ├── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md ├── FUNDING.yml └── workflows │ ├── mac-pyinstaller.yml │ ├── linux-pyinstaller.yml │ ├── linux-nuitka.yml │ ├── win-nuitka.yml │ └── win-pyinstaller-dev2.yml ├── requirements.txt ├── Pipfile ├── main.py ├── script-installer-windows-standalone.iss ├── autosub ├── formatters.py └── constants.py └── README.md /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytranscriber/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytranscriber/control/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytranscriber/gui/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytranscriber/model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytranscriber/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytranscriber/gui/proxy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE -------------------------------------------------------------------------------- /whisper/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "20240930" 2 | -------------------------------------------------------------------------------- /whisper/__main__.py: -------------------------------------------------------------------------------- 1 | from .transcribe import cli 2 | 3 | cli() 4 | -------------------------------------------------------------------------------- /nuitka-win-standalone.bat: -------------------------------------------------------------------------------- 1 | nuitka --enable-plugin=pyqt5 main.py --disable-console --standalone -------------------------------------------------------------------------------- /doc/lightning.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/doc/lightning.jpeg -------------------------------------------------------------------------------- /deployment/nuitka-win-standalone.bat: -------------------------------------------------------------------------------- 1 | nuitka --enable-plugin=pyqt5 main.py --disable-console --standalone -------------------------------------------------------------------------------- /doc/pyTranscriber.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/doc/pyTranscriber.png -------------------------------------------------------------------------------- /doc/screenshot1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/doc/screenshot1.png -------------------------------------------------------------------------------- /doc/screenshot2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/doc/screenshot2.png -------------------------------------------------------------------------------- /doc/screenshot3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/doc/screenshot3.png -------------------------------------------------------------------------------- /pytranscriber.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/pytranscriber.sqlite -------------------------------------------------------------------------------- /pytranscriber/gui/Português.qm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/pytranscriber/gui/Português.qm -------------------------------------------------------------------------------- /whisper/assets/mel_filters.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/whisper/assets/mel_filters.npz -------------------------------------------------------------------------------- /pytranscriber/gui/简体中文 - Chinese Simplified.qm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/pytranscriber/gui/简体中文 - Chinese Simplified.qm -------------------------------------------------------------------------------- /pytranscriber/gui/繁體中文 - Chinese Traditional.qm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/pytranscriber/gui/繁體中文 - Chinese Traditional.qm -------------------------------------------------------------------------------- /whisper/normalizers/__init__.py: -------------------------------------------------------------------------------- 1 | from .basic import BasicTextNormalizer as BasicTextNormalizer 2 | from .english import EnglishTextNormalizer as EnglishTextNormalizer 3 | -------------------------------------------------------------------------------- /deployment/freeze-nuitka-win.bat: -------------------------------------------------------------------------------- 1 | nuitka --enable-plugin=pyqt5 --include-data-files="ffmpeg.exe"="./" --include-data-files="pytranscriber/gui/*.qm"="pytranscriber/gui/" main.py --onefile --disable-console -------------------------------------------------------------------------------- /deployment/freeze-win.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pipenv shell 4 | pyinstaller main.py --path=$pwd --add-binary="ffmpeg.exe;." --add-data="pytranscriber/gui/*.qm;pytranscriber/gui/" --onefile --clean 5 | -------------------------------------------------------------------------------- /deployment/freeze-linux.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pipenv shell 4 | pyinstaller main.py main.spec --path="$(pwd)" --add-binary="ffmpeg:." --add-data="pytranscriber/gui/*.qm:pytranscriber/gui/" --onefile --clean 5 | -------------------------------------------------------------------------------- /patches/note.txt: -------------------------------------------------------------------------------- 1 | The autosub version used for pyTranscriber had to be customized a little bit. 2 | The patch in this folder was made comparing the original autosub/__init__.py file from version 0.4.0 to the customized version I made. 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | lib/ 2 | python-libs/ 3 | bin/ 4 | *.spec 5 | *pyc 6 | *.egg-info 7 | *html 8 | build/ 9 | tests/ 10 | dist/ 11 | .DS_Store 12 | MANIFEST 13 | *#* 14 | ffmpeg* 15 | notes.txt 16 | 17 | Pipfile.lock 18 | 19 | Pipfile 20 | -------------------------------------------------------------------------------- /deployment/freeze-linux-nuitka.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pipenv shell 4 | nuitka3 --enable-plugin=pyqt5 --include-data-files="ffmpeg"="./" \ 5 | --include-data-files="pytranscriber/gui/*.qm"="pytranscriber/gui/" \ 6 | --include-data-files="venv/lib/python3.8/site-packages/whisper/assets" \ 7 | main.py \ 8 | --onefile -------------------------------------------------------------------------------- /freeze-mac.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pipenv shell 4 | pyinstaller main.py \ 5 | --path="$(pwd)" \ 6 | --add-binary="ffmpeg-bin/ffmpeg:." \ 7 | --add-binary="pytranscriber.sqlite:." \ 8 | --add-data="pytranscriber/gui/*.qm:pytranscriber/gui/" \ 9 | --add-data="venv/lib/python3.8/site-packages/whisper/assets:whisper/assets" \ 10 | --clean \ 11 | --windowed \ 12 | --noconfirm 13 | 14 | -------------------------------------------------------------------------------- /doc/entitlements.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | com.apple.security.cs.allow-jit 7 | 8 | com.apple.security.cs.allow-unsigned-executable-memory 9 | 10 | com.apple.security.cs.disable-library-validation 11 | 12 | 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: @raryelcostasouza # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: pytranscriber # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # CUDA-enabled PyTorch packages (CUDA 12.6) 2 | torch==2.3.0 3 | torchvision==0.18.0 4 | torchaudio==2.3.0 5 | 6 | # Ensure the extra index for PyTorch CUDA wheels 7 | --extra-index-url https://download.pytorch.org/whl/cu126 8 | 9 | # Other dependencies 10 | cachetools==4.2.4 11 | certifi==2024.7.4 12 | chardet==4.0.0 13 | charset-normalizer==2.0.6 14 | google-api-core==2.1.0 15 | google-api-python-client==2.24.0 16 | google-auth==2.3.0 17 | google-auth-httplib2==0.1.0 18 | google-auth-oauthlib==0.4.6 19 | googleapis-common-protos==1.53.0 20 | httplib2==0.20.1 21 | idna==3.7 22 | oauthlib==3.2.2 23 | progressbar==2.5 24 | protobuf==4.21.6 25 | pyasn1==0.4.8 26 | pyasn1-modules==0.2.8 27 | pyparsing==2.4.7 28 | pyqt5==5.15.10 29 | pyqt5-sip==12.13.0 30 | pysrt==1.1.2 31 | requests==2.32.0 32 | requests-oauthlib==1.3.0 33 | rsa==4.7.2 34 | six==1.16.0 35 | uritemplate==3.0.1 36 | urllib3==2.2.2 37 | openai-whisper 38 | platformdirs 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | autosub = "*" 8 | pyqt5 = "==5.15.4" 9 | pyinstaller = "*" 10 | macholib = "*" 11 | cachetools = "==4.2.4" 12 | certifi = "==2021.10.8" 13 | chardet = "==4.0.0" 14 | charset-normalizer = "==2.0.6" 15 | google-api-core = "==2.1.0" 16 | google-api-python-client = "==2.24.0" 17 | google-auth = "==2.3.0" 18 | google-auth-httplib2 = "==0.1.0" 19 | google-auth-oauthlib = "==0.4.6" 20 | googleapis-common-protos = "==1.53.0" 21 | httplib2 = "==0.20.1" 22 | idna = "==3.2" 23 | oauthlib = "==3.1.1" 24 | progressbar = "==2.5" 25 | protobuf = "==3.18.1" 26 | pyasn1 = "==0.4.8" 27 | pyasn1-modules = "==0.2.8" 28 | pyparsing = "==2.4.7" 29 | pyqt5-qt5 = "==5.15.2" 30 | pyqt5-sip = "==12.9.0" 31 | pysrt = "==1.1.2" 32 | requests = "==2.26.0" 33 | requests-oauthlib = "==1.3.0" 34 | rsa = "==4.7.2" 35 | six = "==1.16.0" 36 | uritemplate = "==3.0.1" 37 | urllib3 = "==1.26.7" 38 | nuitka = "*" 39 | orderedset = "*" 40 | zstandard = "*" 41 | 42 | [dev-packages] 43 | 44 | [requires] 45 | python_version = "3.8" 46 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # (C) 2019 Raryel C. Souza 2 | # This program is free software: you can redistribute it and/or modify 3 | # it under the terms of the GNU General Public License as published by 4 | # the Free Software Foundation, either version 3 of the License, or 5 | # (at your option) any later version. 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # You should have received a copy of the GNU General Public License 11 | # along with this program. If not, see . 12 | 13 | from pytranscriber.control.ctr_main import Ctr_Main 14 | from pytranscriber.gui.message_util import MessageUtil 15 | import multiprocessing 16 | import sys 17 | 18 | if __name__ == '__main__': 19 | multiprocessing.freeze_support() 20 | 21 | try: 22 | ctrMain = Ctr_Main() 23 | sys.exit(0) 24 | except Exception as ex: 25 | MessageUtil.show_error_message(str(ex), "Main Error") 26 | sys.exit(1) 27 | 28 | 29 | -------------------------------------------------------------------------------- /pytranscriber/control/thread_cancel_autosub.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | ''' 14 | 15 | from PyQt5.QtCore import QThread 16 | from PyQt5.QtCore import pyqtSignal 17 | 18 | 19 | class Thread_Cancel_Autosub(QThread): 20 | signalTerminated = pyqtSignal() 21 | 22 | def __init__(self, pObjWT): 23 | self.objWT = pObjWT 24 | QThread.__init__(self) 25 | 26 | def run(self): 27 | self.objWT.cancel() 28 | self.signalTerminated.emit() 29 | -------------------------------------------------------------------------------- /pytranscriber/control/ctr_engine.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | ''' 14 | 15 | class CtrEngine: 16 | @staticmethod 17 | def init(): 18 | CtrEngine.cancel = False 19 | 20 | @staticmethod 21 | def is_operation_canceled(): 22 | return CtrEngine.cancel 23 | 24 | @staticmethod 25 | def cancel_operation(): 26 | CtrEngine.cancel = True 27 | 28 | @staticmethod 29 | def save_output_file(output_path, file_content): 30 | f = open(output_path, 'wb') 31 | f.write(file_content.encode("utf-8")) 32 | f.close() 33 | -------------------------------------------------------------------------------- /pytranscriber/model/transcription_parameters.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2019 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | ''' 14 | 15 | class Transcription_Parameters(): 16 | 17 | def __init__(self, listFiles, outputFolder, langCode, 18 | boolOpenOutputFilesAuto, proxies=None): 19 | self.listFiles = listFiles 20 | self.outputFolder = outputFolder 21 | self.langCode = langCode 22 | self.boolOpenOutputFilesAuto = boolOpenOutputFilesAuto 23 | self.proxies = proxies 24 | self.model_whisper = None 25 | 26 | def set_model_whisper(self, model): 27 | self.model_whisper = model 28 | 29 | def get_model_whisper(self): 30 | return self.model_whisper -------------------------------------------------------------------------------- /pytranscriber/gui/message_util.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | ''' 14 | 15 | from PyQt5.QtWidgets import QMessageBox 16 | 17 | 18 | class MessageUtil: 19 | 20 | @staticmethod 21 | def show_info_message(info_msg, title=""): 22 | msg = QMessageBox() 23 | msg.setIcon(QMessageBox.Information) 24 | 25 | msg.setWindowTitle(title) 26 | msg.setText(info_msg) 27 | msg.exec() 28 | 29 | @staticmethod 30 | def show_error_message(error_msg, title="Error"): 31 | msg = QMessageBox() 32 | msg.setIcon(QMessageBox.Critical) 33 | 34 | msg.setWindowTitle(title) 35 | msg.setText(error_msg) 36 | msg.exec() 37 | -------------------------------------------------------------------------------- /pytranscriber/control/ctr_main.py: -------------------------------------------------------------------------------- 1 | # (C) 2025 Raryel C. Souza 2 | # This program is free software: you can redistribute it and/or modify 3 | # it under the terms of the GNU General Public License as published by 4 | # the Free Software Foundation, either version 3 of the License, or 5 | # (at your option) any later version. 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # You should have received a copy of the GNU General Public License 11 | # along with this program. If not, see . 12 | 13 | from pytranscriber.control.ctr_proxy import Ctr_Proxy 14 | from pytranscriber.control.ctr_db import CtrDB 15 | from pytranscriber.gui.main.view_main import ViewMain 16 | 17 | 18 | class Ctr_Main(): 19 | 20 | def __init__(self): 21 | self.ctrDB = CtrDB() 22 | self.ctrProxy = Ctr_Proxy(self) 23 | 24 | self.last_language = None 25 | 26 | self.viewMain = ViewMain(self) 27 | 28 | self._load_last_language() 29 | self.viewMain.show() 30 | 31 | def save_last_language(self, language): 32 | self.ctrDB.clear_last_language() 33 | self.ctrDB.save_last_language(language) 34 | 35 | def _load_last_language(self): 36 | data = self.ctrDB.load_last_language() 37 | if data is not None: 38 | 39 | self.last_language = data[1] 40 | self.viewMain.set_gui_language(self.last_language) 41 | 42 | 43 | -------------------------------------------------------------------------------- /deployment/win/script-installer-windows.iss: -------------------------------------------------------------------------------- 1 | ; Script generated by the Inno Setup Script Wizard. 2 | ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES! 3 | 4 | [Setup] 5 | ; NOTE: The value of AppId uniquely identifies this application. 6 | ; Do not use the same AppId value in installers for other applications. 7 | ; (To generate a new GUID, click Tools | Generate GUID inside the IDE.) 8 | AppId={{5240AB76-FC62-4BFA-A1EF-FA49AF701F80} 9 | AppName=pyTranscriber 10 | AppVersion=1.9 11 | AppVerName=pyTranscriber 1.9 12 | AppPublisher=Raryel C. Souza 13 | AppPublisherURL=https://github.com/raryelcostasouza/pyTranscriber 14 | AppSupportURL=https://github.com/raryelcostasouza/pyTranscriber 15 | AppUpdatesURL=https://github.com/raryelcostasouza/pyTranscriber 16 | DefaultDirName={pf}\pyTranscriber 17 | DisableDirPage=yes 18 | DisableProgramGroupPage=yes 19 | LicenseFile=.\LICENSE 20 | OutputBaseFilename=setup 21 | Compression=lzma 22 | SolidCompression=yes 23 | 24 | [Languages] 25 | Name: "english"; MessagesFile: "compiler:Default.isl" 26 | 27 | [Tasks] 28 | Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked 29 | 30 | [Files] 31 | Source: ".\dist\pyTranscriber.exe"; DestDir: "{app}"; Flags: ignoreversion 32 | ; NOTE: Don't use "Flags: ignoreversion" on any shared system files 33 | 34 | [Icons] 35 | Name: "{commonprograms}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe" 36 | Name: "{commondesktop}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe"; Tasks: desktopicon 37 | 38 | [Run] 39 | Filename: "{app}\pyTranscriber.exe"; Description: "{cm:LaunchProgram,pyTranscriber}"; Flags: nowait postinstall skipifsilent 40 | 41 | -------------------------------------------------------------------------------- /script-installer-windows-standalone.iss: -------------------------------------------------------------------------------- 1 | ; Script generated by the Inno Setup Script Wizard. 2 | ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES! 3 | 4 | [Setup] 5 | ; NOTE: The value of AppId uniquely identifies this application. 6 | ; Do not use the same AppId value in installers for other applications. 7 | ; (To generate a new GUID, click Tools | Generate GUID inside the IDE.) 8 | AppId={{5240AB76-FC62-4BFA-A1EF-FA49AF701F80} 9 | AppName=pyTranscriber 10 | AppVersion=1.9 11 | AppVerName=pyTranscriber 1.9 12 | AppPublisher=Raryel C. Souza 13 | AppPublisherURL=https://github.com/raryelcostasouza/pyTranscriber 14 | AppSupportURL=https://github.com/raryelcostasouza/pyTranscriber 15 | AppUpdatesURL=https://github.com/raryelcostasouza/pyTranscriber 16 | DefaultDirName={pf}\pyTranscriber 17 | DisableDirPage=yes 18 | DisableProgramGroupPage=yes 19 | LicenseFile=.\LICENSE 20 | OutputBaseFilename=setup 21 | Compression=lzma 22 | SolidCompression=yes 23 | 24 | [Languages] 25 | Name: "english"; MessagesFile: "compiler:Default.isl" 26 | 27 | [Tasks] 28 | Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked 29 | 30 | [Files] 31 | Source: ".\main.dist\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs 32 | ; NOTE: Don't use "Flags: ignoreversion" on any shared system files 33 | 34 | [Icons] 35 | Name: "{commonprograms}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe" 36 | Name: "{commondesktop}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe"; Tasks: desktopicon 37 | 38 | [Run] 39 | Filename: "{app}\pyTranscriber.exe"; Description: "{cm:LaunchProgram,pyTranscriber}"; Flags: nowait postinstall skipifsilent 40 | 41 | -------------------------------------------------------------------------------- /deployment/win/script-installer-windows-standalone.iss: -------------------------------------------------------------------------------- 1 | ; Script generated by the Inno Setup Script Wizard. 2 | ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES! 3 | 4 | [Setup] 5 | ; NOTE: The value of AppId uniquely identifies this application. 6 | ; Do not use the same AppId value in installers for other applications. 7 | ; (To generate a new GUID, click Tools | Generate GUID inside the IDE.) 8 | AppId={{5240AB76-FC62-4BFA-A1EF-FA49AF701F80} 9 | AppName=pyTranscriber 10 | AppVersion=1.9 11 | AppVerName=pyTranscriber 1.9 12 | AppPublisher=Raryel C. Souza 13 | AppPublisherURL=https://github.com/raryelcostasouza/pyTranscriber 14 | AppSupportURL=https://github.com/raryelcostasouza/pyTranscriber 15 | AppUpdatesURL=https://github.com/raryelcostasouza/pyTranscriber 16 | DefaultDirName={pf}\pyTranscriber 17 | DisableDirPage=yes 18 | DisableProgramGroupPage=yes 19 | LicenseFile=.\LICENSE 20 | OutputBaseFilename=setup 21 | Compression=lzma 22 | SolidCompression=yes 23 | 24 | [Languages] 25 | Name: "english"; MessagesFile: "compiler:Default.isl" 26 | 27 | [Tasks] 28 | Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked 29 | 30 | [Files] 31 | Source: ".\main.dist\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs 32 | ; NOTE: Don't use "Flags: ignoreversion" on any shared system files 33 | 34 | [Icons] 35 | Name: "{commonprograms}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe" 36 | Name: "{commondesktop}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe"; Tasks: desktopicon 37 | 38 | [Run] 39 | Filename: "{app}\pyTranscriber.exe"; Description: "{cm:LaunchProgram,pyTranscriber}"; Flags: nowait postinstall skipifsilent 40 | 41 | -------------------------------------------------------------------------------- /autosub/formatters.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines subtitle formatters used by autosub. 3 | """ 4 | 5 | # -*- coding: utf-8 -*- 6 | from __future__ import unicode_literals 7 | 8 | import json 9 | 10 | import pysrt 11 | import six 12 | 13 | 14 | def srt_formatter(subtitles, padding_before=0, padding_after=0): 15 | """ 16 | Serialize a list of subtitles according to the SRT format, with optional time padding. 17 | """ 18 | sub_rip_file = pysrt.SubRipFile() 19 | for i, ((start, end), text) in enumerate(subtitles, start=1): 20 | item = pysrt.SubRipItem() 21 | item.index = i 22 | item.text = six.text_type(text) 23 | item.start.seconds = max(0, start - padding_before) 24 | item.end.seconds = end + padding_after 25 | sub_rip_file.append(item) 26 | return '\n'.join(six.text_type(item) for item in sub_rip_file) 27 | 28 | 29 | def vtt_formatter(subtitles, padding_before=0, padding_after=0): 30 | """ 31 | Serialize a list of subtitles according to the VTT format, with optional time padding. 32 | """ 33 | text = srt_formatter(subtitles, padding_before, padding_after) 34 | text = 'WEBVTT\n\n' + text.replace(',', '.') 35 | return text 36 | 37 | 38 | def json_formatter(subtitles): 39 | """ 40 | Serialize a list of subtitles as a JSON blob. 41 | """ 42 | subtitle_dicts = [ 43 | { 44 | 'start': start, 45 | 'end': end, 46 | 'content': text, 47 | } 48 | for ((start, end), text) 49 | in subtitles 50 | ] 51 | return json.dumps(subtitle_dicts) 52 | 53 | 54 | def raw_formatter(subtitles): 55 | """ 56 | Serialize a list of subtitles as a newline-delimited string. 57 | """ 58 | return ' '.join(text for (_rng, text) in subtitles) 59 | 60 | 61 | FORMATTERS = { 62 | 'srt': srt_formatter, 63 | 'vtt': vtt_formatter, 64 | 'json': json_formatter, 65 | 'raw': raw_formatter, 66 | } 67 | -------------------------------------------------------------------------------- /pytranscriber/util/srtparser.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2019 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | ''' 14 | 15 | import re, sys 16 | 17 | class SRTParser(object): 18 | @staticmethod 19 | def extractTextFromSRT(fileSRT): 20 | file_name = fileSRT 21 | file_encoding = 'utf-8' 22 | 23 | #loop through the lines for parsing 24 | with open(file_name, encoding=file_encoding, errors='replace') as f: 25 | lines = f.readlines() 26 | new_lines = SRTParser.clean_up(lines) 27 | new_file_name = file_name[:-4] + '.txt' 28 | 29 | #write parsed txt file 30 | with open(new_file_name, 'w', encoding=file_encoding) as f: 31 | for line in new_lines: 32 | f.write(line) 33 | 34 | @staticmethod 35 | def clean_up(lines): 36 | regexSubtitleIndexNumber = re.compile("[0-9]+") 37 | 38 | new_lines = [] 39 | for line in lines[1:]: 40 | #if line empty or 41 | #if line contains --> or 42 | #if line matches the subtitle index regex 43 | #then skip line 44 | if (not line or not line.strip()) or ("-->" in line) or regexSubtitleIndexNumber.match(line): 45 | continue 46 | else: 47 | #append line 48 | new_lines.append(line) 49 | return new_lines 50 | -------------------------------------------------------------------------------- /whisper/normalizers/basic.py: -------------------------------------------------------------------------------- 1 | import re 2 | import unicodedata 3 | 4 | import regex 5 | 6 | # non-ASCII letters that are not separated by "NFKD" normalization 7 | ADDITIONAL_DIACRITICS = { 8 | "œ": "oe", 9 | "Œ": "OE", 10 | "ø": "o", 11 | "Ø": "O", 12 | "æ": "ae", 13 | "Æ": "AE", 14 | "ß": "ss", 15 | "ẞ": "SS", 16 | "đ": "d", 17 | "Đ": "D", 18 | "ð": "d", 19 | "Ð": "D", 20 | "þ": "th", 21 | "Þ": "th", 22 | "ł": "l", 23 | "Ł": "L", 24 | } 25 | 26 | 27 | def remove_symbols_and_diacritics(s: str, keep=""): 28 | """ 29 | Replace any other markers, symbols, and punctuations with a space, 30 | and drop any diacritics (category 'Mn' and some manual mappings) 31 | """ 32 | return "".join( 33 | c 34 | if c in keep 35 | else ADDITIONAL_DIACRITICS[c] 36 | if c in ADDITIONAL_DIACRITICS 37 | else "" 38 | if unicodedata.category(c) == "Mn" 39 | else " " 40 | if unicodedata.category(c)[0] in "MSP" 41 | else c 42 | for c in unicodedata.normalize("NFKD", s) 43 | ) 44 | 45 | 46 | def remove_symbols(s: str): 47 | """ 48 | Replace any other markers, symbols, punctuations with a space, keeping diacritics 49 | """ 50 | return "".join( 51 | " " if unicodedata.category(c)[0] in "MSP" else c 52 | for c in unicodedata.normalize("NFKC", s) 53 | ) 54 | 55 | 56 | class BasicTextNormalizer: 57 | def __init__(self, remove_diacritics: bool = False, split_letters: bool = False): 58 | self.clean = ( 59 | remove_symbols_and_diacritics if remove_diacritics else remove_symbols 60 | ) 61 | self.split_letters = split_letters 62 | 63 | def __call__(self, s: str): 64 | s = s.lower() 65 | s = re.sub(r"[<\[][^>\]]*[>\]]", "", s) # remove words between brackets 66 | s = re.sub(r"\(([^)]+?)\)", "", s) # remove words between parenthesis 67 | s = self.clean(s).lower() 68 | 69 | if self.split_letters: 70 | s = " ".join(regex.findall(r"\X", s, regex.U)) 71 | 72 | s = re.sub( 73 | r"\s+", " ", s 74 | ) # replace any successive whitespace characters with a space 75 | 76 | return s 77 | -------------------------------------------------------------------------------- /pytranscriber/control/ctr_proxy.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | ''' 14 | 15 | from pytranscriber.util.util import MyUtil 16 | from pytranscriber.gui.message_util import MessageUtil 17 | from pytranscriber.gui.proxy.view_proxy import ViewProxy 18 | 19 | 20 | class Ctr_Proxy(): 21 | proxy = { 22 | 'http': None, 23 | 'https': None 24 | } 25 | 26 | def __init__(self, ctrMain): 27 | self.ctrMain = ctrMain 28 | self.viewProxy = None 29 | 30 | def show(self): 31 | if self.viewProxy is None: 32 | self.viewProxy = ViewProxy(self) 33 | self.viewProxy.show() 34 | 35 | def save(self): 36 | self.ctrMain.ctrDB.clear_proxy() 37 | # saving the proxy address 38 | if self.proxy['https']: 39 | self.ctrMain.ctrDB.save_proxy(self.proxy) 40 | # saving proxy address disabled 41 | else: 42 | MessageUtil.show_info_message('Proxy disabled successfully', 'Proxy disabled') 43 | 44 | def load_data(self): 45 | if self.viewProxy is None: 46 | self.viewProxy = ViewProxy(self) 47 | 48 | data = self.ctrMain.ctrDB.load_proxy() 49 | if data is not None: 50 | self.set_proxy_setting(data[1], False) 51 | 52 | def test_proxy_setting(self, proxy_addr): 53 | proxy = {'http': proxy_addr, 'https': proxy_addr} 54 | 55 | if not MyUtil.is_internet_connected(proxy): 56 | MessageUtil.show_error_message('Error connecting to Google.','Error') 57 | else: 58 | MessageUtil.show_info_message('Successfully connected to Google.', 'Success') 59 | 60 | def set_proxy_setting(self, proxy_addr, frontend_request=False): 61 | self.proxy = {'http': proxy_addr, 'https': proxy_addr} 62 | if frontend_request: 63 | self.save() 64 | else: 65 | self.viewProxy.refresh_gui(proxy_addr) 66 | 67 | def get_proxy_setting(self): 68 | return self.proxy 69 | -------------------------------------------------------------------------------- /patches/autosub-0.3.13.patch: -------------------------------------------------------------------------------- 1 | --- __init__-old.py 2019-01-27 11:18:19.560918050 +0700 2 | +++ __init__.py 2019-01-24 09:27:17.057865917 +0700 3 | @@ -262,6 +262,14 @@ 4 | 5 | return 0 6 | 7 | +def percentage(currentval, maxval): 8 | + return 100 * currentval / float(maxval) 9 | + 10 | + 11 | +def output_progress(listener_progress, str_task, progress_percent): 12 | + if listener_progress != None: 13 | + listener_progress(str_task,progress_percent) 14 | + 15 | 16 | def generate_subtitles( 17 | source_path, 18 | @@ -271,6 +279,7 @@ 19 | dst_language=DEFAULT_DST_LANGUAGE, 20 | subtitle_file_format=DEFAULT_SUBTITLE_FORMAT, 21 | api_key=None, 22 | + listener_progress=None, 23 | ): 24 | audio_filename, audio_rate = extract_audio(source_path) 25 | 26 | @@ -284,21 +293,28 @@ 27 | transcripts = [] 28 | if regions: 29 | try: 30 | - widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', 31 | + str_task_1 = "Converting speech regions to FLAC files: " 32 | + widgets = [str_task_1, Percentage(), ' ', Bar(), ' ', 33 | ETA()] 34 | - pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() 35 | + len_regions = len(regions) 36 | + pbar = ProgressBar(widgets=widgets, maxval=len_regions).start() 37 | extracted_regions = [] 38 | for i, extracted_region in enumerate(pool.imap(converter, regions)): 39 | extracted_regions.append(extracted_region) 40 | pbar.update(i) 41 | + progress_percent= percentage(i, len_regions) 42 | + output_progress(listener_progress,str_task_1,progress_percent) 43 | pbar.finish() 44 | 45 | - widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()] 46 | + str_task_2 = "Performing speech recognition: " 47 | + widgets = [str_task_2, Percentage(), ' ', Bar(), ' ', ETA()] 48 | pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() 49 | 50 | for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)): 51 | transcripts.append(transcript) 52 | pbar.update(i) 53 | + progress_percent= percentage(i, len_regions) 54 | + output_progress(listener_progress,str_task_2,progress_percent) 55 | pbar.finish() 56 | 57 | if not is_same_language(src_language, dst_language): 58 | @@ -349,4 +365,5 @@ 59 | 60 | 61 | if __name__ == '__main__': 62 | + multiprocessing.freeze_support() 63 | sys.exit(main()) 64 | -------------------------------------------------------------------------------- /pytranscriber/gui/proxy/view_proxy.py: -------------------------------------------------------------------------------- 1 | from PyQt5.QtWidgets import QDialog 2 | from pytranscriber.gui.proxy.window_proxy import Ui_Dialog 3 | from pytranscriber.gui.message_util import MessageUtil 4 | 5 | 6 | class ViewProxy: 7 | 8 | def __init__(self, ctr_proxy): 9 | self.ctr_proxy = ctr_proxy 10 | self.proxy_dialog = QDialog() 11 | loaded_proxy_dialog = Ui_Dialog() 12 | loaded_proxy_dialog.setupUi(self.proxy_dialog) 13 | 14 | self.radioButtonNone = loaded_proxy_dialog.radioButtonNone 15 | self.radioButtonHTTP = loaded_proxy_dialog.radioButtonHTTP 16 | self.radioButtonNone.clicked.connect(self.__listener_rbOnClicked) 17 | self.lineEditHttpProxy = loaded_proxy_dialog.lineEditHttpProxy 18 | self.lineEditHttpProxy.textChanged.connect(self.__listenerLineEditInput) 19 | self.pushButtonTest = loaded_proxy_dialog.pushButtonTest 20 | self.bSave = loaded_proxy_dialog.bSave 21 | 22 | self.pushButtonTest.clicked.connect(self.__listener_test) 23 | self.bSave.clicked.connect(self.__listener_save) 24 | self.__clear_proxy_settings() 25 | 26 | def show(self): 27 | self.ctr_proxy.load_data() 28 | self.proxy_dialog.exec_() 29 | 30 | def __clear_proxy_settings(self): 31 | self.radioButtonNone.setChecked(True) 32 | self.lineEditHttpProxy.setEnabled(False) 33 | self.pushButtonTest.setEnabled(False) 34 | 35 | def refresh_gui(self, proxy_address=None): 36 | if not proxy_address: 37 | self.__clear_proxy_settings() 38 | else: 39 | self.radioButtonHTTP.setChecked(True) 40 | self.lineEditHttpProxy.setEnabled(True) 41 | self.pushButtonTest.setEnabled(True) 42 | self.lineEditHttpProxy.setText(str(proxy_address)) 43 | 44 | def __listener_test(self): 45 | proxy_input = self.lineEditHttpProxy.text() 46 | 47 | if proxy_input and self.radioButtonHTTP.isChecked(): 48 | self.ctr_proxy.test_proxy_setting(proxy_input) 49 | 50 | def __listener_save(self): 51 | proxy_input = self.lineEditHttpProxy.text() 52 | 53 | if proxy_input and self.radioButtonHTTP.isChecked(): 54 | self.ctr_proxy.set_proxy_setting(proxy_input, True) 55 | elif self.radioButtonNone.isChecked(): 56 | self.ctr_proxy.set_proxy_setting('',True) 57 | 58 | def __listener_rbOnClicked(self): 59 | if self.radioButtonNone.isChecked(): 60 | self.lineEditHttpProxy.setText('') 61 | 62 | def __listenerLineEditInput(self): 63 | if self.lineEditHttpProxy.text(): 64 | self.pushButtonTest.setEnabled(True) 65 | else: 66 | self.pushButtonTest.setEnabled(False) 67 | 68 | -------------------------------------------------------------------------------- /pytranscriber/control/thread_exec_whisper.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | ''' 14 | 15 | from pytranscriber.control.ctr_whisper import CtrWhisper 16 | from pytranscriber.control.thread_exec_generic import ThreadExecGeneric 17 | from pytranscriber.util.util import MyUtil 18 | import traceback 19 | 20 | 21 | class Thread_Exec_Whisper(ThreadExecGeneric): 22 | 23 | def run(self): 24 | CtrWhisper.init() 25 | super()._loopSelectedFiles() 26 | self.running = False 27 | 28 | def _run_engine_for_media(self, index, langCode): 29 | sourceFile = self.obj_transcription_parameters.listFiles[index] 30 | outputFiles = self._generatePathOutputFile(sourceFile) 31 | outputFileSRT = outputFiles[0] 32 | outputFileTXT = outputFiles[1] 33 | 34 | fOutput = None 35 | try: 36 | fOutput = CtrWhisper.generate_subtitles(source_path=sourceFile, 37 | outputSRT=outputFileSRT, 38 | outputTXT=outputFileTXT, 39 | src_language=langCode, 40 | model=self.obj_transcription_parameters.get_model_whisper()) 41 | except Exception as e: 42 | error_msg = f"""Error! Unable to generate subtitles: {traceback.format_exc()}""" 43 | self.signalErrorMsg.emit(error_msg) # Emit the full traceback 44 | 45 | #if nothing was returned 46 | if not fOutput: 47 | self.signalErrorMsg.emit("Error! Unable to generate subtitles for file " + sourceFile + ".") 48 | elif fOutput != -1: 49 | #if the operation was not canceled 50 | 51 | #updated the progress message 52 | self.listenerProgress("Finished", 100) 53 | 54 | if self.obj_transcription_parameters.boolOpenOutputFilesAuto: 55 | #open both SRT and TXT output files 56 | MyUtil.open_file(outputFileTXT) 57 | MyUtil.open_file(outputFileSRT) -------------------------------------------------------------------------------- /doc/technical_details.md: -------------------------------------------------------------------------------- 1 |

For Developers - Technical Details

2 | 3 | This app consists basically of a friendly pyQt5 graphical interface for a customized version of Autosub 0.4.0 that can run on Linux, Windows and MacOS. All the hard work of processing the audio and generating the subtitles is done by Autosub. 4 | 5 |

Dependencies to build

6 | 7 |
    8 |
  1. pip3 install pipenv 9 |
  2. pipenv install (install all dependencies from Pipfile) 10 |
  3. Download the static ffmpeg binary and move it to project root folder 11 | 12 | # How to run? 13 | $ pipenv shell 14 | $ python3 main.py 15 | 16 | 17 | # How to edit the GUI? 18 | Install Qt5 Designer and open the file pytranscriber/gui/gui.ui 19 | 20 | # How to convert the .ui file (qt5designer project file) to .py? 21 | $ pyuic5 gui.ui -o gui.py 22 | 23 | # How to generate the python bundled binary package version with ffmpeg included? 24 | 25 | # Linux: 26 | $ pyinstaller main.py --path="$(pwd)" --add-binary="ffmpeg:." --onefile --clean 27 | 28 | # Windows: 29 | $ pyinstaller main.py --path=$pwd --add-binary="ffmpeg.exe;." --onefile --clean 30 | 31 | # Mac: 32 | $ pyinstaller main.py --path="$(pwd)" --add-binary="ffmpeg:." --clean --windowed 33 | 34 | 35 | The output binary will be on subfolder dist/main and has all dependencies included. For more details check pyinstaller documentation 36 | 37 | # On Linux how to generate a statically linked binary so it can run even on systems with older glibc installed? 38 | 39 | As explained in pyInstaller FAQ: 40 | > The executable that PyInstaller builds is not fully static, in that it still depends on the system libc. Under Linux, the ABI of GLIBC is backward compatible, but not forward compatible. So if you link against a newer GLIBC, you can't run the resulting executable on an older system. 41 | 42 | > Solution 1)To compile the Python interpreter with its modules (and also probably bootloader) on the oldest system you have around, so that it gets linked with the oldest version of GLIBC. 43 | 44 | > Solution 2) to use a tool like StaticX to create a fully-static bundled version of your PyInstaller application. StaticX bundles all dependencies, including libc and ld.so. (Python code :arrow_right: PyInstaller :arrow_right: StaticX :arrow_right: Fully-static application)" 45 | 46 | Install staticx and patchelf (dependency) 47 | 48 | $ pip3 install --user patchelf-wrapper 49 | 50 | $ pip3 install --user staticx 51 | 52 | After generating the binary with pyinstaller, open the dist folder and run: 53 | 54 | $ staticx main main-static 55 | 56 | The newly created main-static contains all library dependencies, including glibc, so it should be able to run even on very old systems. 57 | 58 | Note: In my Manjaro system the first time I run this command I got an error related to "libmpdec.so.2 => not found". Installing the package mpdecimal on the package manager solved the issue. 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyTranscriber 2 | 3 | [![Tip Me via PayPal](https://img.shields.io/badge/PayPal-tip%20me-1462ab.svg?logo=paypal)](https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=YHB854YHPJCU8&item_name=Donation+pyTranscriber¤cy_code=BRL) 4 | [![Tip Me via Bitcoin Lightning](https://img.shields.io/badge/Bitcoin%20Lightning-tip%20me-f7931a.svg?logo=lightning)](https://github.com/raryelcostasouza/pyTranscriber/raw/master/doc/lightning.jpeg) 5 | 6 | [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/A0A6AIR3D) 7 | 8 | MOVED TO NEW WEBSITE - https://pytranscriber.github.io 9 | 10 | More than 640k downloads since first version. Thanks! 11 | Check live statistics at GitHub Release Stats 12 | 13 | # Thanks to the people helping funding 14 | Jixing Zhao, Narsu Narsu, Lucas Thorn, Soonkj Jung, Sergio Moreno, Yooki Adair, Adrien Jarton, YP, JOY_EASY, RodrigoRios, Zhou Mi, Dongmei Chen, Jung Yi Hung, Tah Kwang Tomas Tso 15 | 16 | # UPDATE - v2.1-stable - 13/07/2025 17 | 1. Compiled torch with CUDA support enabled for faster whisper processing for those who have NVidia GPUs 18 | 19 | # UPDATE - v2.0-stable - 07/07/2025 20 | 1. Added binary for Linux (GLIBC 2.35 or newer) 21 | 22 | # UPDATE - v2.0-stable - 22/05/2025 23 | 1. Fixed issue with cantonese language not working using whisper engine 24 | 2. Fixed srt file generation not being compliant with srt syntax 25 | 26 | # UPDATE - v2.0-RC_1 - 04/03/2025 27 | 1. Added support for openAI Whisper with local processing of media files as alternative to Google Speech API (where all media file is uploaded to Google servers for processing) 28 | 2. Added saving/load settings to sqlite local db 29 | 30 | 31 | # UPDATE - v1.9 - 22/12/2022 32 | 1. Windows/Linux version compiled with Nuitka (https://github.com/Nuitka/Nuitka) instead of pyInstaller to improve stability and fix random crashes while transcribing audio. If you still experience issues please report at Issues section. 33 | 2. Support for Ogg/ogv/mkv/webm media files on file selector 34 | 35 | # UPDATE - v1.8 - 17/08/2022 36 | 1. Fixed bug: language codes for Chinese Languages updated accordingly to Speech API. Changed to "cmn-Hans-CN" and "cmn-Hant-TW" instead of "zh / zh-TW").. The output was always mistakenly coming in Cantonese (yue-Hant-HK). Now they come properly in Traditional Chinese and Simplified Chinese. Thanks to "Specter Hi" for reporting! 37 | 2. Added GUI language switch feature 38 | 3. Updated link to funding campaign at GitHub Sponsors 39 | 40 | # UPDATE - v1.7 - 08/08/2022 41 | 1. add proxy setting 42 | 2. change the function 'pytranscriber.util.MyUtil.is_internet_connected' 43 | 3. add requirements.txt 44 | 4. rebuilt using pyInstaller 5.3 - more stability to prevent multithreading crashes on Windows 45 | 5. Added pipfile 46 | 47 | ![image](https://user-images.githubusercontent.com/23170065/143678535-750ac415-2be7-41ce-b5c2-f1d319d3e204.png) 48 | 49 | -------------------------------------------------------------------------------- /pytranscriber/util/util.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2019 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | ''' 14 | 15 | import platform 16 | import os 17 | import subprocess 18 | 19 | import requests 20 | from requests.adapters import HTTPAdapter, Retry 21 | import time 22 | 23 | 24 | class MyUtil(object): 25 | @staticmethod 26 | def open_file(path): 27 | if platform.system() == "Windows": 28 | os.startfile(path) 29 | elif platform.system() == "Darwin": 30 | subprocess.Popen(["open", path]) 31 | else: 32 | subprocess.Popen(["xdg-open", path]) 33 | 34 | @staticmethod 35 | def is_internet_connected(proxies=None): 36 | try: 37 | # connect to the host -- tells us if the host is actually 38 | # reachable 39 | headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Firefox/91.0'} 40 | 41 | res = MyUtil.send_request('https://www.google.com', proxies=proxies, headers=headers) 42 | if res != 200: 43 | return False 44 | 45 | else: 46 | return True 47 | except Exception as e: 48 | print("Error Name: ", e.__class__.__name__) 49 | print("Error Message: ", e) 50 | pass 51 | 52 | return False 53 | 54 | @staticmethod 55 | def send_request(url, 56 | n_retries=0, 57 | backoff_factor=0.9, 58 | status_codes=[504, 503, 502, 500, 429, 302, 408, 425], 59 | proxies=None, 60 | headers=None): 61 | sess = requests.Session() 62 | retries = Retry(connect=n_retries, backoff_factor=backoff_factor, 63 | status_forcelist=status_codes) 64 | sess.mount("https://", HTTPAdapter(max_retries=retries)) 65 | sess.mount("http://", HTTPAdapter(max_retries=retries)) 66 | try: 67 | response = sess.get(url, timeout=5, proxies=proxies, headers=headers) 68 | response.raise_for_status() # Raises an HTTPError for bad responses 69 | return response.status_code 70 | except requests.Timeout: 71 | print("The request timed out") 72 | except requests.RequestException as e: 73 | print(f"An error occurred: {e}") 74 | return -1 75 | 76 | 77 | @staticmethod 78 | def percentage(currentval, maxval): 79 | return 100 * currentval / float(maxval) -------------------------------------------------------------------------------- /autosub/constants.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines constants used by autosub. 3 | """ 4 | 5 | from __future__ import unicode_literals 6 | 7 | GOOGLE_SPEECH_API_KEY = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw" 8 | GOOGLE_SPEECH_API_URL = "http://www.google.com/speech-api/v2/recognize?client=chromium&lang={lang}&key={key}" # pylint: disable=line-too-long 9 | 10 | LANGUAGE_CODES = { 11 | 'af': 'Afrikaans', 12 | 'ar': 'Arabic', 13 | 'az': 'Azerbaijani', 14 | 'be': 'Belarusian', 15 | 'bg': 'Bulgarian', 16 | 'bn': 'Bengali', 17 | 'bs': 'Bosnian', 18 | 'ca': 'Catalan', 19 | 'ceb': 'Cebuano', 20 | 'cs': 'Czech', 21 | 'cy': 'Welsh', 22 | 'da': 'Danish', 23 | 'de': 'German', 24 | 'el': 'Greek', 25 | 'en-AU': 'English (Australia)', 26 | 'en-CA': 'English (Canada)', 27 | 'en-GB': 'English (United Kingdom)', 28 | 'en-IN': 'English (India)', 29 | 'en-IE': 'English (Ireland)', 30 | 'en-NZ': 'English (New Zealand)', 31 | 'en-PH': 'English (Philippines)', 32 | 'en-SG': 'English (Singapore)', 33 | 'en-US': 'English (United States)', 34 | 'eo': 'Esperanto', 35 | 'es-AR': 'Spanish (Argentina)', 36 | 'es-CL': 'Spanish (Chile)', 37 | 'es-ES': 'Spanish (Spain)', 38 | 'es-US': 'Spanish (United States)', 39 | 'es-MX': 'Spanish (Mexico)', 40 | 'es': 'Spanish', 41 | 'et': 'Estonian', 42 | 'eu': 'Basque', 43 | 'fa': 'Persian', 44 | 'fi': 'Finnish', 45 | 'fr': 'French', 46 | 'ga': 'Irish', 47 | 'gl': 'Galician', 48 | 'gu': 'Gujarati', 49 | 'ha': 'Hausa', 50 | 'hi': 'Hindi', 51 | 'hmn': 'Hmong', 52 | 'hr': 'Croatian', 53 | 'ht': 'Haitian Creole', 54 | 'hu': 'Hungarian', 55 | 'hy': 'Armenian', 56 | 'id': 'Indonesian', 57 | 'ig': 'Igbo', 58 | 'is': 'Icelandic', 59 | 'it': 'Italian', 60 | 'iw': 'Hebrew', 61 | 'ja': 'Japanese', 62 | 'jw': 'Javanese', 63 | 'ka': 'Georgian', 64 | 'kk': 'Kazakh', 65 | 'km': 'Khmer', 66 | 'kn': 'Kannada', 67 | 'ko': 'Korean', 68 | 'la': 'Latin', 69 | 'lo': 'Lao', 70 | 'lt': 'Lithuanian', 71 | 'lv': 'Latvian', 72 | 'mg': 'Malagasy', 73 | 'mi': 'Maori', 74 | 'mk': 'Macedonian', 75 | 'ml': 'Malayalam', 76 | 'mn': 'Mongolian', 77 | 'mr': 'Marathi', 78 | 'ms': 'Malay', 79 | 'mt': 'Maltese', 80 | 'my': 'Myanmar (Burmese)', 81 | 'ne': 'Nepali', 82 | 'nl': 'Dutch', 83 | 'no': 'Norwegian', 84 | 'ny': 'Chichewa', 85 | 'pa': 'Punjabi', 86 | 'pl': 'Polish', 87 | 'pt-BR': 'Portuguese (Brazil)', 88 | 'pt-PT': 'Portuguese (Portugal)', 89 | 'ro': 'Romanian', 90 | 'ru': 'Russian', 91 | 'si': 'Sinhala', 92 | 'sk': 'Slovak', 93 | 'sl': 'Slovenian', 94 | 'so': 'Somali', 95 | 'sq': 'Albanian', 96 | 'sr': 'Serbian', 97 | 'st': 'Sesotho', 98 | 'su': 'Sudanese', 99 | 'sv': 'Swedish', 100 | 'sw': 'Swahili', 101 | 'ta': 'Tamil', 102 | 'te': 'Telugu', 103 | 'tg': 'Tajik', 104 | 'th': 'Thai', 105 | 'tl': 'Filipino', 106 | 'tr': 'Turkish', 107 | 'uk': 'Ukrainian', 108 | 'ur': 'Urdu', 109 | 'uz': 'Uzbek', 110 | 'vi': 'Vietnamese', 111 | 'yi': 'Yiddish', 112 | 'yo': 'Yoruba', 113 | 'yue-Hant-HK': 'Cantonese, (Traditional HK)', 114 | 'zh': 'Chinese (Simplified, China)', 115 | 'zh-HK': 'Chinese (Simplified, Hong Kong)', 116 | 'zh-TW': 'Chinese (Traditional, Taiwan)', 117 | 'zu': 'Zulu', 118 | } 119 | -------------------------------------------------------------------------------- /.github/workflows/mac-pyinstaller.yml: -------------------------------------------------------------------------------- 1 | name: MacOS PyInstaller 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - master 8 | - develop 9 | 10 | 11 | jobs: 12 | build: 13 | runs-on: macos-14 # Use macOS ARM64 runner 14 | steps: 15 | - name: Checkout repository 16 | uses: actions/checkout@v4 17 | with: 18 | fetch-depth: 0 # Ensure full history and tags are available 19 | 20 | - name: Get latest Git tag 21 | id: get_version 22 | run: | 23 | VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.1.0") 24 | echo "VERSION=$VERSION" >> $GITHUB_ENV 25 | echo "Resolved version: $VERSION" 26 | 27 | - name: Setup FFmpeg 28 | uses: federicocarboni/setup-ffmpeg@v3.1 29 | with: 30 | ffmpeg-version: release 31 | architecture: x64 32 | 33 | - name: Verify FFmpeg installation 34 | run: | 35 | which ffmpeg 36 | ffmpeg -version 37 | 38 | - name: Set up Python 3.8 39 | uses: actions/setup-python@v4 40 | with: 41 | python-version: "3.8" 42 | 43 | - name: Set up Python virtual environment 44 | run: | 45 | python -m venv .venv 46 | 47 | - name: Activate virtual environment and install dependencies 48 | run: | 49 | source .venv/bin/activate 50 | pip install --upgrade pip 51 | pip install -r requirements.txt 52 | pip install pyinstaller 53 | 54 | - name: Verify existence of Whisper assets directory 55 | run: | 56 | source .venv/bin/activate # Activate the virtual environment 57 | ASSETS_PATH=$(python -c "import whisper; import os; print(os.path.join(os.path.dirname(whisper.__file__), 'assets'))") 58 | if [ -d "$ASSETS_PATH" ]; then 59 | echo "The 'assets' directory exists at: $ASSETS_PATH" 60 | echo "ASSETS_PATH=$ASSETS_PATH" >> $GITHUB_ENV 61 | else 62 | echo "The 'assets' directory does NOT exist." 63 | exit 1 64 | fi 65 | 66 | - name: Compile with pyInstaller 67 | run: | 68 | source .venv/bin/activate 69 | FFMPPEG_PATH=$(which ffmpeg) 70 | pyinstaller main.py \ 71 | --windowed \ 72 | --path="$(pwd)" \ 73 | --add-binary="$FFMPPEG_PATH:." \ 74 | --add-binary="pytranscriber.sqlite:." \ 75 | --add-data="pytranscriber/gui/*.qm:pytranscriber/gui/" \ 76 | --add-data="$ASSETS_PATH:whisper/assets" 77 | 78 | - name: Zip the .app bundle with version number 79 | run: | 80 | cd dist 81 | mv main.app "pyTranscriber-${VERSION}.app" 82 | zip -r "pyTranscriber-macos-${VERSION}.zip" "pyTranscriber-${VERSION}.app" 83 | 84 | - name: Upload built executable with version number 85 | uses: actions/upload-artifact@v4 86 | with: 87 | name: pyTranscriber-macos-${{ env.VERSION }} 88 | path: ./dist/pyTranscriber-macos-${{ env.VERSION }}.zip # Path adjusted for macOS 89 | 90 | download: 91 | runs-on: macos-14 # macOS ARM64 runner for downloading 92 | needs: build 93 | steps: 94 | - name: Download built executable 95 | uses: actions/download-artifact@v4 96 | with: 97 | path: ./output 98 | 99 | - name: List downloaded files 100 | run: ls -la ./output 101 | -------------------------------------------------------------------------------- /pytranscriber/gui/简体中文 - Chinese Simplified.ts: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | window 6 | 7 | 8 | pyTranscriber - v1.7 - 08/08/2020 9 | pyTranscriber -v1.8 - 20/08/2022 10 | 11 | 12 | 13 | Select file(s) 14 | 选择文件 15 | 16 | 17 | 18 | Transcribe Audio / Generate Subtitles 19 | 转译音频 / 生成字幕 20 | 21 | 22 | 23 | Open Output Folder 24 | 打开导出文件夹 25 | 26 | 27 | 28 | Output Location 29 | 导出位置 30 | 31 | 32 | 33 | &List of files to generate transcribe audio / generate subtitles 34 | &转译 / 生成字幕文件列表 35 | 36 | 37 | 38 | Remove file(s) 39 | 移除文件 40 | 41 | 42 | 43 | Cancel 44 | 取消 45 | 46 | 47 | 48 | Open output files automatically 49 | 完成后自动打开文件夹 50 | 51 | 52 | 53 | Audio Language: 54 | 选择音频语言 55 | 56 | 57 | 58 | Abo&ut 59 | Abo&ut 60 | 61 | 62 | 63 | Settings 64 | Settings 65 | 66 | 67 | 68 | &Language 69 | 70 | 71 | 72 | 73 | &License 74 | &License 75 | 76 | 77 | 78 | &Funding at Github Sponsors 79 | 资助 GitHub 上的项目 80 | 81 | 82 | 83 | &More about pyTranscriber 84 | &关于 pyTranscriber 85 | 86 | 87 | 88 | Proxy 89 | Proxy 90 | 91 | 92 | 93 | Proxy settings 94 | Proxy settings 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /.github/workflows/linux-pyinstaller.yml: -------------------------------------------------------------------------------- 1 | name: Linux PyInstaller 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | - develop 8 | pull_request: 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-22.04 13 | 14 | steps: 15 | - name: Checkout repository 16 | uses: actions/checkout@v3 17 | with: 18 | fetch-depth: 0 # Fetch all tags 19 | 20 | - name: Get latest Git tag 21 | id: get_version 22 | run: | 23 | VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.1.0") 24 | echo "VERSION=$VERSION" >> $GITHUB_ENV 25 | echo "Resolved version: $VERSION" 26 | 27 | - name: Install missing system libraries (XCB, TBB, etc.) 28 | run: | 29 | sudo apt-get update 30 | sudo apt-get install -y \ 31 | libxcb1 \ 32 | libxcb-keysyms1 \ 33 | libxcb-shape0 \ 34 | libxcb-xkb1 \ 35 | libxcb-render-util0 \ 36 | libxcb-image0 \ 37 | libxcb-xinerama0 \ 38 | libxkbcommon-x11-0 \ 39 | libxcb-icccm4 \ 40 | libtbb12 \ 41 | libsox-dev 42 | 43 | - name: Install FFmpeg 44 | run: sudo apt update && sudo apt install -y ffmpeg 45 | 46 | - name: Verify FFmpeg installation 47 | run: | 48 | which ffmpeg 49 | ffmpeg -version 50 | 51 | - name: Set up Python 3.8 52 | uses: actions/setup-python@v4 53 | with: 54 | python-version: "3.8" 55 | 56 | - name: Set up Python virtual environment 57 | run: | 58 | python -m venv .venv 59 | 60 | - name: Activate virtual environment and install dependencies 61 | run: | 62 | source .venv/bin/activate 63 | pip install --upgrade pip 64 | pip install -r requirements.txt 65 | pip install pyinstaller 66 | 67 | - name: Verify existence of Whisper assets directory 68 | run: | 69 | source .venv/bin/activate # Activate the virtual environment 70 | ASSETS_PATH=$(python -c "import whisper; import os; print(os.path.join(os.path.dirname(whisper.__file__), 'assets'))") 71 | if [ -d "$ASSETS_PATH" ]; then 72 | echo "The 'assets' directory exists at: $ASSETS_PATH" 73 | echo "ASSETS_PATH=$ASSETS_PATH" >> $GITHUB_ENV 74 | else 75 | echo "The 'assets' directory does NOT exist." 76 | exit 1 77 | fi 78 | 79 | - name: Compile with pyInstaller 80 | run: | 81 | source .venv/bin/activate 82 | FFMPPEG_PATH=$(which ffmpeg) 83 | pyinstaller main.py \ 84 | --path="$(pwd)" \ 85 | --onefile \ 86 | --add-binary="$FFMPPEG_PATH:." \ 87 | --add-binary="pytranscriber.sqlite:." \ 88 | --add-data="pytranscriber/gui/*.qm:pytranscriber/gui/" \ 89 | --add-data="$ASSETS_PATH:whisper/assets" 90 | 91 | - name: Rename and zip the binary with version number 92 | run: | 93 | cd dist 94 | mv main "pyTranscriber-${VERSION}" 95 | 96 | - name: Upload built executable 97 | uses: actions/upload-artifact@v4 98 | with: 99 | name: pyTranscriber-linux-pyinstaller-${{ env.VERSION }} 100 | path: ./dist/pyTranscriber-${{ env.VERSION }} 101 | 102 | download: 103 | runs-on: ubuntu-22.04 104 | needs: build 105 | steps: 106 | - name: Download built executable 107 | uses: actions/download-artifact@v4 108 | with: 109 | path: ./output 110 | 111 | - name: List downloaded files 112 | run: ls -la ./output 113 | -------------------------------------------------------------------------------- /whisper/triton_ops.py: -------------------------------------------------------------------------------- 1 | from functools import lru_cache 2 | 3 | import numpy as np 4 | import torch 5 | 6 | try: 7 | import triton 8 | import triton.language as tl 9 | except ImportError: 10 | raise RuntimeError("triton import failed; try `pip install --pre triton`") 11 | 12 | 13 | @triton.jit 14 | def dtw_kernel( 15 | cost, trace, x, x_stride, cost_stride, trace_stride, N, M, BLOCK_SIZE: tl.constexpr 16 | ): 17 | offsets = tl.arange(0, BLOCK_SIZE) 18 | mask = offsets < M 19 | 20 | for k in range(1, N + M + 1): # k = i + j 21 | tl.debug_barrier() 22 | 23 | p0 = cost + (k - 1) * cost_stride 24 | p1 = cost + k * cost_stride 25 | p2 = cost + k * cost_stride + 1 26 | 27 | c0 = tl.load(p0 + offsets, mask=mask) 28 | c1 = tl.load(p1 + offsets, mask=mask) 29 | c2 = tl.load(p2 + offsets, mask=mask) 30 | 31 | x_row = tl.load(x + (k - 1) * x_stride + offsets, mask=mask, other=0) 32 | cost_row = x_row + tl.minimum(tl.minimum(c0, c1), c2) 33 | 34 | cost_ptr = cost + (k + 1) * cost_stride + 1 35 | tl.store(cost_ptr + offsets, cost_row, mask=mask) 36 | 37 | trace_ptr = trace + (k + 1) * trace_stride + 1 38 | tl.store(trace_ptr + offsets, 2, mask=mask & (c2 <= c0) & (c2 <= c1)) 39 | tl.store(trace_ptr + offsets, 1, mask=mask & (c1 <= c0) & (c1 <= c2)) 40 | tl.store(trace_ptr + offsets, 0, mask=mask & (c0 <= c1) & (c0 <= c2)) 41 | 42 | 43 | @lru_cache(maxsize=None) 44 | def median_kernel(filter_width: int): 45 | @triton.jit 46 | def kernel( 47 | y, x, x_stride, y_stride, BLOCK_SIZE: tl.constexpr 48 | ): # x.shape[-1] == filter_width 49 | row_idx = tl.program_id(0) 50 | offsets = tl.arange(0, BLOCK_SIZE) 51 | mask = offsets < y_stride 52 | 53 | x_ptr = x + row_idx * x_stride # noqa: F841 54 | y_ptr = y + row_idx * y_stride 55 | 56 | LOAD_ALL_ROWS_HERE # noqa: F821 57 | 58 | BUBBLESORT_HERE # noqa: F821 59 | 60 | tl.store(y_ptr + offsets, MIDDLE_ROW_HERE, mask=mask) # noqa: F821 61 | 62 | kernel = triton.JITFunction(kernel.fn) 63 | kernel.src = kernel.src.replace( 64 | " LOAD_ALL_ROWS_HERE", 65 | "\n".join( 66 | [ 67 | f" row{i} = tl.load(x_ptr + offsets + {i}, mask=mask)" 68 | for i in range(filter_width) 69 | ] 70 | ), 71 | ) 72 | kernel.src = kernel.src.replace( 73 | " BUBBLESORT_HERE", 74 | "\n\n".join( 75 | [ 76 | "\n\n".join( 77 | [ 78 | "\n".join( 79 | [ 80 | f" smaller = tl.where(row{j} < row{j + 1}, row{j}, row{j + 1})", 81 | f" larger = tl.where(row{j} > row{j + 1}, row{j}, row{j + 1})", 82 | f" row{j} = smaller", 83 | f" row{j + 1} = larger", 84 | ] 85 | ) 86 | for j in range(filter_width - i - 1) 87 | ] 88 | ) 89 | for i in range(filter_width // 2 + 1) 90 | ] 91 | ), 92 | ) 93 | kernel.src = kernel.src.replace("MIDDLE_ROW_HERE", f"row{filter_width // 2}") 94 | 95 | return kernel 96 | 97 | 98 | def median_filter_cuda(x: torch.Tensor, filter_width: int): 99 | """Apply a median filter of given width along the last dimension of x""" 100 | slices = x.contiguous().unfold(-1, filter_width, 1) 101 | grid = np.prod(slices.shape[:-2]) 102 | 103 | kernel = median_kernel(filter_width) 104 | y = torch.empty_like(slices[..., 0]) 105 | 106 | BLOCK_SIZE = 1 << (y.stride(-2) - 1).bit_length() 107 | kernel[(grid,)](y, x, x.stride(-2), y.stride(-2), BLOCK_SIZE=BLOCK_SIZE) 108 | 109 | return y 110 | -------------------------------------------------------------------------------- /.github/workflows/linux-nuitka.yml: -------------------------------------------------------------------------------- 1 | name: Linux Nuitka Pipeline 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | - develop 8 | pull_request: 9 | branches: 10 | - master 11 | - develop 12 | 13 | jobs: 14 | build: 15 | runs-on: ubuntu-22.04 # Ensure the job runs only on Ubuntu 22.04 16 | 17 | steps: 18 | - name: Checkout repository 19 | uses: actions/checkout@v3 20 | with: 21 | fetch-depth: 0 # Fetch all tags 22 | 23 | - name: Get latest Git tag 24 | id: get_version 25 | run: | 26 | VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.1.0") 27 | echo "VERSION=$VERSION" >> $GITHUB_ENV 28 | echo "Resolved version: $VERSION" 29 | 30 | - name: Install missing system libraries (XCB, TBB, etc.) 31 | run: | 32 | sudo apt-get update 33 | sudo apt-get install -y \ 34 | libxcb1 \ 35 | libxcb-keysyms1 \ 36 | libxcb-shape0 \ 37 | libxcb-xkb1 \ 38 | libxcb-render-util0 \ 39 | libxcb-image0 \ 40 | libxcb-xinerama0 \ 41 | libxkbcommon-x11-0 \ 42 | libxcb-icccm4 \ 43 | libtbb12 \ 44 | ccache \ 45 | libsox-dev 46 | 47 | 48 | - name: Install FFmpeg 49 | run: sudo apt update && sudo apt install -y ffmpeg 50 | 51 | - name: Verify FFmpeg installation 52 | run: | 53 | which ffmpeg 54 | ffmpeg -version 55 | 56 | - name: Set up Python 3.8 57 | uses: actions/setup-python@v4 58 | with: 59 | python-version: "3.8" 60 | 61 | - name: Set up Python virtual environment 62 | run: | 63 | python -m venv .venv 64 | 65 | - name: Install dependencies 66 | run: | 67 | source .venv/bin/activate 68 | pip install --upgrade pip 69 | pip install -r requirements.txt 70 | pip install nuitka 71 | 72 | - name: Verify Whisper assets directory 73 | run: | 74 | source .venv/bin/activate 75 | whisperPath=$(python -c "import whisper; print(whisper.__file__)") 76 | assetsPath=$(dirname $whisperPath)/assets 77 | if [ -d "$assetsPath" ]; then 78 | echo "The 'assets' directory exists at: $assetsPath" 79 | else 80 | echo "The 'assets' directory DOES NOT exist." 81 | exit 1 82 | fi 83 | 84 | - name: Compile with Nuitka 85 | run: | 86 | source .venv/bin/activate 87 | ffmpegPath=$(which ffmpeg) 88 | nuitka \ 89 | --assume-yes-for-downloads \ 90 | --enable-plugin=pyqt5 \ 91 | --include-data-files="pytranscriber.sqlite=pytranscriber.sqlite" \ 92 | --include-data-files="$ffmpegPath=ffmpeg" \ 93 | --include-data-files="pytranscriber/gui/*.qm=pytranscriber/gui/" \ 94 | --include-package-data="whisper:assets/*=whisper/assets" \ 95 | main.py \ 96 | --onefile \ 97 | --output-dir=dist 98 | 99 | - name: Zip the binary with version number 100 | run: | 101 | cd dist 102 | mv main.bin "pyTranscriber-${VERSION}" 103 | 104 | - name: Upload built executable 105 | uses: actions/upload-artifact@v4 106 | with: 107 | name: pyTranscriber-linux-nuitka-${{ env.VERSION }} 108 | path: ./dist/pyTranscriber-${{ env.VERSION }} # Adjust this path if Nuitka outputs elsewhere 109 | 110 | download: 111 | runs-on: ubuntu-22.04 112 | needs: build 113 | steps: 114 | - name: Download built executable 115 | uses: actions/download-artifact@v4 116 | with: 117 | path: ./output 118 | 119 | - name: List downloaded files 120 | run: dir ./output 121 | -------------------------------------------------------------------------------- /pytranscriber/control/ctr_db.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | ''' 14 | 15 | from pathlib import PurePath 16 | 17 | from pytranscriber.gui.message_util import MessageUtil 18 | import sqlite3 19 | 20 | 21 | class CtrDB: 22 | conn = None 23 | DB_ERROR = "DB Error" 24 | 25 | def connect(self): 26 | if self.conn: 27 | return self.conn.cursor() 28 | else: 29 | try: 30 | local_program_path = PurePath(__file__).parent.parent.parent.joinpath('pytranscriber.sqlite') 31 | str_local_program_path = str(local_program_path) 32 | 33 | 34 | 35 | self.conn = sqlite3.connect(str_local_program_path) 36 | cur = self.conn.cursor() 37 | 38 | return cur 39 | except Exception as ex: 40 | MessageUtil.show_error_message("ConnectDB" + str(ex), self.DB_ERROR) 41 | exit(1) 42 | 43 | def close(self): 44 | self.conn.close() 45 | self.conn = None 46 | 47 | def _load_one_row(self, table_name): 48 | cur = self.connect() 49 | if cur is None: 50 | exit(1) 51 | 52 | try: 53 | cur.execute('SELECT * FROM ' + table_name) 54 | return cur.fetchone() 55 | except sqlite3.Error as e: 56 | MessageUtil.show_error_message("LoadOneRow " + str(e), self.DB_ERROR) 57 | return None 58 | 59 | def _save_single_column(self, query, value): 60 | cur = self.connect() 61 | try: 62 | cur.execute(query,(value,)) 63 | self.conn.commit() 64 | except sqlite3.Error as e: 65 | MessageUtil.show_error_message("SaveSingleColumn " + str(e), self.DB_ERROR) 66 | self.close() 67 | 68 | def _truncate_table(self, table_name): 69 | cur = self.connect() 70 | try: 71 | cur.execute('DELETE FROM ' + table_name) 72 | self.conn.commit() 73 | except sqlite3.Error as e: 74 | MessageUtil.show_error_message("TruncateTable " + str(e), self.DB_ERROR) 75 | self.close() 76 | 77 | def load_last_language(self): 78 | return self._load_one_row('Language') 79 | 80 | def clear_last_language(self): 81 | self._truncate_table('Language') 82 | 83 | def save_last_language(self, language): 84 | cur = self.connect() 85 | try: 86 | cur.execute('INSERT INTO Language (last_language) VALUES (?)', 87 | (language,)) 88 | self.conn.commit() 89 | except sqlite3.Error as e: 90 | MessageUtil.show_error_message("SaveLastLanguage " + str(e), self.DB_ERROR) 91 | self.close() 92 | 93 | def load_proxy(self): 94 | return self._load_one_row('Proxy') 95 | 96 | def clear_proxy(self): 97 | self._truncate_table('Proxy') 98 | 99 | def save_proxy(self, proxy): 100 | cur = self.connect() 101 | try: 102 | cur.execute('INSERT INTO Proxy (proxy_address) VALUES (?)', 103 | (proxy['https'],)) 104 | self.conn.commit() 105 | MessageUtil.show_info_message('Proxy address saved successfully', 'Proxy settings saved') 106 | except sqlite3.Error as e: 107 | MessageUtil.show_error_message("SaveProxy " + str(e), self.DB_ERROR) 108 | self.close() 109 | -------------------------------------------------------------------------------- /pytranscriber/control/thread_exec_generic.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | ''' 14 | 15 | from abc import ABC, abstractmethod 16 | from PyQt5.QtCore import QThread 17 | from PyQt5.QtCore import pyqtSignal 18 | from pathlib import Path 19 | from pytranscriber.control.ctr_engine import CtrEngine 20 | import os 21 | 22 | class ThreadExecGeneric(QThread): 23 | signalLockGUI = pyqtSignal() 24 | signalResetGUIAfterCancel = pyqtSignal() 25 | signalResetGUIAfterSuccess = pyqtSignal() 26 | signalProgress = pyqtSignal(str, int) 27 | signalProgressFileYofN = pyqtSignal(str) 28 | signalErrorMsg = pyqtSignal(str) 29 | 30 | def __init__(self, obj_transcription_parameters): 31 | self.obj_transcription_parameters = obj_transcription_parameters 32 | self.running = True 33 | QThread.__init__(self) 34 | 35 | def listenerProgress(self, string, percent): 36 | self.signalProgress.emit(string, percent) 37 | 38 | def _loopSelectedFiles(self): 39 | self.signalLockGUI.emit() 40 | #MessageUtil.show_info_message("loop selected files") 41 | 42 | langCode = self.obj_transcription_parameters.langCode 43 | 44 | #if output directory does not exist, creates it 45 | pathOutputFolder = Path(self.obj_transcription_parameters.outputFolder) 46 | 47 | if not os.path.exists(pathOutputFolder): 48 | os.mkdir(pathOutputFolder) 49 | #if there the output file is not a directory 50 | if not os.path.isdir(pathOutputFolder): 51 | #force the user to select a different output directory 52 | self.signalErrorMsg.emit("Error! Invalid output folder. Please choose another one.") 53 | else: 54 | #go ahead with autosub process 55 | nFiles = len(self.obj_transcription_parameters.listFiles) 56 | for i in range(nFiles): 57 | #does not continue the loop if user clicked cancel button 58 | if not CtrEngine.is_operation_canceled(): 59 | self._updateProgressFileYofN(i, nFiles) 60 | #MessageUtil.show_info_message("run engine for media") 61 | self._run_engine_for_media(i, langCode) 62 | 63 | #if operation is canceled does not clear the file list 64 | if CtrEngine.is_operation_canceled(): 65 | self.signalResetGUIAfterCancel.emit() 66 | else: 67 | self.signalResetGUIAfterSuccess.emit() 68 | 69 | @abstractmethod 70 | def _run_engine_for_media(self, index, langCode): 71 | pass 72 | 73 | def _updateProgressFileYofN(self, currentIndex, countFiles): 74 | self.signalProgressFileYofN.emit("File " + str(currentIndex + 1) + " of " + str(countFiles)) 75 | 76 | def _generatePathOutputFile(self, sourceFile): 77 | # extract the filename without extension from the path 78 | base = os.path.basename(sourceFile) 79 | # [0] is filename, [1] is file extension 80 | fileName = os.path.splitext(base)[0] 81 | 82 | # the output file has same name as input file, located on output Folder 83 | # with extension .srt 84 | pathOutputFolder = Path(self.obj_transcription_parameters.outputFolder) 85 | outputFileSRT = pathOutputFolder / (fileName + ".srt") 86 | outputFileTXT = pathOutputFolder / (fileName + ".txt") 87 | return [outputFileSRT, outputFileTXT] 88 | 89 | @staticmethod 90 | def cancel(): 91 | CtrEngine.cancel_operation() 92 | -------------------------------------------------------------------------------- /patches/autosub-0.4.0.patch: -------------------------------------------------------------------------------- 1 | --- __init__-0.4.0.py 2019-02-09 21:21:16.335586891 +0700 2 | +++ __init__.py 2019-02-10 21:25:41.864964164 +0700 3 | @@ -8,16 +8,22 @@ 4 | 5 | import argparse 6 | import audioop 7 | -import json 8 | import math 9 | import multiprocessing 10 | import os 11 | +from json import JSONDecodeError 12 | import subprocess 13 | import sys 14 | import tempfile 15 | import wave 16 | 17 | +import json 18 | import requests 19 | +try: 20 | + from json.decoder import JSONDecodeError 21 | +except ImportError: 22 | + JSONDecodeError = ValueError 23 | + 24 | from googleapiclient.discovery import build 25 | from progressbar import ProgressBar, Percentage, Bar, ETA 26 | 27 | @@ -61,8 +67,10 @@ 28 | start, end = region 29 | start = max(0, start - self.include_before) 30 | end += self.include_after 31 | - temp = tempfile.NamedTemporaryFile(suffix='.flac') 32 | - command = ["ffmpeg", "-ss", str(start), "-t", str(end - start), 33 | + #delete=False necessary for running on Windows 34 | + temp = tempfile.NamedTemporaryFile(suffix='.flac', delete=False) 35 | + program_ffmpeg = which("ffmpeg") 36 | + command = [str(program_ffmpeg), "-ss", str(start), "-t", str(end - start), 37 | "-y", "-i", self.source_path, 38 | "-loglevel", "error", temp.name] 39 | use_shell = True if os.name == "nt" else False 40 | @@ -102,6 +110,8 @@ 41 | except IndexError: 42 | # no result 43 | continue 44 | + except JSONDecodeError: 45 | + continue 46 | 47 | except KeyboardInterrupt: 48 | return None 49 | @@ -149,17 +159,25 @@ 50 | Checks whether a file is executable. 51 | """ 52 | return os.path.isfile(file_path) and os.access(file_path, os.X_OK) 53 | - 54 | + #necessary to run on Windows 55 | + if os.name == "nt": 56 | + program += ".exe" 57 | fpath, _ = os.path.split(program) 58 | if fpath: 59 | if is_exe(program): 60 | return program 61 | else: 62 | - for path in os.environ["PATH"].split(os.pathsep): 63 | - path = path.strip('"') 64 | - exe_file = os.path.join(path, program) 65 | - if is_exe(exe_file): 66 | - return exe_file 67 | + #looks for file in the script execution folder before checking on system path 68 | + current_dir = os.getcwd() 69 | + local_program = os.path.join(current_dir, program) 70 | + if is_exe(local_program): 71 | + return local_program 72 | + else: 73 | + for path in os.environ["PATH"].split(os.pathsep): 74 | + path = path.strip('"') 75 | + exe_file = os.path.join(path, program) 76 | + if is_exe(exe_file): 77 | + return exe_file 78 | return None 79 | 80 | 81 | @@ -171,10 +189,11 @@ 82 | if not os.path.isfile(filename): 83 | print("The given file does not exist: {}".format(filename)) 84 | raise Exception("Invalid filepath: {}".format(filename)) 85 | - if not which("ffmpeg"): 86 | + program_ffmpeg = which("ffmpeg") 87 | + if not program_ffmpeg: 88 | print("ffmpeg: Executable not found on machine.") 89 | raise Exception("Dependency not found: ffmpeg") 90 | - command = ["ffmpeg", "-y", "-i", filename, 91 | + command = [str(program_ffmpeg), "-y", "-i", filename, 92 | "-ac", str(channels), "-ar", str(rate), 93 | "-loglevel", "error", temp.name] 94 | use_shell = True if os.name == "nt" else False 95 | @@ -233,6 +252,12 @@ 96 | """ 97 | Given an input audio/video file, generate subtitles in the specified language and format. 98 | """ 99 | + 100 | + if "Darwin" in os.uname(): 101 | + #the default unix fork method does not work on Mac OS 102 | + #need to use forkserver 103 | + multiprocessing.set_start_method('forkserver') 104 | + 105 | audio_filename, audio_rate = extract_audio(source_path) 106 | 107 | regions = find_speech_regions(audio_filename) 108 | -------------------------------------------------------------------------------- /pytranscriber/gui/繁體中文 - Chinese Traditional.ts: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | window 6 | 7 | 8 | pyTranscriber - v1.8 - 17/08/2022 9 | pyTranscriber -v1.8 - 20/08/2022 10 | 11 | 12 | 13 | Select file(s) 14 | 選擇檔案 15 | 16 | 17 | 18 | Transcribe Audio / Generate Subtitles 19 | 轉譯音訊 / 生成字幕 20 | 21 | 22 | 23 | Open Output Folder 24 | 開啟輸出位置 25 | 26 | 27 | 28 | Output Location 29 | 選取輸出位置 30 | 31 | 32 | 33 | List of files to generate transcribe audio / generate subtitles 34 | &轉譯音訊 / 生成字幕檔案清單 35 | 36 | 37 | 38 | Remove file(s) 39 | 移除檔案 40 | 41 | 42 | 43 | Cancel 44 | 取消 45 | 46 | 47 | 48 | Open output files automatically 49 | 完成後自動開啟輸出資料夾 50 | 51 | 52 | 53 | Audio Language: 54 | 選擇音訊語言 55 | 56 | 57 | 58 | Abo&ut 59 | 關於 60 | 61 | 62 | 63 | &Settings 64 | 設定 65 | 66 | 67 | 68 | &Language 69 | 語言 70 | 71 | 72 | 73 | &License 74 | &License 75 | 76 | 77 | 78 | &Funding at Github Sponsors 79 | 在 Github 上成為贊助者 80 | 81 | 82 | 83 | &More about pyTranscriber 84 | &關於 pyTranscriber 85 | 86 | 87 | 88 | &Proxy 89 | 代理伺服器(Proxy) 90 | 91 | 92 | 93 | Proxy setting 94 | 代理伺服器設定 95 | 96 | 97 | 98 | English 99 | 100 | 101 | 102 | 103 | 繁體中文 - Chinese Traditional 104 | 105 | 106 | 107 | 108 | 简体中文 - Chinese Simplified 109 | 110 | 111 | 112 | 113 | Português 114 | 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /.github/workflows/win-nuitka.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - master 5 | - develop 6 | pull_request: 7 | 8 | jobs: 9 | build: 10 | runs-on: windows-latest 11 | 12 | steps: 13 | - name: Checkout repository 14 | uses: actions/checkout@v4 15 | with: 16 | fetch-depth: 0 17 | 18 | - name: Get latest Git tag 19 | id: get_version 20 | run: | 21 | $VERSION = git describe --tags --abbrev=0 2>$null 22 | if (-not $VERSION) { 23 | $VERSION = "v0.1.0" # Default version if no tags are found 24 | } 25 | echo "VERSION=$VERSION" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 26 | Write-Host "Resolved version: $VERSION" 27 | shell: pwsh 28 | 29 | - name: Setup FFmpeg 30 | uses: federicocarboni/setup-ffmpeg@v3.1 31 | with: 32 | ffmpeg-version: release # Specify the desired FFmpeg version 33 | architecture: x64 34 | 35 | - name: Add FFmpeg to PATH 36 | run: | 37 | $ffmpegPath = (Get-Command ffmpeg).Source 38 | $env:Path += ";$($ffmpegPath.Substring(0, $ffmpegPath.LastIndexOf('\')))" 39 | $ffmpegPath 40 | shell: pwsh 41 | 42 | - name: Verify FFmpeg installation 43 | run: | 44 | where ffmpeg 45 | ffmpeg -version 46 | 47 | - name: Set up Python 3.8 48 | uses: actions/setup-python@v4 49 | with: 50 | python-version: "3.8" 51 | 52 | - name: Set up Python virtual environment 53 | run: | 54 | python -m venv .venv 55 | 56 | - name: Activate virtual environment 57 | run: | 58 | .\.venv\Scripts\Activate 59 | 60 | - name: Install dependencies 61 | run: | 62 | pip install --upgrade pip 63 | pip install -r requirements.txt 64 | pip install nuitka 65 | 66 | - name: Verificar existência do diretório assets do whisper 67 | run: | 68 | $whisperPath = (python -c "import whisper; print(whisper.__file__)").Trim() 69 | $assetsPath = Join-Path (Split-Path $whisperPath) 'assets' 70 | if (Test-Path $assetsPath) { 71 | Write-Host "O diretório 'assets' existe em: $assetsPath" 72 | } else { 73 | Write-Host "O diretório 'assets' NÃO existe." 74 | exit 1 75 | } 76 | 77 | - name: Compile with Nuitka 78 | run: | 79 | $whisperPath = (python -c "import whisper; print(whisper.__file__)").Trim() 80 | $assetsPath = Join-Path (Split-Path $whisperPath) 'assets' 81 | $ffmpegPath = (Get-Command ffmpeg).Source 82 | nuitka ` 83 | --assume-yes-for-downloads ` 84 | --enable-plugin=pyqt5 ` 85 | --include-data-files="pytranscriber.sqlite=pytranscriber.sqlite" ` 86 | --include-data-files="$ffmpegPath=ffmpeg.exe" ` 87 | --include-data-files="pytranscriber/gui/*.qm=pytranscriber/gui/" ` 88 | --include-data-files="$assetsPath\*=whisper/assets/" ` 89 | main.py ` 90 | --onefile ` 91 | --output-dir=dist ` 92 | --windows-console-mode=disable 93 | 94 | - name: Rename and zip the .exe bundle with version number 95 | run: | 96 | Set-Location -Path dist 97 | Write-Host "Renaming main.exe to pyTranscriber-$env:VERSION.exe" 98 | Rename-Item -Force main.exe "pyTranscriber-$env:VERSION.exe" 99 | # Write-Host "Creating zip archive: pyTranscriber-$env:VERSION.zip" 100 | # Compress-Archive -Path "pyTranscriber-$env:VERSION.exe" -DestinationPath "pyTranscriber-win-$env:VERSION.zip" 101 | shell: pwsh 102 | 103 | - name: Upload built executable 104 | uses: actions/upload-artifact@v4 105 | with: 106 | name: pyTranscriber-win-${{ env.VERSION }} 107 | path: ./dist/pyTranscriber-${{ env.VERSION }}.exe # Adjust this path if Nuitka outputs elsewhere 108 | 109 | download: 110 | runs-on: windows-latest 111 | needs: build 112 | steps: 113 | - name: Download built executable 114 | uses: actions/download-artifact@v4 115 | with: 116 | path: ./output 117 | 118 | - name: List downloaded files 119 | run: dir ./output 120 | -------------------------------------------------------------------------------- /.github/workflows/win-pyinstaller-dev2.yml: -------------------------------------------------------------------------------- 1 | name: Windows PyInstaller 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | - develop 8 | pull_request: 9 | 10 | jobs: 11 | build: 12 | runs-on: windows-latest 13 | strategy: 14 | matrix: 15 | python-version: ["3.8", "3.10", "3.12"] # Paraleliza builds para cada versão do Python 16 | steps: 17 | - name: Checkout repository 18 | uses: actions/checkout@v4 19 | with: 20 | fetch-depth: 0 # Fetch all tags 21 | 22 | - name: Get latest Git tag 23 | id: get_version 24 | run: | 25 | $VERSION = git describe --tags --abbrev=0 2>$null 26 | if (-not $VERSION) { 27 | $VERSION = "v0.1.0" # Default version if no tags are found 28 | } 29 | echo "VERSION=$VERSION" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 30 | Write-Host "Resolved version: $VERSION" 31 | shell: pwsh 32 | 33 | - name: Setup FFmpeg 34 | uses: federicocarboni/setup-ffmpeg@v3.1 35 | with: 36 | ffmpeg-version: release 37 | architecture: x64 38 | 39 | - name: Verify FFmpeg installation 40 | run: | 41 | where ffmpeg 42 | ffmpeg -version 43 | 44 | - name: Set up Python ${{ matrix.python-version }} 45 | uses: actions/setup-python@v4 46 | with: 47 | python-version: ${{ matrix.python-version }} 48 | 49 | - name: Create and activate virtual environment for Python ${{ matrix.python-version }} 50 | run: | 51 | python -m venv .venv-${{ matrix.python-version }} 52 | .\.venv-${{ matrix.python-version }}\Scripts\Activate 53 | shell: pwsh 54 | 55 | - name: Install dependencies for Python ${{ matrix.python-version }} 56 | run: | 57 | .\.venv-${{ matrix.python-version }}\Scripts\Activate 58 | python -m ensurepip --upgrade 59 | python -m pip install --upgrade pip 60 | python -m pip install -r requirements.txt 61 | python -m pip install pyinstaller 62 | shell: pwsh 63 | 64 | - name: Verify whisper assets directory for Python ${{ matrix.python-version }} 65 | run: | 66 | .\.venv-${{ matrix.python-version }}\Scripts\Activate 67 | $whisperPath = (python -c "import whisper; print(whisper.__file__)").Trim() 68 | $assetsPath = Join-Path (Split-Path $whisperPath) 'assets' 69 | if (Test-Path $assetsPath) { 70 | Write-Host "O diretório 'assets' existe em: $assetsPath" 71 | echo "ASSETS_PATH=$assetsPath" >> $env:GITHUB_ENV 72 | } else { 73 | Write-Host "O diretório 'assets' NÃO existe." 74 | exit 1 75 | } 76 | shell: pwsh 77 | 78 | - name: Compile with PyInstaller for Python ${{ matrix.python-version }} 79 | run: | 80 | .\.venv-${{ matrix.python-version }}\Scripts\Activate 81 | $ffmpegPath = (Get-Command ffmpeg).Source 82 | pyinstaller main.py ` 83 | --onefile ` 84 | --path="$(Get-Location)" ` 85 | --add-binary="$ffmpegPath;." ` 86 | --add-binary="pytranscriber.sqlite;." ` 87 | --add-data="pytranscriber/gui/*.qm;pytranscriber/gui/" ` 88 | --add-data="${env:ASSETS_PATH};whisper/assets" ` 89 | --clean 90 | 91 | shell: pwsh 92 | 93 | - name: Rename and zip the .exe bundle with version number 94 | run: | 95 | Set-Location -Path dist 96 | Write-Host "Renaming main.exe to pyTranscriber-$env:VERSION.exe" 97 | Rename-Item -Force main.exe "pyTranscriber-$env:VERSION.exe" 98 | # Write-Host "Creating zip archive: pyTranscriber-win-$env:VERSION.zip" 99 | # Compress-Archive -Path "pyTranscriber-$env:VERSION.exe" -DestinationPath "pyTranscriber-win-$env:VERSION.zip" 100 | shell: pwsh 101 | 102 | - name: Upload built executable for Python ${{ matrix.python-version }} 103 | uses: actions/upload-artifact@v4 104 | with: 105 | name: pyTranscriber-win-${{ env.VERSION }}-py${{ matrix.python-version }} 106 | path: ./dist/pyTranscriber-${{ env.VERSION }}.exe 107 | 108 | download: 109 | runs-on: windows-latest 110 | needs: build 111 | steps: 112 | - name: Download built executables 113 | uses: actions/download-artifact@v4 114 | with: 115 | path: ./output 116 | 117 | - name: List downloaded files 118 | run: dir ./output 119 | -------------------------------------------------------------------------------- /pytranscriber/gui/Português.ts: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | window 6 | 7 | 8 | pyTranscriber - v1.8 - 17/08/2022 9 | pyTranscriber -v1.8 - 17/08/2022 10 | 11 | 12 | 13 | Select file(s) 14 | Selecionar arquivo(s) 15 | 16 | 17 | 18 | Transcribe Audio / Generate Subtitles 19 | Transcrever áudio / Gerar Legendas 20 | 21 | 22 | 23 | Open Output Folder 24 | Abrir Pasta de Destino 25 | 26 | 27 | 28 | Output Location 29 | Pasta de Destino 30 | 31 | 32 | 33 | List of files to generate transcribe audio / generate subtitles 34 | Lista de arquivos para gerar legendas/transcrever áudio 35 | 36 | 37 | 38 | Remove file(s) 39 | Remover arquivo(s) 40 | 41 | 42 | 43 | Cancel 44 | Cancelar 45 | 46 | 47 | 48 | Open output files automatically 49 | Abrir arquivos de saída automaticamente 50 | 51 | 52 | 53 | Audio Language: 54 | Idioma do áudio: 55 | 56 | 57 | 58 | Abo&ut 59 | Sob&re 60 | 61 | 62 | 63 | &Settings 64 | &Configurações 65 | 66 | 67 | 68 | &Language 69 | &Idioma 70 | 71 | 72 | 73 | &License 74 | &Licença 75 | 76 | 77 | 78 | &Funding at Github Sponsors 79 | Patrocínio no GitHub Sponsors 80 | 81 | 82 | 83 | &More about pyTranscriber 84 | &Sobre o pyTranscriber 85 | 86 | 87 | 88 | &Proxy 89 | Proxy 90 | 91 | 92 | 93 | Proxy setting 94 | Configurações de Proxy 95 | 96 | 97 | 98 | English 99 | 100 | 101 | 102 | 103 | 繁體中文 - Chinese Traditional 104 | 105 | 106 | 107 | 108 | 简体中文 - Chinese Simplified 109 | 110 | 111 | 112 | 113 | Português 114 | 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /pytranscriber/gui/proxy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Form implementation generated from reading ui file '.\proxy.ui' 4 | # 5 | # Created by: PyQt5 UI code generator 5.15.4 6 | # 7 | # WARNING: Any manual changes made to this file will be lost when pyuic5 is 8 | # run again. Do not edit this file unless you know what you are doing. 9 | 10 | 11 | from PyQt5 import QtCore, QtGui, QtWidgets 12 | 13 | 14 | class Ui_Dialog(object): 15 | def setupUi(self, Dialog): 16 | Dialog.setObjectName("Dialog") 17 | Dialog.resize(500, 120) 18 | Dialog.setAutoFillBackground(False) 19 | Dialog.setSizeGripEnabled(False) 20 | self.verticalLayout = QtWidgets.QVBoxLayout(Dialog) 21 | self.verticalLayout.setObjectName("verticalLayout") 22 | self.groupBox = QtWidgets.QGroupBox(Dialog) 23 | self.groupBox.setTitle("") 24 | self.groupBox.setObjectName("groupBox") 25 | self.verticalLayout_2 = QtWidgets.QVBoxLayout(self.groupBox) 26 | self.verticalLayout_2.setObjectName("verticalLayout_2") 27 | self.radioButtonNone = QtWidgets.QRadioButton(self.groupBox) 28 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed) 29 | sizePolicy.setHorizontalStretch(0) 30 | sizePolicy.setVerticalStretch(0) 31 | sizePolicy.setHeightForWidth(self.radioButtonNone.sizePolicy().hasHeightForWidth()) 32 | self.radioButtonNone.setSizePolicy(sizePolicy) 33 | self.radioButtonNone.setChecked(True) 34 | self.radioButtonNone.setObjectName("radioButtonNone") 35 | self.verticalLayout_2.addWidget(self.radioButtonNone) 36 | self.radioButtonHTTP = QtWidgets.QRadioButton(self.groupBox) 37 | self.radioButtonHTTP.setEnabled(True) 38 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed) 39 | sizePolicy.setHorizontalStretch(0) 40 | sizePolicy.setVerticalStretch(0) 41 | sizePolicy.setHeightForWidth(self.radioButtonHTTP.sizePolicy().hasHeightForWidth()) 42 | self.radioButtonHTTP.setSizePolicy(sizePolicy) 43 | self.radioButtonHTTP.setObjectName("radioButtonHTTP") 44 | self.verticalLayout_2.addWidget(self.radioButtonHTTP) 45 | self.gridLayout = QtWidgets.QGridLayout() 46 | self.gridLayout.setObjectName("gridLayout") 47 | self.lineEditHttpProxy = QtWidgets.QLineEdit(self.groupBox) 48 | self.lineEditHttpProxy.setToolTip("") 49 | self.lineEditHttpProxy.setStatusTip("") 50 | self.lineEditHttpProxy.setInputMethodHints(QtCore.Qt.ImhUrlCharactersOnly) 51 | self.lineEditHttpProxy.setObjectName("lineEditHttpProxy") 52 | self.gridLayout.addWidget(self.lineEditHttpProxy, 0, 1, 1, 1) 53 | self.label = QtWidgets.QLabel(self.groupBox) 54 | self.label.setObjectName("label") 55 | self.gridLayout.addWidget(self.label, 0, 0, 1, 1) 56 | self.pushButtonTest = QtWidgets.QPushButton(self.groupBox) 57 | self.pushButtonTest.setEnabled(True) 58 | self.pushButtonTest.setObjectName("pushButtonTest") 59 | self.gridLayout.addWidget(self.pushButtonTest, 0, 2, 1, 1) 60 | self.verticalLayout_2.addLayout(self.gridLayout) 61 | self.verticalLayout.addWidget(self.groupBox) 62 | self.buttonBox = QtWidgets.QDialogButtonBox(Dialog) 63 | self.buttonBox.setOrientation(QtCore.Qt.Horizontal) 64 | self.buttonBox.setStandardButtons(QtWidgets.QDialogButtonBox.Cancel|QtWidgets.QDialogButtonBox.Ok) 65 | self.buttonBox.setObjectName("buttonBox") 66 | self.verticalLayout.addWidget(self.buttonBox) 67 | 68 | self.retranslateUi(Dialog) 69 | self.buttonBox.accepted.connect(Dialog.accept) 70 | self.buttonBox.rejected.connect(Dialog.reject) 71 | self.radioButtonNone.clicked['bool'].connect(self.lineEditHttpProxy.setDisabled) 72 | self.radioButtonNone.clicked['bool'].connect(self.pushButtonTest.setDisabled) 73 | self.radioButtonHTTP.clicked['bool'].connect(self.pushButtonTest.setEnabled) 74 | self.radioButtonHTTP.clicked['bool'].connect(self.lineEditHttpProxy.setEnabled) 75 | QtCore.QMetaObject.connectSlotsByName(Dialog) 76 | 77 | def retranslateUi(self, Dialog): 78 | _translate = QtCore.QCoreApplication.translate 79 | Dialog.setWindowTitle(_translate("Dialog", "Proxy setting")) 80 | self.radioButtonNone.setText(_translate("Dialog", "None")) 81 | self.radioButtonHTTP.setText(_translate("Dialog", "HTTP")) 82 | self.lineEditHttpProxy.setPlaceholderText(_translate("Dialog", "http://127.0.0.1:1080")) 83 | self.label.setText(_translate("Dialog", "URL:")) 84 | self.pushButtonTest.setText(_translate("Dialog", "Test")) 85 | -------------------------------------------------------------------------------- /pytranscriber/gui/proxy/window_proxy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Form implementation generated from reading ui file 'window_proxy.ui' 4 | # 5 | # Created by: PyQt5 UI code generator 5.15.4 6 | # 7 | # WARNING: Any manual changes made to this file will be lost when pyuic5 is 8 | # run again. Do not edit this file unless you know what you are doing. 9 | 10 | 11 | from PyQt5 import QtCore, QtGui, QtWidgets 12 | 13 | 14 | class Ui_Dialog(object): 15 | def setupUi(self, Dialog): 16 | Dialog.setObjectName("Dialog") 17 | Dialog.resize(381, 171) 18 | Dialog.setAutoFillBackground(False) 19 | Dialog.setSizeGripEnabled(False) 20 | self.verticalLayout = QtWidgets.QVBoxLayout(Dialog) 21 | self.verticalLayout.setObjectName("verticalLayout") 22 | self.groupBox = QtWidgets.QGroupBox(Dialog) 23 | self.groupBox.setTitle("") 24 | self.groupBox.setObjectName("groupBox") 25 | self.verticalLayout_2 = QtWidgets.QVBoxLayout(self.groupBox) 26 | self.verticalLayout_2.setObjectName("verticalLayout_2") 27 | self.radioButtonNone = QtWidgets.QRadioButton(self.groupBox) 28 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed) 29 | sizePolicy.setHorizontalStretch(0) 30 | sizePolicy.setVerticalStretch(0) 31 | sizePolicy.setHeightForWidth(self.radioButtonNone.sizePolicy().hasHeightForWidth()) 32 | self.radioButtonNone.setSizePolicy(sizePolicy) 33 | font = QtGui.QFont() 34 | font.setPointSize(9) 35 | self.radioButtonNone.setFont(font) 36 | self.radioButtonNone.setChecked(True) 37 | self.radioButtonNone.setObjectName("radioButtonNone") 38 | self.verticalLayout_2.addWidget(self.radioButtonNone) 39 | self.radioButtonHTTP = QtWidgets.QRadioButton(self.groupBox) 40 | self.radioButtonHTTP.setEnabled(True) 41 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed) 42 | sizePolicy.setHorizontalStretch(0) 43 | sizePolicy.setVerticalStretch(0) 44 | sizePolicy.setHeightForWidth(self.radioButtonHTTP.sizePolicy().hasHeightForWidth()) 45 | self.radioButtonHTTP.setSizePolicy(sizePolicy) 46 | font = QtGui.QFont() 47 | font.setPointSize(9) 48 | self.radioButtonHTTP.setFont(font) 49 | self.radioButtonHTTP.setObjectName("radioButtonHTTP") 50 | self.verticalLayout_2.addWidget(self.radioButtonHTTP) 51 | self.gridLayout = QtWidgets.QGridLayout() 52 | self.gridLayout.setObjectName("gridLayout") 53 | self.pushButtonTest = QtWidgets.QPushButton(self.groupBox) 54 | self.pushButtonTest.setEnabled(True) 55 | font = QtGui.QFont() 56 | font.setPointSize(9) 57 | self.pushButtonTest.setFont(font) 58 | self.pushButtonTest.setObjectName("pushButtonTest") 59 | self.gridLayout.addWidget(self.pushButtonTest, 0, 2, 1, 1) 60 | self.lineEditHttpProxy = QtWidgets.QLineEdit(self.groupBox) 61 | self.lineEditHttpProxy.setToolTip("") 62 | self.lineEditHttpProxy.setStatusTip("") 63 | self.lineEditHttpProxy.setInputMethodHints(QtCore.Qt.ImhUrlCharactersOnly) 64 | self.lineEditHttpProxy.setObjectName("lineEditHttpProxy") 65 | self.gridLayout.addWidget(self.lineEditHttpProxy, 0, 1, 1, 1) 66 | self.label = QtWidgets.QLabel(self.groupBox) 67 | font = QtGui.QFont() 68 | font.setPointSize(9) 69 | self.label.setFont(font) 70 | self.label.setObjectName("label") 71 | self.gridLayout.addWidget(self.label, 0, 0, 1, 1) 72 | self.bSave = QtWidgets.QPushButton(self.groupBox) 73 | font = QtGui.QFont() 74 | font.setPointSize(9) 75 | self.bSave.setFont(font) 76 | self.bSave.setObjectName("bSave") 77 | self.gridLayout.addWidget(self.bSave, 1, 2, 1, 1) 78 | self.verticalLayout_2.addLayout(self.gridLayout) 79 | self.verticalLayout.addWidget(self.groupBox) 80 | 81 | self.retranslateUi(Dialog) 82 | self.radioButtonNone.clicked['bool'].connect(self.lineEditHttpProxy.setDisabled) 83 | self.radioButtonNone.clicked['bool'].connect(self.pushButtonTest.setDisabled) 84 | self.radioButtonHTTP.clicked['bool'].connect(self.lineEditHttpProxy.setEnabled) 85 | QtCore.QMetaObject.connectSlotsByName(Dialog) 86 | 87 | def retranslateUi(self, Dialog): 88 | _translate = QtCore.QCoreApplication.translate 89 | Dialog.setWindowTitle(_translate("Dialog", "Proxy setting")) 90 | self.radioButtonNone.setText(_translate("Dialog", "Disabled")) 91 | self.radioButtonHTTP.setText(_translate("Dialog", "Enabled")) 92 | self.pushButtonTest.setText(_translate("Dialog", "Test")) 93 | self.lineEditHttpProxy.setPlaceholderText(_translate("Dialog", "http://127.0.0.1:1080")) 94 | self.label.setText(_translate("Dialog", "URL:")) 95 | self.bSave.setText(_translate("Dialog", "Save")) 96 | -------------------------------------------------------------------------------- /pytranscriber/model/whisper.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | ''' 14 | 15 | class Whisper: 16 | 17 | supported_languages_list = None 18 | supported_languages_dict = { 19 | "en": "english", 20 | "zh": "chinese", 21 | "de": "german", 22 | "es": "spanish", 23 | "ru": "russian", 24 | "ko": "korean", 25 | "fr": "french", 26 | "ja": "japanese", 27 | "pt": "portuguese", 28 | "tr": "turkish", 29 | "pl": "polish", 30 | "ca": "catalan", 31 | "nl": "dutch", 32 | "ar": "arabic", 33 | "sv": "swedish", 34 | "it": "italian", 35 | "id": "indonesian", 36 | "hi": "hindi", 37 | "fi": "finnish", 38 | "vi": "vietnamese", 39 | "he": "hebrew", 40 | "uk": "ukrainian", 41 | "el": "greek", 42 | "ms": "malay", 43 | "cs": "czech", 44 | "ro": "romanian", 45 | "da": "danish", 46 | "hu": "hungarian", 47 | "ta": "tamil", 48 | "no": "norwegian", 49 | "th": "thai", 50 | "ur": "urdu", 51 | "hr": "croatian", 52 | "bg": "bulgarian", 53 | "lt": "lithuanian", 54 | "la": "latin", 55 | "mi": "maori", 56 | "ml": "malayalam", 57 | "cy": "welsh", 58 | "sk": "slovak", 59 | "te": "telugu", 60 | "fa": "persian", 61 | "lv": "latvian", 62 | "bn": "bengali", 63 | "sr": "serbian", 64 | "az": "azerbaijani", 65 | "sl": "slovenian", 66 | "kn": "kannada", 67 | "et": "estonian", 68 | "mk": "macedonian", 69 | "br": "breton", 70 | "eu": "basque", 71 | "is": "icelandic", 72 | "hy": "armenian", 73 | "ne": "nepali", 74 | "mn": "mongolian", 75 | "bs": "bosnian", 76 | "kk": "kazakh", 77 | "sq": "albanian", 78 | "sw": "swahili", 79 | "gl": "galician", 80 | "mr": "marathi", 81 | "pa": "punjabi", 82 | "si": "sinhala", 83 | "km": "khmer", 84 | "sn": "shona", 85 | "yo": "yoruba", 86 | "so": "somali", 87 | "af": "afrikaans", 88 | "oc": "occitan", 89 | "ka": "georgian", 90 | "be": "belarusian", 91 | "tg": "tajik", 92 | "sd": "sindhi", 93 | "gu": "gujarati", 94 | "am": "amharic", 95 | "yi": "yiddish", 96 | "lo": "lao", 97 | "uz": "uzbek", 98 | "fo": "faroese", 99 | "ht": "haitian creole", 100 | "ps": "pashto", 101 | "tk": "turkmen", 102 | "nn": "nynorsk", 103 | "mt": "maltese", 104 | "sa": "sanskrit", 105 | "lb": "luxembourgish", 106 | "my": "myanmar", 107 | "bo": "tibetan", 108 | "tl": "tagalog", 109 | "mg": "malagasy", 110 | "as": "assamese", 111 | "tt": "tatar", 112 | "haw": "hawaiian", 113 | "ln": "lingala", 114 | "ha": "hausa", 115 | "ba": "bashkir", 116 | "jw": "javanese", 117 | "su": "sundanese", 118 | "yue": "cantonese", 119 | } 120 | 121 | @staticmethod 122 | def convert_dict_to_list(): 123 | Whisper.supported_languages_list = list() 124 | for (k, v) in Whisper.supported_languages_dict.items(): 125 | Whisper.supported_languages_list.append(k + " - " + v) 126 | 127 | @staticmethod 128 | def get_supported_languages(): 129 | if Whisper.supported_languages_list is None: 130 | Whisper.convert_dict_to_list() 131 | return Whisper.supported_languages_list -------------------------------------------------------------------------------- /pytranscriber/control/ctr_whisper.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | ''' 14 | 15 | from PyQt5.QtCore import pyqtSignal, QObject 16 | from PyQt5.QtWidgets import QMessageBox 17 | import os 18 | import sys 19 | import whisper 20 | import datetime 21 | import shutil 22 | from pytranscriber.control.ctr_engine import CtrEngine 23 | 24 | 25 | class CtrWhisper(CtrEngine, QObject): 26 | errorSignal = pyqtSignal(str) # Define the signal 27 | MODEL_DIR = None 28 | 29 | @classmethod 30 | def initialize(cls): 31 | """Initialize MODEL_DIR before using the class.""" 32 | if cls.MODEL_DIR is None: 33 | cls.MODEL_DIR = cls.get_whisper_model_dir() 34 | 35 | def __init__(self): 36 | super().__init__() 37 | self.errorSignal.connect(self.show_error_message) # Connect signal to slot 38 | 39 | @staticmethod 40 | def get_whisper_model_dir(): 41 | base_path = os.path.expanduser("~/pytranscriber") # User's home directory 42 | 43 | model_dir = os.path.join(base_path, "whisper_models") 44 | os.makedirs(model_dir, exist_ok=True) # Ensure directory exists 45 | return model_dir 46 | 47 | @staticmethod 48 | def generate_subtitles(source_path, src_language, outputSRT=None, outputTXT=None, model='base'): 49 | CtrWhisper.patch_ffmpeg() # Ensure FFmpeg is available 50 | 51 | model = whisper.load_model(model, download_root=CtrWhisper.MODEL_DIR) 52 | result = model.transcribe(source_path, verbose=True, language=src_language) 53 | 54 | if CtrEngine.is_operation_canceled(): 55 | return -1 56 | 57 | content_srt = CtrWhisper.generate_srt_file_content(result["segments"]) 58 | content_txt = CtrWhisper.generate_txt_file_content(result["segments"]) 59 | 60 | CtrWhisper.save_output_file(outputSRT, content_srt) 61 | CtrWhisper.save_output_file(outputTXT, content_txt) 62 | 63 | return outputSRT 64 | 65 | @staticmethod 66 | def show_error_message(message): 67 | """Displays the error message in a PyQt5 QMessageBox.""" 68 | msg_box = QMessageBox() 69 | msg_box.setIcon(QMessageBox.Critical) 70 | msg_box.setWindowTitle("Error") 71 | msg_box.setText(message) 72 | msg_box.exec_() 73 | 74 | @staticmethod 75 | def generate_srt_file_content(transcribed_segments): 76 | content = "" 77 | 78 | def format_timestamp(seconds): 79 | """Convert seconds to SRT-compliant timestamp (HH:MM:SS,mmm).""" 80 | td = datetime.timedelta(seconds=seconds) 81 | total_seconds = int(td.total_seconds()) 82 | millis = int(round((td.total_seconds() - total_seconds) * 1000)) 83 | hours = total_seconds // 3600 84 | minutes = (total_seconds % 3600) // 60 85 | secs = total_seconds % 60 86 | return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}" 87 | 88 | for i, s in enumerate(transcribed_segments, start=1): 89 | start_time = format_timestamp(s["start"]) 90 | end_time = format_timestamp(s["end"]) 91 | content += f"{i}\n{start_time} --> {end_time}\n{s['text'].strip()}\n\n" 92 | 93 | return content 94 | 95 | @staticmethod 96 | def generate_txt_file_content(transcribed_segments): 97 | content = "" 98 | for s in transcribed_segments: 99 | content = content + str(s["text"]) 100 | return content 101 | 102 | #forces whisper to use the embedded ffmpeg in frozen app 103 | @staticmethod 104 | def patch_ffmpeg(): 105 | """Ensure FFmpeg is correctly detected and patched for PyInstaller frozen apps.""" 106 | if getattr(sys, "frozen", False): # Running as a bundled executable 107 | ffmpeg_path = os.path.join(sys._MEIPASS, "ffmpeg") 108 | else: 109 | ffmpeg_path = shutil.which("ffmpeg") # Use system-wide FFmpeg 110 | 111 | if not ffmpeg_path: 112 | raise FileNotFoundError("FFmpeg not found!") 113 | 114 | os.environ["FFMPEG_PATH"] = ffmpeg_path 115 | os.environ["PATH"] += os.pathsep + os.path.dirname(ffmpeg_path) 116 | 117 | # Monkey-patch shutil.which to always return the correct FFmpeg path 118 | original_which = shutil.which 119 | 120 | def patched_which(cmd, *args, **kwargs): 121 | if cmd == "ffmpeg": 122 | return ffmpeg_path 123 | return original_which(cmd, *args, **kwargs) 124 | 125 | shutil.which = patched_which # Apply the patch -------------------------------------------------------------------------------- /whisper/audio.py: -------------------------------------------------------------------------------- 1 | import os 2 | from functools import lru_cache 3 | from subprocess import CalledProcessError, run 4 | from typing import Optional, Union 5 | 6 | import numpy as np 7 | import torch 8 | import torch.nn.functional as F 9 | 10 | from .utils import exact_div 11 | 12 | # hard-coded audio hyperparameters 13 | SAMPLE_RATE = 16000 14 | N_FFT = 400 15 | HOP_LENGTH = 160 16 | CHUNK_LENGTH = 30 17 | N_SAMPLES = CHUNK_LENGTH * SAMPLE_RATE # 480000 samples in a 30-second chunk 18 | N_FRAMES = exact_div(N_SAMPLES, HOP_LENGTH) # 3000 frames in a mel spectrogram input 19 | 20 | N_SAMPLES_PER_TOKEN = HOP_LENGTH * 2 # the initial convolutions has stride 2 21 | FRAMES_PER_SECOND = exact_div(SAMPLE_RATE, HOP_LENGTH) # 10ms per audio frame 22 | TOKENS_PER_SECOND = exact_div(SAMPLE_RATE, N_SAMPLES_PER_TOKEN) # 20ms per audio token 23 | 24 | 25 | def load_audio(file: str, sr: int = SAMPLE_RATE): 26 | """ 27 | Open an audio file and read as mono waveform, resampling as necessary 28 | 29 | Parameters 30 | ---------- 31 | file: str 32 | The audio file to open 33 | 34 | sr: int 35 | The sample rate to resample the audio if necessary 36 | 37 | Returns 38 | ------- 39 | A NumPy array containing the audio waveform, in float32 dtype. 40 | """ 41 | 42 | # This launches a subprocess to decode audio while down-mixing 43 | # and resampling as necessary. Requires the ffmpeg CLI in PATH. 44 | # fmt: off 45 | cmd = [ 46 | "ffmpeg", 47 | "-nostdin", 48 | "-threads", "0", 49 | "-i", file, 50 | "-f", "s16le", 51 | "-ac", "1", 52 | "-acodec", "pcm_s16le", 53 | "-ar", str(sr), 54 | "-" 55 | ] 56 | # fmt: on 57 | try: 58 | out = run(cmd, capture_output=True, check=True).stdout 59 | except CalledProcessError as e: 60 | raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e 61 | 62 | return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0 63 | 64 | 65 | def pad_or_trim(array, length: int = N_SAMPLES, *, axis: int = -1): 66 | """ 67 | Pad or trim the audio array to N_SAMPLES, as expected by the encoder. 68 | """ 69 | if torch.is_tensor(array): 70 | if array.shape[axis] > length: 71 | array = array.index_select( 72 | dim=axis, index=torch.arange(length, device=array.device) 73 | ) 74 | 75 | if array.shape[axis] < length: 76 | pad_widths = [(0, 0)] * array.ndim 77 | pad_widths[axis] = (0, length - array.shape[axis]) 78 | array = F.pad(array, [pad for sizes in pad_widths[::-1] for pad in sizes]) 79 | else: 80 | if array.shape[axis] > length: 81 | array = array.take(indices=range(length), axis=axis) 82 | 83 | if array.shape[axis] < length: 84 | pad_widths = [(0, 0)] * array.ndim 85 | pad_widths[axis] = (0, length - array.shape[axis]) 86 | array = np.pad(array, pad_widths) 87 | 88 | return array 89 | 90 | 91 | @lru_cache(maxsize=None) 92 | def mel_filters(device, n_mels: int) -> torch.Tensor: 93 | """ 94 | load the mel filterbank matrix for projecting STFT into a Mel spectrogram. 95 | Allows decoupling librosa dependency; saved using: 96 | 97 | np.savez_compressed( 98 | "mel_filters.npz", 99 | mel_80=librosa.filters.mel(sr=16000, n_fft=400, n_mels=80), 100 | mel_128=librosa.filters.mel(sr=16000, n_fft=400, n_mels=128), 101 | ) 102 | """ 103 | assert n_mels in {80, 128}, f"Unsupported n_mels: {n_mels}" 104 | 105 | filters_path = os.path.join(os.path.dirname(__file__), "assets", "mel_filters.npz") 106 | with np.load(filters_path, allow_pickle=False) as f: 107 | return torch.from_numpy(f[f"mel_{n_mels}"]).to(device) 108 | 109 | 110 | def log_mel_spectrogram( 111 | audio: Union[str, np.ndarray, torch.Tensor], 112 | n_mels: int = 80, 113 | padding: int = 0, 114 | device: Optional[Union[str, torch.device]] = None, 115 | ): 116 | """ 117 | Compute the log-Mel spectrogram of 118 | 119 | Parameters 120 | ---------- 121 | audio: Union[str, np.ndarray, torch.Tensor], shape = (*) 122 | The path to audio or either a NumPy array or Tensor containing the audio waveform in 16 kHz 123 | 124 | n_mels: int 125 | The number of Mel-frequency filters, only 80 is supported 126 | 127 | padding: int 128 | Number of zero samples to pad to the right 129 | 130 | device: Optional[Union[str, torch.device]] 131 | If given, the audio tensor is moved to this device before STFT 132 | 133 | Returns 134 | ------- 135 | torch.Tensor, shape = (80, n_frames) 136 | A Tensor that contains the Mel spectrogram 137 | """ 138 | if not torch.is_tensor(audio): 139 | if isinstance(audio, str): 140 | audio = load_audio(audio) 141 | audio = torch.from_numpy(audio) 142 | 143 | if device is not None: 144 | audio = audio.to(device) 145 | if padding > 0: 146 | audio = F.pad(audio, (0, padding)) 147 | window = torch.hann_window(N_FFT).to(audio.device) 148 | stft = torch.stft(audio, N_FFT, HOP_LENGTH, window=window, return_complex=True) 149 | magnitudes = stft[..., :-1].abs() ** 2 150 | 151 | filters = mel_filters(audio.device, n_mels) 152 | mel_spec = filters @ magnitudes 153 | 154 | log_spec = torch.clamp(mel_spec, min=1e-10).log10() 155 | log_spec = torch.maximum(log_spec, log_spec.max() - 8.0) 156 | log_spec = (log_spec + 4.0) / 4.0 157 | return log_spec 158 | -------------------------------------------------------------------------------- /pytranscriber/gui/proxy/window_proxy.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | Dialog 4 | 5 | 6 | 7 | 0 8 | 0 9 | 381 10 | 171 11 | 12 | 13 | 14 | Proxy setting 15 | 16 | 17 | false 18 | 19 | 20 | false 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 0 34 | 0 35 | 36 | 37 | 38 | 39 | 9 40 | 41 | 42 | 43 | Disabled 44 | 45 | 46 | true 47 | 48 | 49 | 50 | 51 | 52 | 53 | true 54 | 55 | 56 | 57 | 0 58 | 0 59 | 60 | 61 | 62 | 63 | 9 64 | 65 | 66 | 67 | Enabled 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | true 77 | 78 | 79 | 80 | 9 81 | 82 | 83 | 84 | Test 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | Qt::ImhUrlCharactersOnly 98 | 99 | 100 | http://127.0.0.1:1080 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 9 109 | 110 | 111 | 112 | URL: 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 9 121 | 122 | 123 | 124 | Save 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | radioButtonNone 139 | clicked(bool) 140 | lineEditHttpProxy 141 | setDisabled(bool) 142 | 143 | 144 | 130 145 | 19 146 | 147 | 148 | 111 149 | 60 150 | 151 | 152 | 153 | 154 | radioButtonNone 155 | clicked(bool) 156 | pushButtonTest 157 | setDisabled(bool) 158 | 159 | 160 | 130 161 | 19 162 | 163 | 164 | 219 165 | 60 166 | 167 | 168 | 169 | 170 | radioButtonHTTP 171 | clicked(bool) 172 | lineEditHttpProxy 173 | setEnabled(bool) 174 | 175 | 176 | 130 177 | 36 178 | 179 | 180 | 111 181 | 60 182 | 183 | 184 | 185 | 186 | 187 | -------------------------------------------------------------------------------- /pytranscriber/control/thread_exec_autosub.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | ''' 14 | 15 | from PyQt5.QtCore import QThread 16 | from PyQt5.QtCore import pyqtSignal 17 | from pathlib import Path 18 | from pytranscriber.util.srtparser import SRTParser 19 | from pytranscriber.util.util import MyUtil 20 | from pytranscriber.control.ctr_autosub import Ctr_Autosub 21 | import os 22 | import traceback 23 | 24 | 25 | class Thread_Exec_Autosub(QThread): 26 | signalLockGUI = pyqtSignal() 27 | signalResetGUIAfterCancel = pyqtSignal() 28 | signalResetGUIAfterSuccess = pyqtSignal() 29 | signalProgress = pyqtSignal(str, int) 30 | signalProgressFileYofN = pyqtSignal(str) 31 | signalErrorMsg = pyqtSignal(str) 32 | 33 | def __init__(self, objParamAutosub): 34 | self.objParamAutosub = objParamAutosub 35 | self.running = True 36 | QThread.__init__(self) 37 | 38 | def __updateProgressFileYofN(self, currentIndex, countFiles ): 39 | self.signalProgressFileYofN.emit("File " + str(currentIndex+1) + " of " +str(countFiles)) 40 | 41 | def listenerProgress(self, string, percent): 42 | self.signalProgress.emit(string, percent) 43 | 44 | def __generatePathOutputFile(self, sourceFile): 45 | #extract the filename without extension from the path 46 | base = os.path.basename(sourceFile) 47 | #[0] is filename, [1] is file extension 48 | fileName = os.path.splitext(base)[0] 49 | 50 | #the output file has same name as input file, located on output Folder 51 | #with extension .srt 52 | pathOutputFolder = Path(self.objParamAutosub.outputFolder) 53 | outputFileSRT = pathOutputFolder / (fileName + ".srt") 54 | outputFileTXT = pathOutputFolder / (fileName + ".txt") 55 | return [outputFileSRT, outputFileTXT] 56 | 57 | def __runAutosubForMedia(self, index, langCode): 58 | sourceFile = self.objParamAutosub.listFiles[index] 59 | outputFiles = self.__generatePathOutputFile(sourceFile) 60 | outputFileSRT = outputFiles[0] 61 | outputFileTXT = outputFiles[1] 62 | 63 | #run autosub 64 | try: 65 | fOutput = Ctr_Autosub.generate_subtitles(source_path = sourceFile, 66 | output = outputFileSRT, 67 | src_language = langCode, 68 | listener_progress = self.listenerProgress, proxies=self.objParamAutosub.proxies) 69 | except Exception as e: 70 | error_msg = f"""Error! Unable to generate subtitles: {traceback.format_exc()}""" 71 | self.signalErrorMsg.emit(error_msg) # Emit the full traceback 72 | 73 | #if nothing was returned 74 | if not fOutput: 75 | self.signalErrorMsg.emit("Error! Unable to generate subtitles for file " + sourceFile + ".") 76 | elif fOutput != -1: 77 | #if the operation was not canceled 78 | 79 | #updated the progress message 80 | self.listenerProgress("Finished", 100) 81 | 82 | #parses the .srt subtitle file and export text to .txt file 83 | SRTParser.extractTextFromSRT(str(outputFileSRT)) 84 | 85 | if self.objParamAutosub.boolOpenOutputFilesAuto: 86 | #open both SRT and TXT output files 87 | MyUtil.open_file(outputFileTXT) 88 | MyUtil.open_file(outputFileSRT) 89 | 90 | def __loopSelectedFiles(self): 91 | self.signalLockGUI.emit() 92 | 93 | langCode = self.objParamAutosub.langCode 94 | 95 | #if output directory does not exist, creates it 96 | pathOutputFolder = Path(self.objParamAutosub.outputFolder) 97 | 98 | if not os.path.exists(pathOutputFolder): 99 | os.mkdir(pathOutputFolder) 100 | #if there the output file is not a directory 101 | if not os.path.isdir(pathOutputFolder): 102 | #force the user to select a different output directory 103 | self.signalErrorMsg.emit("Error! Invalid output folder. Please choose another one.") 104 | else: 105 | #go ahead with autosub process 106 | nFiles = len(self.objParamAutosub.listFiles) 107 | for i in range(nFiles): 108 | #does not continue the loop if user clicked cancel button 109 | if not Ctr_Autosub.is_operation_canceled(): 110 | self.__updateProgressFileYofN(i, nFiles) 111 | self.__runAutosubForMedia(i, langCode) 112 | 113 | #if operation is canceled does not clear the file list 114 | if Ctr_Autosub.is_operation_canceled(): 115 | self.signalResetGUIAfterCancel.emit() 116 | else: 117 | self.signalResetGUIAfterSuccess.emit() 118 | 119 | 120 | def run(self): 121 | Ctr_Autosub.init() 122 | self.__loopSelectedFiles() 123 | self.running = False 124 | 125 | def cancel(self): 126 | Ctr_Autosub.cancel_operation() 127 | -------------------------------------------------------------------------------- /pytranscriber/control/ctr_autosub.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | ''' 14 | 15 | from autosub import FLACConverter 16 | from autosub import SpeechRecognizer 17 | from autosub import extract_audio 18 | from autosub import find_speech_regions 19 | from autosub import DEFAULT_CONCURRENCY 20 | from autosub import DEFAULT_SUBTITLE_FORMAT 21 | from autosub import GOOGLE_SPEECH_API_KEY 22 | from autosub.formatters import FORMATTERS 23 | 24 | import multiprocessing 25 | import time 26 | import os 27 | 28 | from pytranscriber.util.util import MyUtil 29 | 30 | 31 | class Ctr_Autosub: 32 | 33 | cancel = False 34 | 35 | @staticmethod 36 | def init(): 37 | Ctr_Autosub.cancel = False 38 | 39 | @staticmethod 40 | def is_operation_canceled(): 41 | return Ctr_Autosub.cancel 42 | 43 | 44 | @staticmethod 45 | def output_progress(listener_progress, str_task, progress_percent): 46 | # only update progress if not requested to cancel 47 | if not Ctr_Autosub.cancel: 48 | listener_progress(str_task, progress_percent) 49 | 50 | @staticmethod 51 | def cancel_operation(): 52 | Ctr_Autosub.cancel = True 53 | 54 | while Ctr_Autosub.step == 0: 55 | time.sleep(0.1) 56 | 57 | # the first step involves ffmpeg and cannot be stopped safely 58 | if Ctr_Autosub.step == 1: 59 | # close wait for threads to finish their work first 60 | Ctr_Autosub.pool.close() 61 | Ctr_Autosub.pool.join() 62 | 63 | else: 64 | # terminates the threads immediately 65 | Ctr_Autosub.pool.terminate() 66 | Ctr_Autosub.pool.join() 67 | 68 | @staticmethod 69 | def generate_subtitles(# pylint: disable=too-many-locals,too-many-arguments 70 | source_path, 71 | src_language, 72 | listener_progress, 73 | output=None, 74 | concurrency=DEFAULT_CONCURRENCY, 75 | subtitle_file_format=DEFAULT_SUBTITLE_FORMAT, 76 | proxies=None 77 | ): 78 | 79 | # windows not support forkserver... only spawn 80 | if os.name != "nt" and "Darwin" in os.uname(): 81 | # necessary for running on MacOS 82 | # method can be set only once, otherwise crash 83 | #from python 3.8 above the default for macos is spawn and not fork 84 | if 'spawn' != multiprocessing.get_start_method(allow_none=True): 85 | multiprocessing.set_start_method('spawn') 86 | Ctr_Autosub.cancel = False 87 | Ctr_Autosub.step = 0 88 | """ 89 | Given an input audio/video file, generate subtitles in the specified language and format. 90 | """ 91 | audio_filename, audio_rate = extract_audio(source_path) 92 | 93 | regions = find_speech_regions(audio_filename) 94 | 95 | converter = FLACConverter(source_path=audio_filename) 96 | recognizer = SpeechRecognizer(language=src_language, rate=audio_rate, 97 | api_key=GOOGLE_SPEECH_API_KEY, proxies=proxies) 98 | transcripts = [] 99 | if regions: 100 | try: 101 | if Ctr_Autosub.cancel: 102 | return -1 103 | 104 | str_task_1 = "Step 1 of 2: Converting speech regions to FLAC files " 105 | len_regions = len(regions) 106 | extracted_regions = [] 107 | Ctr_Autosub.pool = multiprocessing.Pool(concurrency) 108 | for i, extracted_region in enumerate(Ctr_Autosub.pool.imap(converter, regions)): 109 | Ctr_Autosub.step = 1 110 | extracted_regions.append(extracted_region) 111 | progress_percent = MyUtil.percentage(i, len_regions) 112 | Ctr_Autosub.output_progress(listener_progress, str_task_1, progress_percent) 113 | if Ctr_Autosub.cancel: 114 | return -1 115 | else: 116 | Ctr_Autosub.pool.close() 117 | Ctr_Autosub.pool.join() 118 | 119 | str_task_2 = "Step 2 of 2: Performing speech recognition " 120 | Ctr_Autosub.pool = multiprocessing.Pool(concurrency) 121 | for i, transcript in enumerate(Ctr_Autosub.pool.imap(recognizer, extracted_regions)): 122 | Ctr_Autosub.step = 2 123 | transcripts.append(transcript) 124 | progress_percent = MyUtil.percentage(i, len_regions) 125 | Ctr_Autosub.output_progress(listener_progress, str_task_2, progress_percent) 126 | 127 | if Ctr_Autosub.cancel: 128 | return -1 129 | else: 130 | Ctr_Autosub.pool.close() 131 | Ctr_Autosub.pool.join() 132 | 133 | except KeyboardInterrupt: 134 | Ctr_Autosub.pbar.finish() 135 | Ctr_Autosub.pool.terminate() 136 | Ctr_Autosub.pool.join() 137 | raise 138 | 139 | timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] 140 | formatter = FORMATTERS.get(subtitle_file_format) 141 | formatted_subtitles = formatter(timed_subtitles) 142 | 143 | dest = output 144 | 145 | if not dest: 146 | base = os.path.splitext(source_path)[0] 147 | dest = "{base}.{format}".format(base=base, format=subtitle_file_format) 148 | 149 | with open(dest, 'wb') as output_file: 150 | output_file.write(formatted_subtitles.encode("utf-8")) 151 | 152 | os.remove(audio_filename) 153 | 154 | if Ctr_Autosub.cancel: 155 | return -1 156 | else: 157 | Ctr_Autosub.pool.close() 158 | Ctr_Autosub.pool.join() 159 | 160 | return dest 161 | -------------------------------------------------------------------------------- /pytranscriber/gui/proxy.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | Dialog 4 | 5 | 6 | 7 | 0 8 | 0 9 | 381 10 | 115 11 | 12 | 13 | 14 | Proxy setting 15 | 16 | 17 | false 18 | 19 | 20 | false 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 0 34 | 0 35 | 36 | 37 | 38 | 39 | 9 40 | 41 | 42 | 43 | None 44 | 45 | 46 | true 47 | 48 | 49 | 50 | 51 | 52 | 53 | true 54 | 55 | 56 | 57 | 0 58 | 0 59 | 60 | 61 | 62 | 63 | 9 64 | 65 | 66 | 67 | HTTP 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | Qt::ImhUrlCharactersOnly 83 | 84 | 85 | http://127.0.0.1:1080 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 9 94 | 95 | 96 | 97 | URL: 98 | 99 | 100 | 101 | 102 | 103 | 104 | true 105 | 106 | 107 | 108 | 9 109 | 110 | 111 | 112 | Test 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | Qt::Horizontal 125 | 126 | 127 | QDialogButtonBox::Cancel|QDialogButtonBox::Ok 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | buttonBox 137 | accepted() 138 | Dialog 139 | accept() 140 | 141 | 142 | 224 143 | 100 144 | 145 | 146 | 157 147 | 108 148 | 149 | 150 | 151 | 152 | buttonBox 153 | rejected() 154 | Dialog 155 | reject() 156 | 157 | 158 | 255 159 | 102 160 | 161 | 162 | 261 163 | 108 164 | 165 | 166 | 167 | 168 | radioButtonNone 169 | clicked(bool) 170 | lineEditHttpProxy 171 | setDisabled(bool) 172 | 173 | 174 | 130 175 | 19 176 | 177 | 178 | 111 179 | 60 180 | 181 | 182 | 183 | 184 | radioButtonNone 185 | clicked(bool) 186 | pushButtonTest 187 | setDisabled(bool) 188 | 189 | 190 | 130 191 | 19 192 | 193 | 194 | 219 195 | 60 196 | 197 | 198 | 199 | 200 | radioButtonHTTP 201 | clicked(bool) 202 | pushButtonTest 203 | setEnabled(bool) 204 | 205 | 206 | 130 207 | 36 208 | 209 | 210 | 219 211 | 60 212 | 213 | 214 | 215 | 216 | radioButtonHTTP 217 | clicked(bool) 218 | lineEditHttpProxy 219 | setEnabled(bool) 220 | 221 | 222 | 130 223 | 36 224 | 225 | 226 | 111 227 | 60 228 | 229 | 230 | 231 | 232 | 233 | -------------------------------------------------------------------------------- /whisper/__init__.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import io 3 | import os 4 | import urllib 5 | import warnings 6 | from typing import List, Optional, Union 7 | 8 | import torch 9 | from tqdm import tqdm 10 | 11 | from .audio import load_audio, log_mel_spectrogram, pad_or_trim 12 | from .decoding import DecodingOptions, DecodingResult, decode, detect_language 13 | from .model import ModelDimensions, Whisper 14 | from .transcribe import transcribe 15 | from .version import __version__ 16 | 17 | _MODELS = { 18 | "tiny.en": "https://openaipublic.azureedge.net/main/whisper/models/d3dd57d32accea0b295c96e26691aa14d8822fac7d9d27d5dc00b4ca2826dd03/tiny.en.pt", 19 | "tiny": "https://openaipublic.azureedge.net/main/whisper/models/65147644a518d12f04e32d6f3b26facc3f8dd46e5390956a9424a650c0ce22b9/tiny.pt", 20 | "base.en": "https://openaipublic.azureedge.net/main/whisper/models/25a8566e1d0c1e2231d1c762132cd20e0f96a85d16145c3a00adf5d1ac670ead/base.en.pt", 21 | "base": "https://openaipublic.azureedge.net/main/whisper/models/ed3a0b6b1c0edf879ad9b11b1af5a0e6ab5db9205f891f668f8b0e6c6326e34e/base.pt", 22 | "small.en": "https://openaipublic.azureedge.net/main/whisper/models/f953ad0fd29cacd07d5a9eda5624af0f6bcf2258be67c92b79389873d91e0872/small.en.pt", 23 | "small": "https://openaipublic.azureedge.net/main/whisper/models/9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794/small.pt", 24 | "medium.en": "https://openaipublic.azureedge.net/main/whisper/models/d7440d1dc186f76616474e0ff0b3b6b879abc9d1a4926b7adfa41db2d497ab4f/medium.en.pt", 25 | "medium": "https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt", 26 | "large-v1": "https://openaipublic.azureedge.net/main/whisper/models/e4b87e7e0bf463eb8e6956e646f1e277e901512310def2c24bf0e11bd3c28e9a/large-v1.pt", 27 | "large-v2": "https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt", 28 | "large-v3": "https://openaipublic.azureedge.net/main/whisper/models/e5b1a55b89c1367dacf97e3e19bfd829a01529dbfdeefa8caeb59b3f1b81dadb/large-v3.pt", 29 | "large": "https://openaipublic.azureedge.net/main/whisper/models/e5b1a55b89c1367dacf97e3e19bfd829a01529dbfdeefa8caeb59b3f1b81dadb/large-v3.pt", 30 | "large-v3-turbo": "https://openaipublic.azureedge.net/main/whisper/models/aff26ae408abcba5fbf8813c21e62b0941638c5f6eebfb145be0c9839262a19a/large-v3-turbo.pt", 31 | "turbo": "https://openaipublic.azureedge.net/main/whisper/models/aff26ae408abcba5fbf8813c21e62b0941638c5f6eebfb145be0c9839262a19a/large-v3-turbo.pt", 32 | } 33 | 34 | # base85-encoded (n_layers, n_heads) boolean arrays indicating the cross-attention heads that are 35 | # highly correlated to the word-level timing, i.e. the alignment between audio and text tokens. 36 | _ALIGNMENT_HEADS = { 37 | "tiny.en": b"ABzY8J1N>@0{>%R00Bk>$p{7v037`oCl~+#00", 38 | "tiny": b"ABzY8bu8Lr0{>%RKn9Fp%m@SkK7Kt=7ytkO", 39 | "base.en": b"ABzY8;40c<0{>%RzzG;p*o+Vo09|#PsxSZm00", 40 | "base": b"ABzY8KQ!870{>%RzyTQH3`Q^yNP!>##QT-?_)10{>%RpeA61k&I|OI3I$65C{;;pbCHh0B{qLQ;+}v00", 42 | "small": b"ABzY8DmU6=0{>%Rpa?J`kvJ6qF(V^F86#Xh7JUGMK}P%R7%R7}kK1fFL7w6%<-Pf*t^=N)Qr&0RR9", 45 | "large-v1": b"ABzY8r9j$a0{>%R7#4sLmoOs{s)o3~84-RPdcFk!JR%R7=D0pU<_bnWW*tkYAhobTNnu$jnkEkXqp)j;w1Tzk)UH3X%SZd&fFZ2fC2yj", 47 | "large-v3": b"ABzY8gWO1E0{>%R7(9S+Kn!D~%ngiGaR?*L!iJG9p-nab0JQ=-{D1-g00", 48 | "large": b"ABzY8gWO1E0{>%R7(9S+Kn!D~%ngiGaR?*L!iJG9p-nab0JQ=-{D1-g00", 49 | "large-v3-turbo": b"ABzY8j^C+e0{>%RARaKHP%t(lGR*)0g!tONPyhe`", 50 | "turbo": b"ABzY8j^C+e0{>%RARaKHP%t(lGR*)0g!tONPyhe`", 51 | } 52 | 53 | 54 | def _download(url: str, root: str, in_memory: bool) -> Union[bytes, str]: 55 | os.makedirs(root, exist_ok=True) 56 | 57 | expected_sha256 = url.split("/")[-2] 58 | download_target = os.path.join(root, os.path.basename(url)) 59 | 60 | if os.path.exists(download_target) and not os.path.isfile(download_target): 61 | raise RuntimeError(f"{download_target} exists and is not a regular file") 62 | 63 | if os.path.isfile(download_target): 64 | with open(download_target, "rb") as f: 65 | model_bytes = f.read() 66 | if hashlib.sha256(model_bytes).hexdigest() == expected_sha256: 67 | return model_bytes if in_memory else download_target 68 | else: 69 | warnings.warn( 70 | f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file" 71 | ) 72 | 73 | with urllib.request.urlopen(url) as source, open(download_target, "wb") as output: 74 | with tqdm( 75 | total=int(source.info().get("Content-Length")), 76 | ncols=80, 77 | unit="iB", 78 | unit_scale=True, 79 | unit_divisor=1024, 80 | ) as loop: 81 | while True: 82 | buffer = source.read(8192) 83 | if not buffer: 84 | break 85 | 86 | output.write(buffer) 87 | loop.update(len(buffer)) 88 | 89 | model_bytes = open(download_target, "rb").read() 90 | if hashlib.sha256(model_bytes).hexdigest() != expected_sha256: 91 | raise RuntimeError( 92 | "Model has been downloaded but the SHA256 checksum does not not match. Please retry loading the model." 93 | ) 94 | 95 | return model_bytes if in_memory else download_target 96 | 97 | 98 | def available_models() -> List[str]: 99 | """Returns the names of available models""" 100 | return list(_MODELS.keys()) 101 | 102 | 103 | def load_model( 104 | name: str, 105 | device: Optional[Union[str, torch.device]] = None, 106 | download_root: str = None, 107 | in_memory: bool = False, 108 | ) -> Whisper: 109 | """ 110 | Load a Whisper ASR model 111 | 112 | Parameters 113 | ---------- 114 | name : str 115 | one of the official model names listed by `whisper.available_models()`, or 116 | path to a model checkpoint containing the model dimensions and the model state_dict. 117 | device : Union[str, torch.device] 118 | the PyTorch device to put the model into 119 | download_root: str 120 | path to download the model files; by default, it uses "~/.cache/whisper" 121 | in_memory: bool 122 | whether to preload the model weights into host memory 123 | 124 | Returns 125 | ------- 126 | model : Whisper 127 | The Whisper ASR model instance 128 | """ 129 | 130 | if device is None: 131 | device = "cuda" if torch.cuda.is_available() else "cpu" 132 | if download_root is None: 133 | default = os.path.join(os.path.expanduser("~"), ".cache") 134 | download_root = os.path.join(os.getenv("XDG_CACHE_HOME", default), "whisper") 135 | 136 | if name in _MODELS: 137 | checkpoint_file = _download(_MODELS[name], download_root, in_memory) 138 | alignment_heads = _ALIGNMENT_HEADS[name] 139 | elif os.path.isfile(name): 140 | checkpoint_file = open(name, "rb").read() if in_memory else name 141 | alignment_heads = None 142 | else: 143 | raise RuntimeError( 144 | f"Model {name} not found; available models = {available_models()}" 145 | ) 146 | 147 | with ( 148 | io.BytesIO(checkpoint_file) if in_memory else open(checkpoint_file, "rb") 149 | ) as fp: 150 | checkpoint = torch.load(fp, map_location=device) 151 | del checkpoint_file 152 | 153 | dims = ModelDimensions(**checkpoint["dims"]) 154 | model = Whisper(dims) 155 | model.load_state_dict(checkpoint["model_state_dict"]) 156 | 157 | if alignment_heads is not None: 158 | model.set_alignment_heads(alignment_heads) 159 | 160 | return model.to(device) 161 | -------------------------------------------------------------------------------- /pytranscriber/model/google_speech.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | ''' 14 | 15 | class Google_Speech: 16 | supported_languages_list = ["en-US - English (United States)", 17 | "cmn-Hans-CN - Chinese (Simplified, China)", 18 | "cmn-Hant-TW - Chinese (Traditional, Taiwan)", 19 | "yue-Hant-HK - Cantonese (Traditional, HK)", 20 | "en-AU - English (Australia)", 21 | "en-CA - English (Canada)", 22 | "en-GB - English (United Kingdom)", 23 | "en-HK - English (Hong Kong)", 24 | "en-IN - English (India)", 25 | "en-GB - English (Ireland)", 26 | "en-NZ - English (New Zealand)", 27 | "en-PH - English (Philippines)", 28 | "en-SG - English (Singapore)", 29 | "af - Afrikaans", 30 | "ar - Arabic", 31 | 'ar-DZ - Arabic (Algeria)', 32 | 'ar-EG - Arabic (Egypt)', 33 | 'ar-IQ - Arabic (Iraq)', 34 | 'ar-IS - Arabic (Israel)', 35 | 'ar-JO - Arabic (Jordan)', 36 | 'ar-KW - Arabic (Kuwait)', 37 | 'ar-LB - Arabic (Lebanon)', 38 | 'ar-MA - Arabic (Morocco)', 39 | 'ar-OM - Arabic (Oman)', 40 | 'ar-QA - Arabic (Qatar)', 41 | 'ar-SA - Arabic (Saudi Arabia)', 42 | 'ar-PS - Arabic (State of Palestine)', 43 | 'ar-TN - Arabic (Tunisia)', 44 | 'ar-AE - Arabic (United Arab Emirates)', 45 | 'ar-YE - Arabic (Yemen)', 46 | "az - Azerbaijani", 47 | "be - Belarusian", 48 | "bg - Bulgarian", 49 | "bn - Bengali", 50 | "bs - Bosnian", 51 | "ca - Catalan", 52 | "ceb -Cebuano", 53 | "cs - Czech", 54 | "cy - Welsh", 55 | "da - Danish", 56 | "de - German", 57 | 'de-AT - German (Austria)', 58 | 'de-CH - German (Switzerland)', 59 | "el - Greek", 60 | "eo - Esperanto", 61 | 'es-ES - Spanish (Spain)', 62 | 'es-AR - Spanish (Argentina)', 63 | 'es-BO - Spanish (Bolivia)', 64 | 'es-CL - Spanish (Chile)', 65 | 'es-CO - Spanish (Colombia)', 66 | 'es-CR - Spanish (Costa Rica)', 67 | 'es-DO - Spanish (Dominican Republic)', 68 | 'es-EC - Spanish (Ecuador)', 69 | 'es-GT - Spanish (Guatemala)', 70 | 'es-HN - Spanish (Honduras)', 71 | 'es-MX - Spanish (Mexico)', 72 | 'es-NI - Spanish (Nicaragua)', 73 | 'es-PA - Spanish (Panama)', 74 | 'es-PE - Spanish (Peru)', 75 | 'es-PR - Spanish (Puerto Rico)', 76 | 'es-PY - Spanish (Paraguay)', 77 | 'es-SV - Spanish (El Salvador)', 78 | 'es-UY - Spanish (Uruguay)', 79 | 'es-US - Spanish (United States)', 80 | 'es-VE - Spanish (Venezuela)', 81 | "et - Estonian", 82 | "eu - Basque", 83 | "fa - Persian", 84 | 'fil-PH - Filipino (Philippines)', 85 | "fi - Finnish", 86 | "fr - French", 87 | 'fr-BE - French (Belgium)', 88 | 'fr-CA - French (Canada)', 89 | 'fr-CH - French (Switzerland)', 90 | "ga - Irish", 91 | "gl - Galician", 92 | "gu -Gujarati", 93 | "ha - Hausa", 94 | "hi - Hindi", 95 | "hmn - Hmong", 96 | "hr - Croatian", 97 | "ht - Haitian Creole", 98 | "hu - Hungarian", 99 | "hy - Armenian", 100 | "id - Indonesian", 101 | "ig - Igbo", 102 | "is - Icelandic", 103 | "it - Italian", 104 | 'it-CH - Italian (Switzerland)', 105 | "iw - Hebrew", 106 | "ja - Japanese", 107 | "jw - Javanese", 108 | "ka - Georgian", 109 | "kk - Kazakh", 110 | "km - Khmer", 111 | "kn - Kannada", 112 | "ko - Korean", 113 | "la - Latin", 114 | "lo - Lao", 115 | "lt - Lithuanian", 116 | "lv - Latvian", 117 | "mg - Malagasy", 118 | "mi - Maori", 119 | "mk - Macedonian", 120 | "ml - Malayalam", 121 | "mn - Mongolian", 122 | "mr - Marathi", 123 | "ms - Malay", 124 | "mt - Maltese", 125 | "my - Myanmar (Burmese)", 126 | "ne - Nepali", 127 | "nl - Dutch", 128 | "no - Norwegian", 129 | "ny - Chichewa", 130 | "pa - Punjabi", 131 | "pl - Polish", 132 | "pt-BR - Portuguese (Brazil)", 133 | "pt-PT - Portuguese (Portugal)", 134 | "ro - Romanian", 135 | "ru - Russian", 136 | "si - Sinhala", 137 | "sk - Slovak", 138 | "sl - Slovenian", 139 | "so - Somali", 140 | "sq - Albanian", 141 | "sr - Serbian", 142 | "st - Sesotho", 143 | "su - Sudanese", 144 | "sv - Swedish", 145 | "sw - Swahili", 146 | "ta - Tamil", 147 | 'ta-IN - Tamil (India)', 148 | 'ta-MY - Tamil (Malaysia)', 149 | 'ta-SG - Tamil (Singapore)', 150 | 'ta-LK - Tamil (Sri Lanka)', 151 | "te - Telugu", 152 | "tg - Tajik", 153 | "th - Thai", 154 | "tl - Filipino", 155 | "tr - Turkish", 156 | "uk - Ukrainian", 157 | "ur - Urdu", 158 | "uz - Uzbek", 159 | "vi - Vietnamese", 160 | "yi - Yiddish", 161 | "yo - Yoruba", 162 | "zu - Zulu"] 163 | @staticmethod 164 | def get_supported_languages(): 165 | return Google_Speech.supported_languages_list -------------------------------------------------------------------------------- /whisper/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | import sys 5 | import zlib 6 | from typing import Callable, List, Optional, TextIO 7 | 8 | system_encoding = sys.getdefaultencoding() 9 | 10 | if system_encoding != "utf-8": 11 | 12 | def make_safe(string): 13 | # replaces any character not representable using the system default encoding with an '?', 14 | # avoiding UnicodeEncodeError (https://github.com/openai/whisper/discussions/729). 15 | return string.encode(system_encoding, errors="replace").decode(system_encoding) 16 | 17 | else: 18 | 19 | def make_safe(string): 20 | # utf-8 can encode any Unicode code point, so no need to do the round-trip encoding 21 | return string 22 | 23 | 24 | def exact_div(x, y): 25 | assert x % y == 0 26 | return x // y 27 | 28 | 29 | def str2bool(string): 30 | str2val = {"True": True, "False": False} 31 | if string in str2val: 32 | return str2val[string] 33 | else: 34 | raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}") 35 | 36 | 37 | def optional_int(string): 38 | return None if string == "None" else int(string) 39 | 40 | 41 | def optional_float(string): 42 | return None if string == "None" else float(string) 43 | 44 | 45 | def compression_ratio(text) -> float: 46 | text_bytes = text.encode("utf-8") 47 | return len(text_bytes) / len(zlib.compress(text_bytes)) 48 | 49 | 50 | def format_timestamp( 51 | seconds: float, always_include_hours: bool = False, decimal_marker: str = "." 52 | ): 53 | assert seconds >= 0, "non-negative timestamp expected" 54 | milliseconds = round(seconds * 1000.0) 55 | 56 | hours = milliseconds // 3_600_000 57 | milliseconds -= hours * 3_600_000 58 | 59 | minutes = milliseconds // 60_000 60 | milliseconds -= minutes * 60_000 61 | 62 | seconds = milliseconds // 1_000 63 | milliseconds -= seconds * 1_000 64 | 65 | hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else "" 66 | return ( 67 | f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}" 68 | ) 69 | 70 | 71 | def get_start(segments: List[dict]) -> Optional[float]: 72 | return next( 73 | (w["start"] for s in segments for w in s["words"]), 74 | segments[0]["start"] if segments else None, 75 | ) 76 | 77 | 78 | def get_end(segments: List[dict]) -> Optional[float]: 79 | return next( 80 | (w["end"] for s in reversed(segments) for w in reversed(s["words"])), 81 | segments[-1]["end"] if segments else None, 82 | ) 83 | 84 | 85 | class ResultWriter: 86 | extension: str 87 | 88 | def __init__(self, output_dir: str): 89 | self.output_dir = output_dir 90 | 91 | def __call__( 92 | self, result: dict, audio_path: str, options: Optional[dict] = None, **kwargs 93 | ): 94 | audio_basename = os.path.basename(audio_path) 95 | audio_basename = os.path.splitext(audio_basename)[0] 96 | output_path = os.path.join( 97 | self.output_dir, audio_basename + "." + self.extension 98 | ) 99 | 100 | with open(output_path, "w", encoding="utf-8") as f: 101 | self.write_result(result, file=f, options=options, **kwargs) 102 | 103 | def write_result( 104 | self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs 105 | ): 106 | raise NotImplementedError 107 | 108 | 109 | class WriteTXT(ResultWriter): 110 | extension: str = "txt" 111 | 112 | def write_result( 113 | self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs 114 | ): 115 | for segment in result["segments"]: 116 | print(segment["text"].strip(), file=file, flush=True) 117 | 118 | 119 | class SubtitlesWriter(ResultWriter): 120 | always_include_hours: bool 121 | decimal_marker: str 122 | 123 | def iterate_result( 124 | self, 125 | result: dict, 126 | options: Optional[dict] = None, 127 | *, 128 | max_line_width: Optional[int] = None, 129 | max_line_count: Optional[int] = None, 130 | highlight_words: bool = False, 131 | max_words_per_line: Optional[int] = None, 132 | ): 133 | options = options or {} 134 | max_line_width = max_line_width or options.get("max_line_width") 135 | max_line_count = max_line_count or options.get("max_line_count") 136 | highlight_words = highlight_words or options.get("highlight_words", False) 137 | max_words_per_line = max_words_per_line or options.get("max_words_per_line") 138 | preserve_segments = max_line_count is None or max_line_width is None 139 | max_line_width = max_line_width or 1000 140 | max_words_per_line = max_words_per_line or 1000 141 | 142 | def iterate_subtitles(): 143 | line_len = 0 144 | line_count = 1 145 | # the next subtitle to yield (a list of word timings with whitespace) 146 | subtitle: List[dict] = [] 147 | last: float = get_start(result["segments"]) or 0.0 148 | for segment in result["segments"]: 149 | chunk_index = 0 150 | words_count = max_words_per_line 151 | while chunk_index < len(segment["words"]): 152 | remaining_words = len(segment["words"]) - chunk_index 153 | if max_words_per_line > len(segment["words"]) - chunk_index: 154 | words_count = remaining_words 155 | for i, original_timing in enumerate( 156 | segment["words"][chunk_index : chunk_index + words_count] 157 | ): 158 | timing = original_timing.copy() 159 | long_pause = ( 160 | not preserve_segments and timing["start"] - last > 3.0 161 | ) 162 | has_room = line_len + len(timing["word"]) <= max_line_width 163 | seg_break = i == 0 and len(subtitle) > 0 and preserve_segments 164 | if ( 165 | line_len > 0 166 | and has_room 167 | and not long_pause 168 | and not seg_break 169 | ): 170 | # line continuation 171 | line_len += len(timing["word"]) 172 | else: 173 | # new line 174 | timing["word"] = timing["word"].strip() 175 | if ( 176 | len(subtitle) > 0 177 | and max_line_count is not None 178 | and (long_pause or line_count >= max_line_count) 179 | or seg_break 180 | ): 181 | # subtitle break 182 | yield subtitle 183 | subtitle = [] 184 | line_count = 1 185 | elif line_len > 0: 186 | # line break 187 | line_count += 1 188 | timing["word"] = "\n" + timing["word"] 189 | line_len = len(timing["word"].strip()) 190 | subtitle.append(timing) 191 | last = timing["start"] 192 | chunk_index += max_words_per_line 193 | if len(subtitle) > 0: 194 | yield subtitle 195 | 196 | if len(result["segments"]) > 0 and "words" in result["segments"][0]: 197 | for subtitle in iterate_subtitles(): 198 | subtitle_start = self.format_timestamp(subtitle[0]["start"]) 199 | subtitle_end = self.format_timestamp(subtitle[-1]["end"]) 200 | subtitle_text = "".join([word["word"] for word in subtitle]) 201 | if highlight_words: 202 | last = subtitle_start 203 | all_words = [timing["word"] for timing in subtitle] 204 | for i, this_word in enumerate(subtitle): 205 | start = self.format_timestamp(this_word["start"]) 206 | end = self.format_timestamp(this_word["end"]) 207 | if last != start: 208 | yield last, start, subtitle_text 209 | 210 | yield start, end, "".join( 211 | [ 212 | re.sub(r"^(\s*)(.*)$", r"\1\2", word) 213 | if j == i 214 | else word 215 | for j, word in enumerate(all_words) 216 | ] 217 | ) 218 | last = end 219 | else: 220 | yield subtitle_start, subtitle_end, subtitle_text 221 | else: 222 | for segment in result["segments"]: 223 | segment_start = self.format_timestamp(segment["start"]) 224 | segment_end = self.format_timestamp(segment["end"]) 225 | segment_text = segment["text"].strip().replace("-->", "->") 226 | yield segment_start, segment_end, segment_text 227 | 228 | def format_timestamp(self, seconds: float): 229 | return format_timestamp( 230 | seconds=seconds, 231 | always_include_hours=self.always_include_hours, 232 | decimal_marker=self.decimal_marker, 233 | ) 234 | 235 | 236 | class WriteVTT(SubtitlesWriter): 237 | extension: str = "vtt" 238 | always_include_hours: bool = False 239 | decimal_marker: str = "." 240 | 241 | def write_result( 242 | self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs 243 | ): 244 | print("WEBVTT\n", file=file) 245 | for start, end, text in self.iterate_result(result, options, **kwargs): 246 | print(f"{start} --> {end}\n{text}\n", file=file, flush=True) 247 | 248 | 249 | class WriteSRT(SubtitlesWriter): 250 | extension: str = "srt" 251 | always_include_hours: bool = True 252 | decimal_marker: str = "," 253 | 254 | def write_result( 255 | self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs 256 | ): 257 | for i, (start, end, text) in enumerate( 258 | self.iterate_result(result, options, **kwargs), start=1 259 | ): 260 | print(f"{i}\n{start} --> {end}\n{text}\n", file=file, flush=True) 261 | 262 | 263 | class WriteTSV(ResultWriter): 264 | """ 265 | Write a transcript to a file in TSV (tab-separated values) format containing lines like: 266 | \t\t 267 | 268 | Using integer milliseconds as start and end times means there's no chance of interference from 269 | an environment setting a language encoding that causes the decimal in a floating point number 270 | to appear as a comma; also is faster and more efficient to parse & store, e.g., in C++. 271 | """ 272 | 273 | extension: str = "tsv" 274 | 275 | def write_result( 276 | self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs 277 | ): 278 | print("start", "end", "text", sep="\t", file=file) 279 | for segment in result["segments"]: 280 | print(round(1000 * segment["start"]), file=file, end="\t") 281 | print(round(1000 * segment["end"]), file=file, end="\t") 282 | print(segment["text"].strip().replace("\t", " "), file=file, flush=True) 283 | 284 | 285 | class WriteJSON(ResultWriter): 286 | extension: str = "json" 287 | 288 | def write_result( 289 | self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs 290 | ): 291 | json.dump(result, file) 292 | 293 | 294 | def get_writer( 295 | output_format: str, output_dir: str 296 | ) -> Callable[[dict, TextIO, dict], None]: 297 | writers = { 298 | "txt": WriteTXT, 299 | "vtt": WriteVTT, 300 | "srt": WriteSRT, 301 | "tsv": WriteTSV, 302 | "json": WriteJSON, 303 | } 304 | 305 | if output_format == "all": 306 | all_writers = [writer(output_dir) for writer in writers.values()] 307 | 308 | def write_all( 309 | result: dict, file: TextIO, options: Optional[dict] = None, **kwargs 310 | ): 311 | for writer in all_writers: 312 | writer(result, file, options, **kwargs) 313 | 314 | return write_all 315 | 316 | return writers[output_format](output_dir) 317 | -------------------------------------------------------------------------------- /pytranscriber/gui/main/window_main.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | window 4 | 5 | 6 | 7 | 0 8 | 0 9 | 1045 10 | 610 11 | 12 | 13 | 14 | pyTranscriber v2.1 - 13/07/2025 15 | 16 | 17 | 18 | 19 | 20 | 10 21 | 10 22 | 141 23 | 34 24 | 25 | 26 | 27 | Select file(s) 28 | 29 | 30 | 31 | 32 | false 33 | 34 | 35 | 36 | 200 37 | 380 38 | 341 39 | 34 40 | 41 | 42 | 43 | Transcribe Audio / Generate Subtitles 44 | 45 | 46 | 47 | 48 | 49 | 20 50 | 470 51 | 1021 52 | 23 53 | 54 | 55 | 56 | 0 57 | 58 | 59 | 60 | 61 | 62 | 20 63 | 420 64 | 871 65 | 41 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 550 76 | 380 77 | 241 78 | 34 79 | 80 | 81 | 82 | Open Output Folder 83 | 84 | 85 | 86 | 87 | 88 | 10 89 | 180 90 | 141 91 | 34 92 | 93 | 94 | 95 | Output Location 96 | 97 | 98 | 99 | 100 | 101 | 160 102 | 180 103 | 861 104 | 32 105 | 106 | 107 | 108 | 109 | 110 | 111 | true 112 | 113 | 114 | 115 | 116 | 117 | 160 118 | 10 119 | 871 120 | 161 121 | 122 | 123 | 124 | List of files to generate transcribe audio / generate subtitles 125 | 126 | 127 | Qt::AlignLeading|Qt::AlignLeft|Qt::AlignTop 128 | 129 | 130 | false 131 | 132 | 133 | false 134 | 135 | 136 | 137 | 138 | 10 139 | 30 140 | 851 141 | 121 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 10 150 | 50 151 | 141 152 | 34 153 | 154 | 155 | 156 | Remove file(s) 157 | 158 | 159 | 160 | 161 | 162 | 20 163 | 500 164 | 131 165 | 41 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 470 176 | 510 177 | 108 178 | 36 179 | 180 | 181 | 182 | Cancel 183 | 184 | 185 | 186 | 187 | 188 | 10 189 | 220 190 | 291 191 | 32 192 | 193 | 194 | 195 | Open output files automatically 196 | 197 | 198 | true 199 | 200 | 201 | 202 | 203 | 204 | 200 205 | 250 206 | 591 207 | 38 208 | 209 | 210 | 211 | 212 | 213 | 214 | Audio Language: 215 | 216 | 217 | 218 | 219 | 220 | 221 | QComboBox::AdjustToContents 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 200 231 | 290 232 | 591 233 | 41 234 | 235 | 236 | 237 | 238 | 239 | 240 | Engine: 241 | 242 | 243 | 244 | 245 | 246 | 247 | Google Speech (cloud processing) 248 | 249 | 250 | true 251 | 252 | 253 | 254 | 255 | 256 | 257 | true 258 | 259 | 260 | openAI Whisper (local processing) 261 | 262 | 263 | true 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 200 273 | 330 274 | 611 275 | 31 276 | 277 | 278 | 279 | 280 | 281 | 282 | true 283 | 284 | 285 | Models: 286 | 287 | 288 | 289 | 290 | 291 | 292 | Tiny 293 | 294 | 295 | true 296 | 297 | 298 | 299 | 300 | 301 | 302 | true 303 | 304 | 305 | Base 306 | 307 | 308 | true 309 | 310 | 311 | 312 | 313 | 314 | 315 | true 316 | 317 | 318 | Small 319 | 320 | 321 | true 322 | 323 | 324 | 325 | 326 | 327 | 328 | true 329 | 330 | 331 | Medium 332 | 333 | 334 | true 335 | 336 | 337 | 338 | 339 | 340 | 341 | true 342 | 343 | 344 | Large 345 | 346 | 347 | true 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 0 358 | 0 359 | 1045 360 | 23 361 | 362 | 363 | 364 | 365 | Abo&ut 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | &Settings 374 | 375 | 376 | 377 | 378 | 379 | &Language 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | &License 394 | 395 | 396 | 397 | 398 | &Funding at Github Sponsors 399 | 400 | 401 | 402 | 403 | &About pyTranscriber 404 | 405 | 406 | 407 | 408 | &Proxy 409 | 410 | 411 | Proxy setting 412 | 413 | 414 | 415 | 416 | English 417 | 418 | 419 | 420 | 421 | 繁體中文 - Chinese Traditional 422 | 423 | 424 | 425 | 426 | 简体中文 - Chinese Simplified 427 | 428 | 429 | 430 | 431 | Português 432 | 433 | 434 | 435 | 436 | 437 | 438 | -------------------------------------------------------------------------------- /whisper/model.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import gzip 3 | from contextlib import contextmanager 4 | from dataclasses import dataclass 5 | from typing import Dict, Iterable, Optional, Tuple 6 | 7 | import numpy as np 8 | import torch 9 | import torch.nn.functional as F 10 | from torch import Tensor, nn 11 | 12 | from .decoding import decode as decode_function 13 | from .decoding import detect_language as detect_language_function 14 | from .transcribe import transcribe as transcribe_function 15 | 16 | try: 17 | from torch.nn.functional import scaled_dot_product_attention 18 | 19 | SDPA_AVAILABLE = True 20 | except (ImportError, RuntimeError, OSError): 21 | scaled_dot_product_attention = None 22 | SDPA_AVAILABLE = False 23 | 24 | 25 | @dataclass 26 | class ModelDimensions: 27 | n_mels: int 28 | n_audio_ctx: int 29 | n_audio_state: int 30 | n_audio_head: int 31 | n_audio_layer: int 32 | n_vocab: int 33 | n_text_ctx: int 34 | n_text_state: int 35 | n_text_head: int 36 | n_text_layer: int 37 | 38 | 39 | class LayerNorm(nn.LayerNorm): 40 | def forward(self, x: Tensor) -> Tensor: 41 | return super().forward(x.float()).type(x.dtype) 42 | 43 | 44 | class Linear(nn.Linear): 45 | def forward(self, x: Tensor) -> Tensor: 46 | return F.linear( 47 | x, 48 | self.weight.to(x.dtype), 49 | None if self.bias is None else self.bias.to(x.dtype), 50 | ) 51 | 52 | 53 | class Conv1d(nn.Conv1d): 54 | def _conv_forward( 55 | self, x: Tensor, weight: Tensor, bias: Optional[Tensor] 56 | ) -> Tensor: 57 | return super()._conv_forward( 58 | x, weight.to(x.dtype), None if bias is None else bias.to(x.dtype) 59 | ) 60 | 61 | 62 | def sinusoids(length, channels, max_timescale=10000): 63 | """Returns sinusoids for positional embedding""" 64 | assert channels % 2 == 0 65 | log_timescale_increment = np.log(max_timescale) / (channels // 2 - 1) 66 | inv_timescales = torch.exp(-log_timescale_increment * torch.arange(channels // 2)) 67 | scaled_time = torch.arange(length)[:, np.newaxis] * inv_timescales[np.newaxis, :] 68 | return torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=1) 69 | 70 | 71 | @contextmanager 72 | def disable_sdpa(): 73 | prev_state = MultiHeadAttention.use_sdpa 74 | try: 75 | MultiHeadAttention.use_sdpa = False 76 | yield 77 | finally: 78 | MultiHeadAttention.use_sdpa = prev_state 79 | 80 | 81 | class MultiHeadAttention(nn.Module): 82 | use_sdpa = True 83 | 84 | def __init__(self, n_state: int, n_head: int): 85 | super().__init__() 86 | self.n_head = n_head 87 | self.query = Linear(n_state, n_state) 88 | self.key = Linear(n_state, n_state, bias=False) 89 | self.value = Linear(n_state, n_state) 90 | self.out = Linear(n_state, n_state) 91 | 92 | def forward( 93 | self, 94 | x: Tensor, 95 | xa: Optional[Tensor] = None, 96 | mask: Optional[Tensor] = None, 97 | kv_cache: Optional[dict] = None, 98 | ): 99 | q = self.query(x) 100 | 101 | if kv_cache is None or xa is None or self.key not in kv_cache: 102 | # hooks, if installed (i.e. kv_cache is not None), will prepend the cached kv tensors; 103 | # otherwise, perform key/value projections for self- or cross-attention as usual. 104 | k = self.key(x if xa is None else xa) 105 | v = self.value(x if xa is None else xa) 106 | else: 107 | # for cross-attention, calculate keys and values once and reuse in subsequent calls. 108 | k = kv_cache[self.key] 109 | v = kv_cache[self.value] 110 | 111 | wv, qk = self.qkv_attention(q, k, v, mask) 112 | return self.out(wv), qk 113 | 114 | def qkv_attention( 115 | self, q: Tensor, k: Tensor, v: Tensor, mask: Optional[Tensor] = None 116 | ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: 117 | n_batch, n_ctx, n_state = q.shape 118 | scale = (n_state // self.n_head) ** -0.25 119 | q = q.view(*q.shape[:2], self.n_head, -1).permute(0, 2, 1, 3) 120 | k = k.view(*k.shape[:2], self.n_head, -1).permute(0, 2, 1, 3) 121 | v = v.view(*v.shape[:2], self.n_head, -1).permute(0, 2, 1, 3) 122 | 123 | if SDPA_AVAILABLE and MultiHeadAttention.use_sdpa: 124 | a = scaled_dot_product_attention( 125 | q, k, v, is_causal=mask is not None and n_ctx > 1 126 | ) 127 | out = a.permute(0, 2, 1, 3).flatten(start_dim=2) 128 | qk = None 129 | else: 130 | qk = (q * scale) @ (k * scale).transpose(-1, -2) 131 | if mask is not None: 132 | qk = qk + mask[:n_ctx, :n_ctx] 133 | qk = qk.float() 134 | 135 | w = F.softmax(qk, dim=-1).to(q.dtype) 136 | out = (w @ v).permute(0, 2, 1, 3).flatten(start_dim=2) 137 | qk = qk.detach() 138 | 139 | return out, qk 140 | 141 | 142 | class ResidualAttentionBlock(nn.Module): 143 | def __init__(self, n_state: int, n_head: int, cross_attention: bool = False): 144 | super().__init__() 145 | 146 | self.attn = MultiHeadAttention(n_state, n_head) 147 | self.attn_ln = LayerNorm(n_state) 148 | 149 | self.cross_attn = ( 150 | MultiHeadAttention(n_state, n_head) if cross_attention else None 151 | ) 152 | self.cross_attn_ln = LayerNorm(n_state) if cross_attention else None 153 | 154 | n_mlp = n_state * 4 155 | self.mlp = nn.Sequential( 156 | Linear(n_state, n_mlp), nn.GELU(), Linear(n_mlp, n_state) 157 | ) 158 | self.mlp_ln = LayerNorm(n_state) 159 | 160 | def forward( 161 | self, 162 | x: Tensor, 163 | xa: Optional[Tensor] = None, 164 | mask: Optional[Tensor] = None, 165 | kv_cache: Optional[dict] = None, 166 | ): 167 | x = x + self.attn(self.attn_ln(x), mask=mask, kv_cache=kv_cache)[0] 168 | if self.cross_attn: 169 | x = x + self.cross_attn(self.cross_attn_ln(x), xa, kv_cache=kv_cache)[0] 170 | x = x + self.mlp(self.mlp_ln(x)) 171 | return x 172 | 173 | 174 | class AudioEncoder(nn.Module): 175 | def __init__( 176 | self, n_mels: int, n_ctx: int, n_state: int, n_head: int, n_layer: int 177 | ): 178 | super().__init__() 179 | self.conv1 = Conv1d(n_mels, n_state, kernel_size=3, padding=1) 180 | self.conv2 = Conv1d(n_state, n_state, kernel_size=3, stride=2, padding=1) 181 | self.register_buffer("positional_embedding", sinusoids(n_ctx, n_state)) 182 | 183 | self.blocks: Iterable[ResidualAttentionBlock] = nn.ModuleList( 184 | [ResidualAttentionBlock(n_state, n_head) for _ in range(n_layer)] 185 | ) 186 | self.ln_post = LayerNorm(n_state) 187 | 188 | def forward(self, x: Tensor): 189 | """ 190 | x : torch.Tensor, shape = (batch_size, n_mels, n_ctx) 191 | the mel spectrogram of the audio 192 | """ 193 | x = F.gelu(self.conv1(x)) 194 | x = F.gelu(self.conv2(x)) 195 | x = x.permute(0, 2, 1) 196 | 197 | assert x.shape[1:] == self.positional_embedding.shape, "incorrect audio shape" 198 | x = (x + self.positional_embedding).to(x.dtype) 199 | 200 | for block in self.blocks: 201 | x = block(x) 202 | 203 | x = self.ln_post(x) 204 | return x 205 | 206 | 207 | class TextDecoder(nn.Module): 208 | def __init__( 209 | self, n_vocab: int, n_ctx: int, n_state: int, n_head: int, n_layer: int 210 | ): 211 | super().__init__() 212 | 213 | self.token_embedding = nn.Embedding(n_vocab, n_state) 214 | self.positional_embedding = nn.Parameter(torch.empty(n_ctx, n_state)) 215 | 216 | self.blocks: Iterable[ResidualAttentionBlock] = nn.ModuleList( 217 | [ 218 | ResidualAttentionBlock(n_state, n_head, cross_attention=True) 219 | for _ in range(n_layer) 220 | ] 221 | ) 222 | self.ln = LayerNorm(n_state) 223 | 224 | mask = torch.empty(n_ctx, n_ctx).fill_(-np.inf).triu_(1) 225 | self.register_buffer("mask", mask, persistent=False) 226 | 227 | def forward(self, x: Tensor, xa: Tensor, kv_cache: Optional[dict] = None): 228 | """ 229 | x : torch.LongTensor, shape = (batch_size, <= n_ctx) 230 | the text tokens 231 | xa : torch.Tensor, shape = (batch_size, n_audio_ctx, n_audio_state) 232 | the encoded audio features to be attended on 233 | """ 234 | offset = next(iter(kv_cache.values())).shape[1] if kv_cache else 0 235 | x = ( 236 | self.token_embedding(x) 237 | + self.positional_embedding[offset : offset + x.shape[-1]] 238 | ) 239 | x = x.to(xa.dtype) 240 | 241 | for block in self.blocks: 242 | x = block(x, xa, mask=self.mask, kv_cache=kv_cache) 243 | 244 | x = self.ln(x) 245 | logits = ( 246 | x @ torch.transpose(self.token_embedding.weight.to(x.dtype), 0, 1) 247 | ).float() 248 | 249 | return logits 250 | 251 | 252 | class Whisper(nn.Module): 253 | def __init__(self, dims: ModelDimensions): 254 | super().__init__() 255 | self.dims = dims 256 | self.encoder = AudioEncoder( 257 | self.dims.n_mels, 258 | self.dims.n_audio_ctx, 259 | self.dims.n_audio_state, 260 | self.dims.n_audio_head, 261 | self.dims.n_audio_layer, 262 | ) 263 | self.decoder = TextDecoder( 264 | self.dims.n_vocab, 265 | self.dims.n_text_ctx, 266 | self.dims.n_text_state, 267 | self.dims.n_text_head, 268 | self.dims.n_text_layer, 269 | ) 270 | # use the last half among the decoder layers for time alignment by default; 271 | # to use a specific set of heads, see `set_alignment_heads()` below. 272 | all_heads = torch.zeros( 273 | self.dims.n_text_layer, self.dims.n_text_head, dtype=torch.bool 274 | ) 275 | all_heads[self.dims.n_text_layer // 2 :] = True 276 | self.register_buffer("alignment_heads", all_heads.to_sparse(), persistent=False) 277 | 278 | def set_alignment_heads(self, dump: bytes): 279 | array = np.frombuffer( 280 | gzip.decompress(base64.b85decode(dump)), dtype=bool 281 | ).copy() 282 | mask = torch.from_numpy(array).reshape( 283 | self.dims.n_text_layer, self.dims.n_text_head 284 | ) 285 | self.register_buffer("alignment_heads", mask.to_sparse(), persistent=False) 286 | 287 | def embed_audio(self, mel: torch.Tensor): 288 | return self.encoder(mel) 289 | 290 | def logits(self, tokens: torch.Tensor, audio_features: torch.Tensor): 291 | return self.decoder(tokens, audio_features) 292 | 293 | def forward( 294 | self, mel: torch.Tensor, tokens: torch.Tensor 295 | ) -> Dict[str, torch.Tensor]: 296 | return self.decoder(tokens, self.encoder(mel)) 297 | 298 | @property 299 | def device(self): 300 | return next(self.parameters()).device 301 | 302 | @property 303 | def is_multilingual(self): 304 | return self.dims.n_vocab >= 51865 305 | 306 | @property 307 | def num_languages(self): 308 | return self.dims.n_vocab - 51765 - int(self.is_multilingual) 309 | 310 | def install_kv_cache_hooks(self, cache: Optional[dict] = None): 311 | """ 312 | The `MultiHeadAttention` module optionally accepts `kv_cache` which stores the key and value 313 | tensors calculated for the previous positions. This method returns a dictionary that stores 314 | all caches, and the necessary hooks for the key and value projection modules that save the 315 | intermediate tensors to be reused during later calculations. 316 | 317 | Returns 318 | ------- 319 | cache : Dict[nn.Module, torch.Tensor] 320 | A dictionary object mapping the key/value projection modules to its cache 321 | hooks : List[RemovableHandle] 322 | List of PyTorch RemovableHandle objects to stop the hooks to be called 323 | """ 324 | cache = {**cache} if cache is not None else {} 325 | hooks = [] 326 | 327 | def save_to_cache(module, _, output): 328 | if module not in cache or output.shape[1] > self.dims.n_text_ctx: 329 | # save as-is, for the first token or cross attention 330 | cache[module] = output 331 | else: 332 | cache[module] = torch.cat([cache[module], output], dim=1).detach() 333 | return cache[module] 334 | 335 | def install_hooks(layer: nn.Module): 336 | if isinstance(layer, MultiHeadAttention): 337 | hooks.append(layer.key.register_forward_hook(save_to_cache)) 338 | hooks.append(layer.value.register_forward_hook(save_to_cache)) 339 | 340 | self.decoder.apply(install_hooks) 341 | return cache, hooks 342 | 343 | detect_language = detect_language_function 344 | transcribe = transcribe_function 345 | decode = decode_function 346 | -------------------------------------------------------------------------------- /pytranscriber/gui/main/window_main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Form implementation generated from reading ui file 'window_main.ui' 4 | # 5 | # Created by: PyQt5 UI code generator 5.15.4 6 | # 7 | # WARNING: Any manual changes made to this file will be lost when pyuic5 is 8 | # run again. Do not edit this file unless you know what you are doing. 9 | 10 | 11 | from PyQt5 import QtCore, QtGui, QtWidgets 12 | 13 | 14 | class Ui_window(object): 15 | def setupUi(self, window): 16 | window.setObjectName("window") 17 | window.resize(1045, 610) 18 | self.centralwidget = QtWidgets.QWidget(window) 19 | self.centralwidget.setObjectName("centralwidget") 20 | self.bSelectMedia = QtWidgets.QPushButton(self.centralwidget) 21 | self.bSelectMedia.setGeometry(QtCore.QRect(10, 10, 141, 34)) 22 | self.bSelectMedia.setObjectName("bSelectMedia") 23 | self.bConvert = QtWidgets.QPushButton(self.centralwidget) 24 | self.bConvert.setEnabled(False) 25 | self.bConvert.setGeometry(QtCore.QRect(200, 380, 341, 34)) 26 | self.bConvert.setObjectName("bConvert") 27 | self.progressBar = QtWidgets.QProgressBar(self.centralwidget) 28 | self.progressBar.setGeometry(QtCore.QRect(20, 470, 1021, 23)) 29 | self.progressBar.setProperty("value", 0) 30 | self.progressBar.setObjectName("progressBar") 31 | self.labelCurrentOperation = QtWidgets.QLabel(self.centralwidget) 32 | self.labelCurrentOperation.setGeometry(QtCore.QRect(20, 420, 871, 41)) 33 | self.labelCurrentOperation.setText("") 34 | self.labelCurrentOperation.setObjectName("labelCurrentOperation") 35 | self.bOpenOutputFolder = QtWidgets.QPushButton(self.centralwidget) 36 | self.bOpenOutputFolder.setGeometry(QtCore.QRect(550, 380, 241, 34)) 37 | self.bOpenOutputFolder.setObjectName("bOpenOutputFolder") 38 | self.bSelectOutputFolder = QtWidgets.QPushButton(self.centralwidget) 39 | self.bSelectOutputFolder.setGeometry(QtCore.QRect(10, 180, 141, 34)) 40 | self.bSelectOutputFolder.setObjectName("bSelectOutputFolder") 41 | self.qleOutputFolder = QtWidgets.QLineEdit(self.centralwidget) 42 | self.qleOutputFolder.setGeometry(QtCore.QRect(160, 180, 861, 32)) 43 | self.qleOutputFolder.setText("") 44 | self.qleOutputFolder.setReadOnly(True) 45 | self.qleOutputFolder.setObjectName("qleOutputFolder") 46 | self.groupBox = QtWidgets.QGroupBox(self.centralwidget) 47 | self.groupBox.setGeometry(QtCore.QRect(160, 10, 871, 161)) 48 | self.groupBox.setAlignment(QtCore.Qt.AlignLeading|QtCore.Qt.AlignLeft|QtCore.Qt.AlignTop) 49 | self.groupBox.setFlat(False) 50 | self.groupBox.setCheckable(False) 51 | self.groupBox.setObjectName("groupBox") 52 | self.qlwListFilesSelected = QtWidgets.QListWidget(self.groupBox) 53 | self.qlwListFilesSelected.setGeometry(QtCore.QRect(10, 30, 851, 121)) 54 | self.qlwListFilesSelected.setObjectName("qlwListFilesSelected") 55 | self.bRemoveFile = QtWidgets.QPushButton(self.centralwidget) 56 | self.bRemoveFile.setGeometry(QtCore.QRect(10, 50, 141, 34)) 57 | self.bRemoveFile.setObjectName("bRemoveFile") 58 | self.labelProgressFileIndex = QtWidgets.QLabel(self.centralwidget) 59 | self.labelProgressFileIndex.setGeometry(QtCore.QRect(20, 500, 131, 41)) 60 | self.labelProgressFileIndex.setText("") 61 | self.labelProgressFileIndex.setObjectName("labelProgressFileIndex") 62 | self.bCancel = QtWidgets.QPushButton(self.centralwidget) 63 | self.bCancel.setGeometry(QtCore.QRect(470, 510, 108, 36)) 64 | self.bCancel.setObjectName("bCancel") 65 | self.chbxOpenOutputFilesAuto = QtWidgets.QCheckBox(self.centralwidget) 66 | self.chbxOpenOutputFilesAuto.setGeometry(QtCore.QRect(10, 220, 291, 32)) 67 | self.chbxOpenOutputFilesAuto.setChecked(True) 68 | self.chbxOpenOutputFilesAuto.setObjectName("chbxOpenOutputFilesAuto") 69 | self.horizontalLayoutWidget = QtWidgets.QWidget(self.centralwidget) 70 | self.horizontalLayoutWidget.setGeometry(QtCore.QRect(200, 250, 591, 38)) 71 | self.horizontalLayoutWidget.setObjectName("horizontalLayoutWidget") 72 | self.horizontalLayout_5 = QtWidgets.QHBoxLayout(self.horizontalLayoutWidget) 73 | self.horizontalLayout_5.setContentsMargins(0, 0, 0, 0) 74 | self.horizontalLayout_5.setObjectName("horizontalLayout_5") 75 | self.labelSelectLang = QtWidgets.QLabel(self.horizontalLayoutWidget) 76 | self.labelSelectLang.setObjectName("labelSelectLang") 77 | self.horizontalLayout_5.addWidget(self.labelSelectLang) 78 | self.cbSelectLang = QtWidgets.QComboBox(self.horizontalLayoutWidget) 79 | self.cbSelectLang.setSizeAdjustPolicy(QtWidgets.QComboBox.AdjustToContents) 80 | self.cbSelectLang.setObjectName("cbSelectLang") 81 | self.horizontalLayout_5.addWidget(self.cbSelectLang) 82 | self.horizontalLayoutWidget_2 = QtWidgets.QWidget(self.centralwidget) 83 | self.horizontalLayoutWidget_2.setGeometry(QtCore.QRect(200, 290, 591, 41)) 84 | self.horizontalLayoutWidget_2.setObjectName("horizontalLayoutWidget_2") 85 | self.horizontalLayout = QtWidgets.QHBoxLayout(self.horizontalLayoutWidget_2) 86 | self.horizontalLayout.setContentsMargins(0, 0, 0, 0) 87 | self.horizontalLayout.setObjectName("horizontalLayout") 88 | self.lEngine = QtWidgets.QLabel(self.horizontalLayoutWidget_2) 89 | self.lEngine.setObjectName("lEngine") 90 | self.horizontalLayout.addWidget(self.lEngine) 91 | self.rbGoogleEngine = QtWidgets.QRadioButton(self.horizontalLayoutWidget_2) 92 | self.rbGoogleEngine.setChecked(True) 93 | self.rbGoogleEngine.setObjectName("rbGoogleEngine") 94 | self.horizontalLayout.addWidget(self.rbGoogleEngine) 95 | self.rbWhisper = QtWidgets.QRadioButton(self.horizontalLayoutWidget_2) 96 | self.rbWhisper.setEnabled(True) 97 | self.rbWhisper.setCheckable(True) 98 | self.rbWhisper.setObjectName("rbWhisper") 99 | self.horizontalLayout.addWidget(self.rbWhisper) 100 | self.horizontalLayoutWidget_3 = QtWidgets.QWidget(self.centralwidget) 101 | self.horizontalLayoutWidget_3.setGeometry(QtCore.QRect(200, 330, 611, 31)) 102 | self.horizontalLayoutWidget_3.setObjectName("horizontalLayoutWidget_3") 103 | self.horizontalLayout_2 = QtWidgets.QHBoxLayout(self.horizontalLayoutWidget_3) 104 | self.horizontalLayout_2.setContentsMargins(0, 0, 0, 0) 105 | self.horizontalLayout_2.setObjectName("horizontalLayout_2") 106 | self.lModels = QtWidgets.QLabel(self.horizontalLayoutWidget_3) 107 | self.lModels.setEnabled(True) 108 | self.lModels.setObjectName("lModels") 109 | self.horizontalLayout_2.addWidget(self.lModels) 110 | self.rbModelTiny = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3) 111 | self.rbModelTiny.setChecked(True) 112 | self.rbModelTiny.setObjectName("rbModelTiny") 113 | self.horizontalLayout_2.addWidget(self.rbModelTiny) 114 | self.rbModelBase = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3) 115 | self.rbModelBase.setEnabled(True) 116 | self.rbModelBase.setCheckable(True) 117 | self.rbModelBase.setObjectName("rbModelBase") 118 | self.horizontalLayout_2.addWidget(self.rbModelBase) 119 | self.rbModelSmall = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3) 120 | self.rbModelSmall.setEnabled(True) 121 | self.rbModelSmall.setCheckable(True) 122 | self.rbModelSmall.setObjectName("rbModelSmall") 123 | self.horizontalLayout_2.addWidget(self.rbModelSmall) 124 | self.rbModelMedium = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3) 125 | self.rbModelMedium.setEnabled(True) 126 | self.rbModelMedium.setCheckable(True) 127 | self.rbModelMedium.setObjectName("rbModelMedium") 128 | self.horizontalLayout_2.addWidget(self.rbModelMedium) 129 | self.rbModelLarge = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3) 130 | self.rbModelLarge.setEnabled(True) 131 | self.rbModelLarge.setCheckable(True) 132 | self.rbModelLarge.setObjectName("rbModelLarge") 133 | self.horizontalLayout_2.addWidget(self.rbModelLarge) 134 | window.setCentralWidget(self.centralwidget) 135 | self.menubar = QtWidgets.QMenuBar(window) 136 | self.menubar.setGeometry(QtCore.QRect(0, 0, 1045, 23)) 137 | self.menubar.setObjectName("menubar") 138 | self.menuAbout = QtWidgets.QMenu(self.menubar) 139 | self.menuAbout.setObjectName("menuAbout") 140 | self.menuProxy = QtWidgets.QMenu(self.menubar) 141 | self.menuProxy.setObjectName("menuProxy") 142 | self.menuLanguage = QtWidgets.QMenu(self.menubar) 143 | self.menuLanguage.setObjectName("menuLanguage") 144 | window.setMenuBar(self.menubar) 145 | self.statusbar = QtWidgets.QStatusBar(window) 146 | self.statusbar.setObjectName("statusbar") 147 | window.setStatusBar(self.statusbar) 148 | self.actionLicense = QtWidgets.QAction(window) 149 | self.actionLicense.setObjectName("actionLicense") 150 | self.actionDonation = QtWidgets.QAction(window) 151 | self.actionDonation.setObjectName("actionDonation") 152 | self.actionAbout_pyTranscriber = QtWidgets.QAction(window) 153 | self.actionAbout_pyTranscriber.setObjectName("actionAbout_pyTranscriber") 154 | self.actionProxy = QtWidgets.QAction(window) 155 | self.actionProxy.setObjectName("actionProxy") 156 | self.actionEnglish = QtWidgets.QAction(window) 157 | self.actionEnglish.setObjectName("actionEnglish") 158 | self.actionChineseTraditional = QtWidgets.QAction(window) 159 | self.actionChineseTraditional.setObjectName("actionChineseTraditional") 160 | self.actionChineseSimplified = QtWidgets.QAction(window) 161 | self.actionChineseSimplified.setObjectName("actionChineseSimplified") 162 | self.actionPortuguese = QtWidgets.QAction(window) 163 | self.actionPortuguese.setObjectName("actionPortuguese") 164 | self.menuAbout.addAction(self.actionLicense) 165 | self.menuAbout.addAction(self.actionDonation) 166 | self.menuAbout.addAction(self.actionAbout_pyTranscriber) 167 | self.menuProxy.addAction(self.actionProxy) 168 | self.menuLanguage.addAction(self.actionEnglish) 169 | self.menuLanguage.addAction(self.actionChineseTraditional) 170 | self.menuLanguage.addAction(self.actionChineseSimplified) 171 | self.menuLanguage.addAction(self.actionPortuguese) 172 | self.menubar.addAction(self.menuProxy.menuAction()) 173 | self.menubar.addAction(self.menuLanguage.menuAction()) 174 | self.menubar.addAction(self.menuAbout.menuAction()) 175 | 176 | self.retranslateUi(window) 177 | QtCore.QMetaObject.connectSlotsByName(window) 178 | 179 | def retranslateUi(self, window): 180 | _translate = QtCore.QCoreApplication.translate 181 | window.setWindowTitle(_translate("window", "pyTranscriber v2.1 - 13/07/2025")) 182 | self.bSelectMedia.setText(_translate("window", "Select file(s)")) 183 | self.bConvert.setText(_translate("window", "Transcribe Audio / Generate Subtitles")) 184 | self.bOpenOutputFolder.setText(_translate("window", "Open Output Folder")) 185 | self.bSelectOutputFolder.setText(_translate("window", "Output Location")) 186 | self.groupBox.setTitle(_translate("window", "List of files to generate transcribe audio / generate subtitles")) 187 | self.bRemoveFile.setText(_translate("window", "Remove file(s)")) 188 | self.bCancel.setText(_translate("window", "Cancel")) 189 | self.chbxOpenOutputFilesAuto.setText(_translate("window", "Open output files automatically")) 190 | self.labelSelectLang.setText(_translate("window", "Audio Language:")) 191 | self.lEngine.setText(_translate("window", "Engine:")) 192 | self.rbGoogleEngine.setText(_translate("window", "Google Speech (cloud processing)")) 193 | self.rbWhisper.setText(_translate("window", "openAI Whisper (local processing)")) 194 | self.lModels.setText(_translate("window", "Models:")) 195 | self.rbModelTiny.setText(_translate("window", "Tiny")) 196 | self.rbModelBase.setText(_translate("window", "Base")) 197 | self.rbModelSmall.setText(_translate("window", "Small")) 198 | self.rbModelMedium.setText(_translate("window", "Medium")) 199 | self.rbModelLarge.setText(_translate("window", "Large")) 200 | self.menuAbout.setTitle(_translate("window", "Abo&ut")) 201 | self.menuProxy.setTitle(_translate("window", "&Settings")) 202 | self.menuLanguage.setTitle(_translate("window", "&Language")) 203 | self.actionLicense.setText(_translate("window", "&License")) 204 | self.actionDonation.setText(_translate("window", "&Funding at Github Sponsors")) 205 | self.actionAbout_pyTranscriber.setText(_translate("window", "&More about pyTranscriber")) 206 | self.actionProxy.setText(_translate("window", "&Proxy")) 207 | self.actionProxy.setToolTip(_translate("window", "Proxy setting")) 208 | self.actionEnglish.setText(_translate("window", "English")) 209 | self.actionChineseTraditional.setText(_translate("window", "繁體中文 - Chinese Traditional")) 210 | self.actionChineseSimplified.setText(_translate("window", "简体中文 - Chinese Simplified")) 211 | self.actionPortuguese.setText(_translate("window", "Português")) 212 | --------------------------------------------------------------------------------