├── __init__.py
├── pytranscriber
├── __init__.py
├── control
│ ├── __init__.py
│ ├── thread_cancel_autosub.py
│ ├── ctr_engine.py
│ ├── ctr_main.py
│ ├── ctr_proxy.py
│ ├── thread_exec_whisper.py
│ ├── ctr_db.py
│ ├── thread_exec_generic.py
│ ├── ctr_whisper.py
│ ├── thread_exec_autosub.py
│ └── ctr_autosub.py
├── gui
│ ├── __init__.py
│ ├── proxy
│ │ ├── __init__.py
│ │ ├── view_proxy.py
│ │ ├── window_proxy.py
│ │ └── window_proxy.ui
│ ├── Português.qm
│ ├── 简体中文 - Chinese Simplified.qm
│ ├── 繁體中文 - Chinese Traditional.qm
│ ├── message_util.py
│ ├── 简体中文 - Chinese Simplified.ts
│ ├── 繁體中文 - Chinese Traditional.ts
│ ├── Português.ts
│ ├── proxy.py
│ ├── proxy.ui
│ └── main
│ │ ├── window_main.ui
│ │ └── window_main.py
├── model
│ ├── __init__.py
│ ├── transcription_parameters.py
│ ├── whisper.py
│ └── google_speech.py
└── util
│ ├── __init__.py
│ ├── srtparser.py
│ └── util.py
├── MANIFEST.in
├── whisper
├── version.py
├── __main__.py
├── assets
│ └── mel_filters.npz
├── normalizers
│ ├── __init__.py
│ └── basic.py
├── triton_ops.py
├── audio.py
├── __init__.py
├── utils.py
└── model.py
├── nuitka-win-standalone.bat
├── doc
├── lightning.jpeg
├── pyTranscriber.png
├── screenshot1.png
├── screenshot2.png
├── screenshot3.png
├── entitlements.plist
└── technical_details.md
├── deployment
├── nuitka-win-standalone.bat
├── freeze-nuitka-win.bat
├── freeze-win.sh
├── freeze-linux.sh
├── freeze-linux-nuitka.sh
└── win
│ ├── script-installer-windows.iss
│ └── script-installer-windows-standalone.iss
├── pytranscriber.sqlite
├── patches
├── note.txt
├── autosub-0.3.13.patch
└── autosub-0.4.0.patch
├── .gitignore
├── freeze-mac.sh
├── .github
├── ISSUE_TEMPLATE
│ ├── feature_request.md
│ └── bug_report.md
├── FUNDING.yml
└── workflows
│ ├── mac-pyinstaller.yml
│ ├── linux-pyinstaller.yml
│ ├── linux-nuitka.yml
│ ├── win-nuitka.yml
│ └── win-pyinstaller-dev2.yml
├── requirements.txt
├── Pipfile
├── main.py
├── script-installer-windows-standalone.iss
├── autosub
├── formatters.py
└── constants.py
└── README.md
/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pytranscriber/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pytranscriber/control/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pytranscriber/gui/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pytranscriber/model/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pytranscriber/util/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pytranscriber/gui/proxy/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE
--------------------------------------------------------------------------------
/whisper/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "20240930"
2 |
--------------------------------------------------------------------------------
/whisper/__main__.py:
--------------------------------------------------------------------------------
1 | from .transcribe import cli
2 |
3 | cli()
4 |
--------------------------------------------------------------------------------
/nuitka-win-standalone.bat:
--------------------------------------------------------------------------------
1 | nuitka --enable-plugin=pyqt5 main.py --disable-console --standalone
--------------------------------------------------------------------------------
/doc/lightning.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/doc/lightning.jpeg
--------------------------------------------------------------------------------
/deployment/nuitka-win-standalone.bat:
--------------------------------------------------------------------------------
1 | nuitka --enable-plugin=pyqt5 main.py --disable-console --standalone
--------------------------------------------------------------------------------
/doc/pyTranscriber.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/doc/pyTranscriber.png
--------------------------------------------------------------------------------
/doc/screenshot1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/doc/screenshot1.png
--------------------------------------------------------------------------------
/doc/screenshot2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/doc/screenshot2.png
--------------------------------------------------------------------------------
/doc/screenshot3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/doc/screenshot3.png
--------------------------------------------------------------------------------
/pytranscriber.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/pytranscriber.sqlite
--------------------------------------------------------------------------------
/pytranscriber/gui/Português.qm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/pytranscriber/gui/Português.qm
--------------------------------------------------------------------------------
/whisper/assets/mel_filters.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/whisper/assets/mel_filters.npz
--------------------------------------------------------------------------------
/pytranscriber/gui/简体中文 - Chinese Simplified.qm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/pytranscriber/gui/简体中文 - Chinese Simplified.qm
--------------------------------------------------------------------------------
/pytranscriber/gui/繁體中文 - Chinese Traditional.qm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/pytranscriber/gui/繁體中文 - Chinese Traditional.qm
--------------------------------------------------------------------------------
/whisper/normalizers/__init__.py:
--------------------------------------------------------------------------------
1 | from .basic import BasicTextNormalizer as BasicTextNormalizer
2 | from .english import EnglishTextNormalizer as EnglishTextNormalizer
3 |
--------------------------------------------------------------------------------
/deployment/freeze-nuitka-win.bat:
--------------------------------------------------------------------------------
1 | nuitka --enable-plugin=pyqt5 --include-data-files="ffmpeg.exe"="./" --include-data-files="pytranscriber/gui/*.qm"="pytranscriber/gui/" main.py --onefile --disable-console
--------------------------------------------------------------------------------
/deployment/freeze-win.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | pipenv shell
4 | pyinstaller main.py --path=$pwd --add-binary="ffmpeg.exe;." --add-data="pytranscriber/gui/*.qm;pytranscriber/gui/" --onefile --clean
5 |
--------------------------------------------------------------------------------
/deployment/freeze-linux.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | pipenv shell
4 | pyinstaller main.py main.spec --path="$(pwd)" --add-binary="ffmpeg:." --add-data="pytranscriber/gui/*.qm:pytranscriber/gui/" --onefile --clean
5 |
--------------------------------------------------------------------------------
/patches/note.txt:
--------------------------------------------------------------------------------
1 | The autosub version used for pyTranscriber had to be customized a little bit.
2 | The patch in this folder was made comparing the original autosub/__init__.py file from version 0.4.0 to the customized version I made.
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | lib/
2 | python-libs/
3 | bin/
4 | *.spec
5 | *pyc
6 | *.egg-info
7 | *html
8 | build/
9 | tests/
10 | dist/
11 | .DS_Store
12 | MANIFEST
13 | *#*
14 | ffmpeg*
15 | notes.txt
16 |
17 | Pipfile.lock
18 |
19 | Pipfile
20 |
--------------------------------------------------------------------------------
/deployment/freeze-linux-nuitka.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | pipenv shell
4 | nuitka3 --enable-plugin=pyqt5 --include-data-files="ffmpeg"="./" \
5 | --include-data-files="pytranscriber/gui/*.qm"="pytranscriber/gui/" \
6 | --include-data-files="venv/lib/python3.8/site-packages/whisper/assets" \
7 | main.py \
8 | --onefile
--------------------------------------------------------------------------------
/freeze-mac.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | pipenv shell
4 | pyinstaller main.py \
5 | --path="$(pwd)" \
6 | --add-binary="ffmpeg-bin/ffmpeg:." \
7 | --add-binary="pytranscriber.sqlite:." \
8 | --add-data="pytranscriber/gui/*.qm:pytranscriber/gui/" \
9 | --add-data="venv/lib/python3.8/site-packages/whisper/assets:whisper/assets" \
10 | --clean \
11 | --windowed \
12 | --noconfirm
13 |
14 |
--------------------------------------------------------------------------------
/doc/entitlements.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | com.apple.security.cs.allow-jit
7 |
8 | com.apple.security.cs.allow-unsigned-executable-memory
9 |
10 | com.apple.security.cs.disable-library-validation
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: @raryelcostasouza # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: pytranscriber # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # CUDA-enabled PyTorch packages (CUDA 12.6)
2 | torch==2.3.0
3 | torchvision==0.18.0
4 | torchaudio==2.3.0
5 |
6 | # Ensure the extra index for PyTorch CUDA wheels
7 | --extra-index-url https://download.pytorch.org/whl/cu126
8 |
9 | # Other dependencies
10 | cachetools==4.2.4
11 | certifi==2024.7.4
12 | chardet==4.0.0
13 | charset-normalizer==2.0.6
14 | google-api-core==2.1.0
15 | google-api-python-client==2.24.0
16 | google-auth==2.3.0
17 | google-auth-httplib2==0.1.0
18 | google-auth-oauthlib==0.4.6
19 | googleapis-common-protos==1.53.0
20 | httplib2==0.20.1
21 | idna==3.7
22 | oauthlib==3.2.2
23 | progressbar==2.5
24 | protobuf==4.21.6
25 | pyasn1==0.4.8
26 | pyasn1-modules==0.2.8
27 | pyparsing==2.4.7
28 | pyqt5==5.15.10
29 | pyqt5-sip==12.13.0
30 | pysrt==1.1.2
31 | requests==2.32.0
32 | requests-oauthlib==1.3.0
33 | rsa==4.7.2
34 | six==1.16.0
35 | uritemplate==3.0.1
36 | urllib3==2.2.2
37 | openai-whisper
38 | platformdirs
39 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 |
26 | **Desktop (please complete the following information):**
27 | - OS: [e.g. iOS]
28 | - Browser [e.g. chrome, safari]
29 | - Version [e.g. 22]
30 |
31 | **Smartphone (please complete the following information):**
32 | - Device: [e.g. iPhone6]
33 | - OS: [e.g. iOS8.1]
34 | - Browser [e.g. stock browser, safari]
35 | - Version [e.g. 22]
36 |
37 | **Additional context**
38 | Add any other context about the problem here.
39 |
--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | url = "https://pypi.org/simple"
3 | verify_ssl = true
4 | name = "pypi"
5 |
6 | [packages]
7 | autosub = "*"
8 | pyqt5 = "==5.15.4"
9 | pyinstaller = "*"
10 | macholib = "*"
11 | cachetools = "==4.2.4"
12 | certifi = "==2021.10.8"
13 | chardet = "==4.0.0"
14 | charset-normalizer = "==2.0.6"
15 | google-api-core = "==2.1.0"
16 | google-api-python-client = "==2.24.0"
17 | google-auth = "==2.3.0"
18 | google-auth-httplib2 = "==0.1.0"
19 | google-auth-oauthlib = "==0.4.6"
20 | googleapis-common-protos = "==1.53.0"
21 | httplib2 = "==0.20.1"
22 | idna = "==3.2"
23 | oauthlib = "==3.1.1"
24 | progressbar = "==2.5"
25 | protobuf = "==3.18.1"
26 | pyasn1 = "==0.4.8"
27 | pyasn1-modules = "==0.2.8"
28 | pyparsing = "==2.4.7"
29 | pyqt5-qt5 = "==5.15.2"
30 | pyqt5-sip = "==12.9.0"
31 | pysrt = "==1.1.2"
32 | requests = "==2.26.0"
33 | requests-oauthlib = "==1.3.0"
34 | rsa = "==4.7.2"
35 | six = "==1.16.0"
36 | uritemplate = "==3.0.1"
37 | urllib3 = "==1.26.7"
38 | nuitka = "*"
39 | orderedset = "*"
40 | zstandard = "*"
41 |
42 | [dev-packages]
43 |
44 | [requires]
45 | python_version = "3.8"
46 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | # (C) 2019 Raryel C. Souza
2 | # This program is free software: you can redistribute it and/or modify
3 | # it under the terms of the GNU General Public License as published by
4 | # the Free Software Foundation, either version 3 of the License, or
5 | # (at your option) any later version.
6 | # This program is distributed in the hope that it will be useful,
7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 | # GNU General Public License for more details.
10 | # You should have received a copy of the GNU General Public License
11 | # along with this program. If not, see .
12 |
13 | from pytranscriber.control.ctr_main import Ctr_Main
14 | from pytranscriber.gui.message_util import MessageUtil
15 | import multiprocessing
16 | import sys
17 |
18 | if __name__ == '__main__':
19 | multiprocessing.freeze_support()
20 |
21 | try:
22 | ctrMain = Ctr_Main()
23 | sys.exit(0)
24 | except Exception as ex:
25 | MessageUtil.show_error_message(str(ex), "Main Error")
26 | sys.exit(1)
27 |
28 |
29 |
--------------------------------------------------------------------------------
/pytranscriber/control/thread_cancel_autosub.py:
--------------------------------------------------------------------------------
1 | '''
2 | (C) 2025 Raryel C. Souza
3 | This program is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 | This program is distributed in the hope that it will be useful,
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | GNU General Public License for more details.
11 | You should have received a copy of the GNU General Public License
12 | along with this program. If not, see .
13 | '''
14 |
15 | from PyQt5.QtCore import QThread
16 | from PyQt5.QtCore import pyqtSignal
17 |
18 |
19 | class Thread_Cancel_Autosub(QThread):
20 | signalTerminated = pyqtSignal()
21 |
22 | def __init__(self, pObjWT):
23 | self.objWT = pObjWT
24 | QThread.__init__(self)
25 |
26 | def run(self):
27 | self.objWT.cancel()
28 | self.signalTerminated.emit()
29 |
--------------------------------------------------------------------------------
/pytranscriber/control/ctr_engine.py:
--------------------------------------------------------------------------------
1 | '''
2 | (C) 2025 Raryel C. Souza
3 | This program is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 | This program is distributed in the hope that it will be useful,
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | GNU General Public License for more details.
11 | You should have received a copy of the GNU General Public License
12 | along with this program. If not, see .
13 | '''
14 |
15 | class CtrEngine:
16 | @staticmethod
17 | def init():
18 | CtrEngine.cancel = False
19 |
20 | @staticmethod
21 | def is_operation_canceled():
22 | return CtrEngine.cancel
23 |
24 | @staticmethod
25 | def cancel_operation():
26 | CtrEngine.cancel = True
27 |
28 | @staticmethod
29 | def save_output_file(output_path, file_content):
30 | f = open(output_path, 'wb')
31 | f.write(file_content.encode("utf-8"))
32 | f.close()
33 |
--------------------------------------------------------------------------------
/pytranscriber/model/transcription_parameters.py:
--------------------------------------------------------------------------------
1 | '''
2 | (C) 2019 Raryel C. Souza
3 | This program is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 | This program is distributed in the hope that it will be useful,
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | GNU General Public License for more details.
11 | You should have received a copy of the GNU General Public License
12 | along with this program. If not, see .
13 | '''
14 |
15 | class Transcription_Parameters():
16 |
17 | def __init__(self, listFiles, outputFolder, langCode,
18 | boolOpenOutputFilesAuto, proxies=None):
19 | self.listFiles = listFiles
20 | self.outputFolder = outputFolder
21 | self.langCode = langCode
22 | self.boolOpenOutputFilesAuto = boolOpenOutputFilesAuto
23 | self.proxies = proxies
24 | self.model_whisper = None
25 |
26 | def set_model_whisper(self, model):
27 | self.model_whisper = model
28 |
29 | def get_model_whisper(self):
30 | return self.model_whisper
--------------------------------------------------------------------------------
/pytranscriber/gui/message_util.py:
--------------------------------------------------------------------------------
1 | '''
2 | (C) 2025 Raryel C. Souza
3 | This program is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 | This program is distributed in the hope that it will be useful,
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | GNU General Public License for more details.
11 | You should have received a copy of the GNU General Public License
12 | along with this program. If not, see .
13 | '''
14 |
15 | from PyQt5.QtWidgets import QMessageBox
16 |
17 |
18 | class MessageUtil:
19 |
20 | @staticmethod
21 | def show_info_message(info_msg, title=""):
22 | msg = QMessageBox()
23 | msg.setIcon(QMessageBox.Information)
24 |
25 | msg.setWindowTitle(title)
26 | msg.setText(info_msg)
27 | msg.exec()
28 |
29 | @staticmethod
30 | def show_error_message(error_msg, title="Error"):
31 | msg = QMessageBox()
32 | msg.setIcon(QMessageBox.Critical)
33 |
34 | msg.setWindowTitle(title)
35 | msg.setText(error_msg)
36 | msg.exec()
37 |
--------------------------------------------------------------------------------
/pytranscriber/control/ctr_main.py:
--------------------------------------------------------------------------------
1 | # (C) 2025 Raryel C. Souza
2 | # This program is free software: you can redistribute it and/or modify
3 | # it under the terms of the GNU General Public License as published by
4 | # the Free Software Foundation, either version 3 of the License, or
5 | # (at your option) any later version.
6 | # This program is distributed in the hope that it will be useful,
7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 | # GNU General Public License for more details.
10 | # You should have received a copy of the GNU General Public License
11 | # along with this program. If not, see .
12 |
13 | from pytranscriber.control.ctr_proxy import Ctr_Proxy
14 | from pytranscriber.control.ctr_db import CtrDB
15 | from pytranscriber.gui.main.view_main import ViewMain
16 |
17 |
18 | class Ctr_Main():
19 |
20 | def __init__(self):
21 | self.ctrDB = CtrDB()
22 | self.ctrProxy = Ctr_Proxy(self)
23 |
24 | self.last_language = None
25 |
26 | self.viewMain = ViewMain(self)
27 |
28 | self._load_last_language()
29 | self.viewMain.show()
30 |
31 | def save_last_language(self, language):
32 | self.ctrDB.clear_last_language()
33 | self.ctrDB.save_last_language(language)
34 |
35 | def _load_last_language(self):
36 | data = self.ctrDB.load_last_language()
37 | if data is not None:
38 |
39 | self.last_language = data[1]
40 | self.viewMain.set_gui_language(self.last_language)
41 |
42 |
43 |
--------------------------------------------------------------------------------
/deployment/win/script-installer-windows.iss:
--------------------------------------------------------------------------------
1 | ; Script generated by the Inno Setup Script Wizard.
2 | ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!
3 |
4 | [Setup]
5 | ; NOTE: The value of AppId uniquely identifies this application.
6 | ; Do not use the same AppId value in installers for other applications.
7 | ; (To generate a new GUID, click Tools | Generate GUID inside the IDE.)
8 | AppId={{5240AB76-FC62-4BFA-A1EF-FA49AF701F80}
9 | AppName=pyTranscriber
10 | AppVersion=1.9
11 | AppVerName=pyTranscriber 1.9
12 | AppPublisher=Raryel C. Souza
13 | AppPublisherURL=https://github.com/raryelcostasouza/pyTranscriber
14 | AppSupportURL=https://github.com/raryelcostasouza/pyTranscriber
15 | AppUpdatesURL=https://github.com/raryelcostasouza/pyTranscriber
16 | DefaultDirName={pf}\pyTranscriber
17 | DisableDirPage=yes
18 | DisableProgramGroupPage=yes
19 | LicenseFile=.\LICENSE
20 | OutputBaseFilename=setup
21 | Compression=lzma
22 | SolidCompression=yes
23 |
24 | [Languages]
25 | Name: "english"; MessagesFile: "compiler:Default.isl"
26 |
27 | [Tasks]
28 | Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked
29 |
30 | [Files]
31 | Source: ".\dist\pyTranscriber.exe"; DestDir: "{app}"; Flags: ignoreversion
32 | ; NOTE: Don't use "Flags: ignoreversion" on any shared system files
33 |
34 | [Icons]
35 | Name: "{commonprograms}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe"
36 | Name: "{commondesktop}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe"; Tasks: desktopicon
37 |
38 | [Run]
39 | Filename: "{app}\pyTranscriber.exe"; Description: "{cm:LaunchProgram,pyTranscriber}"; Flags: nowait postinstall skipifsilent
40 |
41 |
--------------------------------------------------------------------------------
/script-installer-windows-standalone.iss:
--------------------------------------------------------------------------------
1 | ; Script generated by the Inno Setup Script Wizard.
2 | ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!
3 |
4 | [Setup]
5 | ; NOTE: The value of AppId uniquely identifies this application.
6 | ; Do not use the same AppId value in installers for other applications.
7 | ; (To generate a new GUID, click Tools | Generate GUID inside the IDE.)
8 | AppId={{5240AB76-FC62-4BFA-A1EF-FA49AF701F80}
9 | AppName=pyTranscriber
10 | AppVersion=1.9
11 | AppVerName=pyTranscriber 1.9
12 | AppPublisher=Raryel C. Souza
13 | AppPublisherURL=https://github.com/raryelcostasouza/pyTranscriber
14 | AppSupportURL=https://github.com/raryelcostasouza/pyTranscriber
15 | AppUpdatesURL=https://github.com/raryelcostasouza/pyTranscriber
16 | DefaultDirName={pf}\pyTranscriber
17 | DisableDirPage=yes
18 | DisableProgramGroupPage=yes
19 | LicenseFile=.\LICENSE
20 | OutputBaseFilename=setup
21 | Compression=lzma
22 | SolidCompression=yes
23 |
24 | [Languages]
25 | Name: "english"; MessagesFile: "compiler:Default.isl"
26 |
27 | [Tasks]
28 | Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked
29 |
30 | [Files]
31 | Source: ".\main.dist\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs
32 | ; NOTE: Don't use "Flags: ignoreversion" on any shared system files
33 |
34 | [Icons]
35 | Name: "{commonprograms}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe"
36 | Name: "{commondesktop}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe"; Tasks: desktopicon
37 |
38 | [Run]
39 | Filename: "{app}\pyTranscriber.exe"; Description: "{cm:LaunchProgram,pyTranscriber}"; Flags: nowait postinstall skipifsilent
40 |
41 |
--------------------------------------------------------------------------------
/deployment/win/script-installer-windows-standalone.iss:
--------------------------------------------------------------------------------
1 | ; Script generated by the Inno Setup Script Wizard.
2 | ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!
3 |
4 | [Setup]
5 | ; NOTE: The value of AppId uniquely identifies this application.
6 | ; Do not use the same AppId value in installers for other applications.
7 | ; (To generate a new GUID, click Tools | Generate GUID inside the IDE.)
8 | AppId={{5240AB76-FC62-4BFA-A1EF-FA49AF701F80}
9 | AppName=pyTranscriber
10 | AppVersion=1.9
11 | AppVerName=pyTranscriber 1.9
12 | AppPublisher=Raryel C. Souza
13 | AppPublisherURL=https://github.com/raryelcostasouza/pyTranscriber
14 | AppSupportURL=https://github.com/raryelcostasouza/pyTranscriber
15 | AppUpdatesURL=https://github.com/raryelcostasouza/pyTranscriber
16 | DefaultDirName={pf}\pyTranscriber
17 | DisableDirPage=yes
18 | DisableProgramGroupPage=yes
19 | LicenseFile=.\LICENSE
20 | OutputBaseFilename=setup
21 | Compression=lzma
22 | SolidCompression=yes
23 |
24 | [Languages]
25 | Name: "english"; MessagesFile: "compiler:Default.isl"
26 |
27 | [Tasks]
28 | Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked
29 |
30 | [Files]
31 | Source: ".\main.dist\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs
32 | ; NOTE: Don't use "Flags: ignoreversion" on any shared system files
33 |
34 | [Icons]
35 | Name: "{commonprograms}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe"
36 | Name: "{commondesktop}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe"; Tasks: desktopicon
37 |
38 | [Run]
39 | Filename: "{app}\pyTranscriber.exe"; Description: "{cm:LaunchProgram,pyTranscriber}"; Flags: nowait postinstall skipifsilent
40 |
41 |
--------------------------------------------------------------------------------
/autosub/formatters.py:
--------------------------------------------------------------------------------
1 | """
2 | Defines subtitle formatters used by autosub.
3 | """
4 |
5 | # -*- coding: utf-8 -*-
6 | from __future__ import unicode_literals
7 |
8 | import json
9 |
10 | import pysrt
11 | import six
12 |
13 |
14 | def srt_formatter(subtitles, padding_before=0, padding_after=0):
15 | """
16 | Serialize a list of subtitles according to the SRT format, with optional time padding.
17 | """
18 | sub_rip_file = pysrt.SubRipFile()
19 | for i, ((start, end), text) in enumerate(subtitles, start=1):
20 | item = pysrt.SubRipItem()
21 | item.index = i
22 | item.text = six.text_type(text)
23 | item.start.seconds = max(0, start - padding_before)
24 | item.end.seconds = end + padding_after
25 | sub_rip_file.append(item)
26 | return '\n'.join(six.text_type(item) for item in sub_rip_file)
27 |
28 |
29 | def vtt_formatter(subtitles, padding_before=0, padding_after=0):
30 | """
31 | Serialize a list of subtitles according to the VTT format, with optional time padding.
32 | """
33 | text = srt_formatter(subtitles, padding_before, padding_after)
34 | text = 'WEBVTT\n\n' + text.replace(',', '.')
35 | return text
36 |
37 |
38 | def json_formatter(subtitles):
39 | """
40 | Serialize a list of subtitles as a JSON blob.
41 | """
42 | subtitle_dicts = [
43 | {
44 | 'start': start,
45 | 'end': end,
46 | 'content': text,
47 | }
48 | for ((start, end), text)
49 | in subtitles
50 | ]
51 | return json.dumps(subtitle_dicts)
52 |
53 |
54 | def raw_formatter(subtitles):
55 | """
56 | Serialize a list of subtitles as a newline-delimited string.
57 | """
58 | return ' '.join(text for (_rng, text) in subtitles)
59 |
60 |
61 | FORMATTERS = {
62 | 'srt': srt_formatter,
63 | 'vtt': vtt_formatter,
64 | 'json': json_formatter,
65 | 'raw': raw_formatter,
66 | }
67 |
--------------------------------------------------------------------------------
/pytranscriber/util/srtparser.py:
--------------------------------------------------------------------------------
1 | '''
2 | (C) 2019 Raryel C. Souza
3 | This program is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 | This program is distributed in the hope that it will be useful,
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | GNU General Public License for more details.
11 | You should have received a copy of the GNU General Public License
12 | along with this program. If not, see .
13 | '''
14 |
15 | import re, sys
16 |
17 | class SRTParser(object):
18 | @staticmethod
19 | def extractTextFromSRT(fileSRT):
20 | file_name = fileSRT
21 | file_encoding = 'utf-8'
22 |
23 | #loop through the lines for parsing
24 | with open(file_name, encoding=file_encoding, errors='replace') as f:
25 | lines = f.readlines()
26 | new_lines = SRTParser.clean_up(lines)
27 | new_file_name = file_name[:-4] + '.txt'
28 |
29 | #write parsed txt file
30 | with open(new_file_name, 'w', encoding=file_encoding) as f:
31 | for line in new_lines:
32 | f.write(line)
33 |
34 | @staticmethod
35 | def clean_up(lines):
36 | regexSubtitleIndexNumber = re.compile("[0-9]+")
37 |
38 | new_lines = []
39 | for line in lines[1:]:
40 | #if line empty or
41 | #if line contains --> or
42 | #if line matches the subtitle index regex
43 | #then skip line
44 | if (not line or not line.strip()) or ("-->" in line) or regexSubtitleIndexNumber.match(line):
45 | continue
46 | else:
47 | #append line
48 | new_lines.append(line)
49 | return new_lines
50 |
--------------------------------------------------------------------------------
/whisper/normalizers/basic.py:
--------------------------------------------------------------------------------
1 | import re
2 | import unicodedata
3 |
4 | import regex
5 |
6 | # non-ASCII letters that are not separated by "NFKD" normalization
7 | ADDITIONAL_DIACRITICS = {
8 | "œ": "oe",
9 | "Œ": "OE",
10 | "ø": "o",
11 | "Ø": "O",
12 | "æ": "ae",
13 | "Æ": "AE",
14 | "ß": "ss",
15 | "ẞ": "SS",
16 | "đ": "d",
17 | "Đ": "D",
18 | "ð": "d",
19 | "Ð": "D",
20 | "þ": "th",
21 | "Þ": "th",
22 | "ł": "l",
23 | "Ł": "L",
24 | }
25 |
26 |
27 | def remove_symbols_and_diacritics(s: str, keep=""):
28 | """
29 | Replace any other markers, symbols, and punctuations with a space,
30 | and drop any diacritics (category 'Mn' and some manual mappings)
31 | """
32 | return "".join(
33 | c
34 | if c in keep
35 | else ADDITIONAL_DIACRITICS[c]
36 | if c in ADDITIONAL_DIACRITICS
37 | else ""
38 | if unicodedata.category(c) == "Mn"
39 | else " "
40 | if unicodedata.category(c)[0] in "MSP"
41 | else c
42 | for c in unicodedata.normalize("NFKD", s)
43 | )
44 |
45 |
46 | def remove_symbols(s: str):
47 | """
48 | Replace any other markers, symbols, punctuations with a space, keeping diacritics
49 | """
50 | return "".join(
51 | " " if unicodedata.category(c)[0] in "MSP" else c
52 | for c in unicodedata.normalize("NFKC", s)
53 | )
54 |
55 |
56 | class BasicTextNormalizer:
57 | def __init__(self, remove_diacritics: bool = False, split_letters: bool = False):
58 | self.clean = (
59 | remove_symbols_and_diacritics if remove_diacritics else remove_symbols
60 | )
61 | self.split_letters = split_letters
62 |
63 | def __call__(self, s: str):
64 | s = s.lower()
65 | s = re.sub(r"[<\[][^>\]]*[>\]]", "", s) # remove words between brackets
66 | s = re.sub(r"\(([^)]+?)\)", "", s) # remove words between parenthesis
67 | s = self.clean(s).lower()
68 |
69 | if self.split_letters:
70 | s = " ".join(regex.findall(r"\X", s, regex.U))
71 |
72 | s = re.sub(
73 | r"\s+", " ", s
74 | ) # replace any successive whitespace characters with a space
75 |
76 | return s
77 |
--------------------------------------------------------------------------------
/pytranscriber/control/ctr_proxy.py:
--------------------------------------------------------------------------------
1 | '''
2 | (C) 2025 Raryel C. Souza
3 | This program is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 | This program is distributed in the hope that it will be useful,
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | GNU General Public License for more details.
11 | You should have received a copy of the GNU General Public License
12 | along with this program. If not, see .
13 | '''
14 |
15 | from pytranscriber.util.util import MyUtil
16 | from pytranscriber.gui.message_util import MessageUtil
17 | from pytranscriber.gui.proxy.view_proxy import ViewProxy
18 |
19 |
20 | class Ctr_Proxy():
21 | proxy = {
22 | 'http': None,
23 | 'https': None
24 | }
25 |
26 | def __init__(self, ctrMain):
27 | self.ctrMain = ctrMain
28 | self.viewProxy = None
29 |
30 | def show(self):
31 | if self.viewProxy is None:
32 | self.viewProxy = ViewProxy(self)
33 | self.viewProxy.show()
34 |
35 | def save(self):
36 | self.ctrMain.ctrDB.clear_proxy()
37 | # saving the proxy address
38 | if self.proxy['https']:
39 | self.ctrMain.ctrDB.save_proxy(self.proxy)
40 | # saving proxy address disabled
41 | else:
42 | MessageUtil.show_info_message('Proxy disabled successfully', 'Proxy disabled')
43 |
44 | def load_data(self):
45 | if self.viewProxy is None:
46 | self.viewProxy = ViewProxy(self)
47 |
48 | data = self.ctrMain.ctrDB.load_proxy()
49 | if data is not None:
50 | self.set_proxy_setting(data[1], False)
51 |
52 | def test_proxy_setting(self, proxy_addr):
53 | proxy = {'http': proxy_addr, 'https': proxy_addr}
54 |
55 | if not MyUtil.is_internet_connected(proxy):
56 | MessageUtil.show_error_message('Error connecting to Google.','Error')
57 | else:
58 | MessageUtil.show_info_message('Successfully connected to Google.', 'Success')
59 |
60 | def set_proxy_setting(self, proxy_addr, frontend_request=False):
61 | self.proxy = {'http': proxy_addr, 'https': proxy_addr}
62 | if frontend_request:
63 | self.save()
64 | else:
65 | self.viewProxy.refresh_gui(proxy_addr)
66 |
67 | def get_proxy_setting(self):
68 | return self.proxy
69 |
--------------------------------------------------------------------------------
/patches/autosub-0.3.13.patch:
--------------------------------------------------------------------------------
1 | --- __init__-old.py 2019-01-27 11:18:19.560918050 +0700
2 | +++ __init__.py 2019-01-24 09:27:17.057865917 +0700
3 | @@ -262,6 +262,14 @@
4 |
5 | return 0
6 |
7 | +def percentage(currentval, maxval):
8 | + return 100 * currentval / float(maxval)
9 | +
10 | +
11 | +def output_progress(listener_progress, str_task, progress_percent):
12 | + if listener_progress != None:
13 | + listener_progress(str_task,progress_percent)
14 | +
15 |
16 | def generate_subtitles(
17 | source_path,
18 | @@ -271,6 +279,7 @@
19 | dst_language=DEFAULT_DST_LANGUAGE,
20 | subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
21 | api_key=None,
22 | + listener_progress=None,
23 | ):
24 | audio_filename, audio_rate = extract_audio(source_path)
25 |
26 | @@ -284,21 +293,28 @@
27 | transcripts = []
28 | if regions:
29 | try:
30 | - widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',
31 | + str_task_1 = "Converting speech regions to FLAC files: "
32 | + widgets = [str_task_1, Percentage(), ' ', Bar(), ' ',
33 | ETA()]
34 | - pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
35 | + len_regions = len(regions)
36 | + pbar = ProgressBar(widgets=widgets, maxval=len_regions).start()
37 | extracted_regions = []
38 | for i, extracted_region in enumerate(pool.imap(converter, regions)):
39 | extracted_regions.append(extracted_region)
40 | pbar.update(i)
41 | + progress_percent= percentage(i, len_regions)
42 | + output_progress(listener_progress,str_task_1,progress_percent)
43 | pbar.finish()
44 |
45 | - widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]
46 | + str_task_2 = "Performing speech recognition: "
47 | + widgets = [str_task_2, Percentage(), ' ', Bar(), ' ', ETA()]
48 | pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
49 |
50 | for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):
51 | transcripts.append(transcript)
52 | pbar.update(i)
53 | + progress_percent= percentage(i, len_regions)
54 | + output_progress(listener_progress,str_task_2,progress_percent)
55 | pbar.finish()
56 |
57 | if not is_same_language(src_language, dst_language):
58 | @@ -349,4 +365,5 @@
59 |
60 |
61 | if __name__ == '__main__':
62 | + multiprocessing.freeze_support()
63 | sys.exit(main())
64 |
--------------------------------------------------------------------------------
/pytranscriber/gui/proxy/view_proxy.py:
--------------------------------------------------------------------------------
1 | from PyQt5.QtWidgets import QDialog
2 | from pytranscriber.gui.proxy.window_proxy import Ui_Dialog
3 | from pytranscriber.gui.message_util import MessageUtil
4 |
5 |
6 | class ViewProxy:
7 |
8 | def __init__(self, ctr_proxy):
9 | self.ctr_proxy = ctr_proxy
10 | self.proxy_dialog = QDialog()
11 | loaded_proxy_dialog = Ui_Dialog()
12 | loaded_proxy_dialog.setupUi(self.proxy_dialog)
13 |
14 | self.radioButtonNone = loaded_proxy_dialog.radioButtonNone
15 | self.radioButtonHTTP = loaded_proxy_dialog.radioButtonHTTP
16 | self.radioButtonNone.clicked.connect(self.__listener_rbOnClicked)
17 | self.lineEditHttpProxy = loaded_proxy_dialog.lineEditHttpProxy
18 | self.lineEditHttpProxy.textChanged.connect(self.__listenerLineEditInput)
19 | self.pushButtonTest = loaded_proxy_dialog.pushButtonTest
20 | self.bSave = loaded_proxy_dialog.bSave
21 |
22 | self.pushButtonTest.clicked.connect(self.__listener_test)
23 | self.bSave.clicked.connect(self.__listener_save)
24 | self.__clear_proxy_settings()
25 |
26 | def show(self):
27 | self.ctr_proxy.load_data()
28 | self.proxy_dialog.exec_()
29 |
30 | def __clear_proxy_settings(self):
31 | self.radioButtonNone.setChecked(True)
32 | self.lineEditHttpProxy.setEnabled(False)
33 | self.pushButtonTest.setEnabled(False)
34 |
35 | def refresh_gui(self, proxy_address=None):
36 | if not proxy_address:
37 | self.__clear_proxy_settings()
38 | else:
39 | self.radioButtonHTTP.setChecked(True)
40 | self.lineEditHttpProxy.setEnabled(True)
41 | self.pushButtonTest.setEnabled(True)
42 | self.lineEditHttpProxy.setText(str(proxy_address))
43 |
44 | def __listener_test(self):
45 | proxy_input = self.lineEditHttpProxy.text()
46 |
47 | if proxy_input and self.radioButtonHTTP.isChecked():
48 | self.ctr_proxy.test_proxy_setting(proxy_input)
49 |
50 | def __listener_save(self):
51 | proxy_input = self.lineEditHttpProxy.text()
52 |
53 | if proxy_input and self.radioButtonHTTP.isChecked():
54 | self.ctr_proxy.set_proxy_setting(proxy_input, True)
55 | elif self.radioButtonNone.isChecked():
56 | self.ctr_proxy.set_proxy_setting('',True)
57 |
58 | def __listener_rbOnClicked(self):
59 | if self.radioButtonNone.isChecked():
60 | self.lineEditHttpProxy.setText('')
61 |
62 | def __listenerLineEditInput(self):
63 | if self.lineEditHttpProxy.text():
64 | self.pushButtonTest.setEnabled(True)
65 | else:
66 | self.pushButtonTest.setEnabled(False)
67 |
68 |
--------------------------------------------------------------------------------
/pytranscriber/control/thread_exec_whisper.py:
--------------------------------------------------------------------------------
1 | '''
2 | (C) 2025 Raryel C. Souza
3 | This program is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 | This program is distributed in the hope that it will be useful,
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | GNU General Public License for more details.
11 | You should have received a copy of the GNU General Public License
12 | along with this program. If not, see .
13 | '''
14 |
15 | from pytranscriber.control.ctr_whisper import CtrWhisper
16 | from pytranscriber.control.thread_exec_generic import ThreadExecGeneric
17 | from pytranscriber.util.util import MyUtil
18 | import traceback
19 |
20 |
21 | class Thread_Exec_Whisper(ThreadExecGeneric):
22 |
23 | def run(self):
24 | CtrWhisper.init()
25 | super()._loopSelectedFiles()
26 | self.running = False
27 |
28 | def _run_engine_for_media(self, index, langCode):
29 | sourceFile = self.obj_transcription_parameters.listFiles[index]
30 | outputFiles = self._generatePathOutputFile(sourceFile)
31 | outputFileSRT = outputFiles[0]
32 | outputFileTXT = outputFiles[1]
33 |
34 | fOutput = None
35 | try:
36 | fOutput = CtrWhisper.generate_subtitles(source_path=sourceFile,
37 | outputSRT=outputFileSRT,
38 | outputTXT=outputFileTXT,
39 | src_language=langCode,
40 | model=self.obj_transcription_parameters.get_model_whisper())
41 | except Exception as e:
42 | error_msg = f"""Error! Unable to generate subtitles: {traceback.format_exc()}"""
43 | self.signalErrorMsg.emit(error_msg) # Emit the full traceback
44 |
45 | #if nothing was returned
46 | if not fOutput:
47 | self.signalErrorMsg.emit("Error! Unable to generate subtitles for file " + sourceFile + ".")
48 | elif fOutput != -1:
49 | #if the operation was not canceled
50 |
51 | #updated the progress message
52 | self.listenerProgress("Finished", 100)
53 |
54 | if self.obj_transcription_parameters.boolOpenOutputFilesAuto:
55 | #open both SRT and TXT output files
56 | MyUtil.open_file(outputFileTXT)
57 | MyUtil.open_file(outputFileSRT)
--------------------------------------------------------------------------------
/doc/technical_details.md:
--------------------------------------------------------------------------------
1 |
For Developers - Technical Details
2 |
3 | This app consists basically of a friendly pyQt5 graphical interface for a customized version of Autosub 0.4.0 that can run on Linux, Windows and MacOS. All the hard work of processing the audio and generating the subtitles is done by Autosub.
4 |
5 |
Dependencies to build
6 |
7 |
8 |
pip3 install pipenv
9 |
pipenv install (install all dependencies from Pipfile)
10 |
Download the static ffmpeg binary and move it to project root folder
11 |
12 | # How to run?
13 | $ pipenv shell
14 | $ python3 main.py
15 |
16 |
17 | # How to edit the GUI?
18 | Install Qt5 Designer and open the file pytranscriber/gui/gui.ui
19 |
20 | # How to convert the .ui file (qt5designer project file) to .py?
21 | $ pyuic5 gui.ui -o gui.py
22 |
23 | # How to generate the python bundled binary package version with ffmpeg included?
24 |
25 | # Linux:
26 | $ pyinstaller main.py --path="$(pwd)" --add-binary="ffmpeg:." --onefile --clean
27 |
28 | # Windows:
29 | $ pyinstaller main.py --path=$pwd --add-binary="ffmpeg.exe;." --onefile --clean
30 |
31 | # Mac:
32 | $ pyinstaller main.py --path="$(pwd)" --add-binary="ffmpeg:." --clean --windowed
33 |
34 |
35 | The output binary will be on subfolder dist/main and has all dependencies included. For more details check pyinstaller documentation
36 |
37 | # On Linux how to generate a statically linked binary so it can run even on systems with older glibc installed?
38 |
39 | As explained in pyInstaller FAQ:
40 | > The executable that PyInstaller builds is not fully static, in that it still depends on the system libc. Under Linux, the ABI of GLIBC is backward compatible, but not forward compatible. So if you link against a newer GLIBC, you can't run the resulting executable on an older system.
41 |
42 | > Solution 1)To compile the Python interpreter with its modules (and also probably bootloader) on the oldest system you have around, so that it gets linked with the oldest version of GLIBC.
43 |
44 | > Solution 2) to use a tool like StaticX to create a fully-static bundled version of your PyInstaller application. StaticX bundles all dependencies, including libc and ld.so. (Python code :arrow_right: PyInstaller :arrow_right: StaticX :arrow_right: Fully-static application)"
45 |
46 | Install staticx and patchelf (dependency)
47 |
48 | $ pip3 install --user patchelf-wrapper
49 |
50 | $ pip3 install --user staticx
51 |
52 | After generating the binary with pyinstaller, open the dist folder and run:
53 |
54 | $ staticx main main-static
55 |
56 | The newly created main-static contains all library dependencies, including glibc, so it should be able to run even on very old systems.
57 |
58 | Note: In my Manjaro system the first time I run this command I got an error related to "libmpdec.so.2 => not found". Installing the package mpdecimal on the package manager solved the issue.
59 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pyTranscriber
2 |
3 | [](https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=YHB854YHPJCU8&item_name=Donation+pyTranscriber¤cy_code=BRL)
4 | [](https://github.com/raryelcostasouza/pyTranscriber/raw/master/doc/lightning.jpeg)
5 |
6 | [](https://ko-fi.com/A0A6AIR3D)
7 |
8 | MOVED TO NEW WEBSITE - https://pytranscriber.github.io
9 |
10 | More than 640k downloads since first version. Thanks!
11 | Check live statistics at GitHub Release Stats
12 |
13 | # Thanks to the people helping funding
14 | Jixing Zhao, Narsu Narsu, Lucas Thorn, Soonkj Jung, Sergio Moreno, Yooki Adair, Adrien Jarton, YP, JOY_EASY, RodrigoRios, Zhou Mi, Dongmei Chen, Jung Yi Hung, Tah Kwang Tomas Tso
15 |
16 | # UPDATE - v2.1-stable - 13/07/2025
17 | 1. Compiled torch with CUDA support enabled for faster whisper processing for those who have NVidia GPUs
18 |
19 | # UPDATE - v2.0-stable - 07/07/2025
20 | 1. Added binary for Linux (GLIBC 2.35 or newer)
21 |
22 | # UPDATE - v2.0-stable - 22/05/2025
23 | 1. Fixed issue with cantonese language not working using whisper engine
24 | 2. Fixed srt file generation not being compliant with srt syntax
25 |
26 | # UPDATE - v2.0-RC_1 - 04/03/2025
27 | 1. Added support for openAI Whisper with local processing of media files as alternative to Google Speech API (where all media file is uploaded to Google servers for processing)
28 | 2. Added saving/load settings to sqlite local db
29 |
30 |
31 | # UPDATE - v1.9 - 22/12/2022
32 | 1. Windows/Linux version compiled with Nuitka (https://github.com/Nuitka/Nuitka) instead of pyInstaller to improve stability and fix random crashes while transcribing audio. If you still experience issues please report at Issues section.
33 | 2. Support for Ogg/ogv/mkv/webm media files on file selector
34 |
35 | # UPDATE - v1.8 - 17/08/2022
36 | 1. Fixed bug: language codes for Chinese Languages updated accordingly to Speech API. Changed to "cmn-Hans-CN" and "cmn-Hant-TW" instead of "zh / zh-TW").. The output was always mistakenly coming in Cantonese (yue-Hant-HK). Now they come properly in Traditional Chinese and Simplified Chinese. Thanks to "Specter Hi" for reporting!
37 | 2. Added GUI language switch feature
38 | 3. Updated link to funding campaign at GitHub Sponsors
39 |
40 | # UPDATE - v1.7 - 08/08/2022
41 | 1. add proxy setting
42 | 2. change the function 'pytranscriber.util.MyUtil.is_internet_connected'
43 | 3. add requirements.txt
44 | 4. rebuilt using pyInstaller 5.3 - more stability to prevent multithreading crashes on Windows
45 | 5. Added pipfile
46 |
47 | 
48 |
49 |
--------------------------------------------------------------------------------
/pytranscriber/util/util.py:
--------------------------------------------------------------------------------
1 | '''
2 | (C) 2019 Raryel C. Souza
3 | This program is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 | This program is distributed in the hope that it will be useful,
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | GNU General Public License for more details.
11 | You should have received a copy of the GNU General Public License
12 | along with this program. If not, see .
13 | '''
14 |
15 | import platform
16 | import os
17 | import subprocess
18 |
19 | import requests
20 | from requests.adapters import HTTPAdapter, Retry
21 | import time
22 |
23 |
24 | class MyUtil(object):
25 | @staticmethod
26 | def open_file(path):
27 | if platform.system() == "Windows":
28 | os.startfile(path)
29 | elif platform.system() == "Darwin":
30 | subprocess.Popen(["open", path])
31 | else:
32 | subprocess.Popen(["xdg-open", path])
33 |
34 | @staticmethod
35 | def is_internet_connected(proxies=None):
36 | try:
37 | # connect to the host -- tells us if the host is actually
38 | # reachable
39 | headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Firefox/91.0'}
40 |
41 | res = MyUtil.send_request('https://www.google.com', proxies=proxies, headers=headers)
42 | if res != 200:
43 | return False
44 |
45 | else:
46 | return True
47 | except Exception as e:
48 | print("Error Name: ", e.__class__.__name__)
49 | print("Error Message: ", e)
50 | pass
51 |
52 | return False
53 |
54 | @staticmethod
55 | def send_request(url,
56 | n_retries=0,
57 | backoff_factor=0.9,
58 | status_codes=[504, 503, 502, 500, 429, 302, 408, 425],
59 | proxies=None,
60 | headers=None):
61 | sess = requests.Session()
62 | retries = Retry(connect=n_retries, backoff_factor=backoff_factor,
63 | status_forcelist=status_codes)
64 | sess.mount("https://", HTTPAdapter(max_retries=retries))
65 | sess.mount("http://", HTTPAdapter(max_retries=retries))
66 | try:
67 | response = sess.get(url, timeout=5, proxies=proxies, headers=headers)
68 | response.raise_for_status() # Raises an HTTPError for bad responses
69 | return response.status_code
70 | except requests.Timeout:
71 | print("The request timed out")
72 | except requests.RequestException as e:
73 | print(f"An error occurred: {e}")
74 | return -1
75 |
76 |
77 | @staticmethod
78 | def percentage(currentval, maxval):
79 | return 100 * currentval / float(maxval)
--------------------------------------------------------------------------------
/autosub/constants.py:
--------------------------------------------------------------------------------
1 | """
2 | Defines constants used by autosub.
3 | """
4 |
5 | from __future__ import unicode_literals
6 |
7 | GOOGLE_SPEECH_API_KEY = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"
8 | GOOGLE_SPEECH_API_URL = "http://www.google.com/speech-api/v2/recognize?client=chromium&lang={lang}&key={key}" # pylint: disable=line-too-long
9 |
10 | LANGUAGE_CODES = {
11 | 'af': 'Afrikaans',
12 | 'ar': 'Arabic',
13 | 'az': 'Azerbaijani',
14 | 'be': 'Belarusian',
15 | 'bg': 'Bulgarian',
16 | 'bn': 'Bengali',
17 | 'bs': 'Bosnian',
18 | 'ca': 'Catalan',
19 | 'ceb': 'Cebuano',
20 | 'cs': 'Czech',
21 | 'cy': 'Welsh',
22 | 'da': 'Danish',
23 | 'de': 'German',
24 | 'el': 'Greek',
25 | 'en-AU': 'English (Australia)',
26 | 'en-CA': 'English (Canada)',
27 | 'en-GB': 'English (United Kingdom)',
28 | 'en-IN': 'English (India)',
29 | 'en-IE': 'English (Ireland)',
30 | 'en-NZ': 'English (New Zealand)',
31 | 'en-PH': 'English (Philippines)',
32 | 'en-SG': 'English (Singapore)',
33 | 'en-US': 'English (United States)',
34 | 'eo': 'Esperanto',
35 | 'es-AR': 'Spanish (Argentina)',
36 | 'es-CL': 'Spanish (Chile)',
37 | 'es-ES': 'Spanish (Spain)',
38 | 'es-US': 'Spanish (United States)',
39 | 'es-MX': 'Spanish (Mexico)',
40 | 'es': 'Spanish',
41 | 'et': 'Estonian',
42 | 'eu': 'Basque',
43 | 'fa': 'Persian',
44 | 'fi': 'Finnish',
45 | 'fr': 'French',
46 | 'ga': 'Irish',
47 | 'gl': 'Galician',
48 | 'gu': 'Gujarati',
49 | 'ha': 'Hausa',
50 | 'hi': 'Hindi',
51 | 'hmn': 'Hmong',
52 | 'hr': 'Croatian',
53 | 'ht': 'Haitian Creole',
54 | 'hu': 'Hungarian',
55 | 'hy': 'Armenian',
56 | 'id': 'Indonesian',
57 | 'ig': 'Igbo',
58 | 'is': 'Icelandic',
59 | 'it': 'Italian',
60 | 'iw': 'Hebrew',
61 | 'ja': 'Japanese',
62 | 'jw': 'Javanese',
63 | 'ka': 'Georgian',
64 | 'kk': 'Kazakh',
65 | 'km': 'Khmer',
66 | 'kn': 'Kannada',
67 | 'ko': 'Korean',
68 | 'la': 'Latin',
69 | 'lo': 'Lao',
70 | 'lt': 'Lithuanian',
71 | 'lv': 'Latvian',
72 | 'mg': 'Malagasy',
73 | 'mi': 'Maori',
74 | 'mk': 'Macedonian',
75 | 'ml': 'Malayalam',
76 | 'mn': 'Mongolian',
77 | 'mr': 'Marathi',
78 | 'ms': 'Malay',
79 | 'mt': 'Maltese',
80 | 'my': 'Myanmar (Burmese)',
81 | 'ne': 'Nepali',
82 | 'nl': 'Dutch',
83 | 'no': 'Norwegian',
84 | 'ny': 'Chichewa',
85 | 'pa': 'Punjabi',
86 | 'pl': 'Polish',
87 | 'pt-BR': 'Portuguese (Brazil)',
88 | 'pt-PT': 'Portuguese (Portugal)',
89 | 'ro': 'Romanian',
90 | 'ru': 'Russian',
91 | 'si': 'Sinhala',
92 | 'sk': 'Slovak',
93 | 'sl': 'Slovenian',
94 | 'so': 'Somali',
95 | 'sq': 'Albanian',
96 | 'sr': 'Serbian',
97 | 'st': 'Sesotho',
98 | 'su': 'Sudanese',
99 | 'sv': 'Swedish',
100 | 'sw': 'Swahili',
101 | 'ta': 'Tamil',
102 | 'te': 'Telugu',
103 | 'tg': 'Tajik',
104 | 'th': 'Thai',
105 | 'tl': 'Filipino',
106 | 'tr': 'Turkish',
107 | 'uk': 'Ukrainian',
108 | 'ur': 'Urdu',
109 | 'uz': 'Uzbek',
110 | 'vi': 'Vietnamese',
111 | 'yi': 'Yiddish',
112 | 'yo': 'Yoruba',
113 | 'yue-Hant-HK': 'Cantonese, (Traditional HK)',
114 | 'zh': 'Chinese (Simplified, China)',
115 | 'zh-HK': 'Chinese (Simplified, Hong Kong)',
116 | 'zh-TW': 'Chinese (Traditional, Taiwan)',
117 | 'zu': 'Zulu',
118 | }
119 |
--------------------------------------------------------------------------------
/.github/workflows/mac-pyinstaller.yml:
--------------------------------------------------------------------------------
1 | name: MacOS PyInstaller
2 |
3 | on:
4 | workflow_dispatch:
5 | push:
6 | branches:
7 | - master
8 | - develop
9 |
10 |
11 | jobs:
12 | build:
13 | runs-on: macos-14 # Use macOS ARM64 runner
14 | steps:
15 | - name: Checkout repository
16 | uses: actions/checkout@v4
17 | with:
18 | fetch-depth: 0 # Ensure full history and tags are available
19 |
20 | - name: Get latest Git tag
21 | id: get_version
22 | run: |
23 | VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.1.0")
24 | echo "VERSION=$VERSION" >> $GITHUB_ENV
25 | echo "Resolved version: $VERSION"
26 |
27 | - name: Setup FFmpeg
28 | uses: federicocarboni/setup-ffmpeg@v3.1
29 | with:
30 | ffmpeg-version: release
31 | architecture: x64
32 |
33 | - name: Verify FFmpeg installation
34 | run: |
35 | which ffmpeg
36 | ffmpeg -version
37 |
38 | - name: Set up Python 3.8
39 | uses: actions/setup-python@v4
40 | with:
41 | python-version: "3.8"
42 |
43 | - name: Set up Python virtual environment
44 | run: |
45 | python -m venv .venv
46 |
47 | - name: Activate virtual environment and install dependencies
48 | run: |
49 | source .venv/bin/activate
50 | pip install --upgrade pip
51 | pip install -r requirements.txt
52 | pip install pyinstaller
53 |
54 | - name: Verify existence of Whisper assets directory
55 | run: |
56 | source .venv/bin/activate # Activate the virtual environment
57 | ASSETS_PATH=$(python -c "import whisper; import os; print(os.path.join(os.path.dirname(whisper.__file__), 'assets'))")
58 | if [ -d "$ASSETS_PATH" ]; then
59 | echo "The 'assets' directory exists at: $ASSETS_PATH"
60 | echo "ASSETS_PATH=$ASSETS_PATH" >> $GITHUB_ENV
61 | else
62 | echo "The 'assets' directory does NOT exist."
63 | exit 1
64 | fi
65 |
66 | - name: Compile with pyInstaller
67 | run: |
68 | source .venv/bin/activate
69 | FFMPPEG_PATH=$(which ffmpeg)
70 | pyinstaller main.py \
71 | --windowed \
72 | --path="$(pwd)" \
73 | --add-binary="$FFMPPEG_PATH:." \
74 | --add-binary="pytranscriber.sqlite:." \
75 | --add-data="pytranscriber/gui/*.qm:pytranscriber/gui/" \
76 | --add-data="$ASSETS_PATH:whisper/assets"
77 |
78 | - name: Zip the .app bundle with version number
79 | run: |
80 | cd dist
81 | mv main.app "pyTranscriber-${VERSION}.app"
82 | zip -r "pyTranscriber-macos-${VERSION}.zip" "pyTranscriber-${VERSION}.app"
83 |
84 | - name: Upload built executable with version number
85 | uses: actions/upload-artifact@v4
86 | with:
87 | name: pyTranscriber-macos-${{ env.VERSION }}
88 | path: ./dist/pyTranscriber-macos-${{ env.VERSION }}.zip # Path adjusted for macOS
89 |
90 | download:
91 | runs-on: macos-14 # macOS ARM64 runner for downloading
92 | needs: build
93 | steps:
94 | - name: Download built executable
95 | uses: actions/download-artifact@v4
96 | with:
97 | path: ./output
98 |
99 | - name: List downloaded files
100 | run: ls -la ./output
101 |
--------------------------------------------------------------------------------
/pytranscriber/gui/简体中文 - Chinese Simplified.ts:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | window
6 |
7 |
8 | pyTranscriber - v1.7 - 08/08/2020
9 | pyTranscriber -v1.8 - 20/08/2022
10 |
11 |
12 |
13 | Select file(s)
14 | 选择文件
15 |
16 |
17 |
18 | Transcribe Audio / Generate Subtitles
19 | 转译音频 / 生成字幕
20 |
21 |
22 |
23 | Open Output Folder
24 | 打开导出文件夹
25 |
26 |
27 |
28 | Output Location
29 | 导出位置
30 |
31 |
32 |
33 | &List of files to generate transcribe audio / generate subtitles
34 | &转译 / 生成字幕文件列表
35 |
36 |
37 |
38 | Remove file(s)
39 | 移除文件
40 |
41 |
42 |
43 | Cancel
44 | 取消
45 |
46 |
47 |
48 | Open output files automatically
49 | 完成后自动打开文件夹
50 |
51 |
52 |
53 | Audio Language:
54 | 选择音频语言
55 |
56 |
57 |
58 | Abo&ut
59 | Abo&ut
60 |
61 |
62 |
63 | Settings
64 | Settings
65 |
66 |
67 |
68 | &Language
69 | 语
70 |
71 |
72 |
73 | &License
74 | &License
75 |
76 |
77 |
78 | &Funding at Github Sponsors
79 | 资助 GitHub 上的项目
80 |
81 |
82 |
83 | &More about pyTranscriber
84 | &关于 pyTranscriber
85 |
86 |
87 |
88 | Proxy
89 | Proxy
90 |
91 |
92 |
93 | Proxy settings
94 | Proxy settings
95 |
96 |
97 |
98 |
--------------------------------------------------------------------------------
/.github/workflows/linux-pyinstaller.yml:
--------------------------------------------------------------------------------
1 | name: Linux PyInstaller
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | - develop
8 | pull_request:
9 |
10 | jobs:
11 | build:
12 | runs-on: ubuntu-22.04
13 |
14 | steps:
15 | - name: Checkout repository
16 | uses: actions/checkout@v3
17 | with:
18 | fetch-depth: 0 # Fetch all tags
19 |
20 | - name: Get latest Git tag
21 | id: get_version
22 | run: |
23 | VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.1.0")
24 | echo "VERSION=$VERSION" >> $GITHUB_ENV
25 | echo "Resolved version: $VERSION"
26 |
27 | - name: Install missing system libraries (XCB, TBB, etc.)
28 | run: |
29 | sudo apt-get update
30 | sudo apt-get install -y \
31 | libxcb1 \
32 | libxcb-keysyms1 \
33 | libxcb-shape0 \
34 | libxcb-xkb1 \
35 | libxcb-render-util0 \
36 | libxcb-image0 \
37 | libxcb-xinerama0 \
38 | libxkbcommon-x11-0 \
39 | libxcb-icccm4 \
40 | libtbb12 \
41 | libsox-dev
42 |
43 | - name: Install FFmpeg
44 | run: sudo apt update && sudo apt install -y ffmpeg
45 |
46 | - name: Verify FFmpeg installation
47 | run: |
48 | which ffmpeg
49 | ffmpeg -version
50 |
51 | - name: Set up Python 3.8
52 | uses: actions/setup-python@v4
53 | with:
54 | python-version: "3.8"
55 |
56 | - name: Set up Python virtual environment
57 | run: |
58 | python -m venv .venv
59 |
60 | - name: Activate virtual environment and install dependencies
61 | run: |
62 | source .venv/bin/activate
63 | pip install --upgrade pip
64 | pip install -r requirements.txt
65 | pip install pyinstaller
66 |
67 | - name: Verify existence of Whisper assets directory
68 | run: |
69 | source .venv/bin/activate # Activate the virtual environment
70 | ASSETS_PATH=$(python -c "import whisper; import os; print(os.path.join(os.path.dirname(whisper.__file__), 'assets'))")
71 | if [ -d "$ASSETS_PATH" ]; then
72 | echo "The 'assets' directory exists at: $ASSETS_PATH"
73 | echo "ASSETS_PATH=$ASSETS_PATH" >> $GITHUB_ENV
74 | else
75 | echo "The 'assets' directory does NOT exist."
76 | exit 1
77 | fi
78 |
79 | - name: Compile with pyInstaller
80 | run: |
81 | source .venv/bin/activate
82 | FFMPPEG_PATH=$(which ffmpeg)
83 | pyinstaller main.py \
84 | --path="$(pwd)" \
85 | --onefile \
86 | --add-binary="$FFMPPEG_PATH:." \
87 | --add-binary="pytranscriber.sqlite:." \
88 | --add-data="pytranscriber/gui/*.qm:pytranscriber/gui/" \
89 | --add-data="$ASSETS_PATH:whisper/assets"
90 |
91 | - name: Rename and zip the binary with version number
92 | run: |
93 | cd dist
94 | mv main "pyTranscriber-${VERSION}"
95 |
96 | - name: Upload built executable
97 | uses: actions/upload-artifact@v4
98 | with:
99 | name: pyTranscriber-linux-pyinstaller-${{ env.VERSION }}
100 | path: ./dist/pyTranscriber-${{ env.VERSION }}
101 |
102 | download:
103 | runs-on: ubuntu-22.04
104 | needs: build
105 | steps:
106 | - name: Download built executable
107 | uses: actions/download-artifact@v4
108 | with:
109 | path: ./output
110 |
111 | - name: List downloaded files
112 | run: ls -la ./output
113 |
--------------------------------------------------------------------------------
/whisper/triton_ops.py:
--------------------------------------------------------------------------------
1 | from functools import lru_cache
2 |
3 | import numpy as np
4 | import torch
5 |
6 | try:
7 | import triton
8 | import triton.language as tl
9 | except ImportError:
10 | raise RuntimeError("triton import failed; try `pip install --pre triton`")
11 |
12 |
13 | @triton.jit
14 | def dtw_kernel(
15 | cost, trace, x, x_stride, cost_stride, trace_stride, N, M, BLOCK_SIZE: tl.constexpr
16 | ):
17 | offsets = tl.arange(0, BLOCK_SIZE)
18 | mask = offsets < M
19 |
20 | for k in range(1, N + M + 1): # k = i + j
21 | tl.debug_barrier()
22 |
23 | p0 = cost + (k - 1) * cost_stride
24 | p1 = cost + k * cost_stride
25 | p2 = cost + k * cost_stride + 1
26 |
27 | c0 = tl.load(p0 + offsets, mask=mask)
28 | c1 = tl.load(p1 + offsets, mask=mask)
29 | c2 = tl.load(p2 + offsets, mask=mask)
30 |
31 | x_row = tl.load(x + (k - 1) * x_stride + offsets, mask=mask, other=0)
32 | cost_row = x_row + tl.minimum(tl.minimum(c0, c1), c2)
33 |
34 | cost_ptr = cost + (k + 1) * cost_stride + 1
35 | tl.store(cost_ptr + offsets, cost_row, mask=mask)
36 |
37 | trace_ptr = trace + (k + 1) * trace_stride + 1
38 | tl.store(trace_ptr + offsets, 2, mask=mask & (c2 <= c0) & (c2 <= c1))
39 | tl.store(trace_ptr + offsets, 1, mask=mask & (c1 <= c0) & (c1 <= c2))
40 | tl.store(trace_ptr + offsets, 0, mask=mask & (c0 <= c1) & (c0 <= c2))
41 |
42 |
43 | @lru_cache(maxsize=None)
44 | def median_kernel(filter_width: int):
45 | @triton.jit
46 | def kernel(
47 | y, x, x_stride, y_stride, BLOCK_SIZE: tl.constexpr
48 | ): # x.shape[-1] == filter_width
49 | row_idx = tl.program_id(0)
50 | offsets = tl.arange(0, BLOCK_SIZE)
51 | mask = offsets < y_stride
52 |
53 | x_ptr = x + row_idx * x_stride # noqa: F841
54 | y_ptr = y + row_idx * y_stride
55 |
56 | LOAD_ALL_ROWS_HERE # noqa: F821
57 |
58 | BUBBLESORT_HERE # noqa: F821
59 |
60 | tl.store(y_ptr + offsets, MIDDLE_ROW_HERE, mask=mask) # noqa: F821
61 |
62 | kernel = triton.JITFunction(kernel.fn)
63 | kernel.src = kernel.src.replace(
64 | " LOAD_ALL_ROWS_HERE",
65 | "\n".join(
66 | [
67 | f" row{i} = tl.load(x_ptr + offsets + {i}, mask=mask)"
68 | for i in range(filter_width)
69 | ]
70 | ),
71 | )
72 | kernel.src = kernel.src.replace(
73 | " BUBBLESORT_HERE",
74 | "\n\n".join(
75 | [
76 | "\n\n".join(
77 | [
78 | "\n".join(
79 | [
80 | f" smaller = tl.where(row{j} < row{j + 1}, row{j}, row{j + 1})",
81 | f" larger = tl.where(row{j} > row{j + 1}, row{j}, row{j + 1})",
82 | f" row{j} = smaller",
83 | f" row{j + 1} = larger",
84 | ]
85 | )
86 | for j in range(filter_width - i - 1)
87 | ]
88 | )
89 | for i in range(filter_width // 2 + 1)
90 | ]
91 | ),
92 | )
93 | kernel.src = kernel.src.replace("MIDDLE_ROW_HERE", f"row{filter_width // 2}")
94 |
95 | return kernel
96 |
97 |
98 | def median_filter_cuda(x: torch.Tensor, filter_width: int):
99 | """Apply a median filter of given width along the last dimension of x"""
100 | slices = x.contiguous().unfold(-1, filter_width, 1)
101 | grid = np.prod(slices.shape[:-2])
102 |
103 | kernel = median_kernel(filter_width)
104 | y = torch.empty_like(slices[..., 0])
105 |
106 | BLOCK_SIZE = 1 << (y.stride(-2) - 1).bit_length()
107 | kernel[(grid,)](y, x, x.stride(-2), y.stride(-2), BLOCK_SIZE=BLOCK_SIZE)
108 |
109 | return y
110 |
--------------------------------------------------------------------------------
/.github/workflows/linux-nuitka.yml:
--------------------------------------------------------------------------------
1 | name: Linux Nuitka Pipeline
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | - develop
8 | pull_request:
9 | branches:
10 | - master
11 | - develop
12 |
13 | jobs:
14 | build:
15 | runs-on: ubuntu-22.04 # Ensure the job runs only on Ubuntu 22.04
16 |
17 | steps:
18 | - name: Checkout repository
19 | uses: actions/checkout@v3
20 | with:
21 | fetch-depth: 0 # Fetch all tags
22 |
23 | - name: Get latest Git tag
24 | id: get_version
25 | run: |
26 | VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.1.0")
27 | echo "VERSION=$VERSION" >> $GITHUB_ENV
28 | echo "Resolved version: $VERSION"
29 |
30 | - name: Install missing system libraries (XCB, TBB, etc.)
31 | run: |
32 | sudo apt-get update
33 | sudo apt-get install -y \
34 | libxcb1 \
35 | libxcb-keysyms1 \
36 | libxcb-shape0 \
37 | libxcb-xkb1 \
38 | libxcb-render-util0 \
39 | libxcb-image0 \
40 | libxcb-xinerama0 \
41 | libxkbcommon-x11-0 \
42 | libxcb-icccm4 \
43 | libtbb12 \
44 | ccache \
45 | libsox-dev
46 |
47 |
48 | - name: Install FFmpeg
49 | run: sudo apt update && sudo apt install -y ffmpeg
50 |
51 | - name: Verify FFmpeg installation
52 | run: |
53 | which ffmpeg
54 | ffmpeg -version
55 |
56 | - name: Set up Python 3.8
57 | uses: actions/setup-python@v4
58 | with:
59 | python-version: "3.8"
60 |
61 | - name: Set up Python virtual environment
62 | run: |
63 | python -m venv .venv
64 |
65 | - name: Install dependencies
66 | run: |
67 | source .venv/bin/activate
68 | pip install --upgrade pip
69 | pip install -r requirements.txt
70 | pip install nuitka
71 |
72 | - name: Verify Whisper assets directory
73 | run: |
74 | source .venv/bin/activate
75 | whisperPath=$(python -c "import whisper; print(whisper.__file__)")
76 | assetsPath=$(dirname $whisperPath)/assets
77 | if [ -d "$assetsPath" ]; then
78 | echo "The 'assets' directory exists at: $assetsPath"
79 | else
80 | echo "The 'assets' directory DOES NOT exist."
81 | exit 1
82 | fi
83 |
84 | - name: Compile with Nuitka
85 | run: |
86 | source .venv/bin/activate
87 | ffmpegPath=$(which ffmpeg)
88 | nuitka \
89 | --assume-yes-for-downloads \
90 | --enable-plugin=pyqt5 \
91 | --include-data-files="pytranscriber.sqlite=pytranscriber.sqlite" \
92 | --include-data-files="$ffmpegPath=ffmpeg" \
93 | --include-data-files="pytranscriber/gui/*.qm=pytranscriber/gui/" \
94 | --include-package-data="whisper:assets/*=whisper/assets" \
95 | main.py \
96 | --onefile \
97 | --output-dir=dist
98 |
99 | - name: Zip the binary with version number
100 | run: |
101 | cd dist
102 | mv main.bin "pyTranscriber-${VERSION}"
103 |
104 | - name: Upload built executable
105 | uses: actions/upload-artifact@v4
106 | with:
107 | name: pyTranscriber-linux-nuitka-${{ env.VERSION }}
108 | path: ./dist/pyTranscriber-${{ env.VERSION }} # Adjust this path if Nuitka outputs elsewhere
109 |
110 | download:
111 | runs-on: ubuntu-22.04
112 | needs: build
113 | steps:
114 | - name: Download built executable
115 | uses: actions/download-artifact@v4
116 | with:
117 | path: ./output
118 |
119 | - name: List downloaded files
120 | run: dir ./output
121 |
--------------------------------------------------------------------------------
/pytranscriber/control/ctr_db.py:
--------------------------------------------------------------------------------
1 | '''
2 | (C) 2025 Raryel C. Souza
3 | This program is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 | This program is distributed in the hope that it will be useful,
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | GNU General Public License for more details.
11 | You should have received a copy of the GNU General Public License
12 | along with this program. If not, see .
13 | '''
14 |
15 | from pathlib import PurePath
16 |
17 | from pytranscriber.gui.message_util import MessageUtil
18 | import sqlite3
19 |
20 |
21 | class CtrDB:
22 | conn = None
23 | DB_ERROR = "DB Error"
24 |
25 | def connect(self):
26 | if self.conn:
27 | return self.conn.cursor()
28 | else:
29 | try:
30 | local_program_path = PurePath(__file__).parent.parent.parent.joinpath('pytranscriber.sqlite')
31 | str_local_program_path = str(local_program_path)
32 |
33 |
34 |
35 | self.conn = sqlite3.connect(str_local_program_path)
36 | cur = self.conn.cursor()
37 |
38 | return cur
39 | except Exception as ex:
40 | MessageUtil.show_error_message("ConnectDB" + str(ex), self.DB_ERROR)
41 | exit(1)
42 |
43 | def close(self):
44 | self.conn.close()
45 | self.conn = None
46 |
47 | def _load_one_row(self, table_name):
48 | cur = self.connect()
49 | if cur is None:
50 | exit(1)
51 |
52 | try:
53 | cur.execute('SELECT * FROM ' + table_name)
54 | return cur.fetchone()
55 | except sqlite3.Error as e:
56 | MessageUtil.show_error_message("LoadOneRow " + str(e), self.DB_ERROR)
57 | return None
58 |
59 | def _save_single_column(self, query, value):
60 | cur = self.connect()
61 | try:
62 | cur.execute(query,(value,))
63 | self.conn.commit()
64 | except sqlite3.Error as e:
65 | MessageUtil.show_error_message("SaveSingleColumn " + str(e), self.DB_ERROR)
66 | self.close()
67 |
68 | def _truncate_table(self, table_name):
69 | cur = self.connect()
70 | try:
71 | cur.execute('DELETE FROM ' + table_name)
72 | self.conn.commit()
73 | except sqlite3.Error as e:
74 | MessageUtil.show_error_message("TruncateTable " + str(e), self.DB_ERROR)
75 | self.close()
76 |
77 | def load_last_language(self):
78 | return self._load_one_row('Language')
79 |
80 | def clear_last_language(self):
81 | self._truncate_table('Language')
82 |
83 | def save_last_language(self, language):
84 | cur = self.connect()
85 | try:
86 | cur.execute('INSERT INTO Language (last_language) VALUES (?)',
87 | (language,))
88 | self.conn.commit()
89 | except sqlite3.Error as e:
90 | MessageUtil.show_error_message("SaveLastLanguage " + str(e), self.DB_ERROR)
91 | self.close()
92 |
93 | def load_proxy(self):
94 | return self._load_one_row('Proxy')
95 |
96 | def clear_proxy(self):
97 | self._truncate_table('Proxy')
98 |
99 | def save_proxy(self, proxy):
100 | cur = self.connect()
101 | try:
102 | cur.execute('INSERT INTO Proxy (proxy_address) VALUES (?)',
103 | (proxy['https'],))
104 | self.conn.commit()
105 | MessageUtil.show_info_message('Proxy address saved successfully', 'Proxy settings saved')
106 | except sqlite3.Error as e:
107 | MessageUtil.show_error_message("SaveProxy " + str(e), self.DB_ERROR)
108 | self.close()
109 |
--------------------------------------------------------------------------------
/pytranscriber/control/thread_exec_generic.py:
--------------------------------------------------------------------------------
1 | '''
2 | (C) 2025 Raryel C. Souza
3 | This program is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 | This program is distributed in the hope that it will be useful,
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | GNU General Public License for more details.
11 | You should have received a copy of the GNU General Public License
12 | along with this program. If not, see .
13 | '''
14 |
15 | from abc import ABC, abstractmethod
16 | from PyQt5.QtCore import QThread
17 | from PyQt5.QtCore import pyqtSignal
18 | from pathlib import Path
19 | from pytranscriber.control.ctr_engine import CtrEngine
20 | import os
21 |
22 | class ThreadExecGeneric(QThread):
23 | signalLockGUI = pyqtSignal()
24 | signalResetGUIAfterCancel = pyqtSignal()
25 | signalResetGUIAfterSuccess = pyqtSignal()
26 | signalProgress = pyqtSignal(str, int)
27 | signalProgressFileYofN = pyqtSignal(str)
28 | signalErrorMsg = pyqtSignal(str)
29 |
30 | def __init__(self, obj_transcription_parameters):
31 | self.obj_transcription_parameters = obj_transcription_parameters
32 | self.running = True
33 | QThread.__init__(self)
34 |
35 | def listenerProgress(self, string, percent):
36 | self.signalProgress.emit(string, percent)
37 |
38 | def _loopSelectedFiles(self):
39 | self.signalLockGUI.emit()
40 | #MessageUtil.show_info_message("loop selected files")
41 |
42 | langCode = self.obj_transcription_parameters.langCode
43 |
44 | #if output directory does not exist, creates it
45 | pathOutputFolder = Path(self.obj_transcription_parameters.outputFolder)
46 |
47 | if not os.path.exists(pathOutputFolder):
48 | os.mkdir(pathOutputFolder)
49 | #if there the output file is not a directory
50 | if not os.path.isdir(pathOutputFolder):
51 | #force the user to select a different output directory
52 | self.signalErrorMsg.emit("Error! Invalid output folder. Please choose another one.")
53 | else:
54 | #go ahead with autosub process
55 | nFiles = len(self.obj_transcription_parameters.listFiles)
56 | for i in range(nFiles):
57 | #does not continue the loop if user clicked cancel button
58 | if not CtrEngine.is_operation_canceled():
59 | self._updateProgressFileYofN(i, nFiles)
60 | #MessageUtil.show_info_message("run engine for media")
61 | self._run_engine_for_media(i, langCode)
62 |
63 | #if operation is canceled does not clear the file list
64 | if CtrEngine.is_operation_canceled():
65 | self.signalResetGUIAfterCancel.emit()
66 | else:
67 | self.signalResetGUIAfterSuccess.emit()
68 |
69 | @abstractmethod
70 | def _run_engine_for_media(self, index, langCode):
71 | pass
72 |
73 | def _updateProgressFileYofN(self, currentIndex, countFiles):
74 | self.signalProgressFileYofN.emit("File " + str(currentIndex + 1) + " of " + str(countFiles))
75 |
76 | def _generatePathOutputFile(self, sourceFile):
77 | # extract the filename without extension from the path
78 | base = os.path.basename(sourceFile)
79 | # [0] is filename, [1] is file extension
80 | fileName = os.path.splitext(base)[0]
81 |
82 | # the output file has same name as input file, located on output Folder
83 | # with extension .srt
84 | pathOutputFolder = Path(self.obj_transcription_parameters.outputFolder)
85 | outputFileSRT = pathOutputFolder / (fileName + ".srt")
86 | outputFileTXT = pathOutputFolder / (fileName + ".txt")
87 | return [outputFileSRT, outputFileTXT]
88 |
89 | @staticmethod
90 | def cancel():
91 | CtrEngine.cancel_operation()
92 |
--------------------------------------------------------------------------------
/patches/autosub-0.4.0.patch:
--------------------------------------------------------------------------------
1 | --- __init__-0.4.0.py 2019-02-09 21:21:16.335586891 +0700
2 | +++ __init__.py 2019-02-10 21:25:41.864964164 +0700
3 | @@ -8,16 +8,22 @@
4 |
5 | import argparse
6 | import audioop
7 | -import json
8 | import math
9 | import multiprocessing
10 | import os
11 | +from json import JSONDecodeError
12 | import subprocess
13 | import sys
14 | import tempfile
15 | import wave
16 |
17 | +import json
18 | import requests
19 | +try:
20 | + from json.decoder import JSONDecodeError
21 | +except ImportError:
22 | + JSONDecodeError = ValueError
23 | +
24 | from googleapiclient.discovery import build
25 | from progressbar import ProgressBar, Percentage, Bar, ETA
26 |
27 | @@ -61,8 +67,10 @@
28 | start, end = region
29 | start = max(0, start - self.include_before)
30 | end += self.include_after
31 | - temp = tempfile.NamedTemporaryFile(suffix='.flac')
32 | - command = ["ffmpeg", "-ss", str(start), "-t", str(end - start),
33 | + #delete=False necessary for running on Windows
34 | + temp = tempfile.NamedTemporaryFile(suffix='.flac', delete=False)
35 | + program_ffmpeg = which("ffmpeg")
36 | + command = [str(program_ffmpeg), "-ss", str(start), "-t", str(end - start),
37 | "-y", "-i", self.source_path,
38 | "-loglevel", "error", temp.name]
39 | use_shell = True if os.name == "nt" else False
40 | @@ -102,6 +110,8 @@
41 | except IndexError:
42 | # no result
43 | continue
44 | + except JSONDecodeError:
45 | + continue
46 |
47 | except KeyboardInterrupt:
48 | return None
49 | @@ -149,17 +159,25 @@
50 | Checks whether a file is executable.
51 | """
52 | return os.path.isfile(file_path) and os.access(file_path, os.X_OK)
53 | -
54 | + #necessary to run on Windows
55 | + if os.name == "nt":
56 | + program += ".exe"
57 | fpath, _ = os.path.split(program)
58 | if fpath:
59 | if is_exe(program):
60 | return program
61 | else:
62 | - for path in os.environ["PATH"].split(os.pathsep):
63 | - path = path.strip('"')
64 | - exe_file = os.path.join(path, program)
65 | - if is_exe(exe_file):
66 | - return exe_file
67 | + #looks for file in the script execution folder before checking on system path
68 | + current_dir = os.getcwd()
69 | + local_program = os.path.join(current_dir, program)
70 | + if is_exe(local_program):
71 | + return local_program
72 | + else:
73 | + for path in os.environ["PATH"].split(os.pathsep):
74 | + path = path.strip('"')
75 | + exe_file = os.path.join(path, program)
76 | + if is_exe(exe_file):
77 | + return exe_file
78 | return None
79 |
80 |
81 | @@ -171,10 +189,11 @@
82 | if not os.path.isfile(filename):
83 | print("The given file does not exist: {}".format(filename))
84 | raise Exception("Invalid filepath: {}".format(filename))
85 | - if not which("ffmpeg"):
86 | + program_ffmpeg = which("ffmpeg")
87 | + if not program_ffmpeg:
88 | print("ffmpeg: Executable not found on machine.")
89 | raise Exception("Dependency not found: ffmpeg")
90 | - command = ["ffmpeg", "-y", "-i", filename,
91 | + command = [str(program_ffmpeg), "-y", "-i", filename,
92 | "-ac", str(channels), "-ar", str(rate),
93 | "-loglevel", "error", temp.name]
94 | use_shell = True if os.name == "nt" else False
95 | @@ -233,6 +252,12 @@
96 | """
97 | Given an input audio/video file, generate subtitles in the specified language and format.
98 | """
99 | +
100 | + if "Darwin" in os.uname():
101 | + #the default unix fork method does not work on Mac OS
102 | + #need to use forkserver
103 | + multiprocessing.set_start_method('forkserver')
104 | +
105 | audio_filename, audio_rate = extract_audio(source_path)
106 |
107 | regions = find_speech_regions(audio_filename)
108 |
--------------------------------------------------------------------------------
/pytranscriber/gui/繁體中文 - Chinese Traditional.ts:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | window
6 |
7 |
8 | pyTranscriber - v1.8 - 17/08/2022
9 | pyTranscriber -v1.8 - 20/08/2022
10 |
11 |
12 |
13 | Select file(s)
14 | 選擇檔案
15 |
16 |
17 |
18 | Transcribe Audio / Generate Subtitles
19 | 轉譯音訊 / 生成字幕
20 |
21 |
22 |
23 | Open Output Folder
24 | 開啟輸出位置
25 |
26 |
27 |
28 | Output Location
29 | 選取輸出位置
30 |
31 |
32 |
33 | List of files to generate transcribe audio / generate subtitles
34 | &轉譯音訊 / 生成字幕檔案清單
35 |
36 |
37 |
38 | Remove file(s)
39 | 移除檔案
40 |
41 |
42 |
43 | Cancel
44 | 取消
45 |
46 |
47 |
48 | Open output files automatically
49 | 完成後自動開啟輸出資料夾
50 |
51 |
52 |
53 | Audio Language:
54 | 選擇音訊語言
55 |
56 |
57 |
58 | Abo&ut
59 | 關於
60 |
61 |
62 |
63 | &Settings
64 | 設定
65 |
66 |
67 |
68 | &Language
69 | 語言
70 |
71 |
72 |
73 | &License
74 | &License
75 |
76 |
77 |
78 | &Funding at Github Sponsors
79 | 在 Github 上成為贊助者
80 |
81 |
82 |
83 | &More about pyTranscriber
84 | &關於 pyTranscriber
85 |
86 |
87 |
88 | &Proxy
89 | 代理伺服器(Proxy)
90 |
91 |
92 |
93 | Proxy setting
94 | 代理伺服器設定
95 |
96 |
97 |
98 | English
99 |
100 |
101 |
102 |
103 | 繁體中文 - Chinese Traditional
104 |
105 |
106 |
107 |
108 | 简体中文 - Chinese Simplified
109 |
110 |
111 |
112 |
113 | Português
114 |
115 |
116 |
117 |
118 |
--------------------------------------------------------------------------------
/.github/workflows/win-nuitka.yml:
--------------------------------------------------------------------------------
1 | on:
2 | push:
3 | branches:
4 | - master
5 | - develop
6 | pull_request:
7 |
8 | jobs:
9 | build:
10 | runs-on: windows-latest
11 |
12 | steps:
13 | - name: Checkout repository
14 | uses: actions/checkout@v4
15 | with:
16 | fetch-depth: 0
17 |
18 | - name: Get latest Git tag
19 | id: get_version
20 | run: |
21 | $VERSION = git describe --tags --abbrev=0 2>$null
22 | if (-not $VERSION) {
23 | $VERSION = "v0.1.0" # Default version if no tags are found
24 | }
25 | echo "VERSION=$VERSION" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8
26 | Write-Host "Resolved version: $VERSION"
27 | shell: pwsh
28 |
29 | - name: Setup FFmpeg
30 | uses: federicocarboni/setup-ffmpeg@v3.1
31 | with:
32 | ffmpeg-version: release # Specify the desired FFmpeg version
33 | architecture: x64
34 |
35 | - name: Add FFmpeg to PATH
36 | run: |
37 | $ffmpegPath = (Get-Command ffmpeg).Source
38 | $env:Path += ";$($ffmpegPath.Substring(0, $ffmpegPath.LastIndexOf('\')))"
39 | $ffmpegPath
40 | shell: pwsh
41 |
42 | - name: Verify FFmpeg installation
43 | run: |
44 | where ffmpeg
45 | ffmpeg -version
46 |
47 | - name: Set up Python 3.8
48 | uses: actions/setup-python@v4
49 | with:
50 | python-version: "3.8"
51 |
52 | - name: Set up Python virtual environment
53 | run: |
54 | python -m venv .venv
55 |
56 | - name: Activate virtual environment
57 | run: |
58 | .\.venv\Scripts\Activate
59 |
60 | - name: Install dependencies
61 | run: |
62 | pip install --upgrade pip
63 | pip install -r requirements.txt
64 | pip install nuitka
65 |
66 | - name: Verificar existência do diretório assets do whisper
67 | run: |
68 | $whisperPath = (python -c "import whisper; print(whisper.__file__)").Trim()
69 | $assetsPath = Join-Path (Split-Path $whisperPath) 'assets'
70 | if (Test-Path $assetsPath) {
71 | Write-Host "O diretório 'assets' existe em: $assetsPath"
72 | } else {
73 | Write-Host "O diretório 'assets' NÃO existe."
74 | exit 1
75 | }
76 |
77 | - name: Compile with Nuitka
78 | run: |
79 | $whisperPath = (python -c "import whisper; print(whisper.__file__)").Trim()
80 | $assetsPath = Join-Path (Split-Path $whisperPath) 'assets'
81 | $ffmpegPath = (Get-Command ffmpeg).Source
82 | nuitka `
83 | --assume-yes-for-downloads `
84 | --enable-plugin=pyqt5 `
85 | --include-data-files="pytranscriber.sqlite=pytranscriber.sqlite" `
86 | --include-data-files="$ffmpegPath=ffmpeg.exe" `
87 | --include-data-files="pytranscriber/gui/*.qm=pytranscriber/gui/" `
88 | --include-data-files="$assetsPath\*=whisper/assets/" `
89 | main.py `
90 | --onefile `
91 | --output-dir=dist `
92 | --windows-console-mode=disable
93 |
94 | - name: Rename and zip the .exe bundle with version number
95 | run: |
96 | Set-Location -Path dist
97 | Write-Host "Renaming main.exe to pyTranscriber-$env:VERSION.exe"
98 | Rename-Item -Force main.exe "pyTranscriber-$env:VERSION.exe"
99 | # Write-Host "Creating zip archive: pyTranscriber-$env:VERSION.zip"
100 | # Compress-Archive -Path "pyTranscriber-$env:VERSION.exe" -DestinationPath "pyTranscriber-win-$env:VERSION.zip"
101 | shell: pwsh
102 |
103 | - name: Upload built executable
104 | uses: actions/upload-artifact@v4
105 | with:
106 | name: pyTranscriber-win-${{ env.VERSION }}
107 | path: ./dist/pyTranscriber-${{ env.VERSION }}.exe # Adjust this path if Nuitka outputs elsewhere
108 |
109 | download:
110 | runs-on: windows-latest
111 | needs: build
112 | steps:
113 | - name: Download built executable
114 | uses: actions/download-artifact@v4
115 | with:
116 | path: ./output
117 |
118 | - name: List downloaded files
119 | run: dir ./output
120 |
--------------------------------------------------------------------------------
/.github/workflows/win-pyinstaller-dev2.yml:
--------------------------------------------------------------------------------
1 | name: Windows PyInstaller
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | - develop
8 | pull_request:
9 |
10 | jobs:
11 | build:
12 | runs-on: windows-latest
13 | strategy:
14 | matrix:
15 | python-version: ["3.8", "3.10", "3.12"] # Paraleliza builds para cada versão do Python
16 | steps:
17 | - name: Checkout repository
18 | uses: actions/checkout@v4
19 | with:
20 | fetch-depth: 0 # Fetch all tags
21 |
22 | - name: Get latest Git tag
23 | id: get_version
24 | run: |
25 | $VERSION = git describe --tags --abbrev=0 2>$null
26 | if (-not $VERSION) {
27 | $VERSION = "v0.1.0" # Default version if no tags are found
28 | }
29 | echo "VERSION=$VERSION" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8
30 | Write-Host "Resolved version: $VERSION"
31 | shell: pwsh
32 |
33 | - name: Setup FFmpeg
34 | uses: federicocarboni/setup-ffmpeg@v3.1
35 | with:
36 | ffmpeg-version: release
37 | architecture: x64
38 |
39 | - name: Verify FFmpeg installation
40 | run: |
41 | where ffmpeg
42 | ffmpeg -version
43 |
44 | - name: Set up Python ${{ matrix.python-version }}
45 | uses: actions/setup-python@v4
46 | with:
47 | python-version: ${{ matrix.python-version }}
48 |
49 | - name: Create and activate virtual environment for Python ${{ matrix.python-version }}
50 | run: |
51 | python -m venv .venv-${{ matrix.python-version }}
52 | .\.venv-${{ matrix.python-version }}\Scripts\Activate
53 | shell: pwsh
54 |
55 | - name: Install dependencies for Python ${{ matrix.python-version }}
56 | run: |
57 | .\.venv-${{ matrix.python-version }}\Scripts\Activate
58 | python -m ensurepip --upgrade
59 | python -m pip install --upgrade pip
60 | python -m pip install -r requirements.txt
61 | python -m pip install pyinstaller
62 | shell: pwsh
63 |
64 | - name: Verify whisper assets directory for Python ${{ matrix.python-version }}
65 | run: |
66 | .\.venv-${{ matrix.python-version }}\Scripts\Activate
67 | $whisperPath = (python -c "import whisper; print(whisper.__file__)").Trim()
68 | $assetsPath = Join-Path (Split-Path $whisperPath) 'assets'
69 | if (Test-Path $assetsPath) {
70 | Write-Host "O diretório 'assets' existe em: $assetsPath"
71 | echo "ASSETS_PATH=$assetsPath" >> $env:GITHUB_ENV
72 | } else {
73 | Write-Host "O diretório 'assets' NÃO existe."
74 | exit 1
75 | }
76 | shell: pwsh
77 |
78 | - name: Compile with PyInstaller for Python ${{ matrix.python-version }}
79 | run: |
80 | .\.venv-${{ matrix.python-version }}\Scripts\Activate
81 | $ffmpegPath = (Get-Command ffmpeg).Source
82 | pyinstaller main.py `
83 | --onefile `
84 | --path="$(Get-Location)" `
85 | --add-binary="$ffmpegPath;." `
86 | --add-binary="pytranscriber.sqlite;." `
87 | --add-data="pytranscriber/gui/*.qm;pytranscriber/gui/" `
88 | --add-data="${env:ASSETS_PATH};whisper/assets" `
89 | --clean
90 |
91 | shell: pwsh
92 |
93 | - name: Rename and zip the .exe bundle with version number
94 | run: |
95 | Set-Location -Path dist
96 | Write-Host "Renaming main.exe to pyTranscriber-$env:VERSION.exe"
97 | Rename-Item -Force main.exe "pyTranscriber-$env:VERSION.exe"
98 | # Write-Host "Creating zip archive: pyTranscriber-win-$env:VERSION.zip"
99 | # Compress-Archive -Path "pyTranscriber-$env:VERSION.exe" -DestinationPath "pyTranscriber-win-$env:VERSION.zip"
100 | shell: pwsh
101 |
102 | - name: Upload built executable for Python ${{ matrix.python-version }}
103 | uses: actions/upload-artifact@v4
104 | with:
105 | name: pyTranscriber-win-${{ env.VERSION }}-py${{ matrix.python-version }}
106 | path: ./dist/pyTranscriber-${{ env.VERSION }}.exe
107 |
108 | download:
109 | runs-on: windows-latest
110 | needs: build
111 | steps:
112 | - name: Download built executables
113 | uses: actions/download-artifact@v4
114 | with:
115 | path: ./output
116 |
117 | - name: List downloaded files
118 | run: dir ./output
119 |
--------------------------------------------------------------------------------
/pytranscriber/gui/Português.ts:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | window
6 |
7 |
8 | pyTranscriber - v1.8 - 17/08/2022
9 | pyTranscriber -v1.8 - 17/08/2022
10 |
11 |
12 |
13 | Select file(s)
14 | Selecionar arquivo(s)
15 |
16 |
17 |
18 | Transcribe Audio / Generate Subtitles
19 | Transcrever áudio / Gerar Legendas
20 |
21 |
22 |
23 | Open Output Folder
24 | Abrir Pasta de Destino
25 |
26 |
27 |
28 | Output Location
29 | Pasta de Destino
30 |
31 |
32 |
33 | List of files to generate transcribe audio / generate subtitles
34 | Lista de arquivos para gerar legendas/transcrever áudio
35 |
36 |
37 |
38 | Remove file(s)
39 | Remover arquivo(s)
40 |
41 |
42 |
43 | Cancel
44 | Cancelar
45 |
46 |
47 |
48 | Open output files automatically
49 | Abrir arquivos de saída automaticamente
50 |
51 |
52 |
53 | Audio Language:
54 | Idioma do áudio:
55 |
56 |
57 |
58 | Abo&ut
59 | Sob&re
60 |
61 |
62 |
63 | &Settings
64 | &Configurações
65 |
66 |
67 |
68 | &Language
69 | &Idioma
70 |
71 |
72 |
73 | &License
74 | &Licença
75 |
76 |
77 |
78 | &Funding at Github Sponsors
79 | Patrocínio no GitHub Sponsors
80 |
81 |
82 |
83 | &More about pyTranscriber
84 | &Sobre o pyTranscriber
85 |
86 |
87 |
88 | &Proxy
89 | Proxy
90 |
91 |
92 |
93 | Proxy setting
94 | Configurações de Proxy
95 |
96 |
97 |
98 | English
99 |
100 |
101 |
102 |
103 | 繁體中文 - Chinese Traditional
104 |
105 |
106 |
107 |
108 | 简体中文 - Chinese Simplified
109 |
110 |
111 |
112 |
113 | Português
114 |
115 |
116 |
117 |
118 |
--------------------------------------------------------------------------------
/pytranscriber/gui/proxy.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Form implementation generated from reading ui file '.\proxy.ui'
4 | #
5 | # Created by: PyQt5 UI code generator 5.15.4
6 | #
7 | # WARNING: Any manual changes made to this file will be lost when pyuic5 is
8 | # run again. Do not edit this file unless you know what you are doing.
9 |
10 |
11 | from PyQt5 import QtCore, QtGui, QtWidgets
12 |
13 |
14 | class Ui_Dialog(object):
15 | def setupUi(self, Dialog):
16 | Dialog.setObjectName("Dialog")
17 | Dialog.resize(500, 120)
18 | Dialog.setAutoFillBackground(False)
19 | Dialog.setSizeGripEnabled(False)
20 | self.verticalLayout = QtWidgets.QVBoxLayout(Dialog)
21 | self.verticalLayout.setObjectName("verticalLayout")
22 | self.groupBox = QtWidgets.QGroupBox(Dialog)
23 | self.groupBox.setTitle("")
24 | self.groupBox.setObjectName("groupBox")
25 | self.verticalLayout_2 = QtWidgets.QVBoxLayout(self.groupBox)
26 | self.verticalLayout_2.setObjectName("verticalLayout_2")
27 | self.radioButtonNone = QtWidgets.QRadioButton(self.groupBox)
28 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed)
29 | sizePolicy.setHorizontalStretch(0)
30 | sizePolicy.setVerticalStretch(0)
31 | sizePolicy.setHeightForWidth(self.radioButtonNone.sizePolicy().hasHeightForWidth())
32 | self.radioButtonNone.setSizePolicy(sizePolicy)
33 | self.radioButtonNone.setChecked(True)
34 | self.radioButtonNone.setObjectName("radioButtonNone")
35 | self.verticalLayout_2.addWidget(self.radioButtonNone)
36 | self.radioButtonHTTP = QtWidgets.QRadioButton(self.groupBox)
37 | self.radioButtonHTTP.setEnabled(True)
38 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed)
39 | sizePolicy.setHorizontalStretch(0)
40 | sizePolicy.setVerticalStretch(0)
41 | sizePolicy.setHeightForWidth(self.radioButtonHTTP.sizePolicy().hasHeightForWidth())
42 | self.radioButtonHTTP.setSizePolicy(sizePolicy)
43 | self.radioButtonHTTP.setObjectName("radioButtonHTTP")
44 | self.verticalLayout_2.addWidget(self.radioButtonHTTP)
45 | self.gridLayout = QtWidgets.QGridLayout()
46 | self.gridLayout.setObjectName("gridLayout")
47 | self.lineEditHttpProxy = QtWidgets.QLineEdit(self.groupBox)
48 | self.lineEditHttpProxy.setToolTip("")
49 | self.lineEditHttpProxy.setStatusTip("")
50 | self.lineEditHttpProxy.setInputMethodHints(QtCore.Qt.ImhUrlCharactersOnly)
51 | self.lineEditHttpProxy.setObjectName("lineEditHttpProxy")
52 | self.gridLayout.addWidget(self.lineEditHttpProxy, 0, 1, 1, 1)
53 | self.label = QtWidgets.QLabel(self.groupBox)
54 | self.label.setObjectName("label")
55 | self.gridLayout.addWidget(self.label, 0, 0, 1, 1)
56 | self.pushButtonTest = QtWidgets.QPushButton(self.groupBox)
57 | self.pushButtonTest.setEnabled(True)
58 | self.pushButtonTest.setObjectName("pushButtonTest")
59 | self.gridLayout.addWidget(self.pushButtonTest, 0, 2, 1, 1)
60 | self.verticalLayout_2.addLayout(self.gridLayout)
61 | self.verticalLayout.addWidget(self.groupBox)
62 | self.buttonBox = QtWidgets.QDialogButtonBox(Dialog)
63 | self.buttonBox.setOrientation(QtCore.Qt.Horizontal)
64 | self.buttonBox.setStandardButtons(QtWidgets.QDialogButtonBox.Cancel|QtWidgets.QDialogButtonBox.Ok)
65 | self.buttonBox.setObjectName("buttonBox")
66 | self.verticalLayout.addWidget(self.buttonBox)
67 |
68 | self.retranslateUi(Dialog)
69 | self.buttonBox.accepted.connect(Dialog.accept)
70 | self.buttonBox.rejected.connect(Dialog.reject)
71 | self.radioButtonNone.clicked['bool'].connect(self.lineEditHttpProxy.setDisabled)
72 | self.radioButtonNone.clicked['bool'].connect(self.pushButtonTest.setDisabled)
73 | self.radioButtonHTTP.clicked['bool'].connect(self.pushButtonTest.setEnabled)
74 | self.radioButtonHTTP.clicked['bool'].connect(self.lineEditHttpProxy.setEnabled)
75 | QtCore.QMetaObject.connectSlotsByName(Dialog)
76 |
77 | def retranslateUi(self, Dialog):
78 | _translate = QtCore.QCoreApplication.translate
79 | Dialog.setWindowTitle(_translate("Dialog", "Proxy setting"))
80 | self.radioButtonNone.setText(_translate("Dialog", "None"))
81 | self.radioButtonHTTP.setText(_translate("Dialog", "HTTP"))
82 | self.lineEditHttpProxy.setPlaceholderText(_translate("Dialog", "http://127.0.0.1:1080"))
83 | self.label.setText(_translate("Dialog", "URL:"))
84 | self.pushButtonTest.setText(_translate("Dialog", "Test"))
85 |
--------------------------------------------------------------------------------
/pytranscriber/gui/proxy/window_proxy.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Form implementation generated from reading ui file 'window_proxy.ui'
4 | #
5 | # Created by: PyQt5 UI code generator 5.15.4
6 | #
7 | # WARNING: Any manual changes made to this file will be lost when pyuic5 is
8 | # run again. Do not edit this file unless you know what you are doing.
9 |
10 |
11 | from PyQt5 import QtCore, QtGui, QtWidgets
12 |
13 |
14 | class Ui_Dialog(object):
15 | def setupUi(self, Dialog):
16 | Dialog.setObjectName("Dialog")
17 | Dialog.resize(381, 171)
18 | Dialog.setAutoFillBackground(False)
19 | Dialog.setSizeGripEnabled(False)
20 | self.verticalLayout = QtWidgets.QVBoxLayout(Dialog)
21 | self.verticalLayout.setObjectName("verticalLayout")
22 | self.groupBox = QtWidgets.QGroupBox(Dialog)
23 | self.groupBox.setTitle("")
24 | self.groupBox.setObjectName("groupBox")
25 | self.verticalLayout_2 = QtWidgets.QVBoxLayout(self.groupBox)
26 | self.verticalLayout_2.setObjectName("verticalLayout_2")
27 | self.radioButtonNone = QtWidgets.QRadioButton(self.groupBox)
28 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed)
29 | sizePolicy.setHorizontalStretch(0)
30 | sizePolicy.setVerticalStretch(0)
31 | sizePolicy.setHeightForWidth(self.radioButtonNone.sizePolicy().hasHeightForWidth())
32 | self.radioButtonNone.setSizePolicy(sizePolicy)
33 | font = QtGui.QFont()
34 | font.setPointSize(9)
35 | self.radioButtonNone.setFont(font)
36 | self.radioButtonNone.setChecked(True)
37 | self.radioButtonNone.setObjectName("radioButtonNone")
38 | self.verticalLayout_2.addWidget(self.radioButtonNone)
39 | self.radioButtonHTTP = QtWidgets.QRadioButton(self.groupBox)
40 | self.radioButtonHTTP.setEnabled(True)
41 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed)
42 | sizePolicy.setHorizontalStretch(0)
43 | sizePolicy.setVerticalStretch(0)
44 | sizePolicy.setHeightForWidth(self.radioButtonHTTP.sizePolicy().hasHeightForWidth())
45 | self.radioButtonHTTP.setSizePolicy(sizePolicy)
46 | font = QtGui.QFont()
47 | font.setPointSize(9)
48 | self.radioButtonHTTP.setFont(font)
49 | self.radioButtonHTTP.setObjectName("radioButtonHTTP")
50 | self.verticalLayout_2.addWidget(self.radioButtonHTTP)
51 | self.gridLayout = QtWidgets.QGridLayout()
52 | self.gridLayout.setObjectName("gridLayout")
53 | self.pushButtonTest = QtWidgets.QPushButton(self.groupBox)
54 | self.pushButtonTest.setEnabled(True)
55 | font = QtGui.QFont()
56 | font.setPointSize(9)
57 | self.pushButtonTest.setFont(font)
58 | self.pushButtonTest.setObjectName("pushButtonTest")
59 | self.gridLayout.addWidget(self.pushButtonTest, 0, 2, 1, 1)
60 | self.lineEditHttpProxy = QtWidgets.QLineEdit(self.groupBox)
61 | self.lineEditHttpProxy.setToolTip("")
62 | self.lineEditHttpProxy.setStatusTip("")
63 | self.lineEditHttpProxy.setInputMethodHints(QtCore.Qt.ImhUrlCharactersOnly)
64 | self.lineEditHttpProxy.setObjectName("lineEditHttpProxy")
65 | self.gridLayout.addWidget(self.lineEditHttpProxy, 0, 1, 1, 1)
66 | self.label = QtWidgets.QLabel(self.groupBox)
67 | font = QtGui.QFont()
68 | font.setPointSize(9)
69 | self.label.setFont(font)
70 | self.label.setObjectName("label")
71 | self.gridLayout.addWidget(self.label, 0, 0, 1, 1)
72 | self.bSave = QtWidgets.QPushButton(self.groupBox)
73 | font = QtGui.QFont()
74 | font.setPointSize(9)
75 | self.bSave.setFont(font)
76 | self.bSave.setObjectName("bSave")
77 | self.gridLayout.addWidget(self.bSave, 1, 2, 1, 1)
78 | self.verticalLayout_2.addLayout(self.gridLayout)
79 | self.verticalLayout.addWidget(self.groupBox)
80 |
81 | self.retranslateUi(Dialog)
82 | self.radioButtonNone.clicked['bool'].connect(self.lineEditHttpProxy.setDisabled)
83 | self.radioButtonNone.clicked['bool'].connect(self.pushButtonTest.setDisabled)
84 | self.radioButtonHTTP.clicked['bool'].connect(self.lineEditHttpProxy.setEnabled)
85 | QtCore.QMetaObject.connectSlotsByName(Dialog)
86 |
87 | def retranslateUi(self, Dialog):
88 | _translate = QtCore.QCoreApplication.translate
89 | Dialog.setWindowTitle(_translate("Dialog", "Proxy setting"))
90 | self.radioButtonNone.setText(_translate("Dialog", "Disabled"))
91 | self.radioButtonHTTP.setText(_translate("Dialog", "Enabled"))
92 | self.pushButtonTest.setText(_translate("Dialog", "Test"))
93 | self.lineEditHttpProxy.setPlaceholderText(_translate("Dialog", "http://127.0.0.1:1080"))
94 | self.label.setText(_translate("Dialog", "URL:"))
95 | self.bSave.setText(_translate("Dialog", "Save"))
96 |
--------------------------------------------------------------------------------
/pytranscriber/model/whisper.py:
--------------------------------------------------------------------------------
1 | '''
2 | (C) 2025 Raryel C. Souza
3 | This program is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 | This program is distributed in the hope that it will be useful,
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | GNU General Public License for more details.
11 | You should have received a copy of the GNU General Public License
12 | along with this program. If not, see .
13 | '''
14 |
15 | class Whisper:
16 |
17 | supported_languages_list = None
18 | supported_languages_dict = {
19 | "en": "english",
20 | "zh": "chinese",
21 | "de": "german",
22 | "es": "spanish",
23 | "ru": "russian",
24 | "ko": "korean",
25 | "fr": "french",
26 | "ja": "japanese",
27 | "pt": "portuguese",
28 | "tr": "turkish",
29 | "pl": "polish",
30 | "ca": "catalan",
31 | "nl": "dutch",
32 | "ar": "arabic",
33 | "sv": "swedish",
34 | "it": "italian",
35 | "id": "indonesian",
36 | "hi": "hindi",
37 | "fi": "finnish",
38 | "vi": "vietnamese",
39 | "he": "hebrew",
40 | "uk": "ukrainian",
41 | "el": "greek",
42 | "ms": "malay",
43 | "cs": "czech",
44 | "ro": "romanian",
45 | "da": "danish",
46 | "hu": "hungarian",
47 | "ta": "tamil",
48 | "no": "norwegian",
49 | "th": "thai",
50 | "ur": "urdu",
51 | "hr": "croatian",
52 | "bg": "bulgarian",
53 | "lt": "lithuanian",
54 | "la": "latin",
55 | "mi": "maori",
56 | "ml": "malayalam",
57 | "cy": "welsh",
58 | "sk": "slovak",
59 | "te": "telugu",
60 | "fa": "persian",
61 | "lv": "latvian",
62 | "bn": "bengali",
63 | "sr": "serbian",
64 | "az": "azerbaijani",
65 | "sl": "slovenian",
66 | "kn": "kannada",
67 | "et": "estonian",
68 | "mk": "macedonian",
69 | "br": "breton",
70 | "eu": "basque",
71 | "is": "icelandic",
72 | "hy": "armenian",
73 | "ne": "nepali",
74 | "mn": "mongolian",
75 | "bs": "bosnian",
76 | "kk": "kazakh",
77 | "sq": "albanian",
78 | "sw": "swahili",
79 | "gl": "galician",
80 | "mr": "marathi",
81 | "pa": "punjabi",
82 | "si": "sinhala",
83 | "km": "khmer",
84 | "sn": "shona",
85 | "yo": "yoruba",
86 | "so": "somali",
87 | "af": "afrikaans",
88 | "oc": "occitan",
89 | "ka": "georgian",
90 | "be": "belarusian",
91 | "tg": "tajik",
92 | "sd": "sindhi",
93 | "gu": "gujarati",
94 | "am": "amharic",
95 | "yi": "yiddish",
96 | "lo": "lao",
97 | "uz": "uzbek",
98 | "fo": "faroese",
99 | "ht": "haitian creole",
100 | "ps": "pashto",
101 | "tk": "turkmen",
102 | "nn": "nynorsk",
103 | "mt": "maltese",
104 | "sa": "sanskrit",
105 | "lb": "luxembourgish",
106 | "my": "myanmar",
107 | "bo": "tibetan",
108 | "tl": "tagalog",
109 | "mg": "malagasy",
110 | "as": "assamese",
111 | "tt": "tatar",
112 | "haw": "hawaiian",
113 | "ln": "lingala",
114 | "ha": "hausa",
115 | "ba": "bashkir",
116 | "jw": "javanese",
117 | "su": "sundanese",
118 | "yue": "cantonese",
119 | }
120 |
121 | @staticmethod
122 | def convert_dict_to_list():
123 | Whisper.supported_languages_list = list()
124 | for (k, v) in Whisper.supported_languages_dict.items():
125 | Whisper.supported_languages_list.append(k + " - " + v)
126 |
127 | @staticmethod
128 | def get_supported_languages():
129 | if Whisper.supported_languages_list is None:
130 | Whisper.convert_dict_to_list()
131 | return Whisper.supported_languages_list
--------------------------------------------------------------------------------
/pytranscriber/control/ctr_whisper.py:
--------------------------------------------------------------------------------
1 | '''
2 | (C) 2025 Raryel C. Souza
3 | This program is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 | This program is distributed in the hope that it will be useful,
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | GNU General Public License for more details.
11 | You should have received a copy of the GNU General Public License
12 | along with this program. If not, see .
13 | '''
14 |
15 | from PyQt5.QtCore import pyqtSignal, QObject
16 | from PyQt5.QtWidgets import QMessageBox
17 | import os
18 | import sys
19 | import whisper
20 | import datetime
21 | import shutil
22 | from pytranscriber.control.ctr_engine import CtrEngine
23 |
24 |
25 | class CtrWhisper(CtrEngine, QObject):
26 | errorSignal = pyqtSignal(str) # Define the signal
27 | MODEL_DIR = None
28 |
29 | @classmethod
30 | def initialize(cls):
31 | """Initialize MODEL_DIR before using the class."""
32 | if cls.MODEL_DIR is None:
33 | cls.MODEL_DIR = cls.get_whisper_model_dir()
34 |
35 | def __init__(self):
36 | super().__init__()
37 | self.errorSignal.connect(self.show_error_message) # Connect signal to slot
38 |
39 | @staticmethod
40 | def get_whisper_model_dir():
41 | base_path = os.path.expanduser("~/pytranscriber") # User's home directory
42 |
43 | model_dir = os.path.join(base_path, "whisper_models")
44 | os.makedirs(model_dir, exist_ok=True) # Ensure directory exists
45 | return model_dir
46 |
47 | @staticmethod
48 | def generate_subtitles(source_path, src_language, outputSRT=None, outputTXT=None, model='base'):
49 | CtrWhisper.patch_ffmpeg() # Ensure FFmpeg is available
50 |
51 | model = whisper.load_model(model, download_root=CtrWhisper.MODEL_DIR)
52 | result = model.transcribe(source_path, verbose=True, language=src_language)
53 |
54 | if CtrEngine.is_operation_canceled():
55 | return -1
56 |
57 | content_srt = CtrWhisper.generate_srt_file_content(result["segments"])
58 | content_txt = CtrWhisper.generate_txt_file_content(result["segments"])
59 |
60 | CtrWhisper.save_output_file(outputSRT, content_srt)
61 | CtrWhisper.save_output_file(outputTXT, content_txt)
62 |
63 | return outputSRT
64 |
65 | @staticmethod
66 | def show_error_message(message):
67 | """Displays the error message in a PyQt5 QMessageBox."""
68 | msg_box = QMessageBox()
69 | msg_box.setIcon(QMessageBox.Critical)
70 | msg_box.setWindowTitle("Error")
71 | msg_box.setText(message)
72 | msg_box.exec_()
73 |
74 | @staticmethod
75 | def generate_srt_file_content(transcribed_segments):
76 | content = ""
77 |
78 | def format_timestamp(seconds):
79 | """Convert seconds to SRT-compliant timestamp (HH:MM:SS,mmm)."""
80 | td = datetime.timedelta(seconds=seconds)
81 | total_seconds = int(td.total_seconds())
82 | millis = int(round((td.total_seconds() - total_seconds) * 1000))
83 | hours = total_seconds // 3600
84 | minutes = (total_seconds % 3600) // 60
85 | secs = total_seconds % 60
86 | return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}"
87 |
88 | for i, s in enumerate(transcribed_segments, start=1):
89 | start_time = format_timestamp(s["start"])
90 | end_time = format_timestamp(s["end"])
91 | content += f"{i}\n{start_time} --> {end_time}\n{s['text'].strip()}\n\n"
92 |
93 | return content
94 |
95 | @staticmethod
96 | def generate_txt_file_content(transcribed_segments):
97 | content = ""
98 | for s in transcribed_segments:
99 | content = content + str(s["text"])
100 | return content
101 |
102 | #forces whisper to use the embedded ffmpeg in frozen app
103 | @staticmethod
104 | def patch_ffmpeg():
105 | """Ensure FFmpeg is correctly detected and patched for PyInstaller frozen apps."""
106 | if getattr(sys, "frozen", False): # Running as a bundled executable
107 | ffmpeg_path = os.path.join(sys._MEIPASS, "ffmpeg")
108 | else:
109 | ffmpeg_path = shutil.which("ffmpeg") # Use system-wide FFmpeg
110 |
111 | if not ffmpeg_path:
112 | raise FileNotFoundError("FFmpeg not found!")
113 |
114 | os.environ["FFMPEG_PATH"] = ffmpeg_path
115 | os.environ["PATH"] += os.pathsep + os.path.dirname(ffmpeg_path)
116 |
117 | # Monkey-patch shutil.which to always return the correct FFmpeg path
118 | original_which = shutil.which
119 |
120 | def patched_which(cmd, *args, **kwargs):
121 | if cmd == "ffmpeg":
122 | return ffmpeg_path
123 | return original_which(cmd, *args, **kwargs)
124 |
125 | shutil.which = patched_which # Apply the patch
--------------------------------------------------------------------------------
/whisper/audio.py:
--------------------------------------------------------------------------------
1 | import os
2 | from functools import lru_cache
3 | from subprocess import CalledProcessError, run
4 | from typing import Optional, Union
5 |
6 | import numpy as np
7 | import torch
8 | import torch.nn.functional as F
9 |
10 | from .utils import exact_div
11 |
12 | # hard-coded audio hyperparameters
13 | SAMPLE_RATE = 16000
14 | N_FFT = 400
15 | HOP_LENGTH = 160
16 | CHUNK_LENGTH = 30
17 | N_SAMPLES = CHUNK_LENGTH * SAMPLE_RATE # 480000 samples in a 30-second chunk
18 | N_FRAMES = exact_div(N_SAMPLES, HOP_LENGTH) # 3000 frames in a mel spectrogram input
19 |
20 | N_SAMPLES_PER_TOKEN = HOP_LENGTH * 2 # the initial convolutions has stride 2
21 | FRAMES_PER_SECOND = exact_div(SAMPLE_RATE, HOP_LENGTH) # 10ms per audio frame
22 | TOKENS_PER_SECOND = exact_div(SAMPLE_RATE, N_SAMPLES_PER_TOKEN) # 20ms per audio token
23 |
24 |
25 | def load_audio(file: str, sr: int = SAMPLE_RATE):
26 | """
27 | Open an audio file and read as mono waveform, resampling as necessary
28 |
29 | Parameters
30 | ----------
31 | file: str
32 | The audio file to open
33 |
34 | sr: int
35 | The sample rate to resample the audio if necessary
36 |
37 | Returns
38 | -------
39 | A NumPy array containing the audio waveform, in float32 dtype.
40 | """
41 |
42 | # This launches a subprocess to decode audio while down-mixing
43 | # and resampling as necessary. Requires the ffmpeg CLI in PATH.
44 | # fmt: off
45 | cmd = [
46 | "ffmpeg",
47 | "-nostdin",
48 | "-threads", "0",
49 | "-i", file,
50 | "-f", "s16le",
51 | "-ac", "1",
52 | "-acodec", "pcm_s16le",
53 | "-ar", str(sr),
54 | "-"
55 | ]
56 | # fmt: on
57 | try:
58 | out = run(cmd, capture_output=True, check=True).stdout
59 | except CalledProcessError as e:
60 | raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
61 |
62 | return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
63 |
64 |
65 | def pad_or_trim(array, length: int = N_SAMPLES, *, axis: int = -1):
66 | """
67 | Pad or trim the audio array to N_SAMPLES, as expected by the encoder.
68 | """
69 | if torch.is_tensor(array):
70 | if array.shape[axis] > length:
71 | array = array.index_select(
72 | dim=axis, index=torch.arange(length, device=array.device)
73 | )
74 |
75 | if array.shape[axis] < length:
76 | pad_widths = [(0, 0)] * array.ndim
77 | pad_widths[axis] = (0, length - array.shape[axis])
78 | array = F.pad(array, [pad for sizes in pad_widths[::-1] for pad in sizes])
79 | else:
80 | if array.shape[axis] > length:
81 | array = array.take(indices=range(length), axis=axis)
82 |
83 | if array.shape[axis] < length:
84 | pad_widths = [(0, 0)] * array.ndim
85 | pad_widths[axis] = (0, length - array.shape[axis])
86 | array = np.pad(array, pad_widths)
87 |
88 | return array
89 |
90 |
91 | @lru_cache(maxsize=None)
92 | def mel_filters(device, n_mels: int) -> torch.Tensor:
93 | """
94 | load the mel filterbank matrix for projecting STFT into a Mel spectrogram.
95 | Allows decoupling librosa dependency; saved using:
96 |
97 | np.savez_compressed(
98 | "mel_filters.npz",
99 | mel_80=librosa.filters.mel(sr=16000, n_fft=400, n_mels=80),
100 | mel_128=librosa.filters.mel(sr=16000, n_fft=400, n_mels=128),
101 | )
102 | """
103 | assert n_mels in {80, 128}, f"Unsupported n_mels: {n_mels}"
104 |
105 | filters_path = os.path.join(os.path.dirname(__file__), "assets", "mel_filters.npz")
106 | with np.load(filters_path, allow_pickle=False) as f:
107 | return torch.from_numpy(f[f"mel_{n_mels}"]).to(device)
108 |
109 |
110 | def log_mel_spectrogram(
111 | audio: Union[str, np.ndarray, torch.Tensor],
112 | n_mels: int = 80,
113 | padding: int = 0,
114 | device: Optional[Union[str, torch.device]] = None,
115 | ):
116 | """
117 | Compute the log-Mel spectrogram of
118 |
119 | Parameters
120 | ----------
121 | audio: Union[str, np.ndarray, torch.Tensor], shape = (*)
122 | The path to audio or either a NumPy array or Tensor containing the audio waveform in 16 kHz
123 |
124 | n_mels: int
125 | The number of Mel-frequency filters, only 80 is supported
126 |
127 | padding: int
128 | Number of zero samples to pad to the right
129 |
130 | device: Optional[Union[str, torch.device]]
131 | If given, the audio tensor is moved to this device before STFT
132 |
133 | Returns
134 | -------
135 | torch.Tensor, shape = (80, n_frames)
136 | A Tensor that contains the Mel spectrogram
137 | """
138 | if not torch.is_tensor(audio):
139 | if isinstance(audio, str):
140 | audio = load_audio(audio)
141 | audio = torch.from_numpy(audio)
142 |
143 | if device is not None:
144 | audio = audio.to(device)
145 | if padding > 0:
146 | audio = F.pad(audio, (0, padding))
147 | window = torch.hann_window(N_FFT).to(audio.device)
148 | stft = torch.stft(audio, N_FFT, HOP_LENGTH, window=window, return_complex=True)
149 | magnitudes = stft[..., :-1].abs() ** 2
150 |
151 | filters = mel_filters(audio.device, n_mels)
152 | mel_spec = filters @ magnitudes
153 |
154 | log_spec = torch.clamp(mel_spec, min=1e-10).log10()
155 | log_spec = torch.maximum(log_spec, log_spec.max() - 8.0)
156 | log_spec = (log_spec + 4.0) / 4.0
157 | return log_spec
158 |
--------------------------------------------------------------------------------
/pytranscriber/gui/proxy/window_proxy.ui:
--------------------------------------------------------------------------------
1 |
2 |
3 | Dialog
4 |
5 |
6 |
7 | 0
8 | 0
9 | 381
10 | 171
11 |
12 |
13 |
14 | Proxy setting
15 |
16 |
17 | false
18 |
19 |
20 | false
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | 0
34 | 0
35 |
36 |
37 |
38 |
39 | 9
40 |
41 |
42 |
43 | Disabled
44 |
45 |
46 | true
47 |
48 |
49 |
50 |
51 |
52 |
53 | true
54 |
55 |
56 |
57 | 0
58 | 0
59 |
60 |
61 |
62 |
63 | 9
64 |
65 |
66 |
67 | Enabled
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 | true
77 |
78 |
79 |
80 | 9
81 |
82 |
83 |
84 | Test
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 | Qt::ImhUrlCharactersOnly
98 |
99 |
100 | http://127.0.0.1:1080
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 | 9
109 |
110 |
111 |
112 | URL:
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 | 9
121 |
122 |
123 |
124 | Save
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 | radioButtonNone
139 | clicked(bool)
140 | lineEditHttpProxy
141 | setDisabled(bool)
142 |
143 |
144 | 130
145 | 19
146 |
147 |
148 | 111
149 | 60
150 |
151 |
152 |
153 |
154 | radioButtonNone
155 | clicked(bool)
156 | pushButtonTest
157 | setDisabled(bool)
158 |
159 |
160 | 130
161 | 19
162 |
163 |
164 | 219
165 | 60
166 |
167 |
168 |
169 |
170 | radioButtonHTTP
171 | clicked(bool)
172 | lineEditHttpProxy
173 | setEnabled(bool)
174 |
175 |
176 | 130
177 | 36
178 |
179 |
180 | 111
181 | 60
182 |
183 |
184 |
185 |
186 |
187 |
--------------------------------------------------------------------------------
/pytranscriber/control/thread_exec_autosub.py:
--------------------------------------------------------------------------------
1 | '''
2 | (C) 2025 Raryel C. Souza
3 | This program is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 | This program is distributed in the hope that it will be useful,
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | GNU General Public License for more details.
11 | You should have received a copy of the GNU General Public License
12 | along with this program. If not, see .
13 | '''
14 |
15 | from PyQt5.QtCore import QThread
16 | from PyQt5.QtCore import pyqtSignal
17 | from pathlib import Path
18 | from pytranscriber.util.srtparser import SRTParser
19 | from pytranscriber.util.util import MyUtil
20 | from pytranscriber.control.ctr_autosub import Ctr_Autosub
21 | import os
22 | import traceback
23 |
24 |
25 | class Thread_Exec_Autosub(QThread):
26 | signalLockGUI = pyqtSignal()
27 | signalResetGUIAfterCancel = pyqtSignal()
28 | signalResetGUIAfterSuccess = pyqtSignal()
29 | signalProgress = pyqtSignal(str, int)
30 | signalProgressFileYofN = pyqtSignal(str)
31 | signalErrorMsg = pyqtSignal(str)
32 |
33 | def __init__(self, objParamAutosub):
34 | self.objParamAutosub = objParamAutosub
35 | self.running = True
36 | QThread.__init__(self)
37 |
38 | def __updateProgressFileYofN(self, currentIndex, countFiles ):
39 | self.signalProgressFileYofN.emit("File " + str(currentIndex+1) + " of " +str(countFiles))
40 |
41 | def listenerProgress(self, string, percent):
42 | self.signalProgress.emit(string, percent)
43 |
44 | def __generatePathOutputFile(self, sourceFile):
45 | #extract the filename without extension from the path
46 | base = os.path.basename(sourceFile)
47 | #[0] is filename, [1] is file extension
48 | fileName = os.path.splitext(base)[0]
49 |
50 | #the output file has same name as input file, located on output Folder
51 | #with extension .srt
52 | pathOutputFolder = Path(self.objParamAutosub.outputFolder)
53 | outputFileSRT = pathOutputFolder / (fileName + ".srt")
54 | outputFileTXT = pathOutputFolder / (fileName + ".txt")
55 | return [outputFileSRT, outputFileTXT]
56 |
57 | def __runAutosubForMedia(self, index, langCode):
58 | sourceFile = self.objParamAutosub.listFiles[index]
59 | outputFiles = self.__generatePathOutputFile(sourceFile)
60 | outputFileSRT = outputFiles[0]
61 | outputFileTXT = outputFiles[1]
62 |
63 | #run autosub
64 | try:
65 | fOutput = Ctr_Autosub.generate_subtitles(source_path = sourceFile,
66 | output = outputFileSRT,
67 | src_language = langCode,
68 | listener_progress = self.listenerProgress, proxies=self.objParamAutosub.proxies)
69 | except Exception as e:
70 | error_msg = f"""Error! Unable to generate subtitles: {traceback.format_exc()}"""
71 | self.signalErrorMsg.emit(error_msg) # Emit the full traceback
72 |
73 | #if nothing was returned
74 | if not fOutput:
75 | self.signalErrorMsg.emit("Error! Unable to generate subtitles for file " + sourceFile + ".")
76 | elif fOutput != -1:
77 | #if the operation was not canceled
78 |
79 | #updated the progress message
80 | self.listenerProgress("Finished", 100)
81 |
82 | #parses the .srt subtitle file and export text to .txt file
83 | SRTParser.extractTextFromSRT(str(outputFileSRT))
84 |
85 | if self.objParamAutosub.boolOpenOutputFilesAuto:
86 | #open both SRT and TXT output files
87 | MyUtil.open_file(outputFileTXT)
88 | MyUtil.open_file(outputFileSRT)
89 |
90 | def __loopSelectedFiles(self):
91 | self.signalLockGUI.emit()
92 |
93 | langCode = self.objParamAutosub.langCode
94 |
95 | #if output directory does not exist, creates it
96 | pathOutputFolder = Path(self.objParamAutosub.outputFolder)
97 |
98 | if not os.path.exists(pathOutputFolder):
99 | os.mkdir(pathOutputFolder)
100 | #if there the output file is not a directory
101 | if not os.path.isdir(pathOutputFolder):
102 | #force the user to select a different output directory
103 | self.signalErrorMsg.emit("Error! Invalid output folder. Please choose another one.")
104 | else:
105 | #go ahead with autosub process
106 | nFiles = len(self.objParamAutosub.listFiles)
107 | for i in range(nFiles):
108 | #does not continue the loop if user clicked cancel button
109 | if not Ctr_Autosub.is_operation_canceled():
110 | self.__updateProgressFileYofN(i, nFiles)
111 | self.__runAutosubForMedia(i, langCode)
112 |
113 | #if operation is canceled does not clear the file list
114 | if Ctr_Autosub.is_operation_canceled():
115 | self.signalResetGUIAfterCancel.emit()
116 | else:
117 | self.signalResetGUIAfterSuccess.emit()
118 |
119 |
120 | def run(self):
121 | Ctr_Autosub.init()
122 | self.__loopSelectedFiles()
123 | self.running = False
124 |
125 | def cancel(self):
126 | Ctr_Autosub.cancel_operation()
127 |
--------------------------------------------------------------------------------
/pytranscriber/control/ctr_autosub.py:
--------------------------------------------------------------------------------
1 | '''
2 | (C) 2025 Raryel C. Souza
3 | This program is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 | This program is distributed in the hope that it will be useful,
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | GNU General Public License for more details.
11 | You should have received a copy of the GNU General Public License
12 | along with this program. If not, see .
13 | '''
14 |
15 | from autosub import FLACConverter
16 | from autosub import SpeechRecognizer
17 | from autosub import extract_audio
18 | from autosub import find_speech_regions
19 | from autosub import DEFAULT_CONCURRENCY
20 | from autosub import DEFAULT_SUBTITLE_FORMAT
21 | from autosub import GOOGLE_SPEECH_API_KEY
22 | from autosub.formatters import FORMATTERS
23 |
24 | import multiprocessing
25 | import time
26 | import os
27 |
28 | from pytranscriber.util.util import MyUtil
29 |
30 |
31 | class Ctr_Autosub:
32 |
33 | cancel = False
34 |
35 | @staticmethod
36 | def init():
37 | Ctr_Autosub.cancel = False
38 |
39 | @staticmethod
40 | def is_operation_canceled():
41 | return Ctr_Autosub.cancel
42 |
43 |
44 | @staticmethod
45 | def output_progress(listener_progress, str_task, progress_percent):
46 | # only update progress if not requested to cancel
47 | if not Ctr_Autosub.cancel:
48 | listener_progress(str_task, progress_percent)
49 |
50 | @staticmethod
51 | def cancel_operation():
52 | Ctr_Autosub.cancel = True
53 |
54 | while Ctr_Autosub.step == 0:
55 | time.sleep(0.1)
56 |
57 | # the first step involves ffmpeg and cannot be stopped safely
58 | if Ctr_Autosub.step == 1:
59 | # close wait for threads to finish their work first
60 | Ctr_Autosub.pool.close()
61 | Ctr_Autosub.pool.join()
62 |
63 | else:
64 | # terminates the threads immediately
65 | Ctr_Autosub.pool.terminate()
66 | Ctr_Autosub.pool.join()
67 |
68 | @staticmethod
69 | def generate_subtitles(# pylint: disable=too-many-locals,too-many-arguments
70 | source_path,
71 | src_language,
72 | listener_progress,
73 | output=None,
74 | concurrency=DEFAULT_CONCURRENCY,
75 | subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
76 | proxies=None
77 | ):
78 |
79 | # windows not support forkserver... only spawn
80 | if os.name != "nt" and "Darwin" in os.uname():
81 | # necessary for running on MacOS
82 | # method can be set only once, otherwise crash
83 | #from python 3.8 above the default for macos is spawn and not fork
84 | if 'spawn' != multiprocessing.get_start_method(allow_none=True):
85 | multiprocessing.set_start_method('spawn')
86 | Ctr_Autosub.cancel = False
87 | Ctr_Autosub.step = 0
88 | """
89 | Given an input audio/video file, generate subtitles in the specified language and format.
90 | """
91 | audio_filename, audio_rate = extract_audio(source_path)
92 |
93 | regions = find_speech_regions(audio_filename)
94 |
95 | converter = FLACConverter(source_path=audio_filename)
96 | recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,
97 | api_key=GOOGLE_SPEECH_API_KEY, proxies=proxies)
98 | transcripts = []
99 | if regions:
100 | try:
101 | if Ctr_Autosub.cancel:
102 | return -1
103 |
104 | str_task_1 = "Step 1 of 2: Converting speech regions to FLAC files "
105 | len_regions = len(regions)
106 | extracted_regions = []
107 | Ctr_Autosub.pool = multiprocessing.Pool(concurrency)
108 | for i, extracted_region in enumerate(Ctr_Autosub.pool.imap(converter, regions)):
109 | Ctr_Autosub.step = 1
110 | extracted_regions.append(extracted_region)
111 | progress_percent = MyUtil.percentage(i, len_regions)
112 | Ctr_Autosub.output_progress(listener_progress, str_task_1, progress_percent)
113 | if Ctr_Autosub.cancel:
114 | return -1
115 | else:
116 | Ctr_Autosub.pool.close()
117 | Ctr_Autosub.pool.join()
118 |
119 | str_task_2 = "Step 2 of 2: Performing speech recognition "
120 | Ctr_Autosub.pool = multiprocessing.Pool(concurrency)
121 | for i, transcript in enumerate(Ctr_Autosub.pool.imap(recognizer, extracted_regions)):
122 | Ctr_Autosub.step = 2
123 | transcripts.append(transcript)
124 | progress_percent = MyUtil.percentage(i, len_regions)
125 | Ctr_Autosub.output_progress(listener_progress, str_task_2, progress_percent)
126 |
127 | if Ctr_Autosub.cancel:
128 | return -1
129 | else:
130 | Ctr_Autosub.pool.close()
131 | Ctr_Autosub.pool.join()
132 |
133 | except KeyboardInterrupt:
134 | Ctr_Autosub.pbar.finish()
135 | Ctr_Autosub.pool.terminate()
136 | Ctr_Autosub.pool.join()
137 | raise
138 |
139 | timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
140 | formatter = FORMATTERS.get(subtitle_file_format)
141 | formatted_subtitles = formatter(timed_subtitles)
142 |
143 | dest = output
144 |
145 | if not dest:
146 | base = os.path.splitext(source_path)[0]
147 | dest = "{base}.{format}".format(base=base, format=subtitle_file_format)
148 |
149 | with open(dest, 'wb') as output_file:
150 | output_file.write(formatted_subtitles.encode("utf-8"))
151 |
152 | os.remove(audio_filename)
153 |
154 | if Ctr_Autosub.cancel:
155 | return -1
156 | else:
157 | Ctr_Autosub.pool.close()
158 | Ctr_Autosub.pool.join()
159 |
160 | return dest
161 |
--------------------------------------------------------------------------------
/pytranscriber/gui/proxy.ui:
--------------------------------------------------------------------------------
1 |
2 |
3 | Dialog
4 |
5 |
6 |
7 | 0
8 | 0
9 | 381
10 | 115
11 |
12 |
13 |
14 | Proxy setting
15 |
16 |
17 | false
18 |
19 |
20 | false
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | 0
34 | 0
35 |
36 |
37 |
38 |
39 | 9
40 |
41 |
42 |
43 | None
44 |
45 |
46 | true
47 |
48 |
49 |
50 |
51 |
52 |
53 | true
54 |
55 |
56 |
57 | 0
58 | 0
59 |
60 |
61 |
62 |
63 | 9
64 |
65 |
66 |
67 | HTTP
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 | Qt::ImhUrlCharactersOnly
83 |
84 |
85 | http://127.0.0.1:1080
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 | 9
94 |
95 |
96 |
97 | URL:
98 |
99 |
100 |
101 |
102 |
103 |
104 | true
105 |
106 |
107 |
108 | 9
109 |
110 |
111 |
112 | Test
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 | Qt::Horizontal
125 |
126 |
127 | QDialogButtonBox::Cancel|QDialogButtonBox::Ok
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 | buttonBox
137 | accepted()
138 | Dialog
139 | accept()
140 |
141 |
142 | 224
143 | 100
144 |
145 |
146 | 157
147 | 108
148 |
149 |
150 |
151 |
152 | buttonBox
153 | rejected()
154 | Dialog
155 | reject()
156 |
157 |
158 | 255
159 | 102
160 |
161 |
162 | 261
163 | 108
164 |
165 |
166 |
167 |
168 | radioButtonNone
169 | clicked(bool)
170 | lineEditHttpProxy
171 | setDisabled(bool)
172 |
173 |
174 | 130
175 | 19
176 |
177 |
178 | 111
179 | 60
180 |
181 |
182 |
183 |
184 | radioButtonNone
185 | clicked(bool)
186 | pushButtonTest
187 | setDisabled(bool)
188 |
189 |
190 | 130
191 | 19
192 |
193 |
194 | 219
195 | 60
196 |
197 |
198 |
199 |
200 | radioButtonHTTP
201 | clicked(bool)
202 | pushButtonTest
203 | setEnabled(bool)
204 |
205 |
206 | 130
207 | 36
208 |
209 |
210 | 219
211 | 60
212 |
213 |
214 |
215 |
216 | radioButtonHTTP
217 | clicked(bool)
218 | lineEditHttpProxy
219 | setEnabled(bool)
220 |
221 |
222 | 130
223 | 36
224 |
225 |
226 | 111
227 | 60
228 |
229 |
230 |
231 |
232 |
233 |
--------------------------------------------------------------------------------
/whisper/__init__.py:
--------------------------------------------------------------------------------
1 | import hashlib
2 | import io
3 | import os
4 | import urllib
5 | import warnings
6 | from typing import List, Optional, Union
7 |
8 | import torch
9 | from tqdm import tqdm
10 |
11 | from .audio import load_audio, log_mel_spectrogram, pad_or_trim
12 | from .decoding import DecodingOptions, DecodingResult, decode, detect_language
13 | from .model import ModelDimensions, Whisper
14 | from .transcribe import transcribe
15 | from .version import __version__
16 |
17 | _MODELS = {
18 | "tiny.en": "https://openaipublic.azureedge.net/main/whisper/models/d3dd57d32accea0b295c96e26691aa14d8822fac7d9d27d5dc00b4ca2826dd03/tiny.en.pt",
19 | "tiny": "https://openaipublic.azureedge.net/main/whisper/models/65147644a518d12f04e32d6f3b26facc3f8dd46e5390956a9424a650c0ce22b9/tiny.pt",
20 | "base.en": "https://openaipublic.azureedge.net/main/whisper/models/25a8566e1d0c1e2231d1c762132cd20e0f96a85d16145c3a00adf5d1ac670ead/base.en.pt",
21 | "base": "https://openaipublic.azureedge.net/main/whisper/models/ed3a0b6b1c0edf879ad9b11b1af5a0e6ab5db9205f891f668f8b0e6c6326e34e/base.pt",
22 | "small.en": "https://openaipublic.azureedge.net/main/whisper/models/f953ad0fd29cacd07d5a9eda5624af0f6bcf2258be67c92b79389873d91e0872/small.en.pt",
23 | "small": "https://openaipublic.azureedge.net/main/whisper/models/9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794/small.pt",
24 | "medium.en": "https://openaipublic.azureedge.net/main/whisper/models/d7440d1dc186f76616474e0ff0b3b6b879abc9d1a4926b7adfa41db2d497ab4f/medium.en.pt",
25 | "medium": "https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt",
26 | "large-v1": "https://openaipublic.azureedge.net/main/whisper/models/e4b87e7e0bf463eb8e6956e646f1e277e901512310def2c24bf0e11bd3c28e9a/large-v1.pt",
27 | "large-v2": "https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt",
28 | "large-v3": "https://openaipublic.azureedge.net/main/whisper/models/e5b1a55b89c1367dacf97e3e19bfd829a01529dbfdeefa8caeb59b3f1b81dadb/large-v3.pt",
29 | "large": "https://openaipublic.azureedge.net/main/whisper/models/e5b1a55b89c1367dacf97e3e19bfd829a01529dbfdeefa8caeb59b3f1b81dadb/large-v3.pt",
30 | "large-v3-turbo": "https://openaipublic.azureedge.net/main/whisper/models/aff26ae408abcba5fbf8813c21e62b0941638c5f6eebfb145be0c9839262a19a/large-v3-turbo.pt",
31 | "turbo": "https://openaipublic.azureedge.net/main/whisper/models/aff26ae408abcba5fbf8813c21e62b0941638c5f6eebfb145be0c9839262a19a/large-v3-turbo.pt",
32 | }
33 |
34 | # base85-encoded (n_layers, n_heads) boolean arrays indicating the cross-attention heads that are
35 | # highly correlated to the word-level timing, i.e. the alignment between audio and text tokens.
36 | _ALIGNMENT_HEADS = {
37 | "tiny.en": b"ABzY8J1N>@0{>%R00Bk>$p{7v037`oCl~+#00",
38 | "tiny": b"ABzY8bu8Lr0{>%RKn9Fp%m@SkK7Kt=7ytkO",
39 | "base.en": b"ABzY8;40c<0{>%RzzG;p*o+Vo09|#PsxSZm00",
40 | "base": b"ABzY8KQ!870{>%RzyTQH3`Q^yNP!>##QT-?_)10{>%RpeA61k&I|OI3I$65C{;;pbCHh0B{qLQ;+}v00",
42 | "small": b"ABzY8DmU6=0{>%Rpa?J`kvJ6qF(V^F86#Xh7JUGMK}P%R7%R7}kK1fFL7w6%<-Pf*t^=N)Qr&0RR9",
45 | "large-v1": b"ABzY8r9j$a0{>%R7#4sLmoOs{s)o3~84-RPdcFk!JR%R7=D0pU<_bnWW*tkYAhobTNnu$jnkEkXqp)j;w1Tzk)UH3X%SZd&fFZ2fC2yj",
47 | "large-v3": b"ABzY8gWO1E0{>%R7(9S+Kn!D~%ngiGaR?*L!iJG9p-nab0JQ=-{D1-g00",
48 | "large": b"ABzY8gWO1E0{>%R7(9S+Kn!D~%ngiGaR?*L!iJG9p-nab0JQ=-{D1-g00",
49 | "large-v3-turbo": b"ABzY8j^C+e0{>%RARaKHP%t(lGR*)0g!tONPyhe`",
50 | "turbo": b"ABzY8j^C+e0{>%RARaKHP%t(lGR*)0g!tONPyhe`",
51 | }
52 |
53 |
54 | def _download(url: str, root: str, in_memory: bool) -> Union[bytes, str]:
55 | os.makedirs(root, exist_ok=True)
56 |
57 | expected_sha256 = url.split("/")[-2]
58 | download_target = os.path.join(root, os.path.basename(url))
59 |
60 | if os.path.exists(download_target) and not os.path.isfile(download_target):
61 | raise RuntimeError(f"{download_target} exists and is not a regular file")
62 |
63 | if os.path.isfile(download_target):
64 | with open(download_target, "rb") as f:
65 | model_bytes = f.read()
66 | if hashlib.sha256(model_bytes).hexdigest() == expected_sha256:
67 | return model_bytes if in_memory else download_target
68 | else:
69 | warnings.warn(
70 | f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file"
71 | )
72 |
73 | with urllib.request.urlopen(url) as source, open(download_target, "wb") as output:
74 | with tqdm(
75 | total=int(source.info().get("Content-Length")),
76 | ncols=80,
77 | unit="iB",
78 | unit_scale=True,
79 | unit_divisor=1024,
80 | ) as loop:
81 | while True:
82 | buffer = source.read(8192)
83 | if not buffer:
84 | break
85 |
86 | output.write(buffer)
87 | loop.update(len(buffer))
88 |
89 | model_bytes = open(download_target, "rb").read()
90 | if hashlib.sha256(model_bytes).hexdigest() != expected_sha256:
91 | raise RuntimeError(
92 | "Model has been downloaded but the SHA256 checksum does not not match. Please retry loading the model."
93 | )
94 |
95 | return model_bytes if in_memory else download_target
96 |
97 |
98 | def available_models() -> List[str]:
99 | """Returns the names of available models"""
100 | return list(_MODELS.keys())
101 |
102 |
103 | def load_model(
104 | name: str,
105 | device: Optional[Union[str, torch.device]] = None,
106 | download_root: str = None,
107 | in_memory: bool = False,
108 | ) -> Whisper:
109 | """
110 | Load a Whisper ASR model
111 |
112 | Parameters
113 | ----------
114 | name : str
115 | one of the official model names listed by `whisper.available_models()`, or
116 | path to a model checkpoint containing the model dimensions and the model state_dict.
117 | device : Union[str, torch.device]
118 | the PyTorch device to put the model into
119 | download_root: str
120 | path to download the model files; by default, it uses "~/.cache/whisper"
121 | in_memory: bool
122 | whether to preload the model weights into host memory
123 |
124 | Returns
125 | -------
126 | model : Whisper
127 | The Whisper ASR model instance
128 | """
129 |
130 | if device is None:
131 | device = "cuda" if torch.cuda.is_available() else "cpu"
132 | if download_root is None:
133 | default = os.path.join(os.path.expanduser("~"), ".cache")
134 | download_root = os.path.join(os.getenv("XDG_CACHE_HOME", default), "whisper")
135 |
136 | if name in _MODELS:
137 | checkpoint_file = _download(_MODELS[name], download_root, in_memory)
138 | alignment_heads = _ALIGNMENT_HEADS[name]
139 | elif os.path.isfile(name):
140 | checkpoint_file = open(name, "rb").read() if in_memory else name
141 | alignment_heads = None
142 | else:
143 | raise RuntimeError(
144 | f"Model {name} not found; available models = {available_models()}"
145 | )
146 |
147 | with (
148 | io.BytesIO(checkpoint_file) if in_memory else open(checkpoint_file, "rb")
149 | ) as fp:
150 | checkpoint = torch.load(fp, map_location=device)
151 | del checkpoint_file
152 |
153 | dims = ModelDimensions(**checkpoint["dims"])
154 | model = Whisper(dims)
155 | model.load_state_dict(checkpoint["model_state_dict"])
156 |
157 | if alignment_heads is not None:
158 | model.set_alignment_heads(alignment_heads)
159 |
160 | return model.to(device)
161 |
--------------------------------------------------------------------------------
/pytranscriber/model/google_speech.py:
--------------------------------------------------------------------------------
1 | '''
2 | (C) 2025 Raryel C. Souza
3 | This program is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 | This program is distributed in the hope that it will be useful,
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | GNU General Public License for more details.
11 | You should have received a copy of the GNU General Public License
12 | along with this program. If not, see .
13 | '''
14 |
15 | class Google_Speech:
16 | supported_languages_list = ["en-US - English (United States)",
17 | "cmn-Hans-CN - Chinese (Simplified, China)",
18 | "cmn-Hant-TW - Chinese (Traditional, Taiwan)",
19 | "yue-Hant-HK - Cantonese (Traditional, HK)",
20 | "en-AU - English (Australia)",
21 | "en-CA - English (Canada)",
22 | "en-GB - English (United Kingdom)",
23 | "en-HK - English (Hong Kong)",
24 | "en-IN - English (India)",
25 | "en-GB - English (Ireland)",
26 | "en-NZ - English (New Zealand)",
27 | "en-PH - English (Philippines)",
28 | "en-SG - English (Singapore)",
29 | "af - Afrikaans",
30 | "ar - Arabic",
31 | 'ar-DZ - Arabic (Algeria)',
32 | 'ar-EG - Arabic (Egypt)',
33 | 'ar-IQ - Arabic (Iraq)',
34 | 'ar-IS - Arabic (Israel)',
35 | 'ar-JO - Arabic (Jordan)',
36 | 'ar-KW - Arabic (Kuwait)',
37 | 'ar-LB - Arabic (Lebanon)',
38 | 'ar-MA - Arabic (Morocco)',
39 | 'ar-OM - Arabic (Oman)',
40 | 'ar-QA - Arabic (Qatar)',
41 | 'ar-SA - Arabic (Saudi Arabia)',
42 | 'ar-PS - Arabic (State of Palestine)',
43 | 'ar-TN - Arabic (Tunisia)',
44 | 'ar-AE - Arabic (United Arab Emirates)',
45 | 'ar-YE - Arabic (Yemen)',
46 | "az - Azerbaijani",
47 | "be - Belarusian",
48 | "bg - Bulgarian",
49 | "bn - Bengali",
50 | "bs - Bosnian",
51 | "ca - Catalan",
52 | "ceb -Cebuano",
53 | "cs - Czech",
54 | "cy - Welsh",
55 | "da - Danish",
56 | "de - German",
57 | 'de-AT - German (Austria)',
58 | 'de-CH - German (Switzerland)',
59 | "el - Greek",
60 | "eo - Esperanto",
61 | 'es-ES - Spanish (Spain)',
62 | 'es-AR - Spanish (Argentina)',
63 | 'es-BO - Spanish (Bolivia)',
64 | 'es-CL - Spanish (Chile)',
65 | 'es-CO - Spanish (Colombia)',
66 | 'es-CR - Spanish (Costa Rica)',
67 | 'es-DO - Spanish (Dominican Republic)',
68 | 'es-EC - Spanish (Ecuador)',
69 | 'es-GT - Spanish (Guatemala)',
70 | 'es-HN - Spanish (Honduras)',
71 | 'es-MX - Spanish (Mexico)',
72 | 'es-NI - Spanish (Nicaragua)',
73 | 'es-PA - Spanish (Panama)',
74 | 'es-PE - Spanish (Peru)',
75 | 'es-PR - Spanish (Puerto Rico)',
76 | 'es-PY - Spanish (Paraguay)',
77 | 'es-SV - Spanish (El Salvador)',
78 | 'es-UY - Spanish (Uruguay)',
79 | 'es-US - Spanish (United States)',
80 | 'es-VE - Spanish (Venezuela)',
81 | "et - Estonian",
82 | "eu - Basque",
83 | "fa - Persian",
84 | 'fil-PH - Filipino (Philippines)',
85 | "fi - Finnish",
86 | "fr - French",
87 | 'fr-BE - French (Belgium)',
88 | 'fr-CA - French (Canada)',
89 | 'fr-CH - French (Switzerland)',
90 | "ga - Irish",
91 | "gl - Galician",
92 | "gu -Gujarati",
93 | "ha - Hausa",
94 | "hi - Hindi",
95 | "hmn - Hmong",
96 | "hr - Croatian",
97 | "ht - Haitian Creole",
98 | "hu - Hungarian",
99 | "hy - Armenian",
100 | "id - Indonesian",
101 | "ig - Igbo",
102 | "is - Icelandic",
103 | "it - Italian",
104 | 'it-CH - Italian (Switzerland)',
105 | "iw - Hebrew",
106 | "ja - Japanese",
107 | "jw - Javanese",
108 | "ka - Georgian",
109 | "kk - Kazakh",
110 | "km - Khmer",
111 | "kn - Kannada",
112 | "ko - Korean",
113 | "la - Latin",
114 | "lo - Lao",
115 | "lt - Lithuanian",
116 | "lv - Latvian",
117 | "mg - Malagasy",
118 | "mi - Maori",
119 | "mk - Macedonian",
120 | "ml - Malayalam",
121 | "mn - Mongolian",
122 | "mr - Marathi",
123 | "ms - Malay",
124 | "mt - Maltese",
125 | "my - Myanmar (Burmese)",
126 | "ne - Nepali",
127 | "nl - Dutch",
128 | "no - Norwegian",
129 | "ny - Chichewa",
130 | "pa - Punjabi",
131 | "pl - Polish",
132 | "pt-BR - Portuguese (Brazil)",
133 | "pt-PT - Portuguese (Portugal)",
134 | "ro - Romanian",
135 | "ru - Russian",
136 | "si - Sinhala",
137 | "sk - Slovak",
138 | "sl - Slovenian",
139 | "so - Somali",
140 | "sq - Albanian",
141 | "sr - Serbian",
142 | "st - Sesotho",
143 | "su - Sudanese",
144 | "sv - Swedish",
145 | "sw - Swahili",
146 | "ta - Tamil",
147 | 'ta-IN - Tamil (India)',
148 | 'ta-MY - Tamil (Malaysia)',
149 | 'ta-SG - Tamil (Singapore)',
150 | 'ta-LK - Tamil (Sri Lanka)',
151 | "te - Telugu",
152 | "tg - Tajik",
153 | "th - Thai",
154 | "tl - Filipino",
155 | "tr - Turkish",
156 | "uk - Ukrainian",
157 | "ur - Urdu",
158 | "uz - Uzbek",
159 | "vi - Vietnamese",
160 | "yi - Yiddish",
161 | "yo - Yoruba",
162 | "zu - Zulu"]
163 | @staticmethod
164 | def get_supported_languages():
165 | return Google_Speech.supported_languages_list
--------------------------------------------------------------------------------
/whisper/utils.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import re
4 | import sys
5 | import zlib
6 | from typing import Callable, List, Optional, TextIO
7 |
8 | system_encoding = sys.getdefaultencoding()
9 |
10 | if system_encoding != "utf-8":
11 |
12 | def make_safe(string):
13 | # replaces any character not representable using the system default encoding with an '?',
14 | # avoiding UnicodeEncodeError (https://github.com/openai/whisper/discussions/729).
15 | return string.encode(system_encoding, errors="replace").decode(system_encoding)
16 |
17 | else:
18 |
19 | def make_safe(string):
20 | # utf-8 can encode any Unicode code point, so no need to do the round-trip encoding
21 | return string
22 |
23 |
24 | def exact_div(x, y):
25 | assert x % y == 0
26 | return x // y
27 |
28 |
29 | def str2bool(string):
30 | str2val = {"True": True, "False": False}
31 | if string in str2val:
32 | return str2val[string]
33 | else:
34 | raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
35 |
36 |
37 | def optional_int(string):
38 | return None if string == "None" else int(string)
39 |
40 |
41 | def optional_float(string):
42 | return None if string == "None" else float(string)
43 |
44 |
45 | def compression_ratio(text) -> float:
46 | text_bytes = text.encode("utf-8")
47 | return len(text_bytes) / len(zlib.compress(text_bytes))
48 |
49 |
50 | def format_timestamp(
51 | seconds: float, always_include_hours: bool = False, decimal_marker: str = "."
52 | ):
53 | assert seconds >= 0, "non-negative timestamp expected"
54 | milliseconds = round(seconds * 1000.0)
55 |
56 | hours = milliseconds // 3_600_000
57 | milliseconds -= hours * 3_600_000
58 |
59 | minutes = milliseconds // 60_000
60 | milliseconds -= minutes * 60_000
61 |
62 | seconds = milliseconds // 1_000
63 | milliseconds -= seconds * 1_000
64 |
65 | hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
66 | return (
67 | f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
68 | )
69 |
70 |
71 | def get_start(segments: List[dict]) -> Optional[float]:
72 | return next(
73 | (w["start"] for s in segments for w in s["words"]),
74 | segments[0]["start"] if segments else None,
75 | )
76 |
77 |
78 | def get_end(segments: List[dict]) -> Optional[float]:
79 | return next(
80 | (w["end"] for s in reversed(segments) for w in reversed(s["words"])),
81 | segments[-1]["end"] if segments else None,
82 | )
83 |
84 |
85 | class ResultWriter:
86 | extension: str
87 |
88 | def __init__(self, output_dir: str):
89 | self.output_dir = output_dir
90 |
91 | def __call__(
92 | self, result: dict, audio_path: str, options: Optional[dict] = None, **kwargs
93 | ):
94 | audio_basename = os.path.basename(audio_path)
95 | audio_basename = os.path.splitext(audio_basename)[0]
96 | output_path = os.path.join(
97 | self.output_dir, audio_basename + "." + self.extension
98 | )
99 |
100 | with open(output_path, "w", encoding="utf-8") as f:
101 | self.write_result(result, file=f, options=options, **kwargs)
102 |
103 | def write_result(
104 | self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs
105 | ):
106 | raise NotImplementedError
107 |
108 |
109 | class WriteTXT(ResultWriter):
110 | extension: str = "txt"
111 |
112 | def write_result(
113 | self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs
114 | ):
115 | for segment in result["segments"]:
116 | print(segment["text"].strip(), file=file, flush=True)
117 |
118 |
119 | class SubtitlesWriter(ResultWriter):
120 | always_include_hours: bool
121 | decimal_marker: str
122 |
123 | def iterate_result(
124 | self,
125 | result: dict,
126 | options: Optional[dict] = None,
127 | *,
128 | max_line_width: Optional[int] = None,
129 | max_line_count: Optional[int] = None,
130 | highlight_words: bool = False,
131 | max_words_per_line: Optional[int] = None,
132 | ):
133 | options = options or {}
134 | max_line_width = max_line_width or options.get("max_line_width")
135 | max_line_count = max_line_count or options.get("max_line_count")
136 | highlight_words = highlight_words or options.get("highlight_words", False)
137 | max_words_per_line = max_words_per_line or options.get("max_words_per_line")
138 | preserve_segments = max_line_count is None or max_line_width is None
139 | max_line_width = max_line_width or 1000
140 | max_words_per_line = max_words_per_line or 1000
141 |
142 | def iterate_subtitles():
143 | line_len = 0
144 | line_count = 1
145 | # the next subtitle to yield (a list of word timings with whitespace)
146 | subtitle: List[dict] = []
147 | last: float = get_start(result["segments"]) or 0.0
148 | for segment in result["segments"]:
149 | chunk_index = 0
150 | words_count = max_words_per_line
151 | while chunk_index < len(segment["words"]):
152 | remaining_words = len(segment["words"]) - chunk_index
153 | if max_words_per_line > len(segment["words"]) - chunk_index:
154 | words_count = remaining_words
155 | for i, original_timing in enumerate(
156 | segment["words"][chunk_index : chunk_index + words_count]
157 | ):
158 | timing = original_timing.copy()
159 | long_pause = (
160 | not preserve_segments and timing["start"] - last > 3.0
161 | )
162 | has_room = line_len + len(timing["word"]) <= max_line_width
163 | seg_break = i == 0 and len(subtitle) > 0 and preserve_segments
164 | if (
165 | line_len > 0
166 | and has_room
167 | and not long_pause
168 | and not seg_break
169 | ):
170 | # line continuation
171 | line_len += len(timing["word"])
172 | else:
173 | # new line
174 | timing["word"] = timing["word"].strip()
175 | if (
176 | len(subtitle) > 0
177 | and max_line_count is not None
178 | and (long_pause or line_count >= max_line_count)
179 | or seg_break
180 | ):
181 | # subtitle break
182 | yield subtitle
183 | subtitle = []
184 | line_count = 1
185 | elif line_len > 0:
186 | # line break
187 | line_count += 1
188 | timing["word"] = "\n" + timing["word"]
189 | line_len = len(timing["word"].strip())
190 | subtitle.append(timing)
191 | last = timing["start"]
192 | chunk_index += max_words_per_line
193 | if len(subtitle) > 0:
194 | yield subtitle
195 |
196 | if len(result["segments"]) > 0 and "words" in result["segments"][0]:
197 | for subtitle in iterate_subtitles():
198 | subtitle_start = self.format_timestamp(subtitle[0]["start"])
199 | subtitle_end = self.format_timestamp(subtitle[-1]["end"])
200 | subtitle_text = "".join([word["word"] for word in subtitle])
201 | if highlight_words:
202 | last = subtitle_start
203 | all_words = [timing["word"] for timing in subtitle]
204 | for i, this_word in enumerate(subtitle):
205 | start = self.format_timestamp(this_word["start"])
206 | end = self.format_timestamp(this_word["end"])
207 | if last != start:
208 | yield last, start, subtitle_text
209 |
210 | yield start, end, "".join(
211 | [
212 | re.sub(r"^(\s*)(.*)$", r"\1\2", word)
213 | if j == i
214 | else word
215 | for j, word in enumerate(all_words)
216 | ]
217 | )
218 | last = end
219 | else:
220 | yield subtitle_start, subtitle_end, subtitle_text
221 | else:
222 | for segment in result["segments"]:
223 | segment_start = self.format_timestamp(segment["start"])
224 | segment_end = self.format_timestamp(segment["end"])
225 | segment_text = segment["text"].strip().replace("-->", "->")
226 | yield segment_start, segment_end, segment_text
227 |
228 | def format_timestamp(self, seconds: float):
229 | return format_timestamp(
230 | seconds=seconds,
231 | always_include_hours=self.always_include_hours,
232 | decimal_marker=self.decimal_marker,
233 | )
234 |
235 |
236 | class WriteVTT(SubtitlesWriter):
237 | extension: str = "vtt"
238 | always_include_hours: bool = False
239 | decimal_marker: str = "."
240 |
241 | def write_result(
242 | self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs
243 | ):
244 | print("WEBVTT\n", file=file)
245 | for start, end, text in self.iterate_result(result, options, **kwargs):
246 | print(f"{start} --> {end}\n{text}\n", file=file, flush=True)
247 |
248 |
249 | class WriteSRT(SubtitlesWriter):
250 | extension: str = "srt"
251 | always_include_hours: bool = True
252 | decimal_marker: str = ","
253 |
254 | def write_result(
255 | self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs
256 | ):
257 | for i, (start, end, text) in enumerate(
258 | self.iterate_result(result, options, **kwargs), start=1
259 | ):
260 | print(f"{i}\n{start} --> {end}\n{text}\n", file=file, flush=True)
261 |
262 |
263 | class WriteTSV(ResultWriter):
264 | """
265 | Write a transcript to a file in TSV (tab-separated values) format containing lines like:
266 | \t\t
267 |
268 | Using integer milliseconds as start and end times means there's no chance of interference from
269 | an environment setting a language encoding that causes the decimal in a floating point number
270 | to appear as a comma; also is faster and more efficient to parse & store, e.g., in C++.
271 | """
272 |
273 | extension: str = "tsv"
274 |
275 | def write_result(
276 | self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs
277 | ):
278 | print("start", "end", "text", sep="\t", file=file)
279 | for segment in result["segments"]:
280 | print(round(1000 * segment["start"]), file=file, end="\t")
281 | print(round(1000 * segment["end"]), file=file, end="\t")
282 | print(segment["text"].strip().replace("\t", " "), file=file, flush=True)
283 |
284 |
285 | class WriteJSON(ResultWriter):
286 | extension: str = "json"
287 |
288 | def write_result(
289 | self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs
290 | ):
291 | json.dump(result, file)
292 |
293 |
294 | def get_writer(
295 | output_format: str, output_dir: str
296 | ) -> Callable[[dict, TextIO, dict], None]:
297 | writers = {
298 | "txt": WriteTXT,
299 | "vtt": WriteVTT,
300 | "srt": WriteSRT,
301 | "tsv": WriteTSV,
302 | "json": WriteJSON,
303 | }
304 |
305 | if output_format == "all":
306 | all_writers = [writer(output_dir) for writer in writers.values()]
307 |
308 | def write_all(
309 | result: dict, file: TextIO, options: Optional[dict] = None, **kwargs
310 | ):
311 | for writer in all_writers:
312 | writer(result, file, options, **kwargs)
313 |
314 | return write_all
315 |
316 | return writers[output_format](output_dir)
317 |
--------------------------------------------------------------------------------
/pytranscriber/gui/main/window_main.ui:
--------------------------------------------------------------------------------
1 |
2 |
3 | window
4 |
5 |
6 |
7 | 0
8 | 0
9 | 1045
10 | 610
11 |
12 |
13 |
14 | pyTranscriber v2.1 - 13/07/2025
15 |
16 |
17 |
18 |
19 |
20 | 10
21 | 10
22 | 141
23 | 34
24 |
25 |
26 |
27 | Select file(s)
28 |
29 |
30 |
31 |
32 | false
33 |
34 |
35 |
36 | 200
37 | 380
38 | 341
39 | 34
40 |
41 |
42 |
43 | Transcribe Audio / Generate Subtitles
44 |
45 |
46 |
47 |
48 |
49 | 20
50 | 470
51 | 1021
52 | 23
53 |
54 |
55 |
56 | 0
57 |
58 |
59 |
60 |
61 |
62 | 20
63 | 420
64 | 871
65 | 41
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 | 550
76 | 380
77 | 241
78 | 34
79 |
80 |
81 |
82 | Open Output Folder
83 |
84 |
85 |
86 |
87 |
88 | 10
89 | 180
90 | 141
91 | 34
92 |
93 |
94 |
95 | Output Location
96 |
97 |
98 |
99 |
100 |
101 | 160
102 | 180
103 | 861
104 | 32
105 |
106 |
107 |
108 |
109 |
110 |
111 | true
112 |
113 |
114 |
115 |
116 |
117 | 160
118 | 10
119 | 871
120 | 161
121 |
122 |
123 |
124 | List of files to generate transcribe audio / generate subtitles
125 |
126 |
127 | Qt::AlignLeading|Qt::AlignLeft|Qt::AlignTop
128 |
129 |
130 | false
131 |
132 |
133 | false
134 |
135 |
136 |
137 |
138 | 10
139 | 30
140 | 851
141 | 121
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 | 10
150 | 50
151 | 141
152 | 34
153 |
154 |
155 |
156 | Remove file(s)
157 |
158 |
159 |
160 |
161 |
162 | 20
163 | 500
164 | 131
165 | 41
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 | 470
176 | 510
177 | 108
178 | 36
179 |
180 |
181 |
182 | Cancel
183 |
184 |
185 |
186 |
187 |
188 | 10
189 | 220
190 | 291
191 | 32
192 |
193 |
194 |
195 | Open output files automatically
196 |
197 |
198 | true
199 |
200 |
201 |
202 |
203 |
204 | 200
205 | 250
206 | 591
207 | 38
208 |
209 |
210 |
211 |
212 |
213 |
214 | Audio Language:
215 |
216 |
217 |
218 |
219 |
220 |
221 | QComboBox::AdjustToContents
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 | 200
231 | 290
232 | 591
233 | 41
234 |
235 |
236 |
237 |
238 |
239 |
240 | Engine:
241 |
242 |
243 |
244 |
245 |
246 |
247 | Google Speech (cloud processing)
248 |
249 |
250 | true
251 |
252 |
253 |
254 |
255 |
256 |
257 | true
258 |
259 |
260 | openAI Whisper (local processing)
261 |
262 |
263 | true
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 | 200
273 | 330
274 | 611
275 | 31
276 |
277 |
278 |
279 |
280 |
281 |
282 | true
283 |
284 |
285 | Models:
286 |
287 |
288 |
289 |
290 |
291 |
292 | Tiny
293 |
294 |
295 | true
296 |
297 |
298 |
299 |
300 |
301 |
302 | true
303 |
304 |
305 | Base
306 |
307 |
308 | true
309 |
310 |
311 |
312 |
313 |
314 |
315 | true
316 |
317 |
318 | Small
319 |
320 |
321 | true
322 |
323 |
324 |
325 |
326 |
327 |
328 | true
329 |
330 |
331 | Medium
332 |
333 |
334 | true
335 |
336 |
337 |
338 |
339 |
340 |
341 | true
342 |
343 |
344 | Large
345 |
346 |
347 | true
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 | 0
358 | 0
359 | 1045
360 | 23
361 |
362 |
363 |
364 |
365 | Abo&ut
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 | &Settings
374 |
375 |
376 |
377 |
378 |
379 | &Language
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 | &License
394 |
395 |
396 |
397 |
398 | &Funding at Github Sponsors
399 |
400 |
401 |
402 |
403 | &About pyTranscriber
404 |
405 |
406 |
407 |
408 | &Proxy
409 |
410 |
411 | Proxy setting
412 |
413 |
414 |
415 |
416 | English
417 |
418 |
419 |
420 |
421 | 繁體中文 - Chinese Traditional
422 |
423 |
424 |
425 |
426 | 简体中文 - Chinese Simplified
427 |
428 |
429 |
430 |
431 | Português
432 |
433 |
434 |
435 |
436 |
437 |
438 |
--------------------------------------------------------------------------------
/whisper/model.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import gzip
3 | from contextlib import contextmanager
4 | from dataclasses import dataclass
5 | from typing import Dict, Iterable, Optional, Tuple
6 |
7 | import numpy as np
8 | import torch
9 | import torch.nn.functional as F
10 | from torch import Tensor, nn
11 |
12 | from .decoding import decode as decode_function
13 | from .decoding import detect_language as detect_language_function
14 | from .transcribe import transcribe as transcribe_function
15 |
16 | try:
17 | from torch.nn.functional import scaled_dot_product_attention
18 |
19 | SDPA_AVAILABLE = True
20 | except (ImportError, RuntimeError, OSError):
21 | scaled_dot_product_attention = None
22 | SDPA_AVAILABLE = False
23 |
24 |
25 | @dataclass
26 | class ModelDimensions:
27 | n_mels: int
28 | n_audio_ctx: int
29 | n_audio_state: int
30 | n_audio_head: int
31 | n_audio_layer: int
32 | n_vocab: int
33 | n_text_ctx: int
34 | n_text_state: int
35 | n_text_head: int
36 | n_text_layer: int
37 |
38 |
39 | class LayerNorm(nn.LayerNorm):
40 | def forward(self, x: Tensor) -> Tensor:
41 | return super().forward(x.float()).type(x.dtype)
42 |
43 |
44 | class Linear(nn.Linear):
45 | def forward(self, x: Tensor) -> Tensor:
46 | return F.linear(
47 | x,
48 | self.weight.to(x.dtype),
49 | None if self.bias is None else self.bias.to(x.dtype),
50 | )
51 |
52 |
53 | class Conv1d(nn.Conv1d):
54 | def _conv_forward(
55 | self, x: Tensor, weight: Tensor, bias: Optional[Tensor]
56 | ) -> Tensor:
57 | return super()._conv_forward(
58 | x, weight.to(x.dtype), None if bias is None else bias.to(x.dtype)
59 | )
60 |
61 |
62 | def sinusoids(length, channels, max_timescale=10000):
63 | """Returns sinusoids for positional embedding"""
64 | assert channels % 2 == 0
65 | log_timescale_increment = np.log(max_timescale) / (channels // 2 - 1)
66 | inv_timescales = torch.exp(-log_timescale_increment * torch.arange(channels // 2))
67 | scaled_time = torch.arange(length)[:, np.newaxis] * inv_timescales[np.newaxis, :]
68 | return torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=1)
69 |
70 |
71 | @contextmanager
72 | def disable_sdpa():
73 | prev_state = MultiHeadAttention.use_sdpa
74 | try:
75 | MultiHeadAttention.use_sdpa = False
76 | yield
77 | finally:
78 | MultiHeadAttention.use_sdpa = prev_state
79 |
80 |
81 | class MultiHeadAttention(nn.Module):
82 | use_sdpa = True
83 |
84 | def __init__(self, n_state: int, n_head: int):
85 | super().__init__()
86 | self.n_head = n_head
87 | self.query = Linear(n_state, n_state)
88 | self.key = Linear(n_state, n_state, bias=False)
89 | self.value = Linear(n_state, n_state)
90 | self.out = Linear(n_state, n_state)
91 |
92 | def forward(
93 | self,
94 | x: Tensor,
95 | xa: Optional[Tensor] = None,
96 | mask: Optional[Tensor] = None,
97 | kv_cache: Optional[dict] = None,
98 | ):
99 | q = self.query(x)
100 |
101 | if kv_cache is None or xa is None or self.key not in kv_cache:
102 | # hooks, if installed (i.e. kv_cache is not None), will prepend the cached kv tensors;
103 | # otherwise, perform key/value projections for self- or cross-attention as usual.
104 | k = self.key(x if xa is None else xa)
105 | v = self.value(x if xa is None else xa)
106 | else:
107 | # for cross-attention, calculate keys and values once and reuse in subsequent calls.
108 | k = kv_cache[self.key]
109 | v = kv_cache[self.value]
110 |
111 | wv, qk = self.qkv_attention(q, k, v, mask)
112 | return self.out(wv), qk
113 |
114 | def qkv_attention(
115 | self, q: Tensor, k: Tensor, v: Tensor, mask: Optional[Tensor] = None
116 | ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
117 | n_batch, n_ctx, n_state = q.shape
118 | scale = (n_state // self.n_head) ** -0.25
119 | q = q.view(*q.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)
120 | k = k.view(*k.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)
121 | v = v.view(*v.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)
122 |
123 | if SDPA_AVAILABLE and MultiHeadAttention.use_sdpa:
124 | a = scaled_dot_product_attention(
125 | q, k, v, is_causal=mask is not None and n_ctx > 1
126 | )
127 | out = a.permute(0, 2, 1, 3).flatten(start_dim=2)
128 | qk = None
129 | else:
130 | qk = (q * scale) @ (k * scale).transpose(-1, -2)
131 | if mask is not None:
132 | qk = qk + mask[:n_ctx, :n_ctx]
133 | qk = qk.float()
134 |
135 | w = F.softmax(qk, dim=-1).to(q.dtype)
136 | out = (w @ v).permute(0, 2, 1, 3).flatten(start_dim=2)
137 | qk = qk.detach()
138 |
139 | return out, qk
140 |
141 |
142 | class ResidualAttentionBlock(nn.Module):
143 | def __init__(self, n_state: int, n_head: int, cross_attention: bool = False):
144 | super().__init__()
145 |
146 | self.attn = MultiHeadAttention(n_state, n_head)
147 | self.attn_ln = LayerNorm(n_state)
148 |
149 | self.cross_attn = (
150 | MultiHeadAttention(n_state, n_head) if cross_attention else None
151 | )
152 | self.cross_attn_ln = LayerNorm(n_state) if cross_attention else None
153 |
154 | n_mlp = n_state * 4
155 | self.mlp = nn.Sequential(
156 | Linear(n_state, n_mlp), nn.GELU(), Linear(n_mlp, n_state)
157 | )
158 | self.mlp_ln = LayerNorm(n_state)
159 |
160 | def forward(
161 | self,
162 | x: Tensor,
163 | xa: Optional[Tensor] = None,
164 | mask: Optional[Tensor] = None,
165 | kv_cache: Optional[dict] = None,
166 | ):
167 | x = x + self.attn(self.attn_ln(x), mask=mask, kv_cache=kv_cache)[0]
168 | if self.cross_attn:
169 | x = x + self.cross_attn(self.cross_attn_ln(x), xa, kv_cache=kv_cache)[0]
170 | x = x + self.mlp(self.mlp_ln(x))
171 | return x
172 |
173 |
174 | class AudioEncoder(nn.Module):
175 | def __init__(
176 | self, n_mels: int, n_ctx: int, n_state: int, n_head: int, n_layer: int
177 | ):
178 | super().__init__()
179 | self.conv1 = Conv1d(n_mels, n_state, kernel_size=3, padding=1)
180 | self.conv2 = Conv1d(n_state, n_state, kernel_size=3, stride=2, padding=1)
181 | self.register_buffer("positional_embedding", sinusoids(n_ctx, n_state))
182 |
183 | self.blocks: Iterable[ResidualAttentionBlock] = nn.ModuleList(
184 | [ResidualAttentionBlock(n_state, n_head) for _ in range(n_layer)]
185 | )
186 | self.ln_post = LayerNorm(n_state)
187 |
188 | def forward(self, x: Tensor):
189 | """
190 | x : torch.Tensor, shape = (batch_size, n_mels, n_ctx)
191 | the mel spectrogram of the audio
192 | """
193 | x = F.gelu(self.conv1(x))
194 | x = F.gelu(self.conv2(x))
195 | x = x.permute(0, 2, 1)
196 |
197 | assert x.shape[1:] == self.positional_embedding.shape, "incorrect audio shape"
198 | x = (x + self.positional_embedding).to(x.dtype)
199 |
200 | for block in self.blocks:
201 | x = block(x)
202 |
203 | x = self.ln_post(x)
204 | return x
205 |
206 |
207 | class TextDecoder(nn.Module):
208 | def __init__(
209 | self, n_vocab: int, n_ctx: int, n_state: int, n_head: int, n_layer: int
210 | ):
211 | super().__init__()
212 |
213 | self.token_embedding = nn.Embedding(n_vocab, n_state)
214 | self.positional_embedding = nn.Parameter(torch.empty(n_ctx, n_state))
215 |
216 | self.blocks: Iterable[ResidualAttentionBlock] = nn.ModuleList(
217 | [
218 | ResidualAttentionBlock(n_state, n_head, cross_attention=True)
219 | for _ in range(n_layer)
220 | ]
221 | )
222 | self.ln = LayerNorm(n_state)
223 |
224 | mask = torch.empty(n_ctx, n_ctx).fill_(-np.inf).triu_(1)
225 | self.register_buffer("mask", mask, persistent=False)
226 |
227 | def forward(self, x: Tensor, xa: Tensor, kv_cache: Optional[dict] = None):
228 | """
229 | x : torch.LongTensor, shape = (batch_size, <= n_ctx)
230 | the text tokens
231 | xa : torch.Tensor, shape = (batch_size, n_audio_ctx, n_audio_state)
232 | the encoded audio features to be attended on
233 | """
234 | offset = next(iter(kv_cache.values())).shape[1] if kv_cache else 0
235 | x = (
236 | self.token_embedding(x)
237 | + self.positional_embedding[offset : offset + x.shape[-1]]
238 | )
239 | x = x.to(xa.dtype)
240 |
241 | for block in self.blocks:
242 | x = block(x, xa, mask=self.mask, kv_cache=kv_cache)
243 |
244 | x = self.ln(x)
245 | logits = (
246 | x @ torch.transpose(self.token_embedding.weight.to(x.dtype), 0, 1)
247 | ).float()
248 |
249 | return logits
250 |
251 |
252 | class Whisper(nn.Module):
253 | def __init__(self, dims: ModelDimensions):
254 | super().__init__()
255 | self.dims = dims
256 | self.encoder = AudioEncoder(
257 | self.dims.n_mels,
258 | self.dims.n_audio_ctx,
259 | self.dims.n_audio_state,
260 | self.dims.n_audio_head,
261 | self.dims.n_audio_layer,
262 | )
263 | self.decoder = TextDecoder(
264 | self.dims.n_vocab,
265 | self.dims.n_text_ctx,
266 | self.dims.n_text_state,
267 | self.dims.n_text_head,
268 | self.dims.n_text_layer,
269 | )
270 | # use the last half among the decoder layers for time alignment by default;
271 | # to use a specific set of heads, see `set_alignment_heads()` below.
272 | all_heads = torch.zeros(
273 | self.dims.n_text_layer, self.dims.n_text_head, dtype=torch.bool
274 | )
275 | all_heads[self.dims.n_text_layer // 2 :] = True
276 | self.register_buffer("alignment_heads", all_heads.to_sparse(), persistent=False)
277 |
278 | def set_alignment_heads(self, dump: bytes):
279 | array = np.frombuffer(
280 | gzip.decompress(base64.b85decode(dump)), dtype=bool
281 | ).copy()
282 | mask = torch.from_numpy(array).reshape(
283 | self.dims.n_text_layer, self.dims.n_text_head
284 | )
285 | self.register_buffer("alignment_heads", mask.to_sparse(), persistent=False)
286 |
287 | def embed_audio(self, mel: torch.Tensor):
288 | return self.encoder(mel)
289 |
290 | def logits(self, tokens: torch.Tensor, audio_features: torch.Tensor):
291 | return self.decoder(tokens, audio_features)
292 |
293 | def forward(
294 | self, mel: torch.Tensor, tokens: torch.Tensor
295 | ) -> Dict[str, torch.Tensor]:
296 | return self.decoder(tokens, self.encoder(mel))
297 |
298 | @property
299 | def device(self):
300 | return next(self.parameters()).device
301 |
302 | @property
303 | def is_multilingual(self):
304 | return self.dims.n_vocab >= 51865
305 |
306 | @property
307 | def num_languages(self):
308 | return self.dims.n_vocab - 51765 - int(self.is_multilingual)
309 |
310 | def install_kv_cache_hooks(self, cache: Optional[dict] = None):
311 | """
312 | The `MultiHeadAttention` module optionally accepts `kv_cache` which stores the key and value
313 | tensors calculated for the previous positions. This method returns a dictionary that stores
314 | all caches, and the necessary hooks for the key and value projection modules that save the
315 | intermediate tensors to be reused during later calculations.
316 |
317 | Returns
318 | -------
319 | cache : Dict[nn.Module, torch.Tensor]
320 | A dictionary object mapping the key/value projection modules to its cache
321 | hooks : List[RemovableHandle]
322 | List of PyTorch RemovableHandle objects to stop the hooks to be called
323 | """
324 | cache = {**cache} if cache is not None else {}
325 | hooks = []
326 |
327 | def save_to_cache(module, _, output):
328 | if module not in cache or output.shape[1] > self.dims.n_text_ctx:
329 | # save as-is, for the first token or cross attention
330 | cache[module] = output
331 | else:
332 | cache[module] = torch.cat([cache[module], output], dim=1).detach()
333 | return cache[module]
334 |
335 | def install_hooks(layer: nn.Module):
336 | if isinstance(layer, MultiHeadAttention):
337 | hooks.append(layer.key.register_forward_hook(save_to_cache))
338 | hooks.append(layer.value.register_forward_hook(save_to_cache))
339 |
340 | self.decoder.apply(install_hooks)
341 | return cache, hooks
342 |
343 | detect_language = detect_language_function
344 | transcribe = transcribe_function
345 | decode = decode_function
346 |
--------------------------------------------------------------------------------
/pytranscriber/gui/main/window_main.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Form implementation generated from reading ui file 'window_main.ui'
4 | #
5 | # Created by: PyQt5 UI code generator 5.15.4
6 | #
7 | # WARNING: Any manual changes made to this file will be lost when pyuic5 is
8 | # run again. Do not edit this file unless you know what you are doing.
9 |
10 |
11 | from PyQt5 import QtCore, QtGui, QtWidgets
12 |
13 |
14 | class Ui_window(object):
15 | def setupUi(self, window):
16 | window.setObjectName("window")
17 | window.resize(1045, 610)
18 | self.centralwidget = QtWidgets.QWidget(window)
19 | self.centralwidget.setObjectName("centralwidget")
20 | self.bSelectMedia = QtWidgets.QPushButton(self.centralwidget)
21 | self.bSelectMedia.setGeometry(QtCore.QRect(10, 10, 141, 34))
22 | self.bSelectMedia.setObjectName("bSelectMedia")
23 | self.bConvert = QtWidgets.QPushButton(self.centralwidget)
24 | self.bConvert.setEnabled(False)
25 | self.bConvert.setGeometry(QtCore.QRect(200, 380, 341, 34))
26 | self.bConvert.setObjectName("bConvert")
27 | self.progressBar = QtWidgets.QProgressBar(self.centralwidget)
28 | self.progressBar.setGeometry(QtCore.QRect(20, 470, 1021, 23))
29 | self.progressBar.setProperty("value", 0)
30 | self.progressBar.setObjectName("progressBar")
31 | self.labelCurrentOperation = QtWidgets.QLabel(self.centralwidget)
32 | self.labelCurrentOperation.setGeometry(QtCore.QRect(20, 420, 871, 41))
33 | self.labelCurrentOperation.setText("")
34 | self.labelCurrentOperation.setObjectName("labelCurrentOperation")
35 | self.bOpenOutputFolder = QtWidgets.QPushButton(self.centralwidget)
36 | self.bOpenOutputFolder.setGeometry(QtCore.QRect(550, 380, 241, 34))
37 | self.bOpenOutputFolder.setObjectName("bOpenOutputFolder")
38 | self.bSelectOutputFolder = QtWidgets.QPushButton(self.centralwidget)
39 | self.bSelectOutputFolder.setGeometry(QtCore.QRect(10, 180, 141, 34))
40 | self.bSelectOutputFolder.setObjectName("bSelectOutputFolder")
41 | self.qleOutputFolder = QtWidgets.QLineEdit(self.centralwidget)
42 | self.qleOutputFolder.setGeometry(QtCore.QRect(160, 180, 861, 32))
43 | self.qleOutputFolder.setText("")
44 | self.qleOutputFolder.setReadOnly(True)
45 | self.qleOutputFolder.setObjectName("qleOutputFolder")
46 | self.groupBox = QtWidgets.QGroupBox(self.centralwidget)
47 | self.groupBox.setGeometry(QtCore.QRect(160, 10, 871, 161))
48 | self.groupBox.setAlignment(QtCore.Qt.AlignLeading|QtCore.Qt.AlignLeft|QtCore.Qt.AlignTop)
49 | self.groupBox.setFlat(False)
50 | self.groupBox.setCheckable(False)
51 | self.groupBox.setObjectName("groupBox")
52 | self.qlwListFilesSelected = QtWidgets.QListWidget(self.groupBox)
53 | self.qlwListFilesSelected.setGeometry(QtCore.QRect(10, 30, 851, 121))
54 | self.qlwListFilesSelected.setObjectName("qlwListFilesSelected")
55 | self.bRemoveFile = QtWidgets.QPushButton(self.centralwidget)
56 | self.bRemoveFile.setGeometry(QtCore.QRect(10, 50, 141, 34))
57 | self.bRemoveFile.setObjectName("bRemoveFile")
58 | self.labelProgressFileIndex = QtWidgets.QLabel(self.centralwidget)
59 | self.labelProgressFileIndex.setGeometry(QtCore.QRect(20, 500, 131, 41))
60 | self.labelProgressFileIndex.setText("")
61 | self.labelProgressFileIndex.setObjectName("labelProgressFileIndex")
62 | self.bCancel = QtWidgets.QPushButton(self.centralwidget)
63 | self.bCancel.setGeometry(QtCore.QRect(470, 510, 108, 36))
64 | self.bCancel.setObjectName("bCancel")
65 | self.chbxOpenOutputFilesAuto = QtWidgets.QCheckBox(self.centralwidget)
66 | self.chbxOpenOutputFilesAuto.setGeometry(QtCore.QRect(10, 220, 291, 32))
67 | self.chbxOpenOutputFilesAuto.setChecked(True)
68 | self.chbxOpenOutputFilesAuto.setObjectName("chbxOpenOutputFilesAuto")
69 | self.horizontalLayoutWidget = QtWidgets.QWidget(self.centralwidget)
70 | self.horizontalLayoutWidget.setGeometry(QtCore.QRect(200, 250, 591, 38))
71 | self.horizontalLayoutWidget.setObjectName("horizontalLayoutWidget")
72 | self.horizontalLayout_5 = QtWidgets.QHBoxLayout(self.horizontalLayoutWidget)
73 | self.horizontalLayout_5.setContentsMargins(0, 0, 0, 0)
74 | self.horizontalLayout_5.setObjectName("horizontalLayout_5")
75 | self.labelSelectLang = QtWidgets.QLabel(self.horizontalLayoutWidget)
76 | self.labelSelectLang.setObjectName("labelSelectLang")
77 | self.horizontalLayout_5.addWidget(self.labelSelectLang)
78 | self.cbSelectLang = QtWidgets.QComboBox(self.horizontalLayoutWidget)
79 | self.cbSelectLang.setSizeAdjustPolicy(QtWidgets.QComboBox.AdjustToContents)
80 | self.cbSelectLang.setObjectName("cbSelectLang")
81 | self.horizontalLayout_5.addWidget(self.cbSelectLang)
82 | self.horizontalLayoutWidget_2 = QtWidgets.QWidget(self.centralwidget)
83 | self.horizontalLayoutWidget_2.setGeometry(QtCore.QRect(200, 290, 591, 41))
84 | self.horizontalLayoutWidget_2.setObjectName("horizontalLayoutWidget_2")
85 | self.horizontalLayout = QtWidgets.QHBoxLayout(self.horizontalLayoutWidget_2)
86 | self.horizontalLayout.setContentsMargins(0, 0, 0, 0)
87 | self.horizontalLayout.setObjectName("horizontalLayout")
88 | self.lEngine = QtWidgets.QLabel(self.horizontalLayoutWidget_2)
89 | self.lEngine.setObjectName("lEngine")
90 | self.horizontalLayout.addWidget(self.lEngine)
91 | self.rbGoogleEngine = QtWidgets.QRadioButton(self.horizontalLayoutWidget_2)
92 | self.rbGoogleEngine.setChecked(True)
93 | self.rbGoogleEngine.setObjectName("rbGoogleEngine")
94 | self.horizontalLayout.addWidget(self.rbGoogleEngine)
95 | self.rbWhisper = QtWidgets.QRadioButton(self.horizontalLayoutWidget_2)
96 | self.rbWhisper.setEnabled(True)
97 | self.rbWhisper.setCheckable(True)
98 | self.rbWhisper.setObjectName("rbWhisper")
99 | self.horizontalLayout.addWidget(self.rbWhisper)
100 | self.horizontalLayoutWidget_3 = QtWidgets.QWidget(self.centralwidget)
101 | self.horizontalLayoutWidget_3.setGeometry(QtCore.QRect(200, 330, 611, 31))
102 | self.horizontalLayoutWidget_3.setObjectName("horizontalLayoutWidget_3")
103 | self.horizontalLayout_2 = QtWidgets.QHBoxLayout(self.horizontalLayoutWidget_3)
104 | self.horizontalLayout_2.setContentsMargins(0, 0, 0, 0)
105 | self.horizontalLayout_2.setObjectName("horizontalLayout_2")
106 | self.lModels = QtWidgets.QLabel(self.horizontalLayoutWidget_3)
107 | self.lModels.setEnabled(True)
108 | self.lModels.setObjectName("lModels")
109 | self.horizontalLayout_2.addWidget(self.lModels)
110 | self.rbModelTiny = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3)
111 | self.rbModelTiny.setChecked(True)
112 | self.rbModelTiny.setObjectName("rbModelTiny")
113 | self.horizontalLayout_2.addWidget(self.rbModelTiny)
114 | self.rbModelBase = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3)
115 | self.rbModelBase.setEnabled(True)
116 | self.rbModelBase.setCheckable(True)
117 | self.rbModelBase.setObjectName("rbModelBase")
118 | self.horizontalLayout_2.addWidget(self.rbModelBase)
119 | self.rbModelSmall = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3)
120 | self.rbModelSmall.setEnabled(True)
121 | self.rbModelSmall.setCheckable(True)
122 | self.rbModelSmall.setObjectName("rbModelSmall")
123 | self.horizontalLayout_2.addWidget(self.rbModelSmall)
124 | self.rbModelMedium = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3)
125 | self.rbModelMedium.setEnabled(True)
126 | self.rbModelMedium.setCheckable(True)
127 | self.rbModelMedium.setObjectName("rbModelMedium")
128 | self.horizontalLayout_2.addWidget(self.rbModelMedium)
129 | self.rbModelLarge = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3)
130 | self.rbModelLarge.setEnabled(True)
131 | self.rbModelLarge.setCheckable(True)
132 | self.rbModelLarge.setObjectName("rbModelLarge")
133 | self.horizontalLayout_2.addWidget(self.rbModelLarge)
134 | window.setCentralWidget(self.centralwidget)
135 | self.menubar = QtWidgets.QMenuBar(window)
136 | self.menubar.setGeometry(QtCore.QRect(0, 0, 1045, 23))
137 | self.menubar.setObjectName("menubar")
138 | self.menuAbout = QtWidgets.QMenu(self.menubar)
139 | self.menuAbout.setObjectName("menuAbout")
140 | self.menuProxy = QtWidgets.QMenu(self.menubar)
141 | self.menuProxy.setObjectName("menuProxy")
142 | self.menuLanguage = QtWidgets.QMenu(self.menubar)
143 | self.menuLanguage.setObjectName("menuLanguage")
144 | window.setMenuBar(self.menubar)
145 | self.statusbar = QtWidgets.QStatusBar(window)
146 | self.statusbar.setObjectName("statusbar")
147 | window.setStatusBar(self.statusbar)
148 | self.actionLicense = QtWidgets.QAction(window)
149 | self.actionLicense.setObjectName("actionLicense")
150 | self.actionDonation = QtWidgets.QAction(window)
151 | self.actionDonation.setObjectName("actionDonation")
152 | self.actionAbout_pyTranscriber = QtWidgets.QAction(window)
153 | self.actionAbout_pyTranscriber.setObjectName("actionAbout_pyTranscriber")
154 | self.actionProxy = QtWidgets.QAction(window)
155 | self.actionProxy.setObjectName("actionProxy")
156 | self.actionEnglish = QtWidgets.QAction(window)
157 | self.actionEnglish.setObjectName("actionEnglish")
158 | self.actionChineseTraditional = QtWidgets.QAction(window)
159 | self.actionChineseTraditional.setObjectName("actionChineseTraditional")
160 | self.actionChineseSimplified = QtWidgets.QAction(window)
161 | self.actionChineseSimplified.setObjectName("actionChineseSimplified")
162 | self.actionPortuguese = QtWidgets.QAction(window)
163 | self.actionPortuguese.setObjectName("actionPortuguese")
164 | self.menuAbout.addAction(self.actionLicense)
165 | self.menuAbout.addAction(self.actionDonation)
166 | self.menuAbout.addAction(self.actionAbout_pyTranscriber)
167 | self.menuProxy.addAction(self.actionProxy)
168 | self.menuLanguage.addAction(self.actionEnglish)
169 | self.menuLanguage.addAction(self.actionChineseTraditional)
170 | self.menuLanguage.addAction(self.actionChineseSimplified)
171 | self.menuLanguage.addAction(self.actionPortuguese)
172 | self.menubar.addAction(self.menuProxy.menuAction())
173 | self.menubar.addAction(self.menuLanguage.menuAction())
174 | self.menubar.addAction(self.menuAbout.menuAction())
175 |
176 | self.retranslateUi(window)
177 | QtCore.QMetaObject.connectSlotsByName(window)
178 |
179 | def retranslateUi(self, window):
180 | _translate = QtCore.QCoreApplication.translate
181 | window.setWindowTitle(_translate("window", "pyTranscriber v2.1 - 13/07/2025"))
182 | self.bSelectMedia.setText(_translate("window", "Select file(s)"))
183 | self.bConvert.setText(_translate("window", "Transcribe Audio / Generate Subtitles"))
184 | self.bOpenOutputFolder.setText(_translate("window", "Open Output Folder"))
185 | self.bSelectOutputFolder.setText(_translate("window", "Output Location"))
186 | self.groupBox.setTitle(_translate("window", "List of files to generate transcribe audio / generate subtitles"))
187 | self.bRemoveFile.setText(_translate("window", "Remove file(s)"))
188 | self.bCancel.setText(_translate("window", "Cancel"))
189 | self.chbxOpenOutputFilesAuto.setText(_translate("window", "Open output files automatically"))
190 | self.labelSelectLang.setText(_translate("window", "Audio Language:"))
191 | self.lEngine.setText(_translate("window", "Engine:"))
192 | self.rbGoogleEngine.setText(_translate("window", "Google Speech (cloud processing)"))
193 | self.rbWhisper.setText(_translate("window", "openAI Whisper (local processing)"))
194 | self.lModels.setText(_translate("window", "Models:"))
195 | self.rbModelTiny.setText(_translate("window", "Tiny"))
196 | self.rbModelBase.setText(_translate("window", "Base"))
197 | self.rbModelSmall.setText(_translate("window", "Small"))
198 | self.rbModelMedium.setText(_translate("window", "Medium"))
199 | self.rbModelLarge.setText(_translate("window", "Large"))
200 | self.menuAbout.setTitle(_translate("window", "Abo&ut"))
201 | self.menuProxy.setTitle(_translate("window", "&Settings"))
202 | self.menuLanguage.setTitle(_translate("window", "&Language"))
203 | self.actionLicense.setText(_translate("window", "&License"))
204 | self.actionDonation.setText(_translate("window", "&Funding at Github Sponsors"))
205 | self.actionAbout_pyTranscriber.setText(_translate("window", "&More about pyTranscriber"))
206 | self.actionProxy.setText(_translate("window", "&Proxy"))
207 | self.actionProxy.setToolTip(_translate("window", "Proxy setting"))
208 | self.actionEnglish.setText(_translate("window", "English"))
209 | self.actionChineseTraditional.setText(_translate("window", "繁體中文 - Chinese Traditional"))
210 | self.actionChineseSimplified.setText(_translate("window", "简体中文 - Chinese Simplified"))
211 | self.actionPortuguese.setText(_translate("window", "Português"))
212 |
--------------------------------------------------------------------------------