├── __init__.py
├── pytranscriber
    ├── __init__.py
    ├── control
    │   ├── __init__.py
    │   ├── thread_cancel_autosub.py
    │   ├── ctr_engine.py
    │   ├── ctr_main.py
    │   ├── ctr_proxy.py
    │   ├── thread_exec_whisper.py
    │   ├── ctr_db.py
    │   ├── thread_exec_generic.py
    │   ├── ctr_whisper.py
    │   ├── thread_exec_autosub.py
    │   └── ctr_autosub.py
    ├── gui
    │   ├── __init__.py
    │   ├── proxy
    │   │   ├── __init__.py
    │   │   ├── view_proxy.py
    │   │   ├── window_proxy.py
    │   │   └── window_proxy.ui
    │   ├── Português.qm
    │   ├── 简体中文 - Chinese Simplified.qm
    │   ├── 繁體中文 - Chinese Traditional.qm
    │   ├── message_util.py
    │   ├── 简体中文 - Chinese Simplified.ts
    │   ├── 繁體中文 - Chinese Traditional.ts
    │   ├── Português.ts
    │   ├── proxy.py
    │   ├── proxy.ui
    │   └── main
    │   │   ├── window_main.ui
    │   │   └── window_main.py
    ├── model
    │   ├── __init__.py
    │   ├── transcription_parameters.py
    │   ├── whisper.py
    │   └── google_speech.py
    └── util
    │   ├── __init__.py
    │   ├── srtparser.py
    │   └── util.py
├── MANIFEST.in
├── whisper
    ├── version.py
    ├── __main__.py
    ├── assets
    │   └── mel_filters.npz
    ├── normalizers
    │   ├── __init__.py
    │   └── basic.py
    ├── triton_ops.py
    ├── audio.py
    ├── __init__.py
    ├── utils.py
    └── model.py
├── nuitka-win-standalone.bat
├── doc
    ├── lightning.jpeg
    ├── pyTranscriber.png
    ├── screenshot1.png
    ├── screenshot2.png
    ├── screenshot3.png
    ├── entitlements.plist
    └── technical_details.md
├── deployment
    ├── nuitka-win-standalone.bat
    ├── freeze-nuitka-win.bat
    ├── freeze-win.sh
    ├── freeze-linux.sh
    ├── freeze-linux-nuitka.sh
    └── win
    │   ├── script-installer-windows.iss
    │   └── script-installer-windows-standalone.iss
├── pytranscriber.sqlite
├── patches
    ├── note.txt
    ├── autosub-0.3.13.patch
    └── autosub-0.4.0.patch
├── .gitignore
├── freeze-mac.sh
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── feature_request.md
    │   └── bug_report.md
    ├── FUNDING.yml
    └── workflows
    │   ├── mac-pyinstaller.yml
    │   ├── linux-pyinstaller.yml
    │   ├── linux-nuitka.yml
    │   ├── win-nuitka.yml
    │   └── win-pyinstaller-dev2.yml
├── requirements.txt
├── Pipfile
├── main.py
├── script-installer-windows-standalone.iss
├── autosub
    ├── formatters.py
    └── constants.py
└── README.md


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytranscriber/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytranscriber/control/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytranscriber/gui/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytranscriber/model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytranscriber/util/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytranscriber/gui/proxy/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE


--------------------------------------------------------------------------------
/whisper/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "20240930"
2 | 


--------------------------------------------------------------------------------
/whisper/__main__.py:
--------------------------------------------------------------------------------
1 | from .transcribe import cli
2 | 
3 | cli()
4 | 


--------------------------------------------------------------------------------
/nuitka-win-standalone.bat:
--------------------------------------------------------------------------------
1 | nuitka --enable-plugin=pyqt5  main.py --disable-console --standalone


--------------------------------------------------------------------------------
/doc/lightning.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/doc/lightning.jpeg


--------------------------------------------------------------------------------
/deployment/nuitka-win-standalone.bat:
--------------------------------------------------------------------------------
1 | nuitka --enable-plugin=pyqt5  main.py --disable-console --standalone


--------------------------------------------------------------------------------
/doc/pyTranscriber.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/doc/pyTranscriber.png


--------------------------------------------------------------------------------
/doc/screenshot1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/doc/screenshot1.png


--------------------------------------------------------------------------------
/doc/screenshot2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/doc/screenshot2.png


--------------------------------------------------------------------------------
/doc/screenshot3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/doc/screenshot3.png


--------------------------------------------------------------------------------
/pytranscriber.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/pytranscriber.sqlite


--------------------------------------------------------------------------------
/pytranscriber/gui/Português.qm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/pytranscriber/gui/Português.qm


--------------------------------------------------------------------------------
/whisper/assets/mel_filters.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/whisper/assets/mel_filters.npz


--------------------------------------------------------------------------------
/pytranscriber/gui/简体中文 - Chinese Simplified.qm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/pytranscriber/gui/简体中文 - Chinese Simplified.qm


--------------------------------------------------------------------------------
/pytranscriber/gui/繁體中文 - Chinese Traditional.qm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/HEAD/pytranscriber/gui/繁體中文 - Chinese Traditional.qm


--------------------------------------------------------------------------------
/whisper/normalizers/__init__.py:
--------------------------------------------------------------------------------
1 | from .basic import BasicTextNormalizer as BasicTextNormalizer
2 | from .english import EnglishTextNormalizer as EnglishTextNormalizer
3 | 


--------------------------------------------------------------------------------
/deployment/freeze-nuitka-win.bat:
--------------------------------------------------------------------------------
1 | nuitka --enable-plugin=pyqt5 --include-data-files="ffmpeg.exe"="./" --include-data-files="pytranscriber/gui/*.qm"="pytranscriber/gui/"  main.py --onefile --disable-console


--------------------------------------------------------------------------------
/deployment/freeze-win.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | pipenv shell
4 | pyinstaller main.py --path=$pwd --add-binary="ffmpeg.exe;." --add-data="pytranscriber/gui/*.qm;pytranscriber/gui/"  --onefile --clean
5 | 


--------------------------------------------------------------------------------
/deployment/freeze-linux.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | pipenv shell
4 | pyinstaller main.py main.spec --path="$(pwd)" --add-binary="ffmpeg:." --add-data="pytranscriber/gui/*.qm:pytranscriber/gui/" --onefile --clean
5 | 


--------------------------------------------------------------------------------
/patches/note.txt:
--------------------------------------------------------------------------------
1 | The autosub version used for pyTranscriber had to be customized a little bit.
2 | The patch in this folder was made comparing the original autosub/__init__.py file from version 0.4.0 to the customized version I made.
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | lib/
 2 | python-libs/
 3 | bin/
 4 | *.spec
 5 | *pyc
 6 | *.egg-info
 7 | *html
 8 | build/
 9 | tests/
10 | dist/
11 | .DS_Store
12 | MANIFEST
13 | *#*
14 | ffmpeg*
15 | notes.txt
16 | 
17 | Pipfile.lock
18 | 
19 | Pipfile
20 | 


--------------------------------------------------------------------------------
/deployment/freeze-linux-nuitka.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | pipenv shell
4 | nuitka3 --enable-plugin=pyqt5 --include-data-files="ffmpeg"="./" \
5 | --include-data-files="pytranscriber/gui/*.qm"="pytranscriber/gui/" \
6 | --include-data-files="venv/lib/python3.8/site-packages/whisper/assets"  \
7 | main.py \
8 | --onefile


--------------------------------------------------------------------------------
/freeze-mac.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pipenv shell
 4 | pyinstaller main.py \
 5 | --path="$(pwd)" \
 6 | --add-binary="ffmpeg-bin/ffmpeg:." \
 7 | --add-binary="pytranscriber.sqlite:." \
 8 | --add-data="pytranscriber/gui/*.qm:pytranscriber/gui/" \
 9 | --add-data="venv/lib/python3.8/site-packages/whisper/assets:whisper/assets" \
10 | --clean  \
11 | --windowed \
12 | --noconfirm 
13 | 
14 | 


--------------------------------------------------------------------------------
/doc/entitlements.plist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | <dict>
 5 | 	<!-- These are required for binaries built by PyInstaller -->
 6 | 	<key>com.apple.security.cs.allow-jit</key>
 7 | 	<true/>
 8 | 	<key>com.apple.security.cs.allow-unsigned-executable-memory</key>
 9 | 	<true/>
10 | 	<key>com.apple.security.cs.disable-library-validation</key><true/>
11 | </dict>
12 | </plist>
13 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: @raryelcostasouza # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: pytranscriber # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # CUDA-enabled PyTorch packages (CUDA 12.6)
 2 | torch==2.3.0
 3 | torchvision==0.18.0
 4 | torchaudio==2.3.0
 5 | 
 6 | # Ensure the extra index for PyTorch CUDA wheels
 7 | --extra-index-url https://download.pytorch.org/whl/cu126
 8 | 
 9 | # Other dependencies
10 | cachetools==4.2.4
11 | certifi==2024.7.4
12 | chardet==4.0.0
13 | charset-normalizer==2.0.6
14 | google-api-core==2.1.0
15 | google-api-python-client==2.24.0
16 | google-auth==2.3.0
17 | google-auth-httplib2==0.1.0
18 | google-auth-oauthlib==0.4.6
19 | googleapis-common-protos==1.53.0
20 | httplib2==0.20.1
21 | idna==3.7
22 | oauthlib==3.2.2
23 | progressbar==2.5
24 | protobuf==4.21.6
25 | pyasn1==0.4.8
26 | pyasn1-modules==0.2.8
27 | pyparsing==2.4.7
28 | pyqt5==5.15.10
29 | pyqt5-sip==12.13.0
30 | pysrt==1.1.2
31 | requests==2.32.0
32 | requests-oauthlib==1.3.0
33 | rsa==4.7.2
34 | six==1.16.0
35 | uritemplate==3.0.1
36 | urllib3==2.2.2
37 | openai-whisper
38 | platformdirs
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Smartphone (please complete the following information):**
32 |  - Device: [e.g. iPhone6]
33 |  - OS: [e.g. iOS8.1]
34 |  - Browser [e.g. stock browser, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Additional context**
38 | Add any other context about the problem here.
39 | 


--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | autosub = "*"
 8 | pyqt5 = "==5.15.4"
 9 | pyinstaller = "*"
10 | macholib = "*"
11 | cachetools = "==4.2.4"
12 | certifi = "==2021.10.8"
13 | chardet = "==4.0.0"
14 | charset-normalizer = "==2.0.6"
15 | google-api-core = "==2.1.0"
16 | google-api-python-client = "==2.24.0"
17 | google-auth = "==2.3.0"
18 | google-auth-httplib2 = "==0.1.0"
19 | google-auth-oauthlib = "==0.4.6"
20 | googleapis-common-protos = "==1.53.0"
21 | httplib2 = "==0.20.1"
22 | idna = "==3.2"
23 | oauthlib = "==3.1.1"
24 | progressbar = "==2.5"
25 | protobuf = "==3.18.1"
26 | pyasn1 = "==0.4.8"
27 | pyasn1-modules = "==0.2.8"
28 | pyparsing = "==2.4.7"
29 | pyqt5-qt5 = "==5.15.2"
30 | pyqt5-sip = "==12.9.0"
31 | pysrt = "==1.1.2"
32 | requests = "==2.26.0"
33 | requests-oauthlib = "==1.3.0"
34 | rsa = "==4.7.2"
35 | six = "==1.16.0"
36 | uritemplate = "==3.0.1"
37 | urllib3 = "==1.26.7"
38 | nuitka = "*"
39 | orderedset = "*"
40 | zstandard = "*"
41 | 
42 | [dev-packages]
43 | 
44 | [requires]
45 | python_version = "3.8"
46 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | # (C) 2019 Raryel C. Souza
 2 | # This program is free software: you can redistribute it and/or modify
 3 | # it under the terms of the GNU General Public License as published by
 4 | # the Free Software Foundation, either version 3 of the License, or
 5 | # (at your option) any later version.
 6 | # This program is distributed in the hope that it will be useful,
 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 9 | # GNU General Public License for more details.
10 | # You should have received a copy of the GNU General Public License
11 | # along with this program. If not, see <https://www.gnu.org/licenses/>.
12 | 
13 | from pytranscriber.control.ctr_main import Ctr_Main
14 | from pytranscriber.gui.message_util import MessageUtil
15 | import multiprocessing
16 | import sys
17 | 
18 | if __name__ == '__main__':
19 |     multiprocessing.freeze_support()
20 | 
21 |     try:
22 |         ctrMain = Ctr_Main()
23 |         sys.exit(0)
24 |     except Exception as ex:
25 |         MessageUtil.show_error_message(str(ex), "Main Error")
26 |         sys.exit(1)
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/pytranscriber/control/thread_cancel_autosub.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |    (C) 2025 Raryel C. Souza
 3 |     This program is free software: you can redistribute it and/or modify
 4 |     it under the terms of the GNU General Public License as published by
 5 |     the Free Software Foundation, either version 3 of the License, or
 6 |     (at your option) any later version.
 7 |     This program is distributed in the hope that it will be useful,
 8 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 |     GNU General Public License for more details.
11 |     You should have received a copy of the GNU General Public License
12 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
13 | '''
14 | 
15 | from PyQt5.QtCore import QThread
16 | from PyQt5.QtCore import pyqtSignal
17 | 
18 | 
19 | class Thread_Cancel_Autosub(QThread):
20 |     signalTerminated = pyqtSignal()
21 | 
22 |     def __init__(self, pObjWT):
23 |         self.objWT = pObjWT
24 |         QThread.__init__(self)
25 | 
26 |     def run(self):
27 |         self.objWT.cancel()
28 |         self.signalTerminated.emit()
29 | 


--------------------------------------------------------------------------------
/pytranscriber/control/ctr_engine.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |    (C) 2025 Raryel C. Souza
 3 |     This program is free software: you can redistribute it and/or modify
 4 |     it under the terms of the GNU General Public License as published by
 5 |     the Free Software Foundation, either version 3 of the License, or
 6 |     (at your option) any later version.
 7 |     This program is distributed in the hope that it will be useful,
 8 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 |     GNU General Public License for more details.
11 |     You should have received a copy of the GNU General Public License
12 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
13 | '''
14 | 
15 | class CtrEngine:
16 |     @staticmethod
17 |     def init():
18 |         CtrEngine.cancel = False
19 | 
20 |     @staticmethod
21 |     def is_operation_canceled():
22 |         return CtrEngine.cancel
23 | 
24 |     @staticmethod
25 |     def cancel_operation():
26 |         CtrEngine.cancel = True
27 | 
28 |     @staticmethod
29 |     def save_output_file(output_path, file_content):
30 |         f = open(output_path, 'wb')
31 |         f.write(file_content.encode("utf-8"))
32 |         f.close()
33 | 


--------------------------------------------------------------------------------
/pytranscriber/model/transcription_parameters.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |    (C) 2019 Raryel C. Souza
 3 |     This program is free software: you can redistribute it and/or modify
 4 |     it under the terms of the GNU General Public License as published by
 5 |     the Free Software Foundation, either version 3 of the License, or
 6 |     (at your option) any later version.
 7 |     This program is distributed in the hope that it will be useful,
 8 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 |     GNU General Public License for more details.
11 |     You should have received a copy of the GNU General Public License
12 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
13 | '''
14 | 
15 | class Transcription_Parameters():
16 | 
17 |     def __init__(self, listFiles, outputFolder, langCode,
18 |                 boolOpenOutputFilesAuto, proxies=None):
19 |         self.listFiles = listFiles
20 |         self.outputFolder = outputFolder
21 |         self.langCode = langCode
22 |         self.boolOpenOutputFilesAuto = boolOpenOutputFilesAuto
23 |         self.proxies = proxies
24 |         self.model_whisper = None
25 | 
26 |     def set_model_whisper(self, model):
27 |         self.model_whisper = model
28 | 
29 |     def get_model_whisper(self):
30 |         return self.model_whisper


--------------------------------------------------------------------------------
/pytranscriber/gui/message_util.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |    (C) 2025 Raryel C. Souza
 3 |     This program is free software: you can redistribute it and/or modify
 4 |     it under the terms of the GNU General Public License as published by
 5 |     the Free Software Foundation, either version 3 of the License, or
 6 |     (at your option) any later version.
 7 |     This program is distributed in the hope that it will be useful,
 8 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 |     GNU General Public License for more details.
11 |     You should have received a copy of the GNU General Public License
12 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
13 | '''
14 | 
15 | from PyQt5.QtWidgets import QMessageBox
16 | 
17 | 
18 | class MessageUtil:
19 | 
20 |     @staticmethod
21 |     def show_info_message(info_msg, title=""):
22 |         msg = QMessageBox()
23 |         msg.setIcon(QMessageBox.Information)
24 | 
25 |         msg.setWindowTitle(title)
26 |         msg.setText(info_msg)
27 |         msg.exec()
28 | 
29 |     @staticmethod
30 |     def show_error_message(error_msg, title="Error"):
31 |         msg = QMessageBox()
32 |         msg.setIcon(QMessageBox.Critical)
33 | 
34 |         msg.setWindowTitle(title)
35 |         msg.setText(error_msg)
36 |         msg.exec()
37 | 


--------------------------------------------------------------------------------
/pytranscriber/control/ctr_main.py:
--------------------------------------------------------------------------------
 1 | # (C) 2025 Raryel C. Souza
 2 | # This program is free software: you can redistribute it and/or modify
 3 | # it under the terms of the GNU General Public License as published by
 4 | # the Free Software Foundation, either version 3 of the License, or
 5 | # (at your option) any later version.
 6 | # This program is distributed in the hope that it will be useful,
 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 9 | # GNU General Public License for more details.
10 | # You should have received a copy of the GNU General Public License
11 | # along with this program. If not, see <https://www.gnu.org/licenses/>.
12 | 
13 | from pytranscriber.control.ctr_proxy import Ctr_Proxy
14 | from pytranscriber.control.ctr_db import CtrDB
15 | from pytranscriber.gui.main.view_main import ViewMain
16 | 
17 | 
18 | class Ctr_Main():
19 | 
20 |     def __init__(self):
21 |         self.ctrDB = CtrDB()
22 |         self.ctrProxy = Ctr_Proxy(self)
23 | 
24 |         self.last_language = None
25 | 
26 |         self.viewMain = ViewMain(self)
27 | 
28 |         self._load_last_language()
29 |         self.viewMain.show()
30 | 
31 |     def save_last_language(self, language):
32 |         self.ctrDB.clear_last_language()
33 |         self.ctrDB.save_last_language(language)
34 | 
35 |     def _load_last_language(self):
36 |         data = self.ctrDB.load_last_language()
37 |         if data is not None:
38 | 
39 |             self.last_language = data[1]
40 |             self.viewMain.set_gui_language(self.last_language)
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/deployment/win/script-installer-windows.iss:
--------------------------------------------------------------------------------
 1 | ; Script generated by the Inno Setup Script Wizard.
 2 | ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!
 3 | 
 4 | [Setup]
 5 | ; NOTE: The value of AppId uniquely identifies this application.
 6 | ; Do not use the same AppId value in installers for other applications.
 7 | ; (To generate a new GUID, click Tools | Generate GUID inside the IDE.)
 8 | AppId={{5240AB76-FC62-4BFA-A1EF-FA49AF701F80}
 9 | AppName=pyTranscriber
10 | AppVersion=1.9
11 | AppVerName=pyTranscriber 1.9
12 | AppPublisher=Raryel C. Souza
13 | AppPublisherURL=https://github.com/raryelcostasouza/pyTranscriber
14 | AppSupportURL=https://github.com/raryelcostasouza/pyTranscriber
15 | AppUpdatesURL=https://github.com/raryelcostasouza/pyTranscriber
16 | DefaultDirName={pf}\pyTranscriber
17 | DisableDirPage=yes
18 | DisableProgramGroupPage=yes
19 | LicenseFile=.\LICENSE
20 | OutputBaseFilename=setup
21 | Compression=lzma
22 | SolidCompression=yes
23 | 
24 | [Languages]
25 | Name: "english"; MessagesFile: "compiler:Default.isl"
26 | 
27 | [Tasks]
28 | Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked
29 | 
30 | [Files]
31 | Source: ".\dist\pyTranscriber.exe"; DestDir: "{app}"; Flags: ignoreversion
32 | ; NOTE: Don't use "Flags: ignoreversion" on any shared system files
33 | 
34 | [Icons]
35 | Name: "{commonprograms}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe"
36 | Name: "{commondesktop}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe"; Tasks: desktopicon
37 | 
38 | [Run]
39 | Filename: "{app}\pyTranscriber.exe"; Description: "{cm:LaunchProgram,pyTranscriber}"; Flags: nowait postinstall skipifsilent
40 | 
41 | 


--------------------------------------------------------------------------------
/script-installer-windows-standalone.iss:
--------------------------------------------------------------------------------
 1 | ; Script generated by the Inno Setup Script Wizard.
 2 | ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!
 3 | 
 4 | [Setup]
 5 | ; NOTE: The value of AppId uniquely identifies this application.
 6 | ; Do not use the same AppId value in installers for other applications.
 7 | ; (To generate a new GUID, click Tools | Generate GUID inside the IDE.)
 8 | AppId={{5240AB76-FC62-4BFA-A1EF-FA49AF701F80}
 9 | AppName=pyTranscriber
10 | AppVersion=1.9
11 | AppVerName=pyTranscriber 1.9
12 | AppPublisher=Raryel C. Souza
13 | AppPublisherURL=https://github.com/raryelcostasouza/pyTranscriber
14 | AppSupportURL=https://github.com/raryelcostasouza/pyTranscriber
15 | AppUpdatesURL=https://github.com/raryelcostasouza/pyTranscriber
16 | DefaultDirName={pf}\pyTranscriber
17 | DisableDirPage=yes
18 | DisableProgramGroupPage=yes
19 | LicenseFile=.\LICENSE
20 | OutputBaseFilename=setup
21 | Compression=lzma
22 | SolidCompression=yes
23 | 
24 | [Languages]
25 | Name: "english"; MessagesFile: "compiler:Default.isl"
26 | 
27 | [Tasks]
28 | Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked
29 | 
30 | [Files]
31 | Source: ".\main.dist\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs
32 | ; NOTE: Don't use "Flags: ignoreversion" on any shared system files
33 | 
34 | [Icons]
35 | Name: "{commonprograms}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe"
36 | Name: "{commondesktop}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe"; Tasks: desktopicon
37 | 
38 | [Run]
39 | Filename: "{app}\pyTranscriber.exe"; Description: "{cm:LaunchProgram,pyTranscriber}"; Flags: nowait postinstall skipifsilent
40 | 
41 | 


--------------------------------------------------------------------------------
/deployment/win/script-installer-windows-standalone.iss:
--------------------------------------------------------------------------------
 1 | ; Script generated by the Inno Setup Script Wizard.
 2 | ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!
 3 | 
 4 | [Setup]
 5 | ; NOTE: The value of AppId uniquely identifies this application.
 6 | ; Do not use the same AppId value in installers for other applications.
 7 | ; (To generate a new GUID, click Tools | Generate GUID inside the IDE.)
 8 | AppId={{5240AB76-FC62-4BFA-A1EF-FA49AF701F80}
 9 | AppName=pyTranscriber
10 | AppVersion=1.9
11 | AppVerName=pyTranscriber 1.9
12 | AppPublisher=Raryel C. Souza
13 | AppPublisherURL=https://github.com/raryelcostasouza/pyTranscriber
14 | AppSupportURL=https://github.com/raryelcostasouza/pyTranscriber
15 | AppUpdatesURL=https://github.com/raryelcostasouza/pyTranscriber
16 | DefaultDirName={pf}\pyTranscriber
17 | DisableDirPage=yes
18 | DisableProgramGroupPage=yes
19 | LicenseFile=.\LICENSE
20 | OutputBaseFilename=setup
21 | Compression=lzma
22 | SolidCompression=yes
23 | 
24 | [Languages]
25 | Name: "english"; MessagesFile: "compiler:Default.isl"
26 | 
27 | [Tasks]
28 | Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked
29 | 
30 | [Files]
31 | Source: ".\main.dist\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs
32 | ; NOTE: Don't use "Flags: ignoreversion" on any shared system files
33 | 
34 | [Icons]
35 | Name: "{commonprograms}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe"
36 | Name: "{commondesktop}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe"; Tasks: desktopicon
37 | 
38 | [Run]
39 | Filename: "{app}\pyTranscriber.exe"; Description: "{cm:LaunchProgram,pyTranscriber}"; Flags: nowait postinstall skipifsilent
40 | 
41 | 


--------------------------------------------------------------------------------
/autosub/formatters.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Defines subtitle formatters used by autosub.
 3 | """
 4 | 
 5 | # -*- coding: utf-8 -*-
 6 | from __future__ import unicode_literals
 7 | 
 8 | import json
 9 | 
10 | import pysrt
11 | import six
12 | 
13 | 
14 | def srt_formatter(subtitles, padding_before=0, padding_after=0):
15 |     """
16 |     Serialize a list of subtitles according to the SRT format, with optional time padding.
17 |     """
18 |     sub_rip_file = pysrt.SubRipFile()
19 |     for i, ((start, end), text) in enumerate(subtitles, start=1):
20 |         item = pysrt.SubRipItem()
21 |         item.index = i
22 |         item.text = six.text_type(text)
23 |         item.start.seconds = max(0, start - padding_before)
24 |         item.end.seconds = end + padding_after
25 |         sub_rip_file.append(item)
26 |     return '\n'.join(six.text_type(item) for item in sub_rip_file)
27 | 
28 | 
29 | def vtt_formatter(subtitles, padding_before=0, padding_after=0):
30 |     """
31 |     Serialize a list of subtitles according to the VTT format, with optional time padding.
32 |     """
33 |     text = srt_formatter(subtitles, padding_before, padding_after)
34 |     text = 'WEBVTT\n\n' + text.replace(',', '.')
35 |     return text
36 | 
37 | 
38 | def json_formatter(subtitles):
39 |     """
40 |     Serialize a list of subtitles as a JSON blob.
41 |     """
42 |     subtitle_dicts = [
43 |         {
44 |             'start': start,
45 |             'end': end,
46 |             'content': text,
47 |         }
48 |         for ((start, end), text)
49 |         in subtitles
50 |     ]
51 |     return json.dumps(subtitle_dicts)
52 | 
53 | 
54 | def raw_formatter(subtitles):
55 |     """
56 |     Serialize a list of subtitles as a newline-delimited string.
57 |     """
58 |     return ' '.join(text for (_rng, text) in subtitles)
59 | 
60 | 
61 | FORMATTERS = {
62 |     'srt': srt_formatter,
63 |     'vtt': vtt_formatter,
64 |     'json': json_formatter,
65 |     'raw': raw_formatter,
66 | }
67 | 


--------------------------------------------------------------------------------
/pytranscriber/util/srtparser.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |    (C) 2019 Raryel C. Souza
 3 |     This program is free software: you can redistribute it and/or modify
 4 |     it under the terms of the GNU General Public License as published by
 5 |     the Free Software Foundation, either version 3 of the License, or
 6 |     (at your option) any later version.
 7 |     This program is distributed in the hope that it will be useful,
 8 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 |     GNU General Public License for more details.
11 |     You should have received a copy of the GNU General Public License
12 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
13 | '''
14 | 
15 | import re, sys
16 | 
17 | class SRTParser(object):
18 |     @staticmethod
19 |     def extractTextFromSRT(fileSRT):
20 |         file_name = fileSRT
21 |         file_encoding = 'utf-8'
22 | 
23 |         #loop through the lines for parsing
24 |         with open(file_name, encoding=file_encoding, errors='replace') as f:
25 |             lines = f.readlines()
26 |             new_lines = SRTParser.clean_up(lines)
27 |             new_file_name = file_name[:-4] + '.txt'
28 | 
29 |         #write parsed txt file
30 |         with open(new_file_name, 'w', encoding=file_encoding) as f:
31 |             for line in new_lines:
32 |                 f.write(line)
33 | 
34 |     @staticmethod
35 |     def clean_up(lines):
36 |         regexSubtitleIndexNumber = re.compile("[0-9]+")
37 | 
38 |         new_lines = []
39 |         for line in lines[1:]:
40 |             #if line empty or
41 |             #if line contains --> or
42 |             #if line matches the subtitle index regex
43 |             #then skip line
44 |             if (not line or not line.strip()) or ("-->" in line) or regexSubtitleIndexNumber.match(line):
45 |                 continue
46 |             else:
47 |                 #append line
48 |                 new_lines.append(line)
49 |         return new_lines
50 | 


--------------------------------------------------------------------------------
/whisper/normalizers/basic.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import unicodedata
 3 | 
 4 | import regex
 5 | 
 6 | # non-ASCII letters that are not separated by "NFKD" normalization
 7 | ADDITIONAL_DIACRITICS = {
 8 |     "œ": "oe",
 9 |     "Œ": "OE",
10 |     "ø": "o",
11 |     "Ø": "O",
12 |     "æ": "ae",
13 |     "Æ": "AE",
14 |     "ß": "ss",
15 |     "ẞ": "SS",
16 |     "đ": "d",
17 |     "Đ": "D",
18 |     "ð": "d",
19 |     "Ð": "D",
20 |     "þ": "th",
21 |     "Þ": "th",
22 |     "ł": "l",
23 |     "Ł": "L",
24 | }
25 | 
26 | 
27 | def remove_symbols_and_diacritics(s: str, keep=""):
28 |     """
29 |     Replace any other markers, symbols, and punctuations with a space,
30 |     and drop any diacritics (category 'Mn' and some manual mappings)
31 |     """
32 |     return "".join(
33 |         c
34 |         if c in keep
35 |         else ADDITIONAL_DIACRITICS[c]
36 |         if c in ADDITIONAL_DIACRITICS
37 |         else ""
38 |         if unicodedata.category(c) == "Mn"
39 |         else " "
40 |         if unicodedata.category(c)[0] in "MSP"
41 |         else c
42 |         for c in unicodedata.normalize("NFKD", s)
43 |     )
44 | 
45 | 
46 | def remove_symbols(s: str):
47 |     """
48 |     Replace any other markers, symbols, punctuations with a space, keeping diacritics
49 |     """
50 |     return "".join(
51 |         " " if unicodedata.category(c)[0] in "MSP" else c
52 |         for c in unicodedata.normalize("NFKC", s)
53 |     )
54 | 
55 | 
56 | class BasicTextNormalizer:
57 |     def __init__(self, remove_diacritics: bool = False, split_letters: bool = False):
58 |         self.clean = (
59 |             remove_symbols_and_diacritics if remove_diacritics else remove_symbols
60 |         )
61 |         self.split_letters = split_letters
62 | 
63 |     def __call__(self, s: str):
64 |         s = s.lower()
65 |         s = re.sub(r"[<\[][^>\]]*[>\]]", "", s)  # remove words between brackets
66 |         s = re.sub(r"\(([^)]+?)\)", "", s)  # remove words between parenthesis
67 |         s = self.clean(s).lower()
68 | 
69 |         if self.split_letters:
70 |             s = " ".join(regex.findall(r"\X", s, regex.U))
71 | 
72 |         s = re.sub(
73 |             r"\s+", " ", s
74 |         )  # replace any successive whitespace characters with a space
75 | 
76 |         return s
77 | 


--------------------------------------------------------------------------------
/pytranscriber/control/ctr_proxy.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |    (C) 2025 Raryel C. Souza
 3 |     This program is free software: you can redistribute it and/or modify
 4 |     it under the terms of the GNU General Public License as published by
 5 |     the Free Software Foundation, either version 3 of the License, or
 6 |     (at your option) any later version.
 7 |     This program is distributed in the hope that it will be useful,
 8 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 |     GNU General Public License for more details.
11 |     You should have received a copy of the GNU General Public License
12 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
13 | '''
14 | 
15 | from pytranscriber.util.util import MyUtil
16 | from pytranscriber.gui.message_util import MessageUtil
17 | from pytranscriber.gui.proxy.view_proxy import ViewProxy
18 | 
19 | 
20 | class Ctr_Proxy():
21 |     proxy = {
22 |         'http': None,
23 |         'https': None
24 |     }
25 | 
26 |     def __init__(self, ctrMain):
27 |         self.ctrMain = ctrMain
28 |         self.viewProxy = None
29 | 
30 |     def show(self):
31 |         if self.viewProxy is None:
32 |             self.viewProxy = ViewProxy(self)
33 |         self.viewProxy.show()
34 | 
35 |     def save(self):
36 |         self.ctrMain.ctrDB.clear_proxy()
37 |         # saving the proxy address
38 |         if self.proxy['https']:
39 |             self.ctrMain.ctrDB.save_proxy(self.proxy)
40 |         # saving proxy address disabled
41 |         else:
42 |             MessageUtil.show_info_message('Proxy disabled successfully', 'Proxy disabled')
43 | 
44 |     def load_data(self):
45 |         if self.viewProxy is None:
46 |             self.viewProxy = ViewProxy(self)
47 | 
48 |         data = self.ctrMain.ctrDB.load_proxy()
49 |         if data is not None:
50 |             self.set_proxy_setting(data[1], False)
51 | 
52 |     def test_proxy_setting(self, proxy_addr):
53 |         proxy = {'http': proxy_addr, 'https': proxy_addr}
54 | 
55 |         if not MyUtil.is_internet_connected(proxy):
56 |             MessageUtil.show_error_message('Error connecting to Google.','Error')
57 |         else:
58 |             MessageUtil.show_info_message('Successfully connected to Google.', 'Success')
59 | 
60 |     def set_proxy_setting(self, proxy_addr, frontend_request=False):
61 |         self.proxy = {'http': proxy_addr, 'https': proxy_addr}
62 |         if frontend_request:
63 |             self.save()
64 |         else:
65 |             self.viewProxy.refresh_gui(proxy_addr)
66 | 
67 |     def get_proxy_setting(self):
68 |         return self.proxy
69 | 


--------------------------------------------------------------------------------
/patches/autosub-0.3.13.patch:
--------------------------------------------------------------------------------
 1 | --- __init__-old.py	2019-01-27 11:18:19.560918050 +0700
 2 | +++ __init__.py	2019-01-24 09:27:17.057865917 +0700
 3 | @@ -262,6 +262,14 @@
 4 |  
 5 |      return 0
 6 |  
 7 | +def percentage(currentval, maxval):
 8 | +    return 100 * currentval / float(maxval)
 9 | +
10 | +
11 | +def output_progress(listener_progress, str_task, progress_percent):
12 | +    if listener_progress != None:
13 | +        listener_progress(str_task,progress_percent)
14 | +
15 |  
16 |  def generate_subtitles(
17 |      source_path,
18 | @@ -271,6 +279,7 @@
19 |      dst_language=DEFAULT_DST_LANGUAGE,
20 |      subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
21 |      api_key=None,
22 | +    listener_progress=None,
23 |  ):
24 |      audio_filename, audio_rate = extract_audio(source_path)
25 |  
26 | @@ -284,21 +293,28 @@
27 |      transcripts = []
28 |      if regions:
29 |          try:
30 | -            widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',
31 | +            str_task_1 = "Converting speech regions to FLAC files: "
32 | +            widgets = [str_task_1, Percentage(), ' ', Bar(), ' ',
33 |                         ETA()]
34 | -            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
35 | +            len_regions = len(regions)
36 | +            pbar = ProgressBar(widgets=widgets, maxval=len_regions).start()
37 |              extracted_regions = []
38 |              for i, extracted_region in enumerate(pool.imap(converter, regions)):
39 |                  extracted_regions.append(extracted_region)
40 |                  pbar.update(i)
41 | +                progress_percent= percentage(i, len_regions)
42 | +                output_progress(listener_progress,str_task_1,progress_percent)
43 |              pbar.finish()
44 |  
45 | -            widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]
46 | +            str_task_2 = "Performing speech recognition: "
47 | +            widgets = [str_task_2, Percentage(), ' ', Bar(), ' ', ETA()]
48 |              pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
49 |  
50 |              for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):
51 |                  transcripts.append(transcript)
52 |                  pbar.update(i)
53 | +                progress_percent= percentage(i, len_regions)
54 | +                output_progress(listener_progress,str_task_2,progress_percent)
55 |              pbar.finish()
56 |  
57 |              if not is_same_language(src_language, dst_language):
58 | @@ -349,4 +365,5 @@
59 |  
60 |  
61 |  if __name__ == '__main__':
62 | +    multiprocessing.freeze_support()
63 |      sys.exit(main())
64 | 


--------------------------------------------------------------------------------
/pytranscriber/gui/proxy/view_proxy.py:
--------------------------------------------------------------------------------
 1 | from PyQt5.QtWidgets import QDialog
 2 | from pytranscriber.gui.proxy.window_proxy import Ui_Dialog
 3 | from pytranscriber.gui.message_util import MessageUtil
 4 | 
 5 | 
 6 | class ViewProxy:
 7 | 
 8 |     def __init__(self, ctr_proxy):
 9 |         self.ctr_proxy = ctr_proxy
10 |         self.proxy_dialog = QDialog()
11 |         loaded_proxy_dialog = Ui_Dialog()
12 |         loaded_proxy_dialog.setupUi(self.proxy_dialog)
13 | 
14 |         self.radioButtonNone = loaded_proxy_dialog.radioButtonNone
15 |         self.radioButtonHTTP = loaded_proxy_dialog.radioButtonHTTP
16 |         self.radioButtonNone.clicked.connect(self.__listener_rbOnClicked)
17 |         self.lineEditHttpProxy = loaded_proxy_dialog.lineEditHttpProxy
18 |         self.lineEditHttpProxy.textChanged.connect(self.__listenerLineEditInput)
19 |         self.pushButtonTest = loaded_proxy_dialog.pushButtonTest
20 |         self.bSave = loaded_proxy_dialog.bSave
21 | 
22 |         self.pushButtonTest.clicked.connect(self.__listener_test)
23 |         self.bSave.clicked.connect(self.__listener_save)
24 |         self.__clear_proxy_settings()
25 | 
26 |     def show(self):
27 |         self.ctr_proxy.load_data()
28 |         self.proxy_dialog.exec_()
29 | 
30 |     def __clear_proxy_settings(self):
31 |         self.radioButtonNone.setChecked(True)
32 |         self.lineEditHttpProxy.setEnabled(False)
33 |         self.pushButtonTest.setEnabled(False)
34 | 
35 |     def refresh_gui(self, proxy_address=None):
36 |         if not proxy_address:
37 |             self.__clear_proxy_settings()
38 |         else:
39 |             self.radioButtonHTTP.setChecked(True)
40 |             self.lineEditHttpProxy.setEnabled(True)
41 |             self.pushButtonTest.setEnabled(True)
42 |             self.lineEditHttpProxy.setText(str(proxy_address))
43 | 
44 |     def __listener_test(self):
45 |         proxy_input = self.lineEditHttpProxy.text()
46 | 
47 |         if proxy_input and self.radioButtonHTTP.isChecked():
48 |             self.ctr_proxy.test_proxy_setting(proxy_input)
49 | 
50 |     def __listener_save(self):
51 |         proxy_input = self.lineEditHttpProxy.text()
52 | 
53 |         if proxy_input and self.radioButtonHTTP.isChecked():
54 |             self.ctr_proxy.set_proxy_setting(proxy_input, True)
55 |         elif self.radioButtonNone.isChecked():
56 |             self.ctr_proxy.set_proxy_setting('',True)
57 | 
58 |     def __listener_rbOnClicked(self):
59 |         if self.radioButtonNone.isChecked():
60 |             self.lineEditHttpProxy.setText('')
61 | 
62 |     def __listenerLineEditInput(self):
63 |         if self.lineEditHttpProxy.text():
64 |             self.pushButtonTest.setEnabled(True)
65 |         else:
66 |             self.pushButtonTest.setEnabled(False)
67 | 
68 | 


--------------------------------------------------------------------------------
/pytranscriber/control/thread_exec_whisper.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |    (C) 2025 Raryel C. Souza
 3 |     This program is free software: you can redistribute it and/or modify
 4 |     it under the terms of the GNU General Public License as published by
 5 |     the Free Software Foundation, either version 3 of the License, or
 6 |     (at your option) any later version.
 7 |     This program is distributed in the hope that it will be useful,
 8 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 |     GNU General Public License for more details.
11 |     You should have received a copy of the GNU General Public License
12 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
13 | '''
14 | 
15 | from pytranscriber.control.ctr_whisper import CtrWhisper
16 | from pytranscriber.control.thread_exec_generic import ThreadExecGeneric
17 | from pytranscriber.util.util import MyUtil
18 | import traceback
19 | 
20 | 
21 | class Thread_Exec_Whisper(ThreadExecGeneric):
22 | 
23 |     def run(self):
24 |         CtrWhisper.init()
25 |         super()._loopSelectedFiles()
26 |         self.running = False
27 | 
28 |     def _run_engine_for_media(self, index, langCode):
29 |         sourceFile = self.obj_transcription_parameters.listFiles[index]
30 |         outputFiles = self._generatePathOutputFile(sourceFile)
31 |         outputFileSRT = outputFiles[0]
32 |         outputFileTXT = outputFiles[1]
33 | 
34 |         fOutput = None
35 |         try:
36 |             fOutput = CtrWhisper.generate_subtitles(source_path=sourceFile,
37 |                                                               outputSRT=outputFileSRT,
38 |                                                               outputTXT=outputFileTXT,
39 |                                                               src_language=langCode,
40 |                                                               model=self.obj_transcription_parameters.get_model_whisper())
41 |         except Exception as e:
42 |             error_msg = f"""Error! Unable to generate subtitles: {traceback.format_exc()}"""
43 |             self.signalErrorMsg.emit(error_msg)  # Emit the full traceback
44 | 
45 |         #if nothing was returned
46 |         if not fOutput:
47 |             self.signalErrorMsg.emit("Error! Unable to generate subtitles for file " + sourceFile + ".")
48 |         elif fOutput != -1:
49 |             #if the operation was not canceled
50 | 
51 |             #updated the progress message
52 |             self.listenerProgress("Finished", 100)
53 | 
54 |             if self.obj_transcription_parameters.boolOpenOutputFilesAuto:
55 |                 #open both SRT and TXT output files
56 |                 MyUtil.open_file(outputFileTXT)
57 |                 MyUtil.open_file(outputFileSRT)


--------------------------------------------------------------------------------
/doc/technical_details.md:
--------------------------------------------------------------------------------
 1 | <h1>For Developers - Technical Details</h1>
 2 | 
 3 | This app consists basically of a friendly pyQt5 graphical interface for a customized version of <a href="https://github.com/agermanidis/autosub">Autosub 0.4.0</a> that can run on Linux, Windows and MacOS. All the hard work of processing the audio and generating the subtitles is done by Autosub.
 4 | 
 5 | <h2>Dependencies to build</h2>
 6 | 
 7 | <ol>
 8 | <li>pip3 install pipenv
 9 | <li>pipenv install (install all dependencies from Pipfile)
10 | <li>Download the <a href="https://johnvansickle.com/ffmpeg/">static ffmpeg binary </a> and move it to project root folder
11 | 
12 | # How to run?
13 | $ pipenv shell
14 | $ python3 main.py
15 | 
16 | 
17 | # How to edit the GUI?
18 | Install Qt5 Designer and open the file pytranscriber/gui/gui.ui
19 | 
20 | # How to convert the .ui file (qt5designer project file) to .py?
21 | $ pyuic5 gui.ui -o gui.py
22 | 
23 | # How to generate the python bundled binary package version with ffmpeg included?
24 | 
25 | # Linux:
26 | $ pyinstaller main.py --path="$(pwd)" --add-binary="ffmpeg:." --onefile --clean
27 | 
28 | # Windows:
29 | $ pyinstaller main.py --path=$pwd --add-binary="ffmpeg.exe;." --onefile --clean
30 | 
31 | # Mac:
32 | $ pyinstaller main.py --path="$(pwd)" --add-binary="ffmpeg:." --clean --windowed
33 | 
34 | 
35 | The output binary will be on subfolder dist/main and has all dependencies included. For more details check pyinstaller documentation
36 | 
37 | # On Linux how to generate a statically linked binary so it can run even on systems with older glibc installed?
38 | 
39 | As explained in <a href=https://github.com/pyinstaller/pyinstaller/wiki/FAQ>pyInstaller FAQ</a>:
40 | > The executable that PyInstaller builds is not fully static, in that it still depends on the system libc. Under Linux, the ABI of GLIBC is backward compatible, but not forward compatible. So if you link against a newer GLIBC, you can't run the resulting executable on an older system.
41 | 
42 | > <b>Solution 1)</b>To compile the Python interpreter with its modules (and also probably bootloader) on the oldest system you have around, so that it gets linked with the oldest version of GLIBC.
43 | 
44 | > <b>Solution 2)</b> to use a tool like StaticX to create a fully-static bundled version of your PyInstaller application. StaticX bundles all dependencies, including libc and ld.so. (Python code :arrow_right: PyInstaller :arrow_right: StaticX :arrow_right: Fully-static application)"
45 | 
46 | <b>Install staticx and patchelf (dependency)</b>
47 | 
48 | $ pip3 install --user patchelf-wrapper
49 |  
50 | $ pip3 install --user staticx
51 | 
52 | <b>After generating the binary with pyinstaller, open the dist folder and run: </b>
53 | 
54 | $ staticx main main-static
55 | 
56 | The newly created main-static contains all library dependencies, including glibc, so it should be able to run even on very old systems.
57 | 
58 | Note: In my Manjaro system the first time I run this command I got an error related to "libmpdec.so.2 => not found". Installing the package <b>mpdecimal</b> on the package manager solved the issue.
59 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # pyTranscriber
 2 | 
 3 | [![Tip Me via PayPal](https://img.shields.io/badge/PayPal-tip%20me-1462ab.svg?logo=paypal)](https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=YHB854YHPJCU8&item_name=Donation+pyTranscriber&currency_code=BRL)
 4 | [![Tip Me via Bitcoin Lightning](https://img.shields.io/badge/Bitcoin%20Lightning-tip%20me-f7931a.svg?logo=lightning)](https://github.com/raryelcostasouza/pyTranscriber/raw/master/doc/lightning.jpeg)
 5 | 
 6 | [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/A0A6AIR3D)
 7 | 
 8 | MOVED TO NEW WEBSITE - <a href="https://pytranscriber.github.io">https://pytranscriber.github.io</a>
 9 | 
10 | More than 640k downloads since first version. Thanks!
11 | Check live statistics at <a href="https://somsubhra.github.io/github-release-stats/?username=raryelcostasouza&repository=pyTranscriber&page=1&per_page=30">GitHub Release Stats</a>
12 | 
13 | # Thanks to the people helping funding
14 | Jixing Zhao, Narsu Narsu, Lucas Thorn, Soonkj Jung, Sergio Moreno, Yooki Adair, Adrien Jarton, YP, JOY_EASY, RodrigoRios, Zhou Mi, Dongmei Chen, Jung Yi Hung, Tah Kwang Tomas Tso
15 | 
16 | # UPDATE - v2.1-stable - 13/07/2025
17 | 1. Compiled torch with CUDA support enabled for faster whisper processing for those who have NVidia GPUs
18 | 
19 | # UPDATE - v2.0-stable - 07/07/2025
20 | 1. Added binary for Linux (GLIBC 2.35 or newer)
21 | 
22 | # UPDATE - v2.0-stable - 22/05/2025
23 | 1. Fixed issue with cantonese language not working using whisper engine
24 | 2.  Fixed srt file generation not being compliant with srt syntax
25 | 
26 | # UPDATE - v2.0-RC_1 - 04/03/2025
27 | 1. Added support for <a hfref="https://openai.com/index/whisper/">openAI Whisper</a> with local processing of media files as alternative to Google Speech API (where all media file is uploaded to Google servers for processing)
28 | 2. Added saving/load settings to sqlite local db
29 | 
30 | 
31 | # UPDATE - v1.9 - 22/12/2022
32 | 1. Windows/Linux version compiled with Nuitka (https://github.com/Nuitka/Nuitka) instead of pyInstaller to improve stability and fix random crashes while transcribing audio. If you still experience issues please report at Issues section.
33 | 2. Support for Ogg/ogv/mkv/webm media files on file selector
34 | 
35 | # UPDATE - v1.8 - 17/08/2022
36 | 1. Fixed bug: language codes for Chinese Languages updated accordingly to Speech API. Changed to "cmn-Hans-CN" and "cmn-Hant-TW" instead of "zh / zh-TW").. The output was always mistakenly coming in Cantonese (yue-Hant-HK). Now they come properly in Traditional Chinese and Simplified Chinese. Thanks to "Specter Hi" for reporting!
37 | 2. Added GUI language switch feature
38 | 3. Updated link to funding campaign at GitHub Sponsors
39 | 
40 | # UPDATE - v1.7 - 08/08/2022
41 | 1. add proxy setting
42 | 2. change the function 'pytranscriber.util.MyUtil.is_internet_connected'
43 | 3. add requirements.txt
44 | 4. rebuilt using pyInstaller 5.3 - more stability to prevent multithreading crashes on Windows
45 | 5. Added pipfile
46 | 
47 | ![image](https://user-images.githubusercontent.com/23170065/143678535-750ac415-2be7-41ce-b5c2-f1d319d3e204.png)
48 | 
49 | 


--------------------------------------------------------------------------------
/pytranscriber/util/util.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |    (C) 2019 Raryel C. Souza
 3 |     This program is free software: you can redistribute it and/or modify
 4 |     it under the terms of the GNU General Public License as published by
 5 |     the Free Software Foundation, either version 3 of the License, or
 6 |     (at your option) any later version.
 7 |     This program is distributed in the hope that it will be useful,
 8 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 |     GNU General Public License for more details.
11 |     You should have received a copy of the GNU General Public License
12 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
13 | '''
14 | 
15 | import platform
16 | import os
17 | import subprocess
18 | 
19 | import requests
20 | from requests.adapters import HTTPAdapter, Retry
21 | import time
22 | 
23 | 
24 | class MyUtil(object):
25 |     @staticmethod
26 |     def open_file(path):
27 |         if platform.system() == "Windows":
28 |             os.startfile(path)
29 |         elif platform.system() == "Darwin":
30 |             subprocess.Popen(["open", path])
31 |         else:
32 |             subprocess.Popen(["xdg-open", path])
33 | 
34 |     @staticmethod
35 |     def is_internet_connected(proxies=None):
36 |         try:
37 |             # connect to the host -- tells us if the host is actually
38 |             # reachable
39 |             headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Firefox/91.0'}
40 | 
41 |             res = MyUtil.send_request('https://www.google.com', proxies=proxies, headers=headers)
42 |             if res != 200:
43 |                 return False
44 | 
45 |             else:
46 |                 return True
47 |         except Exception as e:
48 |             print("Error Name: ", e.__class__.__name__)
49 |             print("Error Message: ", e)
50 |             pass
51 | 
52 |         return False
53 | 
54 |     @staticmethod
55 |     def send_request(url,
56 |                      n_retries=0,
57 |                      backoff_factor=0.9,
58 |                      status_codes=[504, 503, 502, 500, 429, 302, 408, 425],
59 |                      proxies=None,
60 |                      headers=None):
61 |         sess = requests.Session()
62 |         retries = Retry(connect=n_retries, backoff_factor=backoff_factor,
63 |                         status_forcelist=status_codes)
64 |         sess.mount("https://", HTTPAdapter(max_retries=retries))
65 |         sess.mount("http://", HTTPAdapter(max_retries=retries))
66 |         try:
67 |             response = sess.get(url, timeout=5, proxies=proxies, headers=headers)
68 |             response.raise_for_status()  # Raises an HTTPError for bad responses
69 |             return response.status_code
70 |         except requests.Timeout:
71 |             print("The request timed out")
72 |         except requests.RequestException as e:
73 |             print(f"An error occurred: {e}")
74 |         return -1
75 | 
76 | 
77 |     @staticmethod
78 |     def percentage(currentval, maxval):
79 |         return 100 * currentval / float(maxval)


--------------------------------------------------------------------------------
/autosub/constants.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Defines constants used by autosub.
  3 | """
  4 | 
  5 | from __future__ import unicode_literals
  6 | 
  7 | GOOGLE_SPEECH_API_KEY = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"
  8 | GOOGLE_SPEECH_API_URL = "http://www.google.com/speech-api/v2/recognize?client=chromium&lang={lang}&key={key}" # pylint: disable=line-too-long
  9 | 
 10 | LANGUAGE_CODES = {
 11 |     'af': 'Afrikaans',
 12 |     'ar': 'Arabic',
 13 |     'az': 'Azerbaijani',
 14 |     'be': 'Belarusian',
 15 |     'bg': 'Bulgarian',
 16 |     'bn': 'Bengali',
 17 |     'bs': 'Bosnian',
 18 |     'ca': 'Catalan',
 19 |     'ceb': 'Cebuano',
 20 |     'cs': 'Czech',
 21 |     'cy': 'Welsh',
 22 |     'da': 'Danish',
 23 |     'de': 'German',
 24 |     'el': 'Greek',
 25 |     'en-AU': 'English (Australia)',
 26 |     'en-CA': 'English (Canada)',
 27 |     'en-GB': 'English (United Kingdom)',
 28 |     'en-IN': 'English (India)',
 29 |     'en-IE': 'English (Ireland)',
 30 |     'en-NZ': 'English (New Zealand)',
 31 |     'en-PH': 'English (Philippines)',
 32 |     'en-SG': 'English (Singapore)',
 33 |     'en-US': 'English (United States)',
 34 |     'eo': 'Esperanto',
 35 |     'es-AR': 'Spanish (Argentina)',
 36 |     'es-CL': 'Spanish (Chile)',
 37 |     'es-ES': 'Spanish (Spain)',
 38 |     'es-US': 'Spanish (United States)',
 39 |     'es-MX': 'Spanish (Mexico)',
 40 |     'es': 'Spanish',
 41 |     'et': 'Estonian',
 42 |     'eu': 'Basque',
 43 |     'fa': 'Persian',
 44 |     'fi': 'Finnish',
 45 |     'fr': 'French',
 46 |     'ga': 'Irish',
 47 |     'gl': 'Galician',
 48 |     'gu': 'Gujarati',
 49 |     'ha': 'Hausa',
 50 |     'hi': 'Hindi',
 51 |     'hmn': 'Hmong',
 52 |     'hr': 'Croatian',
 53 |     'ht': 'Haitian Creole',
 54 |     'hu': 'Hungarian',
 55 |     'hy': 'Armenian',
 56 |     'id': 'Indonesian',
 57 |     'ig': 'Igbo',
 58 |     'is': 'Icelandic',
 59 |     'it': 'Italian',
 60 |     'iw': 'Hebrew',
 61 |     'ja': 'Japanese',
 62 |     'jw': 'Javanese',
 63 |     'ka': 'Georgian',
 64 |     'kk': 'Kazakh',
 65 |     'km': 'Khmer',
 66 |     'kn': 'Kannada',
 67 |     'ko': 'Korean',
 68 |     'la': 'Latin',
 69 |     'lo': 'Lao',
 70 |     'lt': 'Lithuanian',
 71 |     'lv': 'Latvian',
 72 |     'mg': 'Malagasy',
 73 |     'mi': 'Maori',
 74 |     'mk': 'Macedonian',
 75 |     'ml': 'Malayalam',
 76 |     'mn': 'Mongolian',
 77 |     'mr': 'Marathi',
 78 |     'ms': 'Malay',
 79 |     'mt': 'Maltese',
 80 |     'my': 'Myanmar (Burmese)',
 81 |     'ne': 'Nepali',
 82 |     'nl': 'Dutch',
 83 |     'no': 'Norwegian',
 84 |     'ny': 'Chichewa',
 85 |     'pa': 'Punjabi',
 86 |     'pl': 'Polish',
 87 |     'pt-BR': 'Portuguese (Brazil)',
 88 |     'pt-PT': 'Portuguese (Portugal)',
 89 |     'ro': 'Romanian',
 90 |     'ru': 'Russian',
 91 |     'si': 'Sinhala',
 92 |     'sk': 'Slovak',
 93 |     'sl': 'Slovenian',
 94 |     'so': 'Somali',
 95 |     'sq': 'Albanian',
 96 |     'sr': 'Serbian',
 97 |     'st': 'Sesotho',
 98 |     'su': 'Sudanese',
 99 |     'sv': 'Swedish',
100 |     'sw': 'Swahili',
101 |     'ta': 'Tamil',
102 |     'te': 'Telugu',
103 |     'tg': 'Tajik',
104 |     'th': 'Thai',
105 |     'tl': 'Filipino',
106 |     'tr': 'Turkish',
107 |     'uk': 'Ukrainian',
108 |     'ur': 'Urdu',
109 |     'uz': 'Uzbek',
110 |     'vi': 'Vietnamese',
111 |     'yi': 'Yiddish',
112 |     'yo': 'Yoruba',
113 |     'yue-Hant-HK': 'Cantonese, (Traditional HK)',
114 |     'zh': 'Chinese (Simplified, China)',
115 |     'zh-HK': 'Chinese (Simplified, Hong Kong)',
116 |     'zh-TW': 'Chinese (Traditional, Taiwan)',
117 |     'zu': 'Zulu',
118 | }
119 | 


--------------------------------------------------------------------------------
/.github/workflows/mac-pyinstaller.yml:
--------------------------------------------------------------------------------
  1 | name: MacOS PyInstaller
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |   push:
  6 |     branches:
  7 |       - master
  8 |       - develop
  9 |  
 10 | 
 11 | jobs:
 12 |   build:
 13 |     runs-on: macos-14  # Use macOS ARM64 runner
 14 |     steps:
 15 |       - name: Checkout repository
 16 |         uses: actions/checkout@v4
 17 |         with:
 18 |           fetch-depth: 0  # Ensure full history and tags are available
 19 | 
 20 |       - name: Get latest Git tag
 21 |         id: get_version
 22 |         run: |
 23 |           VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.1.0")
 24 |           echo "VERSION=$VERSION" >> $GITHUB_ENV
 25 |           echo "Resolved version: $VERSION"
 26 | 
 27 |       - name: Setup FFmpeg
 28 |         uses: federicocarboni/setup-ffmpeg@v3.1
 29 |         with:
 30 |           ffmpeg-version: release
 31 |           architecture: x64
 32 | 
 33 |       - name: Verify FFmpeg installation
 34 |         run: |
 35 |           which ffmpeg
 36 |           ffmpeg -version 
 37 | 
 38 |       - name: Set up Python 3.8
 39 |         uses: actions/setup-python@v4
 40 |         with:
 41 |           python-version: "3.8"
 42 | 
 43 |       - name: Set up Python virtual environment
 44 |         run: |
 45 |           python -m venv .venv
 46 | 
 47 |       - name: Activate virtual environment and install dependencies
 48 |         run: |
 49 |           source .venv/bin/activate
 50 |           pip install --upgrade pip
 51 |           pip install -r requirements.txt
 52 |           pip install pyinstaller
 53 | 
 54 |       - name: Verify existence of Whisper assets directory
 55 |         run: |
 56 |           source .venv/bin/activate  # Activate the virtual environment
 57 |           ASSETS_PATH=$(python -c "import whisper; import os; print(os.path.join(os.path.dirname(whisper.__file__), 'assets'))")
 58 |           if [ -d "$ASSETS_PATH" ]; then
 59 |             echo "The 'assets' directory exists at: $ASSETS_PATH"
 60 |             echo "ASSETS_PATH=$ASSETS_PATH" >> $GITHUB_ENV
 61 |           else
 62 |             echo "The 'assets' directory does NOT exist."
 63 |             exit 1
 64 |           fi
 65 | 
 66 |       - name: Compile with pyInstaller
 67 |         run: |
 68 |           source .venv/bin/activate
 69 |           FFMPPEG_PATH=$(which ffmpeg)
 70 |           pyinstaller main.py \
 71 |             --windowed \
 72 |             --path="$(pwd)" \
 73 |             --add-binary="$FFMPPEG_PATH:." \
 74 |             --add-binary="pytranscriber.sqlite:." \
 75 |             --add-data="pytranscriber/gui/*.qm:pytranscriber/gui/" \
 76 |             --add-data="$ASSETS_PATH:whisper/assets"
 77 | 
 78 |       - name: Zip the .app bundle with version number
 79 |         run: |
 80 |           cd dist
 81 |           mv main.app "pyTranscriber-${VERSION}.app"
 82 |           zip -r "pyTranscriber-macos-${VERSION}.zip" "pyTranscriber-${VERSION}.app"
 83 | 
 84 |       - name: Upload built executable with version number
 85 |         uses: actions/upload-artifact@v4
 86 |         with:
 87 |           name: pyTranscriber-macos-${{ env.VERSION }}
 88 |           path: ./dist/pyTranscriber-macos-${{ env.VERSION }}.zip # Path adjusted for macOS
 89 |       
 90 |   download:
 91 |     runs-on: macos-14  # macOS ARM64 runner for downloading
 92 |     needs: build
 93 |     steps:
 94 |       - name: Download built executable
 95 |         uses: actions/download-artifact@v4
 96 |         with:
 97 |           path: ./output
 98 | 
 99 |       - name: List downloaded files
100 |         run: ls -la ./output
101 | 


--------------------------------------------------------------------------------
/pytranscriber/gui/简体中文 - Chinese Simplified.ts:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!DOCTYPE TS>
 3 | <TS version="2.1" language="zh" sourcelanguage="en">
 4 | <context>
 5 |     <name>window</name>
 6 |     <message>
 7 |         <location filename="gui.py" line="114"/>
 8 |         <source>pyTranscriber - v1.7 - 08/08/2020</source>
 9 |         <translation>pyTranscriber -v1.8 - 20/08/2022</translation>
10 |     </message>
11 |     <message>
12 |         <location filename="gui.py" line="115"/>
13 |         <source>Select file(s)</source>
14 |         <translation>选择文件</translation>
15 |     </message>
16 |     <message>
17 |         <location filename="gui.py" line="116"/>
18 |         <source>Transcribe Audio / Generate Subtitles</source>
19 |         <translation>转译音频 / 生成字幕</translation>
20 |     </message>
21 |     <message>
22 |         <location filename="gui.py" line="117"/>
23 |         <source>Open Output Folder</source>
24 |         <translation>打开导出文件夹</translation>
25 |     </message>
26 |     <message>
27 |         <location filename="gui.py" line="118"/>
28 |         <source>Output Location</source>
29 |         <translation>导出位置</translation>
30 |     </message>
31 |     <message>
32 |         <location filename="gui.py" line="119"/>
33 |         <source>&amp;List of files to generate transcribe audio / generate subtitles</source>
34 |         <translation>&amp;转译 / 生成字幕文件列表</translation>
35 |     </message>
36 |     <message>
37 |         <location filename="gui.py" line="120"/>
38 |         <source>Remove file(s)</source>
39 |         <translation>移除文件</translation>
40 |     </message>
41 |     <message>
42 |         <location filename="gui.py" line="121"/>
43 |         <source>Cancel</source>
44 |         <translation>取消</translation>
45 |     </message>
46 |     <message>
47 |         <location filename="gui.py" line="122"/>
48 |         <source>Open output files automatically</source>
49 |         <translation>完成后自动打开文件夹</translation>
50 |     </message>
51 |     <message>
52 |         <location filename="gui.py" line="123"/>
53 |         <source>Audio Language:</source>
54 |         <translation>选择音频语言</translation>
55 |     </message>
56 |     <message>
57 |         <location filename="gui.py" line="124"/>
58 |         <source>Abo&amp;ut</source>
59 |         <translation>Abo&amp;ut</translation>
60 |     </message>
61 |     <message>
62 |         <location filename="gui.py" line="125"/>
63 |         <source>Settings</source>
64 |         <translation>Settings</translation>
65 |     </message>
66 |     <message>
67 |         <location filename="gui.py" line="141"/>
68 |         <source>&amp;Language</source>
69 |         <translation>语</translation>
70 |     </message>
71 |     <message>
72 |         <location filename="gui.py" line="126"/>
73 |         <source>&amp;License</source>
74 |         <translation>&amp;License</translation>
75 |     </message>
76 |     <message>
77 |         <location filename="gui.py" line="127"/>
78 |         <source>&amp;Funding at Github Sponsors</source>
79 |         <translation>资助 GitHub 上的项目</translation>
80 |     </message>
81 |     <message>
82 |         <location filename="gui.py" line="128"/>
83 |         <source>&amp;More about pyTranscriber</source>
84 |         <translation>&amp;关于 pyTranscriber</translation>
85 |     </message>
86 |     <message>
87 |         <location filename="gui.py" line="129"/>
88 |         <source>Proxy</source>
89 |         <translation>Proxy</translation>
90 |     </message>
91 |     <message>
92 |         <location filename="gui.py" line="130"/>
93 |         <source>Proxy settings</source>
94 |         <translation>Proxy settings</translation>
95 |     </message>
96 | </context>
97 | </TS>
98 | 


--------------------------------------------------------------------------------
/.github/workflows/linux-pyinstaller.yml:
--------------------------------------------------------------------------------
  1 | name: Linux PyInstaller
  2 | 
  3 | on:
  4 |   push:
  5 |     branches:
  6 |       - master
  7 |       - develop
  8 |   pull_request:
  9 | 
 10 | jobs:
 11 |   build:
 12 |     runs-on: ubuntu-22.04
 13 | 
 14 |     steps:
 15 |       - name: Checkout repository
 16 |         uses: actions/checkout@v3
 17 |         with:
 18 |           fetch-depth: 0  # Fetch all tags
 19 |       
 20 |       - name: Get latest Git tag
 21 |         id: get_version
 22 |         run: |
 23 |           VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.1.0")
 24 |           echo "VERSION=$VERSION" >> $GITHUB_ENV
 25 |           echo "Resolved version: $VERSION"
 26 |      
 27 |       - name: Install missing system libraries (XCB, TBB, etc.)
 28 |         run: |
 29 |           sudo apt-get update
 30 |           sudo apt-get install -y \
 31 |             libxcb1 \
 32 |             libxcb-keysyms1 \
 33 |             libxcb-shape0 \
 34 |             libxcb-xkb1 \
 35 |             libxcb-render-util0 \
 36 |             libxcb-image0 \
 37 |             libxcb-xinerama0 \
 38 |             libxkbcommon-x11-0 \
 39 |             libxcb-icccm4 \
 40 |             libtbb12 \
 41 |             libsox-dev
 42 | 
 43 |       - name: Install FFmpeg
 44 |         run: sudo apt update && sudo apt install -y ffmpeg
 45 |       
 46 |       - name: Verify FFmpeg installation
 47 |         run: |
 48 |           which ffmpeg
 49 |           ffmpeg -version 
 50 |       
 51 |       - name: Set up Python 3.8
 52 |         uses: actions/setup-python@v4
 53 |         with:
 54 |           python-version: "3.8"
 55 | 
 56 |       - name: Set up Python virtual environment
 57 |         run: |
 58 |           python -m venv .venv
 59 | 
 60 |       - name: Activate virtual environment and install dependencies
 61 |         run: |
 62 |           source .venv/bin/activate
 63 |           pip install --upgrade pip
 64 |           pip install -r requirements.txt
 65 |           pip install pyinstaller
 66 |      
 67 |       - name: Verify existence of Whisper assets directory
 68 |         run: |
 69 |           source .venv/bin/activate  # Activate the virtual environment
 70 |           ASSETS_PATH=$(python -c "import whisper; import os; print(os.path.join(os.path.dirname(whisper.__file__), 'assets'))")
 71 |           if [ -d "$ASSETS_PATH" ]; then
 72 |             echo "The 'assets' directory exists at: $ASSETS_PATH"
 73 |             echo "ASSETS_PATH=$ASSETS_PATH" >> $GITHUB_ENV
 74 |           else
 75 |             echo "The 'assets' directory does NOT exist."
 76 |             exit 1
 77 |           fi
 78 |       
 79 |       - name: Compile with pyInstaller
 80 |         run: |
 81 |           source .venv/bin/activate
 82 |           FFMPPEG_PATH=$(which ffmpeg)
 83 |           pyinstaller main.py \
 84 |             --path="$(pwd)" \
 85 |             --onefile \
 86 |             --add-binary="$FFMPPEG_PATH:." \
 87 |             --add-binary="pytranscriber.sqlite:." \
 88 |             --add-data="pytranscriber/gui/*.qm:pytranscriber/gui/" \
 89 |             --add-data="$ASSETS_PATH:whisper/assets"
 90 |      
 91 |       - name: Rename and zip the binary with version number
 92 |         run: |
 93 |           cd dist
 94 |           mv main "pyTranscriber-${VERSION}"
 95 |       
 96 |       - name: Upload built executable
 97 |         uses: actions/upload-artifact@v4
 98 |         with:
 99 |           name: pyTranscriber-linux-pyinstaller-${{ env.VERSION }}
100 |           path: ./dist/pyTranscriber-${{ env.VERSION }}
101 | 
102 |   download:
103 |     runs-on: ubuntu-22.04
104 |     needs: build
105 |     steps:
106 |       - name: Download built executable
107 |         uses: actions/download-artifact@v4
108 |         with:
109 |           path: ./output
110 | 
111 |       - name: List downloaded files
112 |         run: ls -la ./output
113 | 


--------------------------------------------------------------------------------
/whisper/triton_ops.py:
--------------------------------------------------------------------------------
  1 | from functools import lru_cache
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | 
  6 | try:
  7 |     import triton
  8 |     import triton.language as tl
  9 | except ImportError:
 10 |     raise RuntimeError("triton import failed; try `pip install --pre triton`")
 11 | 
 12 | 
 13 | @triton.jit
 14 | def dtw_kernel(
 15 |     cost, trace, x, x_stride, cost_stride, trace_stride, N, M, BLOCK_SIZE: tl.constexpr
 16 | ):
 17 |     offsets = tl.arange(0, BLOCK_SIZE)
 18 |     mask = offsets < M
 19 | 
 20 |     for k in range(1, N + M + 1):  # k = i + j
 21 |         tl.debug_barrier()
 22 | 
 23 |         p0 = cost + (k - 1) * cost_stride
 24 |         p1 = cost + k * cost_stride
 25 |         p2 = cost + k * cost_stride + 1
 26 | 
 27 |         c0 = tl.load(p0 + offsets, mask=mask)
 28 |         c1 = tl.load(p1 + offsets, mask=mask)
 29 |         c2 = tl.load(p2 + offsets, mask=mask)
 30 | 
 31 |         x_row = tl.load(x + (k - 1) * x_stride + offsets, mask=mask, other=0)
 32 |         cost_row = x_row + tl.minimum(tl.minimum(c0, c1), c2)
 33 | 
 34 |         cost_ptr = cost + (k + 1) * cost_stride + 1
 35 |         tl.store(cost_ptr + offsets, cost_row, mask=mask)
 36 | 
 37 |         trace_ptr = trace + (k + 1) * trace_stride + 1
 38 |         tl.store(trace_ptr + offsets, 2, mask=mask & (c2 <= c0) & (c2 <= c1))
 39 |         tl.store(trace_ptr + offsets, 1, mask=mask & (c1 <= c0) & (c1 <= c2))
 40 |         tl.store(trace_ptr + offsets, 0, mask=mask & (c0 <= c1) & (c0 <= c2))
 41 | 
 42 | 
 43 | @lru_cache(maxsize=None)
 44 | def median_kernel(filter_width: int):
 45 |     @triton.jit
 46 |     def kernel(
 47 |         y, x, x_stride, y_stride, BLOCK_SIZE: tl.constexpr
 48 |     ):  # x.shape[-1] == filter_width
 49 |         row_idx = tl.program_id(0)
 50 |         offsets = tl.arange(0, BLOCK_SIZE)
 51 |         mask = offsets < y_stride
 52 | 
 53 |         x_ptr = x + row_idx * x_stride  # noqa: F841
 54 |         y_ptr = y + row_idx * y_stride
 55 | 
 56 |         LOAD_ALL_ROWS_HERE  # noqa: F821
 57 | 
 58 |         BUBBLESORT_HERE  # noqa: F821
 59 | 
 60 |         tl.store(y_ptr + offsets, MIDDLE_ROW_HERE, mask=mask)  # noqa: F821
 61 | 
 62 |     kernel = triton.JITFunction(kernel.fn)
 63 |     kernel.src = kernel.src.replace(
 64 |         "    LOAD_ALL_ROWS_HERE",
 65 |         "\n".join(
 66 |             [
 67 |                 f"    row{i} = tl.load(x_ptr + offsets + {i}, mask=mask)"
 68 |                 for i in range(filter_width)
 69 |             ]
 70 |         ),
 71 |     )
 72 |     kernel.src = kernel.src.replace(
 73 |         "    BUBBLESORT_HERE",
 74 |         "\n\n".join(
 75 |             [
 76 |                 "\n\n".join(
 77 |                     [
 78 |                         "\n".join(
 79 |                             [
 80 |                                 f"    smaller = tl.where(row{j} < row{j + 1}, row{j}, row{j + 1})",
 81 |                                 f"    larger = tl.where(row{j} > row{j + 1}, row{j}, row{j + 1})",
 82 |                                 f"    row{j} = smaller",
 83 |                                 f"    row{j + 1} = larger",
 84 |                             ]
 85 |                         )
 86 |                         for j in range(filter_width - i - 1)
 87 |                     ]
 88 |                 )
 89 |                 for i in range(filter_width // 2 + 1)
 90 |             ]
 91 |         ),
 92 |     )
 93 |     kernel.src = kernel.src.replace("MIDDLE_ROW_HERE", f"row{filter_width // 2}")
 94 | 
 95 |     return kernel
 96 | 
 97 | 
 98 | def median_filter_cuda(x: torch.Tensor, filter_width: int):
 99 |     """Apply a median filter of given width along the last dimension of x"""
100 |     slices = x.contiguous().unfold(-1, filter_width, 1)
101 |     grid = np.prod(slices.shape[:-2])
102 | 
103 |     kernel = median_kernel(filter_width)
104 |     y = torch.empty_like(slices[..., 0])
105 | 
106 |     BLOCK_SIZE = 1 << (y.stride(-2) - 1).bit_length()
107 |     kernel[(grid,)](y, x, x.stride(-2), y.stride(-2), BLOCK_SIZE=BLOCK_SIZE)
108 | 
109 |     return y
110 | 


--------------------------------------------------------------------------------
/.github/workflows/linux-nuitka.yml:
--------------------------------------------------------------------------------
  1 | name: Linux Nuitka Pipeline
  2 | 
  3 | on:
  4 |   push:
  5 |     branches:
  6 |       - master
  7 |       - develop
  8 |   pull_request:
  9 |     branches:
 10 |       - master
 11 |       - develop
 12 | 
 13 | jobs:
 14 |   build:
 15 |     runs-on: ubuntu-22.04  # Ensure the job runs only on Ubuntu 22.04
 16 | 
 17 |     steps:
 18 |       - name: Checkout repository
 19 |         uses: actions/checkout@v3
 20 |         with:
 21 |           fetch-depth: 0  # Fetch all tags
 22 |         
 23 |       - name: Get latest Git tag
 24 |         id: get_version
 25 |         run: |
 26 |           VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.1.0")
 27 |           echo "VERSION=$VERSION" >> $GITHUB_ENV
 28 |           echo "Resolved version: $VERSION"
 29 |       
 30 |       - name: Install missing system libraries (XCB, TBB, etc.)
 31 |         run: |
 32 |           sudo apt-get update
 33 |           sudo apt-get install -y \
 34 |             libxcb1 \
 35 |             libxcb-keysyms1 \
 36 |             libxcb-shape0 \
 37 |             libxcb-xkb1 \
 38 |             libxcb-render-util0 \
 39 |             libxcb-image0 \
 40 |             libxcb-xinerama0 \
 41 |             libxkbcommon-x11-0 \
 42 |             libxcb-icccm4 \
 43 |             libtbb12 \
 44 |             ccache \
 45 |             libsox-dev
 46 |             
 47 | 
 48 |       - name: Install FFmpeg
 49 |         run: sudo apt update && sudo apt install -y ffmpeg
 50 | 
 51 |       - name: Verify FFmpeg installation
 52 |         run: |
 53 |           which ffmpeg
 54 |           ffmpeg -version 
 55 |           
 56 |       - name: Set up Python 3.8
 57 |         uses: actions/setup-python@v4
 58 |         with:
 59 |           python-version: "3.8"
 60 | 
 61 |       - name: Set up Python virtual environment
 62 |         run: |
 63 |           python -m venv .venv
 64 | 
 65 |       - name: Install dependencies
 66 |         run: |
 67 |           source .venv/bin/activate
 68 |           pip install --upgrade pip
 69 |           pip install -r requirements.txt
 70 |           pip install nuitka
 71 | 
 72 |       - name: Verify Whisper assets directory
 73 |         run: |
 74 |           source .venv/bin/activate
 75 |           whisperPath=$(python -c "import whisper; print(whisper.__file__)")
 76 |           assetsPath=$(dirname $whisperPath)/assets
 77 |           if [ -d "$assetsPath" ]; then
 78 |             echo "The 'assets' directory exists at: $assetsPath"
 79 |           else
 80 |             echo "The 'assets' directory DOES NOT exist."
 81 |             exit 1
 82 |           fi
 83 | 
 84 |       - name: Compile with Nuitka
 85 |         run: |
 86 |           source .venv/bin/activate
 87 |           ffmpegPath=$(which ffmpeg)
 88 |           nuitka \
 89 |             --assume-yes-for-downloads \
 90 |             --enable-plugin=pyqt5 \
 91 |             --include-data-files="pytranscriber.sqlite=pytranscriber.sqlite" \
 92 |             --include-data-files="$ffmpegPath=ffmpeg" \
 93 |             --include-data-files="pytranscriber/gui/*.qm=pytranscriber/gui/" \
 94 |             --include-package-data="whisper:assets/*=whisper/assets" \
 95 |             main.py \
 96 |             --onefile \
 97 |             --output-dir=dist 
 98 |             
 99 |       - name: Zip the binary with version number
100 |         run: |
101 |           cd dist
102 |           mv main.bin "pyTranscriber-${VERSION}"
103 |       
104 |       - name: Upload built executable
105 |         uses: actions/upload-artifact@v4
106 |         with:
107 |           name: pyTranscriber-linux-nuitka-${{ env.VERSION }}
108 |           path: ./dist/pyTranscriber-${{ env.VERSION }} # Adjust this path if Nuitka outputs elsewhere
109 | 
110 |   download:
111 |     runs-on: ubuntu-22.04
112 |     needs: build
113 |     steps:
114 |       - name: Download built executable
115 |         uses: actions/download-artifact@v4
116 |         with:
117 |           path: ./output
118 | 
119 |       - name: List downloaded files
120 |         run: dir ./output
121 | 


--------------------------------------------------------------------------------
/pytranscriber/control/ctr_db.py:
--------------------------------------------------------------------------------
  1 | '''
  2 |    (C) 2025 Raryel C. Souza
  3 |     This program is free software: you can redistribute it and/or modify
  4 |     it under the terms of the GNU General Public License as published by
  5 |     the Free Software Foundation, either version 3 of the License, or
  6 |     (at your option) any later version.
  7 |     This program is distributed in the hope that it will be useful,
  8 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |     GNU General Public License for more details.
 11 |     You should have received a copy of the GNU General Public License
 12 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
 13 | '''
 14 | 
 15 | from pathlib import PurePath
 16 | 
 17 | from pytranscriber.gui.message_util import MessageUtil
 18 | import sqlite3
 19 | 
 20 | 
 21 | class CtrDB:
 22 |     conn = None
 23 |     DB_ERROR = "DB Error"
 24 | 
 25 |     def connect(self):
 26 |         if self.conn:
 27 |             return self.conn.cursor()
 28 |         else:
 29 |             try:
 30 |                 local_program_path = PurePath(__file__).parent.parent.parent.joinpath('pytranscriber.sqlite')
 31 |                 str_local_program_path = str(local_program_path)
 32 | 
 33 | 
 34 | 
 35 |                 self.conn = sqlite3.connect(str_local_program_path)
 36 |                 cur = self.conn.cursor()
 37 | 
 38 |                 return cur
 39 |             except Exception as ex:
 40 |                 MessageUtil.show_error_message("ConnectDB" + str(ex), self.DB_ERROR)
 41 |                 exit(1)
 42 | 
 43 |     def close(self):
 44 |         self.conn.close()
 45 |         self.conn = None
 46 | 
 47 |     def _load_one_row(self, table_name):
 48 |         cur = self.connect()
 49 |         if cur is None:
 50 |             exit(1)
 51 | 
 52 |         try:
 53 |             cur.execute('SELECT * FROM ' + table_name)
 54 |             return cur.fetchone()
 55 |         except sqlite3.Error as e:
 56 |             MessageUtil.show_error_message("LoadOneRow " + str(e), self.DB_ERROR)
 57 |             return None
 58 | 
 59 |     def _save_single_column(self, query, value):
 60 |         cur = self.connect()
 61 |         try:
 62 |             cur.execute(query,(value,))
 63 |             self.conn.commit()
 64 |         except sqlite3.Error as e:
 65 |             MessageUtil.show_error_message("SaveSingleColumn " + str(e), self.DB_ERROR)
 66 |         self.close()
 67 | 
 68 |     def _truncate_table(self, table_name):
 69 |         cur = self.connect()
 70 |         try:
 71 |             cur.execute('DELETE FROM ' + table_name)
 72 |             self.conn.commit()
 73 |         except sqlite3.Error as e:
 74 |             MessageUtil.show_error_message("TruncateTable " + str(e), self.DB_ERROR)
 75 |         self.close()
 76 | 
 77 |     def load_last_language(self):
 78 |         return self._load_one_row('Language')
 79 | 
 80 |     def clear_last_language(self):
 81 |         self._truncate_table('Language')
 82 | 
 83 |     def save_last_language(self, language):
 84 |         cur = self.connect()
 85 |         try:
 86 |             cur.execute('INSERT INTO Language (last_language) VALUES (?)',
 87 |                         (language,))
 88 |             self.conn.commit()
 89 |         except sqlite3.Error as e:
 90 |             MessageUtil.show_error_message("SaveLastLanguage " + str(e), self.DB_ERROR)
 91 |         self.close()
 92 | 
 93 |     def load_proxy(self):
 94 |         return self._load_one_row('Proxy')
 95 | 
 96 |     def clear_proxy(self):
 97 |         self._truncate_table('Proxy')
 98 | 
 99 |     def save_proxy(self, proxy):
100 |         cur = self.connect()
101 |         try:
102 |             cur.execute('INSERT INTO Proxy (proxy_address) VALUES (?)',
103 |                         (proxy['https'],))
104 |             self.conn.commit()
105 |             MessageUtil.show_info_message('Proxy address saved successfully', 'Proxy settings saved')
106 |         except sqlite3.Error as e:
107 |             MessageUtil.show_error_message("SaveProxy " + str(e), self.DB_ERROR)
108 |         self.close()
109 | 


--------------------------------------------------------------------------------
/pytranscriber/control/thread_exec_generic.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |    (C) 2025 Raryel C. Souza
 3 |     This program is free software: you can redistribute it and/or modify
 4 |     it under the terms of the GNU General Public License as published by
 5 |     the Free Software Foundation, either version 3 of the License, or
 6 |     (at your option) any later version.
 7 |     This program is distributed in the hope that it will be useful,
 8 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 |     GNU General Public License for more details.
11 |     You should have received a copy of the GNU General Public License
12 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
13 | '''
14 | 
15 | from abc import ABC, abstractmethod
16 | from PyQt5.QtCore import QThread
17 | from PyQt5.QtCore import pyqtSignal
18 | from pathlib import Path
19 | from pytranscriber.control.ctr_engine import CtrEngine
20 | import os
21 | 
22 | class ThreadExecGeneric(QThread):
23 |     signalLockGUI = pyqtSignal()
24 |     signalResetGUIAfterCancel = pyqtSignal()
25 |     signalResetGUIAfterSuccess = pyqtSignal()
26 |     signalProgress = pyqtSignal(str, int)
27 |     signalProgressFileYofN = pyqtSignal(str)
28 |     signalErrorMsg = pyqtSignal(str)
29 | 
30 |     def __init__(self, obj_transcription_parameters):
31 |         self.obj_transcription_parameters = obj_transcription_parameters
32 |         self.running = True
33 |         QThread.__init__(self)
34 | 
35 |     def listenerProgress(self, string, percent):
36 |         self.signalProgress.emit(string, percent)
37 | 
38 |     def _loopSelectedFiles(self):
39 |         self.signalLockGUI.emit()
40 |         #MessageUtil.show_info_message("loop selected files")
41 | 
42 |         langCode = self.obj_transcription_parameters.langCode
43 | 
44 |         #if output directory does not exist, creates it
45 |         pathOutputFolder = Path(self.obj_transcription_parameters.outputFolder)
46 | 
47 |         if not os.path.exists(pathOutputFolder):
48 |             os.mkdir(pathOutputFolder)
49 |         #if there the output file is not a directory
50 |         if not os.path.isdir(pathOutputFolder):
51 |             #force the user to select a different output directory
52 |             self.signalErrorMsg.emit("Error! Invalid output folder. Please choose another one.")
53 |         else:
54 |             #go ahead with autosub process
55 |             nFiles = len(self.obj_transcription_parameters.listFiles)
56 |             for i in range(nFiles):
57 |                 #does not continue the loop if user clicked cancel button
58 |                 if not CtrEngine.is_operation_canceled():
59 |                     self._updateProgressFileYofN(i, nFiles)
60 |                     #MessageUtil.show_info_message("run engine for media")
61 |                     self._run_engine_for_media(i, langCode)
62 | 
63 |             #if operation is canceled does not clear the file list
64 |             if CtrEngine.is_operation_canceled():
65 |                 self.signalResetGUIAfterCancel.emit()
66 |             else:
67 |                 self.signalResetGUIAfterSuccess.emit()
68 | 
69 |     @abstractmethod
70 |     def _run_engine_for_media(self, index, langCode):
71 |         pass
72 | 
73 |     def _updateProgressFileYofN(self, currentIndex, countFiles):
74 |         self.signalProgressFileYofN.emit("File " + str(currentIndex + 1) + " of " + str(countFiles))
75 | 
76 |     def _generatePathOutputFile(self, sourceFile):
77 |         # extract the filename without extension from the path
78 |         base = os.path.basename(sourceFile)
79 |         # [0] is filename, [1] is file extension
80 |         fileName = os.path.splitext(base)[0]
81 | 
82 |         # the output file has same name as input file, located on output Folder
83 |         # with extension .srt
84 |         pathOutputFolder = Path(self.obj_transcription_parameters.outputFolder)
85 |         outputFileSRT = pathOutputFolder / (fileName + ".srt")
86 |         outputFileTXT = pathOutputFolder / (fileName + ".txt")
87 |         return [outputFileSRT, outputFileTXT]
88 | 
89 |     @staticmethod
90 |     def cancel():
91 |         CtrEngine.cancel_operation()
92 | 


--------------------------------------------------------------------------------
/patches/autosub-0.4.0.patch:
--------------------------------------------------------------------------------
  1 | --- __init__-0.4.0.py	2019-02-09 21:21:16.335586891 +0700
  2 | +++ __init__.py	2019-02-10 21:25:41.864964164 +0700
  3 | @@ -8,16 +8,22 @@
  4 |  
  5 |  import argparse
  6 |  import audioop
  7 | -import json
  8 |  import math
  9 |  import multiprocessing
 10 |  import os
 11 | +from json import JSONDecodeError
 12 |  import subprocess
 13 |  import sys
 14 |  import tempfile
 15 |  import wave
 16 |  
 17 | +import json
 18 |  import requests
 19 | +try:
 20 | +    from json.decoder import JSONDecodeError
 21 | +except ImportError:
 22 | +    JSONDecodeError = ValueError
 23 | +
 24 |  from googleapiclient.discovery import build
 25 |  from progressbar import ProgressBar, Percentage, Bar, ETA
 26 |  
 27 | @@ -61,8 +67,10 @@
 28 |              start, end = region
 29 |              start = max(0, start - self.include_before)
 30 |              end += self.include_after
 31 | -            temp = tempfile.NamedTemporaryFile(suffix='.flac')
 32 | -            command = ["ffmpeg", "-ss", str(start), "-t", str(end - start),
 33 | +            #delete=False necessary for running on Windows
 34 | +            temp = tempfile.NamedTemporaryFile(suffix='.flac', delete=False)
 35 | +            program_ffmpeg = which("ffmpeg")
 36 | +            command = [str(program_ffmpeg), "-ss", str(start), "-t", str(end - start),
 37 |                         "-y", "-i", self.source_path,
 38 |                         "-loglevel", "error", temp.name]
 39 |              use_shell = True if os.name == "nt" else False
 40 | @@ -102,6 +110,8 @@
 41 |                      except IndexError:
 42 |                          # no result
 43 |                          continue
 44 | +                    except JSONDecodeError:
 45 | +                        continue
 46 |  
 47 |          except KeyboardInterrupt:
 48 |              return None
 49 | @@ -149,17 +159,25 @@
 50 |          Checks whether a file is executable.
 51 |          """
 52 |          return os.path.isfile(file_path) and os.access(file_path, os.X_OK)
 53 | -
 54 | +    #necessary to run on Windows
 55 | +    if os.name == "nt":
 56 | +        program += ".exe"
 57 |      fpath, _ = os.path.split(program)
 58 |      if fpath:
 59 |          if is_exe(program):
 60 |              return program
 61 |      else:
 62 | -        for path in os.environ["PATH"].split(os.pathsep):
 63 | -            path = path.strip('"')
 64 | -            exe_file = os.path.join(path, program)
 65 | -            if is_exe(exe_file):
 66 | -                return exe_file
 67 | +        #looks for file in the script execution folder before checking on system path
 68 | +        current_dir = os.getcwd()
 69 | +        local_program = os.path.join(current_dir, program)
 70 | +        if is_exe(local_program):
 71 | +            return local_program
 72 | +        else:
 73 | +            for path in os.environ["PATH"].split(os.pathsep):
 74 | +                path = path.strip('"')
 75 | +                exe_file = os.path.join(path, program)
 76 | +                if is_exe(exe_file):
 77 | +                    return exe_file
 78 |      return None
 79 |  
 80 |  
 81 | @@ -171,10 +189,11 @@
 82 |      if not os.path.isfile(filename):
 83 |          print("The given file does not exist: {}".format(filename))
 84 |          raise Exception("Invalid filepath: {}".format(filename))
 85 | -    if not which("ffmpeg"):
 86 | +    program_ffmpeg = which("ffmpeg")
 87 | +    if not program_ffmpeg:
 88 |          print("ffmpeg: Executable not found on machine.")
 89 |          raise Exception("Dependency not found: ffmpeg")
 90 | -    command = ["ffmpeg", "-y", "-i", filename,
 91 | +    command = [str(program_ffmpeg), "-y", "-i", filename,
 92 |                 "-ac", str(channels), "-ar", str(rate),
 93 |                 "-loglevel", "error", temp.name]
 94 |      use_shell = True if os.name == "nt" else False
 95 | @@ -233,6 +252,12 @@
 96 |      """
 97 |      Given an input audio/video file, generate subtitles in the specified language and format.
 98 |      """
 99 | +
100 | +    if "Darwin" in os.uname():
101 | +        #the default unix fork method does not work on Mac OS
102 | +        #need to use forkserver
103 | +        multiprocessing.set_start_method('forkserver')
104 | +
105 |      audio_filename, audio_rate = extract_audio(source_path)
106 |  
107 |      regions = find_speech_regions(audio_filename)
108 | 


--------------------------------------------------------------------------------
/pytranscriber/gui/繁體中文 - Chinese Traditional.ts:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <!DOCTYPE TS>
  3 | <TS version="2.1">
  4 | <context>
  5 |     <name>window</name>
  6 |     <message>
  7 |         <location filename="gui.py" line="129"/>
  8 |         <source>pyTranscriber - v1.8 - 17/08/2022</source>
  9 |         <translation>pyTranscriber -v1.8 - 20/08/2022</translation>
 10 |     </message>
 11 |     <message>
 12 |         <location filename="gui.py" line="130"/>
 13 |         <source>Select file(s)</source>
 14 |         <translation>選擇檔案</translation>
 15 |     </message>
 16 |     <message>
 17 |         <location filename="gui.py" line="131"/>
 18 |         <source>Transcribe Audio / Generate Subtitles</source>
 19 |         <translation>轉譯音訊 / 生成字幕</translation>
 20 |     </message>
 21 |     <message>
 22 |         <location filename="gui.py" line="132"/>
 23 |         <source>Open Output Folder</source>
 24 |         <translation>開啟輸出位置</translation>
 25 |     </message>
 26 |     <message>
 27 |         <location filename="gui.py" line="133"/>
 28 |         <source>Output Location</source>
 29 |         <translation>選取輸出位置</translation>
 30 |     </message>
 31 |     <message>
 32 |         <location filename="gui.py" line="134"/>
 33 |         <source>List of files to generate transcribe audio / generate subtitles</source>
 34 |         <translation>&amp;轉譯音訊 / 生成字幕檔案清單</translation>
 35 |     </message>
 36 |     <message>
 37 |         <location filename="gui.py" line="135"/>
 38 |         <source>Remove file(s)</source>
 39 |         <translation>移除檔案</translation>
 40 |     </message>
 41 |     <message>
 42 |         <location filename="gui.py" line="136"/>
 43 |         <source>Cancel</source>
 44 |         <translation>取消</translation>
 45 |     </message>
 46 |     <message>
 47 |         <location filename="gui.py" line="137"/>
 48 |         <source>Open output files automatically</source>
 49 |         <translation>完成後自動開啟輸出資料夾</translation>
 50 |     </message>
 51 |     <message>
 52 |         <location filename="gui.py" line="138"/>
 53 |         <source>Audio Language:</source>
 54 |         <translation>選擇音訊語言</translation>
 55 |     </message>
 56 |     <message>
 57 |         <location filename="gui.py" line="139"/>
 58 |         <source>Abo&amp;ut</source>
 59 |         <translation>關於</translation>
 60 |     </message>
 61 |     <message>
 62 |         <location filename="gui.py" line="140"/>
 63 |         <source>&amp;Settings</source>
 64 |         <translation>設定</translation>
 65 |     </message>
 66 |     <message>
 67 |         <location filename="gui.py" line="141"/>
 68 |         <source>&amp;Language</source>
 69 |         <translation>語言</translation>
 70 |     </message>
 71 |     <message>
 72 |         <location filename="gui.py" line="142"/>
 73 |         <source>&amp;License</source>
 74 |         <translation>&amp;License</translation>
 75 |     </message>
 76 |     <message>
 77 |         <location filename="gui.py" line="143"/>
 78 |         <source>&amp;Funding at Github Sponsors</source>
 79 |         <translation>在 Github 上成為贊助者</translation>
 80 |     </message>
 81 |     <message>
 82 |         <location filename="gui.py" line="144"/>
 83 |         <source>&amp;More about pyTranscriber</source>
 84 |         <translation>&amp;關於 pyTranscriber</translation>
 85 |     </message>
 86 |     <message>
 87 |         <location filename="gui.py" line="145"/>
 88 |         <source>&amp;Proxy</source>
 89 |         <translation>代理伺服器(Proxy)</translation>
 90 |     </message>
 91 |     <message>
 92 |         <location filename="gui.py" line="146"/>
 93 |         <source>Proxy setting</source>
 94 |         <translation>代理伺服器設定</translation>
 95 |     </message>
 96 |     <message>
 97 |         <location filename="gui.py" line="147"/>
 98 |         <source>English</source>
 99 |         <translation type="unfinished"></translation>
100 |     </message>
101 |     <message>
102 |         <location filename="gui.py" line="148"/>
103 |         <source>繁體中文 - Chinese Traditional</source>
104 |         <translation type="unfinished"></translation>
105 |     </message>
106 |     <message>
107 |         <location filename="gui.py" line="149"/>
108 |         <source>简体中文 - Chinese Simplified</source>
109 |         <translation type="unfinished"></translation>
110 |     </message>
111 |     <message>
112 |         <location filename="gui.py" line="150"/>
113 |         <source>Português</source>
114 |         <translation type="unfinished"></translation>
115 |     </message>
116 | </context>
117 | </TS>
118 | 


--------------------------------------------------------------------------------
/.github/workflows/win-nuitka.yml:
--------------------------------------------------------------------------------
  1 | on:
  2 |   push:
  3 |     branches:
  4 |       - master
  5 |       - develop
  6 |   pull_request:
  7 | 
  8 | jobs:
  9 |   build:
 10 |     runs-on: windows-latest
 11 | 
 12 |     steps:
 13 |       - name: Checkout repository
 14 |         uses: actions/checkout@v4
 15 |         with:
 16 |           fetch-depth: 0
 17 |       
 18 |       - name: Get latest Git tag
 19 |         id: get_version
 20 |         run: |
 21 |           $VERSION = git describe --tags --abbrev=0 2>$null
 22 |           if (-not $VERSION) {
 23 |             $VERSION = "v0.1.0"  # Default version if no tags are found
 24 |           }
 25 |           echo "VERSION=$VERSION" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8
 26 |           Write-Host "Resolved version: $VERSION"
 27 |         shell: pwsh
 28 | 
 29 |       - name: Setup FFmpeg
 30 |         uses: federicocarboni/setup-ffmpeg@v3.1
 31 |         with:
 32 |           ffmpeg-version: release  # Specify the desired FFmpeg version
 33 |           architecture: x64 
 34 |           
 35 |       - name: Add FFmpeg to PATH
 36 |         run: |
 37 |           $ffmpegPath = (Get-Command ffmpeg).Source
 38 |           $env:Path += ";$($ffmpegPath.Substring(0, $ffmpegPath.LastIndexOf('\')))"
 39 |           $ffmpegPath 
 40 |         shell: pwsh
 41 |           
 42 |       - name: Verify FFmpeg installation
 43 |         run: |
 44 |           where ffmpeg
 45 |           ffmpeg -version 
 46 |           
 47 |       - name: Set up Python 3.8
 48 |         uses: actions/setup-python@v4
 49 |         with:
 50 |           python-version: "3.8"
 51 | 
 52 |       - name: Set up Python virtual environment
 53 |         run: |
 54 |           python -m venv .venv
 55 |      
 56 |       - name: Activate virtual environment
 57 |         run: |
 58 |           .\.venv\Scripts\Activate
 59 |       
 60 |       - name: Install dependencies
 61 |         run: |
 62 |           pip install --upgrade pip
 63 |           pip install -r requirements.txt
 64 |           pip install nuitka
 65 |      
 66 |       - name: Verificar existência do diretório assets do whisper
 67 |         run: |
 68 |           $whisperPath = (python -c "import whisper; print(whisper.__file__)").Trim()
 69 |           $assetsPath = Join-Path (Split-Path $whisperPath) 'assets'
 70 |           if (Test-Path $assetsPath) {
 71 |             Write-Host "O diretório 'assets' existe em: $assetsPath"
 72 |           } else {
 73 |             Write-Host "O diretório 'assets' NÃO existe."
 74 |             exit 1
 75 |           }
 76 |           
 77 |       - name: Compile with Nuitka
 78 |         run: |
 79 |           $whisperPath = (python -c "import whisper; print(whisper.__file__)").Trim()
 80 |           $assetsPath = Join-Path (Split-Path $whisperPath) 'assets'
 81 |           $ffmpegPath = (Get-Command ffmpeg).Source
 82 |           nuitka `
 83 |             --assume-yes-for-downloads `
 84 |             --enable-plugin=pyqt5 `
 85 |             --include-data-files="pytranscriber.sqlite=pytranscriber.sqlite" `
 86 |             --include-data-files="$ffmpegPath=ffmpeg.exe" `
 87 |             --include-data-files="pytranscriber/gui/*.qm=pytranscriber/gui/" `
 88 |             --include-data-files="$assetsPath\*=whisper/assets/" `
 89 |             main.py `
 90 |             --onefile `
 91 |             --output-dir=dist `
 92 |             --windows-console-mode=disable
 93 |       
 94 |       - name: Rename and zip the .exe bundle with version number
 95 |         run: |
 96 |           Set-Location -Path dist
 97 |           Write-Host "Renaming main.exe to pyTranscriber-$env:VERSION.exe"
 98 |           Rename-Item -Force main.exe "pyTranscriber-$env:VERSION.exe"
 99 |           # Write-Host "Creating zip archive: pyTranscriber-$env:VERSION.zip"
100 |           # Compress-Archive -Path "pyTranscriber-$env:VERSION.exe" -DestinationPath "pyTranscriber-win-$env:VERSION.zip"
101 |         shell: pwsh 
102 |      
103 |       - name: Upload built executable
104 |         uses: actions/upload-artifact@v4
105 |         with:
106 |           name: pyTranscriber-win-${{ env.VERSION }}
107 |           path: ./dist/pyTranscriber-${{ env.VERSION }}.exe # Adjust this path if Nuitka outputs elsewhere
108 | 
109 |   download:
110 |     runs-on: windows-latest
111 |     needs: build
112 |     steps:
113 |       - name: Download built executable
114 |         uses: actions/download-artifact@v4
115 |         with:
116 |           path: ./output
117 | 
118 |       - name: List downloaded files
119 |         run: dir ./output
120 | 


--------------------------------------------------------------------------------
/.github/workflows/win-pyinstaller-dev2.yml:
--------------------------------------------------------------------------------
  1 | name: Windows PyInstaller
  2 | 
  3 | on:
  4 |   push:
  5 |     branches:
  6 |       - master
  7 |       - develop
  8 |   pull_request:
  9 | 
 10 | jobs:
 11 |   build:
 12 |     runs-on: windows-latest
 13 |     strategy:
 14 |       matrix:
 15 |         python-version: ["3.8", "3.10", "3.12"]  # Paraleliza builds para cada versão do Python
 16 |     steps:
 17 |       - name: Checkout repository
 18 |         uses: actions/checkout@v4
 19 |         with:
 20 |           fetch-depth: 0  # Fetch all tags
 21 |       
 22 |       - name: Get latest Git tag
 23 |         id: get_version
 24 |         run: |
 25 |           $VERSION = git describe --tags --abbrev=0 2>$null
 26 |           if (-not $VERSION) {
 27 |             $VERSION = "v0.1.0"  # Default version if no tags are found
 28 |           }
 29 |           echo "VERSION=$VERSION" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8
 30 |           Write-Host "Resolved version: $VERSION"
 31 |         shell: pwsh
 32 |         
 33 |       - name: Setup FFmpeg
 34 |         uses: federicocarboni/setup-ffmpeg@v3.1
 35 |         with:
 36 |           ffmpeg-version: release
 37 |           architecture: x64 
 38 | 
 39 |       - name: Verify FFmpeg installation
 40 |         run: |
 41 |           where ffmpeg
 42 |           ffmpeg -version 
 43 | 
 44 |       - name: Set up Python ${{ matrix.python-version }}
 45 |         uses: actions/setup-python@v4
 46 |         with:
 47 |           python-version: ${{ matrix.python-version }}
 48 | 
 49 |       - name: Create and activate virtual environment for Python ${{ matrix.python-version }}
 50 |         run: |
 51 |           python -m venv .venv-${{ matrix.python-version }}
 52 |           .\.venv-${{ matrix.python-version }}\Scripts\Activate
 53 |         shell: pwsh
 54 | 
 55 |       - name: Install dependencies for Python ${{ matrix.python-version }}
 56 |         run: |
 57 |           .\.venv-${{ matrix.python-version }}\Scripts\Activate
 58 |           python -m ensurepip --upgrade
 59 |           python -m pip install --upgrade pip
 60 |           python -m pip install -r requirements.txt
 61 |           python -m pip install pyinstaller
 62 |         shell: pwsh
 63 | 
 64 |       - name: Verify whisper assets directory for Python ${{ matrix.python-version }}
 65 |         run: |
 66 |           .\.venv-${{ matrix.python-version }}\Scripts\Activate
 67 |           $whisperPath = (python -c "import whisper; print(whisper.__file__)").Trim()
 68 |           $assetsPath = Join-Path (Split-Path $whisperPath) 'assets'
 69 |           if (Test-Path $assetsPath) {
 70 |             Write-Host "O diretório 'assets' existe em: $assetsPath"
 71 |             echo "ASSETS_PATH=$assetsPath" >> $env:GITHUB_ENV
 72 |           } else {
 73 |             Write-Host "O diretório 'assets' NÃO existe."
 74 |             exit 1
 75 |           }
 76 |         shell: pwsh
 77 | 
 78 |       - name: Compile with PyInstaller for Python ${{ matrix.python-version }}
 79 |         run: |
 80 |           .\.venv-${{ matrix.python-version }}\Scripts\Activate
 81 |           $ffmpegPath = (Get-Command ffmpeg).Source
 82 |           pyinstaller main.py `
 83 |             --onefile `
 84 |             --path="$(Get-Location)" `
 85 |             --add-binary="$ffmpegPath;." `
 86 |             --add-binary="pytranscriber.sqlite;." `
 87 |             --add-data="pytranscriber/gui/*.qm;pytranscriber/gui/" `
 88 |             --add-data="${env:ASSETS_PATH};whisper/assets" `
 89 |             --clean
 90 | 
 91 |         shell: pwsh
 92 |         
 93 |       - name: Rename and zip the .exe bundle with version number
 94 |         run: |
 95 |           Set-Location -Path dist
 96 |           Write-Host "Renaming main.exe to pyTranscriber-$env:VERSION.exe"
 97 |           Rename-Item -Force main.exe "pyTranscriber-$env:VERSION.exe"
 98 |           # Write-Host "Creating zip archive: pyTranscriber-win-$env:VERSION.zip"
 99 |           # Compress-Archive -Path "pyTranscriber-$env:VERSION.exe" -DestinationPath "pyTranscriber-win-$env:VERSION.zip"
100 |         shell: pwsh 
101 |         
102 |       - name: Upload built executable for Python ${{ matrix.python-version }}
103 |         uses: actions/upload-artifact@v4
104 |         with:
105 |           name: pyTranscriber-win-${{ env.VERSION }}-py${{ matrix.python-version }}
106 |           path: ./dist/pyTranscriber-${{ env.VERSION }}.exe
107 | 
108 |   download:
109 |     runs-on: windows-latest
110 |     needs: build
111 |     steps:
112 |       - name: Download built executables
113 |         uses: actions/download-artifact@v4
114 |         with:
115 |           path: ./output
116 | 
117 |       - name: List downloaded files
118 |         run: dir ./output
119 | 


--------------------------------------------------------------------------------
/pytranscriber/gui/Português.ts:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <!DOCTYPE TS>
  3 | <TS version="2.1">
  4 | <context>
  5 |     <name>window</name>
  6 |     <message>
  7 |         <location filename="gui.py" line="129"/>
  8 |         <source>pyTranscriber - v1.8 - 17/08/2022</source>
  9 |         <translation>pyTranscriber -v1.8 - 17/08/2022</translation>
 10 |     </message>
 11 |     <message>
 12 |         <location filename="gui.py" line="130"/>
 13 |         <source>Select file(s)</source>
 14 |         <translation>Selecionar arquivo(s)</translation>
 15 |     </message>
 16 |     <message>
 17 |         <location filename="gui.py" line="131"/>
 18 |         <source>Transcribe Audio / Generate Subtitles</source>
 19 |         <translation>Transcrever áudio / Gerar Legendas</translation>
 20 |     </message>
 21 |     <message>
 22 |         <location filename="gui.py" line="132"/>
 23 |         <source>Open Output Folder</source>
 24 |         <translation>Abrir Pasta de Destino</translation>
 25 |     </message>
 26 |     <message>
 27 |         <location filename="gui.py" line="133"/>
 28 |         <source>Output Location</source>
 29 |         <translation>Pasta de Destino</translation>
 30 |     </message>
 31 |     <message>
 32 |         <location filename="gui.py" line="134"/>
 33 |         <source>List of files to generate transcribe audio / generate subtitles</source>
 34 |         <translation>Lista de arquivos para gerar legendas/transcrever áudio</translation>
 35 |     </message>
 36 |     <message>
 37 |         <location filename="gui.py" line="135"/>
 38 |         <source>Remove file(s)</source>
 39 |         <translation>Remover arquivo(s)</translation>
 40 |     </message>
 41 |     <message>
 42 |         <location filename="gui.py" line="136"/>
 43 |         <source>Cancel</source>
 44 |         <translation>Cancelar</translation>
 45 |     </message>
 46 |     <message>
 47 |         <location filename="gui.py" line="137"/>
 48 |         <source>Open output files automatically</source>
 49 |         <translation>Abrir arquivos de saída automaticamente</translation>
 50 |     </message>
 51 |     <message>
 52 |         <location filename="gui.py" line="138"/>
 53 |         <source>Audio Language:</source>
 54 |         <translation>Idioma do áudio:</translation>
 55 |     </message>
 56 |     <message>
 57 |         <location filename="gui.py" line="139"/>
 58 |         <source>Abo&amp;ut</source>
 59 |         <translation>Sob&amp;re</translation>
 60 |     </message>
 61 |     <message>
 62 |         <location filename="gui.py" line="140"/>
 63 |         <source>&amp;Settings</source>
 64 |         <translation>&amp;Configurações</translation>
 65 |     </message>
 66 |     <message>
 67 |         <location filename="gui.py" line="141"/>
 68 |         <source>&amp;Language</source>
 69 |         <translation>&amp;Idioma</translation>
 70 |     </message>
 71 |     <message>
 72 |         <location filename="gui.py" line="142"/>
 73 |         <source>&amp;License</source>
 74 |         <translation>&amp;Licença</translation>
 75 |     </message>
 76 |     <message>
 77 |         <location filename="gui.py" line="143"/>
 78 |         <source>&amp;Funding at Github Sponsors</source>
 79 |         <translation>Patrocínio no GitHub Sponsors</translation>
 80 |     </message>
 81 |     <message>
 82 |         <location filename="gui.py" line="144"/>
 83 |         <source>&amp;More about pyTranscriber</source>
 84 |         <translation>&amp;Sobre o pyTranscriber</translation>
 85 |     </message>
 86 |     <message>
 87 |         <location filename="gui.py" line="145"/>
 88 |         <source>&amp;Proxy</source>
 89 |         <translation>Proxy</translation>
 90 |     </message>
 91 |     <message>
 92 |         <location filename="gui.py" line="146"/>
 93 |         <source>Proxy setting</source>
 94 |         <translation>Configurações de Proxy</translation>
 95 |     </message>
 96 |     <message>
 97 |         <location filename="gui.py" line="147"/>
 98 |         <source>English</source>
 99 |         <translation type="unfinished"></translation>
100 |     </message>
101 |     <message>
102 |         <location filename="gui.py" line="148"/>
103 |         <source>繁體中文 - Chinese Traditional</source>
104 |         <translation type="unfinished"></translation>
105 |     </message>
106 |     <message>
107 |         <location filename="gui.py" line="149"/>
108 |         <source>简体中文 - Chinese Simplified</source>
109 |         <translation type="unfinished"></translation>
110 |     </message>
111 |     <message>
112 |         <location filename="gui.py" line="150"/>
113 |         <source>Português</source>
114 |         <translation type="unfinished"></translation>
115 |     </message>
116 | </context>
117 | </TS>
118 | 


--------------------------------------------------------------------------------
/pytranscriber/gui/proxy.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Form implementation generated from reading ui file '.\proxy.ui'
 4 | #
 5 | # Created by: PyQt5 UI code generator 5.15.4
 6 | #
 7 | # WARNING: Any manual changes made to this file will be lost when pyuic5 is
 8 | # run again.  Do not edit this file unless you know what you are doing.
 9 | 
10 | 
11 | from PyQt5 import QtCore, QtGui, QtWidgets
12 | 
13 | 
14 | class Ui_Dialog(object):
15 |     def setupUi(self, Dialog):
16 |         Dialog.setObjectName("Dialog")
17 |         Dialog.resize(500, 120)
18 |         Dialog.setAutoFillBackground(False)
19 |         Dialog.setSizeGripEnabled(False)
20 |         self.verticalLayout = QtWidgets.QVBoxLayout(Dialog)
21 |         self.verticalLayout.setObjectName("verticalLayout")
22 |         self.groupBox = QtWidgets.QGroupBox(Dialog)
23 |         self.groupBox.setTitle("")
24 |         self.groupBox.setObjectName("groupBox")
25 |         self.verticalLayout_2 = QtWidgets.QVBoxLayout(self.groupBox)
26 |         self.verticalLayout_2.setObjectName("verticalLayout_2")
27 |         self.radioButtonNone = QtWidgets.QRadioButton(self.groupBox)
28 |         sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed)
29 |         sizePolicy.setHorizontalStretch(0)
30 |         sizePolicy.setVerticalStretch(0)
31 |         sizePolicy.setHeightForWidth(self.radioButtonNone.sizePolicy().hasHeightForWidth())
32 |         self.radioButtonNone.setSizePolicy(sizePolicy)
33 |         self.radioButtonNone.setChecked(True)
34 |         self.radioButtonNone.setObjectName("radioButtonNone")
35 |         self.verticalLayout_2.addWidget(self.radioButtonNone)
36 |         self.radioButtonHTTP = QtWidgets.QRadioButton(self.groupBox)
37 |         self.radioButtonHTTP.setEnabled(True)
38 |         sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed)
39 |         sizePolicy.setHorizontalStretch(0)
40 |         sizePolicy.setVerticalStretch(0)
41 |         sizePolicy.setHeightForWidth(self.radioButtonHTTP.sizePolicy().hasHeightForWidth())
42 |         self.radioButtonHTTP.setSizePolicy(sizePolicy)
43 |         self.radioButtonHTTP.setObjectName("radioButtonHTTP")
44 |         self.verticalLayout_2.addWidget(self.radioButtonHTTP)
45 |         self.gridLayout = QtWidgets.QGridLayout()
46 |         self.gridLayout.setObjectName("gridLayout")
47 |         self.lineEditHttpProxy = QtWidgets.QLineEdit(self.groupBox)
48 |         self.lineEditHttpProxy.setToolTip("")
49 |         self.lineEditHttpProxy.setStatusTip("")
50 |         self.lineEditHttpProxy.setInputMethodHints(QtCore.Qt.ImhUrlCharactersOnly)
51 |         self.lineEditHttpProxy.setObjectName("lineEditHttpProxy")
52 |         self.gridLayout.addWidget(self.lineEditHttpProxy, 0, 1, 1, 1)
53 |         self.label = QtWidgets.QLabel(self.groupBox)
54 |         self.label.setObjectName("label")
55 |         self.gridLayout.addWidget(self.label, 0, 0, 1, 1)
56 |         self.pushButtonTest = QtWidgets.QPushButton(self.groupBox)
57 |         self.pushButtonTest.setEnabled(True)
58 |         self.pushButtonTest.setObjectName("pushButtonTest")
59 |         self.gridLayout.addWidget(self.pushButtonTest, 0, 2, 1, 1)
60 |         self.verticalLayout_2.addLayout(self.gridLayout)
61 |         self.verticalLayout.addWidget(self.groupBox)
62 |         self.buttonBox = QtWidgets.QDialogButtonBox(Dialog)
63 |         self.buttonBox.setOrientation(QtCore.Qt.Horizontal)
64 |         self.buttonBox.setStandardButtons(QtWidgets.QDialogButtonBox.Cancel|QtWidgets.QDialogButtonBox.Ok)
65 |         self.buttonBox.setObjectName("buttonBox")
66 |         self.verticalLayout.addWidget(self.buttonBox)
67 | 
68 |         self.retranslateUi(Dialog)
69 |         self.buttonBox.accepted.connect(Dialog.accept)
70 |         self.buttonBox.rejected.connect(Dialog.reject)
71 |         self.radioButtonNone.clicked['bool'].connect(self.lineEditHttpProxy.setDisabled)
72 |         self.radioButtonNone.clicked['bool'].connect(self.pushButtonTest.setDisabled)
73 |         self.radioButtonHTTP.clicked['bool'].connect(self.pushButtonTest.setEnabled)
74 |         self.radioButtonHTTP.clicked['bool'].connect(self.lineEditHttpProxy.setEnabled)
75 |         QtCore.QMetaObject.connectSlotsByName(Dialog)
76 | 
77 |     def retranslateUi(self, Dialog):
78 |         _translate = QtCore.QCoreApplication.translate
79 |         Dialog.setWindowTitle(_translate("Dialog", "Proxy setting"))
80 |         self.radioButtonNone.setText(_translate("Dialog", "None"))
81 |         self.radioButtonHTTP.setText(_translate("Dialog", "HTTP"))
82 |         self.lineEditHttpProxy.setPlaceholderText(_translate("Dialog", "http://127.0.0.1:1080"))
83 |         self.label.setText(_translate("Dialog", "URL:"))
84 |         self.pushButtonTest.setText(_translate("Dialog", "Test"))
85 | 


--------------------------------------------------------------------------------
/pytranscriber/gui/proxy/window_proxy.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Form implementation generated from reading ui file 'window_proxy.ui'
 4 | #
 5 | # Created by: PyQt5 UI code generator 5.15.4
 6 | #
 7 | # WARNING: Any manual changes made to this file will be lost when pyuic5 is
 8 | # run again.  Do not edit this file unless you know what you are doing.
 9 | 
10 | 
11 | from PyQt5 import QtCore, QtGui, QtWidgets
12 | 
13 | 
14 | class Ui_Dialog(object):
15 |     def setupUi(self, Dialog):
16 |         Dialog.setObjectName("Dialog")
17 |         Dialog.resize(381, 171)
18 |         Dialog.setAutoFillBackground(False)
19 |         Dialog.setSizeGripEnabled(False)
20 |         self.verticalLayout = QtWidgets.QVBoxLayout(Dialog)
21 |         self.verticalLayout.setObjectName("verticalLayout")
22 |         self.groupBox = QtWidgets.QGroupBox(Dialog)
23 |         self.groupBox.setTitle("")
24 |         self.groupBox.setObjectName("groupBox")
25 |         self.verticalLayout_2 = QtWidgets.QVBoxLayout(self.groupBox)
26 |         self.verticalLayout_2.setObjectName("verticalLayout_2")
27 |         self.radioButtonNone = QtWidgets.QRadioButton(self.groupBox)
28 |         sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed)
29 |         sizePolicy.setHorizontalStretch(0)
30 |         sizePolicy.setVerticalStretch(0)
31 |         sizePolicy.setHeightForWidth(self.radioButtonNone.sizePolicy().hasHeightForWidth())
32 |         self.radioButtonNone.setSizePolicy(sizePolicy)
33 |         font = QtGui.QFont()
34 |         font.setPointSize(9)
35 |         self.radioButtonNone.setFont(font)
36 |         self.radioButtonNone.setChecked(True)
37 |         self.radioButtonNone.setObjectName("radioButtonNone")
38 |         self.verticalLayout_2.addWidget(self.radioButtonNone)
39 |         self.radioButtonHTTP = QtWidgets.QRadioButton(self.groupBox)
40 |         self.radioButtonHTTP.setEnabled(True)
41 |         sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed)
42 |         sizePolicy.setHorizontalStretch(0)
43 |         sizePolicy.setVerticalStretch(0)
44 |         sizePolicy.setHeightForWidth(self.radioButtonHTTP.sizePolicy().hasHeightForWidth())
45 |         self.radioButtonHTTP.setSizePolicy(sizePolicy)
46 |         font = QtGui.QFont()
47 |         font.setPointSize(9)
48 |         self.radioButtonHTTP.setFont(font)
49 |         self.radioButtonHTTP.setObjectName("radioButtonHTTP")
50 |         self.verticalLayout_2.addWidget(self.radioButtonHTTP)
51 |         self.gridLayout = QtWidgets.QGridLayout()
52 |         self.gridLayout.setObjectName("gridLayout")
53 |         self.pushButtonTest = QtWidgets.QPushButton(self.groupBox)
54 |         self.pushButtonTest.setEnabled(True)
55 |         font = QtGui.QFont()
56 |         font.setPointSize(9)
57 |         self.pushButtonTest.setFont(font)
58 |         self.pushButtonTest.setObjectName("pushButtonTest")
59 |         self.gridLayout.addWidget(self.pushButtonTest, 0, 2, 1, 1)
60 |         self.lineEditHttpProxy = QtWidgets.QLineEdit(self.groupBox)
61 |         self.lineEditHttpProxy.setToolTip("")
62 |         self.lineEditHttpProxy.setStatusTip("")
63 |         self.lineEditHttpProxy.setInputMethodHints(QtCore.Qt.ImhUrlCharactersOnly)
64 |         self.lineEditHttpProxy.setObjectName("lineEditHttpProxy")
65 |         self.gridLayout.addWidget(self.lineEditHttpProxy, 0, 1, 1, 1)
66 |         self.label = QtWidgets.QLabel(self.groupBox)
67 |         font = QtGui.QFont()
68 |         font.setPointSize(9)
69 |         self.label.setFont(font)
70 |         self.label.setObjectName("label")
71 |         self.gridLayout.addWidget(self.label, 0, 0, 1, 1)
72 |         self.bSave = QtWidgets.QPushButton(self.groupBox)
73 |         font = QtGui.QFont()
74 |         font.setPointSize(9)
75 |         self.bSave.setFont(font)
76 |         self.bSave.setObjectName("bSave")
77 |         self.gridLayout.addWidget(self.bSave, 1, 2, 1, 1)
78 |         self.verticalLayout_2.addLayout(self.gridLayout)
79 |         self.verticalLayout.addWidget(self.groupBox)
80 | 
81 |         self.retranslateUi(Dialog)
82 |         self.radioButtonNone.clicked['bool'].connect(self.lineEditHttpProxy.setDisabled)
83 |         self.radioButtonNone.clicked['bool'].connect(self.pushButtonTest.setDisabled)
84 |         self.radioButtonHTTP.clicked['bool'].connect(self.lineEditHttpProxy.setEnabled)
85 |         QtCore.QMetaObject.connectSlotsByName(Dialog)
86 | 
87 |     def retranslateUi(self, Dialog):
88 |         _translate = QtCore.QCoreApplication.translate
89 |         Dialog.setWindowTitle(_translate("Dialog", "Proxy setting"))
90 |         self.radioButtonNone.setText(_translate("Dialog", "Disabled"))
91 |         self.radioButtonHTTP.setText(_translate("Dialog", "Enabled"))
92 |         self.pushButtonTest.setText(_translate("Dialog", "Test"))
93 |         self.lineEditHttpProxy.setPlaceholderText(_translate("Dialog", "http://127.0.0.1:1080"))
94 |         self.label.setText(_translate("Dialog", "URL:"))
95 |         self.bSave.setText(_translate("Dialog", "Save"))
96 | 


--------------------------------------------------------------------------------
/pytranscriber/model/whisper.py:
--------------------------------------------------------------------------------
  1 | '''
  2 |    (C) 2025 Raryel C. Souza
  3 |     This program is free software: you can redistribute it and/or modify
  4 |     it under the terms of the GNU General Public License as published by
  5 |     the Free Software Foundation, either version 3 of the License, or
  6 |     (at your option) any later version.
  7 |     This program is distributed in the hope that it will be useful,
  8 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |     GNU General Public License for more details.
 11 |     You should have received a copy of the GNU General Public License
 12 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
 13 | '''
 14 | 
 15 | class Whisper:
 16 | 
 17 |     supported_languages_list = None
 18 |     supported_languages_dict = {
 19 |                 "en": "english",
 20 |                 "zh": "chinese",
 21 |                 "de": "german",
 22 |                 "es": "spanish",
 23 |                 "ru": "russian",
 24 |                 "ko": "korean",
 25 |                 "fr": "french",
 26 |                 "ja": "japanese",
 27 |                 "pt": "portuguese",
 28 |                 "tr": "turkish",
 29 |                 "pl": "polish",
 30 |                 "ca": "catalan",
 31 |                 "nl": "dutch",
 32 |                 "ar": "arabic",
 33 |                 "sv": "swedish",
 34 |                 "it": "italian",
 35 |                 "id": "indonesian",
 36 |                 "hi": "hindi",
 37 |                 "fi": "finnish",
 38 |                 "vi": "vietnamese",
 39 |                 "he": "hebrew",
 40 |                 "uk": "ukrainian",
 41 |                 "el": "greek",
 42 |                 "ms": "malay",
 43 |                 "cs": "czech",
 44 |                 "ro": "romanian",
 45 |                 "da": "danish",
 46 |                 "hu": "hungarian",
 47 |                 "ta": "tamil",
 48 |                 "no": "norwegian",
 49 |                 "th": "thai",
 50 |                 "ur": "urdu",
 51 |                 "hr": "croatian",
 52 |                 "bg": "bulgarian",
 53 |                 "lt": "lithuanian",
 54 |                 "la": "latin",
 55 |                 "mi": "maori",
 56 |                 "ml": "malayalam",
 57 |                 "cy": "welsh",
 58 |                 "sk": "slovak",
 59 |                 "te": "telugu",
 60 |                 "fa": "persian",
 61 |                 "lv": "latvian",
 62 |                 "bn": "bengali",
 63 |                 "sr": "serbian",
 64 |                 "az": "azerbaijani",
 65 |                 "sl": "slovenian",
 66 |                 "kn": "kannada",
 67 |                 "et": "estonian",
 68 |                 "mk": "macedonian",
 69 |                 "br": "breton",
 70 |                 "eu": "basque",
 71 |                 "is": "icelandic",
 72 |                 "hy": "armenian",
 73 |                 "ne": "nepali",
 74 |                 "mn": "mongolian",
 75 |                 "bs": "bosnian",
 76 |                 "kk": "kazakh",
 77 |                 "sq": "albanian",
 78 |                 "sw": "swahili",
 79 |                 "gl": "galician",
 80 |                 "mr": "marathi",
 81 |                 "pa": "punjabi",
 82 |                 "si": "sinhala",
 83 |                 "km": "khmer",
 84 |                 "sn": "shona",
 85 |                 "yo": "yoruba",
 86 |                 "so": "somali",
 87 |                 "af": "afrikaans",
 88 |                 "oc": "occitan",
 89 |                 "ka": "georgian",
 90 |                 "be": "belarusian",
 91 |                 "tg": "tajik",
 92 |                 "sd": "sindhi",
 93 |                 "gu": "gujarati",
 94 |                 "am": "amharic",
 95 |                 "yi": "yiddish",
 96 |                 "lo": "lao",
 97 |                 "uz": "uzbek",
 98 |                 "fo": "faroese",
 99 |                 "ht": "haitian creole",
100 |                 "ps": "pashto",
101 |                 "tk": "turkmen",
102 |                 "nn": "nynorsk",
103 |                 "mt": "maltese",
104 |                 "sa": "sanskrit",
105 |                 "lb": "luxembourgish",
106 |                 "my": "myanmar",
107 |                 "bo": "tibetan",
108 |                 "tl": "tagalog",
109 |                 "mg": "malagasy",
110 |                 "as": "assamese",
111 |                 "tt": "tatar",
112 |                 "haw": "hawaiian",
113 |                 "ln": "lingala",
114 |                 "ha": "hausa",
115 |                 "ba": "bashkir",
116 |                 "jw": "javanese",
117 |                 "su": "sundanese",
118 |                 "yue": "cantonese",
119 |             }
120 | 
121 |     @staticmethod
122 |     def convert_dict_to_list():
123 |         Whisper.supported_languages_list = list()
124 |         for (k, v) in Whisper.supported_languages_dict.items():
125 |             Whisper.supported_languages_list.append(k + " - " + v)
126 | 
127 |     @staticmethod
128 |     def get_supported_languages():
129 |         if Whisper.supported_languages_list is None:
130 |             Whisper.convert_dict_to_list()
131 |         return Whisper.supported_languages_list


--------------------------------------------------------------------------------
/pytranscriber/control/ctr_whisper.py:
--------------------------------------------------------------------------------
  1 | '''
  2 |    (C) 2025 Raryel C. Souza
  3 |     This program is free software: you can redistribute it and/or modify
  4 |     it under the terms of the GNU General Public License as published by
  5 |     the Free Software Foundation, either version 3 of the License, or
  6 |     (at your option) any later version.
  7 |     This program is distributed in the hope that it will be useful,
  8 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |     GNU General Public License for more details.
 11 |     You should have received a copy of the GNU General Public License
 12 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
 13 | '''
 14 | 
 15 | from PyQt5.QtCore import pyqtSignal, QObject
 16 | from PyQt5.QtWidgets import QMessageBox
 17 | import os
 18 | import sys
 19 | import whisper
 20 | import datetime
 21 | import shutil
 22 | from pytranscriber.control.ctr_engine import CtrEngine
 23 | 
 24 | 
 25 | class CtrWhisper(CtrEngine, QObject):
 26 |     errorSignal = pyqtSignal(str)  # Define the signal
 27 |     MODEL_DIR = None
 28 | 
 29 |     @classmethod
 30 |     def initialize(cls):
 31 |         """Initialize MODEL_DIR before using the class."""
 32 |         if cls.MODEL_DIR is None:
 33 |             cls.MODEL_DIR = cls.get_whisper_model_dir()
 34 | 
 35 |     def __init__(self):
 36 |         super().__init__()
 37 |         self.errorSignal.connect(self.show_error_message)  # Connect signal to slot
 38 | 
 39 |     @staticmethod
 40 |     def get_whisper_model_dir():
 41 |         base_path = os.path.expanduser("~/pytranscriber")  # User's home directory
 42 | 
 43 |         model_dir = os.path.join(base_path, "whisper_models")
 44 |         os.makedirs(model_dir, exist_ok=True)  # Ensure directory exists
 45 |         return model_dir
 46 | 
 47 |     @staticmethod
 48 |     def generate_subtitles(source_path, src_language, outputSRT=None, outputTXT=None, model='base'):
 49 |         CtrWhisper.patch_ffmpeg()  # Ensure FFmpeg is available
 50 | 
 51 |         model = whisper.load_model(model, download_root=CtrWhisper.MODEL_DIR)
 52 |         result = model.transcribe(source_path, verbose=True, language=src_language)
 53 | 
 54 |         if CtrEngine.is_operation_canceled():
 55 |             return -1
 56 | 
 57 |         content_srt = CtrWhisper.generate_srt_file_content(result["segments"])
 58 |         content_txt = CtrWhisper.generate_txt_file_content(result["segments"])
 59 | 
 60 |         CtrWhisper.save_output_file(outputSRT, content_srt)
 61 |         CtrWhisper.save_output_file(outputTXT, content_txt)
 62 | 
 63 |         return outputSRT
 64 | 
 65 |     @staticmethod
 66 |     def show_error_message(message):
 67 |         """Displays the error message in a PyQt5 QMessageBox."""
 68 |         msg_box = QMessageBox()
 69 |         msg_box.setIcon(QMessageBox.Critical)
 70 |         msg_box.setWindowTitle("Error")
 71 |         msg_box.setText(message)
 72 |         msg_box.exec_()
 73 | 
 74 |     @staticmethod
 75 |     def generate_srt_file_content(transcribed_segments):
 76 |         content = ""
 77 | 
 78 |         def format_timestamp(seconds):
 79 |             """Convert seconds to SRT-compliant timestamp (HH:MM:SS,mmm)."""
 80 |             td = datetime.timedelta(seconds=seconds)
 81 |             total_seconds = int(td.total_seconds())
 82 |             millis = int(round((td.total_seconds() - total_seconds) * 1000))
 83 |             hours = total_seconds // 3600
 84 |             minutes = (total_seconds % 3600) // 60
 85 |             secs = total_seconds % 60
 86 |             return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}"
 87 | 
 88 |         for i, s in enumerate(transcribed_segments, start=1):
 89 |             start_time = format_timestamp(s["start"])
 90 |             end_time = format_timestamp(s["end"])
 91 |             content += f"{i}\n{start_time} --> {end_time}\n{s['text'].strip()}\n\n"
 92 | 
 93 |         return content
 94 | 
 95 |     @staticmethod
 96 |     def generate_txt_file_content(transcribed_segments):
 97 |         content = ""
 98 |         for s in transcribed_segments:
 99 |             content = content + str(s["text"])
100 |         return content
101 | 
102 |     #forces whisper to use the embedded ffmpeg in frozen app
103 |     @staticmethod
104 |     def patch_ffmpeg():
105 |         """Ensure FFmpeg is correctly detected and patched for PyInstaller frozen apps."""
106 |         if getattr(sys, "frozen", False):  # Running as a bundled executable
107 |             ffmpeg_path = os.path.join(sys._MEIPASS, "ffmpeg")
108 |         else:
109 |             ffmpeg_path = shutil.which("ffmpeg")  # Use system-wide FFmpeg
110 | 
111 |         if not ffmpeg_path:
112 |             raise FileNotFoundError("FFmpeg not found!")
113 | 
114 |         os.environ["FFMPEG_PATH"] = ffmpeg_path
115 |         os.environ["PATH"] += os.pathsep + os.path.dirname(ffmpeg_path)
116 | 
117 |         # Monkey-patch shutil.which to always return the correct FFmpeg path
118 |         original_which = shutil.which
119 | 
120 |         def patched_which(cmd, *args, **kwargs):
121 |             if cmd == "ffmpeg":
122 |                 return ffmpeg_path
123 |             return original_which(cmd, *args, **kwargs)
124 | 
125 |         shutil.which = patched_which  # Apply the patch


--------------------------------------------------------------------------------
/whisper/audio.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from functools import lru_cache
  3 | from subprocess import CalledProcessError, run
  4 | from typing import Optional, Union
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | import torch.nn.functional as F
  9 | 
 10 | from .utils import exact_div
 11 | 
 12 | # hard-coded audio hyperparameters
 13 | SAMPLE_RATE = 16000
 14 | N_FFT = 400
 15 | HOP_LENGTH = 160
 16 | CHUNK_LENGTH = 30
 17 | N_SAMPLES = CHUNK_LENGTH * SAMPLE_RATE  # 480000 samples in a 30-second chunk
 18 | N_FRAMES = exact_div(N_SAMPLES, HOP_LENGTH)  # 3000 frames in a mel spectrogram input
 19 | 
 20 | N_SAMPLES_PER_TOKEN = HOP_LENGTH * 2  # the initial convolutions has stride 2
 21 | FRAMES_PER_SECOND = exact_div(SAMPLE_RATE, HOP_LENGTH)  # 10ms per audio frame
 22 | TOKENS_PER_SECOND = exact_div(SAMPLE_RATE, N_SAMPLES_PER_TOKEN)  # 20ms per audio token
 23 | 
 24 | 
 25 | def load_audio(file: str, sr: int = SAMPLE_RATE):
 26 |     """
 27 |     Open an audio file and read as mono waveform, resampling as necessary
 28 | 
 29 |     Parameters
 30 |     ----------
 31 |     file: str
 32 |         The audio file to open
 33 | 
 34 |     sr: int
 35 |         The sample rate to resample the audio if necessary
 36 | 
 37 |     Returns
 38 |     -------
 39 |     A NumPy array containing the audio waveform, in float32 dtype.
 40 |     """
 41 | 
 42 |     # This launches a subprocess to decode audio while down-mixing
 43 |     # and resampling as necessary.  Requires the ffmpeg CLI in PATH.
 44 |     # fmt: off
 45 |     cmd = [
 46 |         "ffmpeg",
 47 |         "-nostdin",
 48 |         "-threads", "0",
 49 |         "-i", file,
 50 |         "-f", "s16le",
 51 |         "-ac", "1",
 52 |         "-acodec", "pcm_s16le",
 53 |         "-ar", str(sr),
 54 |         "-"
 55 |     ]
 56 |     # fmt: on
 57 |     try:
 58 |         out = run(cmd, capture_output=True, check=True).stdout
 59 |     except CalledProcessError as e:
 60 |         raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
 61 | 
 62 |     return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
 63 | 
 64 | 
 65 | def pad_or_trim(array, length: int = N_SAMPLES, *, axis: int = -1):
 66 |     """
 67 |     Pad or trim the audio array to N_SAMPLES, as expected by the encoder.
 68 |     """
 69 |     if torch.is_tensor(array):
 70 |         if array.shape[axis] > length:
 71 |             array = array.index_select(
 72 |                 dim=axis, index=torch.arange(length, device=array.device)
 73 |             )
 74 | 
 75 |         if array.shape[axis] < length:
 76 |             pad_widths = [(0, 0)] * array.ndim
 77 |             pad_widths[axis] = (0, length - array.shape[axis])
 78 |             array = F.pad(array, [pad for sizes in pad_widths[::-1] for pad in sizes])
 79 |     else:
 80 |         if array.shape[axis] > length:
 81 |             array = array.take(indices=range(length), axis=axis)
 82 | 
 83 |         if array.shape[axis] < length:
 84 |             pad_widths = [(0, 0)] * array.ndim
 85 |             pad_widths[axis] = (0, length - array.shape[axis])
 86 |             array = np.pad(array, pad_widths)
 87 | 
 88 |     return array
 89 | 
 90 | 
 91 | @lru_cache(maxsize=None)
 92 | def mel_filters(device, n_mels: int) -> torch.Tensor:
 93 |     """
 94 |     load the mel filterbank matrix for projecting STFT into a Mel spectrogram.
 95 |     Allows decoupling librosa dependency; saved using:
 96 | 
 97 |         np.savez_compressed(
 98 |             "mel_filters.npz",
 99 |             mel_80=librosa.filters.mel(sr=16000, n_fft=400, n_mels=80),
100 |             mel_128=librosa.filters.mel(sr=16000, n_fft=400, n_mels=128),
101 |         )
102 |     """
103 |     assert n_mels in {80, 128}, f"Unsupported n_mels: {n_mels}"
104 | 
105 |     filters_path = os.path.join(os.path.dirname(__file__), "assets", "mel_filters.npz")
106 |     with np.load(filters_path, allow_pickle=False) as f:
107 |         return torch.from_numpy(f[f"mel_{n_mels}"]).to(device)
108 | 
109 | 
110 | def log_mel_spectrogram(
111 |     audio: Union[str, np.ndarray, torch.Tensor],
112 |     n_mels: int = 80,
113 |     padding: int = 0,
114 |     device: Optional[Union[str, torch.device]] = None,
115 | ):
116 |     """
117 |     Compute the log-Mel spectrogram of
118 | 
119 |     Parameters
120 |     ----------
121 |     audio: Union[str, np.ndarray, torch.Tensor], shape = (*)
122 |         The path to audio or either a NumPy array or Tensor containing the audio waveform in 16 kHz
123 | 
124 |     n_mels: int
125 |         The number of Mel-frequency filters, only 80 is supported
126 | 
127 |     padding: int
128 |         Number of zero samples to pad to the right
129 | 
130 |     device: Optional[Union[str, torch.device]]
131 |         If given, the audio tensor is moved to this device before STFT
132 | 
133 |     Returns
134 |     -------
135 |     torch.Tensor, shape = (80, n_frames)
136 |         A Tensor that contains the Mel spectrogram
137 |     """
138 |     if not torch.is_tensor(audio):
139 |         if isinstance(audio, str):
140 |             audio = load_audio(audio)
141 |         audio = torch.from_numpy(audio)
142 | 
143 |     if device is not None:
144 |         audio = audio.to(device)
145 |     if padding > 0:
146 |         audio = F.pad(audio, (0, padding))
147 |     window = torch.hann_window(N_FFT).to(audio.device)
148 |     stft = torch.stft(audio, N_FFT, HOP_LENGTH, window=window, return_complex=True)
149 |     magnitudes = stft[..., :-1].abs() ** 2
150 | 
151 |     filters = mel_filters(audio.device, n_mels)
152 |     mel_spec = filters @ magnitudes
153 | 
154 |     log_spec = torch.clamp(mel_spec, min=1e-10).log10()
155 |     log_spec = torch.maximum(log_spec, log_spec.max() - 8.0)
156 |     log_spec = (log_spec + 4.0) / 4.0
157 |     return log_spec
158 | 


--------------------------------------------------------------------------------
/pytranscriber/gui/proxy/window_proxy.ui:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <ui version="4.0">
  3 |  <class>Dialog</class>
  4 |  <widget class="QDialog" name="Dialog">
  5 |   <property name="geometry">
  6 |    <rect>
  7 |     <x>0</x>
  8 |     <y>0</y>
  9 |     <width>381</width>
 10 |     <height>171</height>
 11 |    </rect>
 12 |   </property>
 13 |   <property name="windowTitle">
 14 |    <string>Proxy setting</string>
 15 |   </property>
 16 |   <property name="autoFillBackground">
 17 |    <bool>false</bool>
 18 |   </property>
 19 |   <property name="sizeGripEnabled">
 20 |    <bool>false</bool>
 21 |   </property>
 22 |   <layout class="QVBoxLayout" name="verticalLayout">
 23 |    <item>
 24 |     <widget class="QGroupBox" name="groupBox">
 25 |      <property name="title">
 26 |       <string/>
 27 |      </property>
 28 |      <layout class="QVBoxLayout" name="verticalLayout_2">
 29 |       <item>
 30 |        <widget class="QRadioButton" name="radioButtonNone">
 31 |         <property name="sizePolicy">
 32 |          <sizepolicy hsizetype="Minimum" vsizetype="Fixed">
 33 |           <horstretch>0</horstretch>
 34 |           <verstretch>0</verstretch>
 35 |          </sizepolicy>
 36 |         </property>
 37 |         <property name="font">
 38 |          <font>
 39 |           <pointsize>9</pointsize>
 40 |          </font>
 41 |         </property>
 42 |         <property name="text">
 43 |          <string>Disabled</string>
 44 |         </property>
 45 |         <property name="checked">
 46 |          <bool>true</bool>
 47 |         </property>
 48 |        </widget>
 49 |       </item>
 50 |       <item>
 51 |        <widget class="QRadioButton" name="radioButtonHTTP">
 52 |         <property name="enabled">
 53 |          <bool>true</bool>
 54 |         </property>
 55 |         <property name="sizePolicy">
 56 |          <sizepolicy hsizetype="Minimum" vsizetype="Fixed">
 57 |           <horstretch>0</horstretch>
 58 |           <verstretch>0</verstretch>
 59 |          </sizepolicy>
 60 |         </property>
 61 |         <property name="font">
 62 |          <font>
 63 |           <pointsize>9</pointsize>
 64 |          </font>
 65 |         </property>
 66 |         <property name="text">
 67 |          <string>Enabled</string>
 68 |         </property>
 69 |        </widget>
 70 |       </item>
 71 |       <item>
 72 |        <layout class="QGridLayout" name="gridLayout">
 73 |         <item row="0" column="2">
 74 |          <widget class="QPushButton" name="pushButtonTest">
 75 |           <property name="enabled">
 76 |            <bool>true</bool>
 77 |           </property>
 78 |           <property name="font">
 79 |            <font>
 80 |             <pointsize>9</pointsize>
 81 |            </font>
 82 |           </property>
 83 |           <property name="text">
 84 |            <string>Test</string>
 85 |           </property>
 86 |          </widget>
 87 |         </item>
 88 |         <item row="0" column="1">
 89 |          <widget class="QLineEdit" name="lineEditHttpProxy">
 90 |           <property name="toolTip">
 91 |            <string/>
 92 |           </property>
 93 |           <property name="statusTip">
 94 |            <string/>
 95 |           </property>
 96 |           <property name="inputMethodHints">
 97 |            <set>Qt::ImhUrlCharactersOnly</set>
 98 |           </property>
 99 |           <property name="placeholderText">
100 |            <string>http://127.0.0.1:1080</string>
101 |           </property>
102 |          </widget>
103 |         </item>
104 |         <item row="0" column="0">
105 |          <widget class="QLabel" name="label">
106 |           <property name="font">
107 |            <font>
108 |             <pointsize>9</pointsize>
109 |            </font>
110 |           </property>
111 |           <property name="text">
112 |            <string>URL:</string>
113 |           </property>
114 |          </widget>
115 |         </item>
116 |         <item row="1" column="2">
117 |          <widget class="QPushButton" name="bSave">
118 |           <property name="font">
119 |            <font>
120 |             <pointsize>9</pointsize>
121 |            </font>
122 |           </property>
123 |           <property name="text">
124 |            <string>Save</string>
125 |           </property>
126 |          </widget>
127 |         </item>
128 |        </layout>
129 |       </item>
130 |      </layout>
131 |     </widget>
132 |    </item>
133 |   </layout>
134 |  </widget>
135 |  <resources/>
136 |  <connections>
137 |   <connection>
138 |    <sender>radioButtonNone</sender>
139 |    <signal>clicked(bool)</signal>
140 |    <receiver>lineEditHttpProxy</receiver>
141 |    <slot>setDisabled(bool)</slot>
142 |    <hints>
143 |     <hint type="sourcelabel">
144 |      <x>130</x>
145 |      <y>19</y>
146 |     </hint>
147 |     <hint type="destinationlabel">
148 |      <x>111</x>
149 |      <y>60</y>
150 |     </hint>
151 |    </hints>
152 |   </connection>
153 |   <connection>
154 |    <sender>radioButtonNone</sender>
155 |    <signal>clicked(bool)</signal>
156 |    <receiver>pushButtonTest</receiver>
157 |    <slot>setDisabled(bool)</slot>
158 |    <hints>
159 |     <hint type="sourcelabel">
160 |      <x>130</x>
161 |      <y>19</y>
162 |     </hint>
163 |     <hint type="destinationlabel">
164 |      <x>219</x>
165 |      <y>60</y>
166 |     </hint>
167 |    </hints>
168 |   </connection>
169 |   <connection>
170 |    <sender>radioButtonHTTP</sender>
171 |    <signal>clicked(bool)</signal>
172 |    <receiver>lineEditHttpProxy</receiver>
173 |    <slot>setEnabled(bool)</slot>
174 |    <hints>
175 |     <hint type="sourcelabel">
176 |      <x>130</x>
177 |      <y>36</y>
178 |     </hint>
179 |     <hint type="destinationlabel">
180 |      <x>111</x>
181 |      <y>60</y>
182 |     </hint>
183 |    </hints>
184 |   </connection>
185 |  </connections>
186 | </ui>
187 | 


--------------------------------------------------------------------------------
/pytranscriber/control/thread_exec_autosub.py:
--------------------------------------------------------------------------------
  1 | '''
  2 |    (C) 2025 Raryel C. Souza
  3 |     This program is free software: you can redistribute it and/or modify
  4 |     it under the terms of the GNU General Public License as published by
  5 |     the Free Software Foundation, either version 3 of the License, or
  6 |     (at your option) any later version.
  7 |     This program is distributed in the hope that it will be useful,
  8 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |     GNU General Public License for more details.
 11 |     You should have received a copy of the GNU General Public License
 12 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
 13 | '''
 14 | 
 15 | from PyQt5.QtCore import QThread
 16 | from PyQt5.QtCore import pyqtSignal
 17 | from pathlib import Path
 18 | from pytranscriber.util.srtparser import SRTParser
 19 | from pytranscriber.util.util import MyUtil
 20 | from pytranscriber.control.ctr_autosub import Ctr_Autosub
 21 | import os
 22 | import traceback
 23 | 
 24 | 
 25 | class Thread_Exec_Autosub(QThread):
 26 |     signalLockGUI = pyqtSignal()
 27 |     signalResetGUIAfterCancel = pyqtSignal()
 28 |     signalResetGUIAfterSuccess = pyqtSignal()
 29 |     signalProgress = pyqtSignal(str, int)
 30 |     signalProgressFileYofN = pyqtSignal(str)
 31 |     signalErrorMsg = pyqtSignal(str)
 32 | 
 33 |     def __init__(self, objParamAutosub):
 34 |         self.objParamAutosub = objParamAutosub
 35 |         self.running = True
 36 |         QThread.__init__(self)
 37 | 
 38 |     def __updateProgressFileYofN(self, currentIndex, countFiles ):
 39 |         self.signalProgressFileYofN.emit("File " + str(currentIndex+1) + " of " +str(countFiles))
 40 | 
 41 |     def listenerProgress(self, string, percent):
 42 |         self.signalProgress.emit(string, percent)
 43 | 
 44 |     def __generatePathOutputFile(self, sourceFile):
 45 |         #extract the filename without extension from the path
 46 |         base = os.path.basename(sourceFile)
 47 |         #[0] is filename, [1] is file extension
 48 |         fileName = os.path.splitext(base)[0]
 49 | 
 50 |         #the output file has same name as input file, located on output Folder
 51 |         #with extension .srt
 52 |         pathOutputFolder = Path(self.objParamAutosub.outputFolder)
 53 |         outputFileSRT = pathOutputFolder / (fileName + ".srt")
 54 |         outputFileTXT = pathOutputFolder / (fileName + ".txt")
 55 |         return [outputFileSRT, outputFileTXT]
 56 | 
 57 |     def __runAutosubForMedia(self, index, langCode):
 58 |         sourceFile = self.objParamAutosub.listFiles[index]
 59 |         outputFiles = self.__generatePathOutputFile(sourceFile)
 60 |         outputFileSRT = outputFiles[0]
 61 |         outputFileTXT = outputFiles[1]
 62 | 
 63 |         #run autosub
 64 |         try:
 65 |             fOutput = Ctr_Autosub.generate_subtitles(source_path = sourceFile,
 66 |                                         output = outputFileSRT,
 67 |                                         src_language = langCode,
 68 |                                         listener_progress = self.listenerProgress, proxies=self.objParamAutosub.proxies)
 69 |         except Exception as e:
 70 |             error_msg = f"""Error! Unable to generate subtitles: {traceback.format_exc()}"""
 71 |             self.signalErrorMsg.emit(error_msg)  # Emit the full traceback
 72 | 
 73 |         #if nothing was returned
 74 |         if not fOutput:
 75 |             self.signalErrorMsg.emit("Error! Unable to generate subtitles for file " + sourceFile + ".")
 76 |         elif fOutput != -1:
 77 |             #if the operation was not canceled
 78 | 
 79 |             #updated the progress message
 80 |             self.listenerProgress("Finished", 100)
 81 | 
 82 |             #parses the .srt subtitle file and export text to .txt file
 83 |             SRTParser.extractTextFromSRT(str(outputFileSRT))
 84 | 
 85 |             if self.objParamAutosub.boolOpenOutputFilesAuto:
 86 |                 #open both SRT and TXT output files
 87 |                 MyUtil.open_file(outputFileTXT)
 88 |                 MyUtil.open_file(outputFileSRT)
 89 | 
 90 |     def __loopSelectedFiles(self):
 91 |         self.signalLockGUI.emit()
 92 | 
 93 |         langCode = self.objParamAutosub.langCode
 94 | 
 95 |         #if output directory does not exist, creates it
 96 |         pathOutputFolder = Path(self.objParamAutosub.outputFolder)
 97 | 
 98 |         if not os.path.exists(pathOutputFolder):
 99 |             os.mkdir(pathOutputFolder)
100 |         #if there the output file is not a directory
101 |         if not os.path.isdir(pathOutputFolder):
102 |             #force the user to select a different output directory
103 |             self.signalErrorMsg.emit("Error! Invalid output folder. Please choose another one.")
104 |         else:
105 |             #go ahead with autosub process
106 |             nFiles = len(self.objParamAutosub.listFiles)
107 |             for i in range(nFiles):
108 |                 #does not continue the loop if user clicked cancel button
109 |                 if not Ctr_Autosub.is_operation_canceled():
110 |                     self.__updateProgressFileYofN(i, nFiles)
111 |                     self.__runAutosubForMedia(i, langCode)
112 | 
113 |             #if operation is canceled does not clear the file list
114 |             if Ctr_Autosub.is_operation_canceled():
115 |                 self.signalResetGUIAfterCancel.emit()
116 |             else:
117 |                 self.signalResetGUIAfterSuccess.emit()
118 | 
119 | 
120 |     def run(self):
121 |         Ctr_Autosub.init()
122 |         self.__loopSelectedFiles()
123 |         self.running = False
124 | 
125 |     def cancel(self):
126 |        Ctr_Autosub.cancel_operation()
127 | 


--------------------------------------------------------------------------------
/pytranscriber/control/ctr_autosub.py:
--------------------------------------------------------------------------------
  1 | '''
  2 |    (C) 2025 Raryel C. Souza
  3 |     This program is free software: you can redistribute it and/or modify
  4 |     it under the terms of the GNU General Public License as published by
  5 |     the Free Software Foundation, either version 3 of the License, or
  6 |     (at your option) any later version.
  7 |     This program is distributed in the hope that it will be useful,
  8 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |     GNU General Public License for more details.
 11 |     You should have received a copy of the GNU General Public License
 12 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
 13 | '''
 14 | 
 15 | from autosub import FLACConverter
 16 | from autosub import SpeechRecognizer
 17 | from autosub import extract_audio
 18 | from autosub import find_speech_regions
 19 | from autosub import DEFAULT_CONCURRENCY
 20 | from autosub import DEFAULT_SUBTITLE_FORMAT
 21 | from autosub import GOOGLE_SPEECH_API_KEY
 22 | from autosub.formatters import FORMATTERS
 23 | 
 24 | import multiprocessing
 25 | import time
 26 | import os
 27 | 
 28 | from pytranscriber.util.util import MyUtil
 29 | 
 30 | 
 31 | class Ctr_Autosub:
 32 | 
 33 |     cancel = False
 34 | 
 35 |     @staticmethod
 36 |     def init():
 37 |         Ctr_Autosub.cancel = False
 38 | 
 39 |     @staticmethod
 40 |     def is_operation_canceled():
 41 |         return Ctr_Autosub.cancel
 42 | 
 43 | 
 44 |     @staticmethod
 45 |     def output_progress(listener_progress, str_task, progress_percent):
 46 |         # only update progress if not requested to cancel
 47 |         if not Ctr_Autosub.cancel:
 48 |             listener_progress(str_task, progress_percent)
 49 | 
 50 |     @staticmethod
 51 |     def cancel_operation():
 52 |         Ctr_Autosub.cancel = True
 53 | 
 54 |         while Ctr_Autosub.step == 0:
 55 |             time.sleep(0.1)
 56 | 
 57 |         # the first step involves ffmpeg and cannot be stopped safely
 58 |         if Ctr_Autosub.step == 1:
 59 |             # close wait for threads to finish their work first
 60 |             Ctr_Autosub.pool.close()
 61 |             Ctr_Autosub.pool.join()
 62 | 
 63 |         else:
 64 |             # terminates the threads immediately
 65 |             Ctr_Autosub.pool.terminate()
 66 |             Ctr_Autosub.pool.join()
 67 | 
 68 |     @staticmethod
 69 |     def generate_subtitles(# pylint: disable=too-many-locals,too-many-arguments
 70 |             source_path,
 71 |             src_language,
 72 |             listener_progress,
 73 |             output=None,
 74 |             concurrency=DEFAULT_CONCURRENCY,
 75 |             subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
 76 |             proxies=None
 77 |         ):
 78 | 
 79 |         # windows not support forkserver... only spawn
 80 |         if os.name != "nt" and "Darwin" in os.uname():
 81 |             # necessary for running on MacOS
 82 |             # method can be set only once, otherwise crash
 83 |             #from python 3.8 above the default for macos is spawn and not fork
 84 |             if 'spawn' != multiprocessing.get_start_method(allow_none=True):
 85 |                 multiprocessing.set_start_method('spawn')
 86 |         Ctr_Autosub.cancel = False
 87 |         Ctr_Autosub.step = 0
 88 |         """
 89 |         Given an input audio/video file, generate subtitles in the specified language and format.
 90 |         """
 91 |         audio_filename, audio_rate = extract_audio(source_path)
 92 | 
 93 |         regions = find_speech_regions(audio_filename)
 94 | 
 95 |         converter = FLACConverter(source_path=audio_filename)
 96 |         recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,
 97 |                                       api_key=GOOGLE_SPEECH_API_KEY, proxies=proxies)
 98 |         transcripts = []
 99 |         if regions:
100 |             try:
101 |                 if Ctr_Autosub.cancel:
102 |                     return -1
103 | 
104 |                 str_task_1 = "Step 1 of 2: Converting speech regions to FLAC files "
105 |                 len_regions = len(regions)
106 |                 extracted_regions = []
107 |                 Ctr_Autosub.pool = multiprocessing.Pool(concurrency)
108 |                 for i, extracted_region in enumerate(Ctr_Autosub.pool.imap(converter, regions)):
109 |                     Ctr_Autosub.step = 1
110 |                     extracted_regions.append(extracted_region)
111 |                     progress_percent = MyUtil.percentage(i, len_regions)
112 |                     Ctr_Autosub.output_progress(listener_progress, str_task_1, progress_percent)
113 |                 if Ctr_Autosub.cancel:
114 |                     return -1
115 |                 else:
116 |                     Ctr_Autosub.pool.close()
117 |                     Ctr_Autosub.pool.join()
118 | 
119 |                 str_task_2 = "Step 2 of 2: Performing speech recognition "
120 |                 Ctr_Autosub.pool = multiprocessing.Pool(concurrency)
121 |                 for i, transcript in enumerate(Ctr_Autosub.pool.imap(recognizer, extracted_regions)):
122 |                     Ctr_Autosub.step = 2
123 |                     transcripts.append(transcript)
124 |                     progress_percent = MyUtil.percentage(i, len_regions)
125 |                     Ctr_Autosub.output_progress(listener_progress, str_task_2, progress_percent)
126 | 
127 |                 if Ctr_Autosub.cancel:
128 |                     return -1
129 |                 else:
130 |                     Ctr_Autosub.pool.close()
131 |                     Ctr_Autosub.pool.join()
132 | 
133 |             except KeyboardInterrupt:
134 |                 Ctr_Autosub.pbar.finish()
135 |                 Ctr_Autosub.pool.terminate()
136 |                 Ctr_Autosub.pool.join()
137 |                 raise
138 | 
139 |         timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
140 |         formatter = FORMATTERS.get(subtitle_file_format)
141 |         formatted_subtitles = formatter(timed_subtitles)
142 | 
143 |         dest = output
144 | 
145 |         if not dest:
146 |             base = os.path.splitext(source_path)[0]
147 |             dest = "{base}.{format}".format(base=base, format=subtitle_file_format)
148 | 
149 |         with open(dest, 'wb') as output_file:
150 |             output_file.write(formatted_subtitles.encode("utf-8"))
151 | 
152 |         os.remove(audio_filename)
153 | 
154 |         if Ctr_Autosub.cancel:
155 |             return -1
156 |         else:
157 |             Ctr_Autosub.pool.close()
158 |             Ctr_Autosub.pool.join()
159 | 
160 |         return dest
161 | 


--------------------------------------------------------------------------------
/pytranscriber/gui/proxy.ui:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <ui version="4.0">
  3 |  <class>Dialog</class>
  4 |  <widget class="QDialog" name="Dialog">
  5 |   <property name="geometry">
  6 |    <rect>
  7 |     <x>0</x>
  8 |     <y>0</y>
  9 |     <width>381</width>
 10 |     <height>115</height>
 11 |    </rect>
 12 |   </property>
 13 |   <property name="windowTitle">
 14 |    <string>Proxy setting</string>
 15 |   </property>
 16 |   <property name="autoFillBackground">
 17 |    <bool>false</bool>
 18 |   </property>
 19 |   <property name="sizeGripEnabled">
 20 |    <bool>false</bool>
 21 |   </property>
 22 |   <layout class="QVBoxLayout" name="verticalLayout">
 23 |    <item>
 24 |     <widget class="QGroupBox" name="groupBox">
 25 |      <property name="title">
 26 |       <string/>
 27 |      </property>
 28 |      <layout class="QVBoxLayout" name="verticalLayout_2">
 29 |       <item>
 30 |        <widget class="QRadioButton" name="radioButtonNone">
 31 |         <property name="sizePolicy">
 32 |          <sizepolicy hsizetype="Minimum" vsizetype="Fixed">
 33 |           <horstretch>0</horstretch>
 34 |           <verstretch>0</verstretch>
 35 |          </sizepolicy>
 36 |         </property>
 37 |         <property name="font">
 38 |          <font>
 39 |           <pointsize>9</pointsize>
 40 |          </font>
 41 |         </property>
 42 |         <property name="text">
 43 |          <string>None</string>
 44 |         </property>
 45 |         <property name="checked">
 46 |          <bool>true</bool>
 47 |         </property>
 48 |        </widget>
 49 |       </item>
 50 |       <item>
 51 |        <widget class="QRadioButton" name="radioButtonHTTP">
 52 |         <property name="enabled">
 53 |          <bool>true</bool>
 54 |         </property>
 55 |         <property name="sizePolicy">
 56 |          <sizepolicy hsizetype="Minimum" vsizetype="Fixed">
 57 |           <horstretch>0</horstretch>
 58 |           <verstretch>0</verstretch>
 59 |          </sizepolicy>
 60 |         </property>
 61 |         <property name="font">
 62 |          <font>
 63 |           <pointsize>9</pointsize>
 64 |          </font>
 65 |         </property>
 66 |         <property name="text">
 67 |          <string>HTTP</string>
 68 |         </property>
 69 |        </widget>
 70 |       </item>
 71 |       <item>
 72 |        <layout class="QGridLayout" name="gridLayout">
 73 |         <item row="0" column="1">
 74 |          <widget class="QLineEdit" name="lineEditHttpProxy">
 75 |           <property name="toolTip">
 76 |            <string/>
 77 |           </property>
 78 |           <property name="statusTip">
 79 |            <string/>
 80 |           </property>
 81 |           <property name="inputMethodHints">
 82 |            <set>Qt::ImhUrlCharactersOnly</set>
 83 |           </property>
 84 |           <property name="placeholderText">
 85 |            <string>http://127.0.0.1:1080</string>
 86 |           </property>
 87 |          </widget>
 88 |         </item>
 89 |         <item row="0" column="0">
 90 |          <widget class="QLabel" name="label">
 91 |           <property name="font">
 92 |            <font>
 93 |             <pointsize>9</pointsize>
 94 |            </font>
 95 |           </property>
 96 |           <property name="text">
 97 |            <string>URL:</string>
 98 |           </property>
 99 |          </widget>
100 |         </item>
101 |         <item row="0" column="2">
102 |          <widget class="QPushButton" name="pushButtonTest">
103 |           <property name="enabled">
104 |            <bool>true</bool>
105 |           </property>
106 |           <property name="font">
107 |            <font>
108 |             <pointsize>9</pointsize>
109 |            </font>
110 |           </property>
111 |           <property name="text">
112 |            <string>Test</string>
113 |           </property>
114 |          </widget>
115 |         </item>
116 |        </layout>
117 |       </item>
118 |      </layout>
119 |     </widget>
120 |    </item>
121 |    <item>
122 |     <widget class="QDialogButtonBox" name="buttonBox">
123 |      <property name="orientation">
124 |       <enum>Qt::Horizontal</enum>
125 |      </property>
126 |      <property name="standardButtons">
127 |       <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
128 |      </property>
129 |     </widget>
130 |    </item>
131 |   </layout>
132 |  </widget>
133 |  <resources/>
134 |  <connections>
135 |   <connection>
136 |    <sender>buttonBox</sender>
137 |    <signal>accepted()</signal>
138 |    <receiver>Dialog</receiver>
139 |    <slot>accept()</slot>
140 |    <hints>
141 |     <hint type="sourcelabel">
142 |      <x>224</x>
143 |      <y>100</y>
144 |     </hint>
145 |     <hint type="destinationlabel">
146 |      <x>157</x>
147 |      <y>108</y>
148 |     </hint>
149 |    </hints>
150 |   </connection>
151 |   <connection>
152 |    <sender>buttonBox</sender>
153 |    <signal>rejected()</signal>
154 |    <receiver>Dialog</receiver>
155 |    <slot>reject()</slot>
156 |    <hints>
157 |     <hint type="sourcelabel">
158 |      <x>255</x>
159 |      <y>102</y>
160 |     </hint>
161 |     <hint type="destinationlabel">
162 |      <x>261</x>
163 |      <y>108</y>
164 |     </hint>
165 |    </hints>
166 |   </connection>
167 |   <connection>
168 |    <sender>radioButtonNone</sender>
169 |    <signal>clicked(bool)</signal>
170 |    <receiver>lineEditHttpProxy</receiver>
171 |    <slot>setDisabled(bool)</slot>
172 |    <hints>
173 |     <hint type="sourcelabel">
174 |      <x>130</x>
175 |      <y>19</y>
176 |     </hint>
177 |     <hint type="destinationlabel">
178 |      <x>111</x>
179 |      <y>60</y>
180 |     </hint>
181 |    </hints>
182 |   </connection>
183 |   <connection>
184 |    <sender>radioButtonNone</sender>
185 |    <signal>clicked(bool)</signal>
186 |    <receiver>pushButtonTest</receiver>
187 |    <slot>setDisabled(bool)</slot>
188 |    <hints>
189 |     <hint type="sourcelabel">
190 |      <x>130</x>
191 |      <y>19</y>
192 |     </hint>
193 |     <hint type="destinationlabel">
194 |      <x>219</x>
195 |      <y>60</y>
196 |     </hint>
197 |    </hints>
198 |   </connection>
199 |   <connection>
200 |    <sender>radioButtonHTTP</sender>
201 |    <signal>clicked(bool)</signal>
202 |    <receiver>pushButtonTest</receiver>
203 |    <slot>setEnabled(bool)</slot>
204 |    <hints>
205 |     <hint type="sourcelabel">
206 |      <x>130</x>
207 |      <y>36</y>
208 |     </hint>
209 |     <hint type="destinationlabel">
210 |      <x>219</x>
211 |      <y>60</y>
212 |     </hint>
213 |    </hints>
214 |   </connection>
215 |   <connection>
216 |    <sender>radioButtonHTTP</sender>
217 |    <signal>clicked(bool)</signal>
218 |    <receiver>lineEditHttpProxy</receiver>
219 |    <slot>setEnabled(bool)</slot>
220 |    <hints>
221 |     <hint type="sourcelabel">
222 |      <x>130</x>
223 |      <y>36</y>
224 |     </hint>
225 |     <hint type="destinationlabel">
226 |      <x>111</x>
227 |      <y>60</y>
228 |     </hint>
229 |    </hints>
230 |   </connection>
231 |  </connections>
232 | </ui>
233 | 


--------------------------------------------------------------------------------
/whisper/__init__.py:
--------------------------------------------------------------------------------
  1 | import hashlib
  2 | import io
  3 | import os
  4 | import urllib
  5 | import warnings
  6 | from typing import List, Optional, Union
  7 | 
  8 | import torch
  9 | from tqdm import tqdm
 10 | 
 11 | from .audio import load_audio, log_mel_spectrogram, pad_or_trim
 12 | from .decoding import DecodingOptions, DecodingResult, decode, detect_language
 13 | from .model import ModelDimensions, Whisper
 14 | from .transcribe import transcribe
 15 | from .version import __version__
 16 | 
 17 | _MODELS = {
 18 |     "tiny.en": "https://openaipublic.azureedge.net/main/whisper/models/d3dd57d32accea0b295c96e26691aa14d8822fac7d9d27d5dc00b4ca2826dd03/tiny.en.pt",
 19 |     "tiny": "https://openaipublic.azureedge.net/main/whisper/models/65147644a518d12f04e32d6f3b26facc3f8dd46e5390956a9424a650c0ce22b9/tiny.pt",
 20 |     "base.en": "https://openaipublic.azureedge.net/main/whisper/models/25a8566e1d0c1e2231d1c762132cd20e0f96a85d16145c3a00adf5d1ac670ead/base.en.pt",
 21 |     "base": "https://openaipublic.azureedge.net/main/whisper/models/ed3a0b6b1c0edf879ad9b11b1af5a0e6ab5db9205f891f668f8b0e6c6326e34e/base.pt",
 22 |     "small.en": "https://openaipublic.azureedge.net/main/whisper/models/f953ad0fd29cacd07d5a9eda5624af0f6bcf2258be67c92b79389873d91e0872/small.en.pt",
 23 |     "small": "https://openaipublic.azureedge.net/main/whisper/models/9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794/small.pt",
 24 |     "medium.en": "https://openaipublic.azureedge.net/main/whisper/models/d7440d1dc186f76616474e0ff0b3b6b879abc9d1a4926b7adfa41db2d497ab4f/medium.en.pt",
 25 |     "medium": "https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt",
 26 |     "large-v1": "https://openaipublic.azureedge.net/main/whisper/models/e4b87e7e0bf463eb8e6956e646f1e277e901512310def2c24bf0e11bd3c28e9a/large-v1.pt",
 27 |     "large-v2": "https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt",
 28 |     "large-v3": "https://openaipublic.azureedge.net/main/whisper/models/e5b1a55b89c1367dacf97e3e19bfd829a01529dbfdeefa8caeb59b3f1b81dadb/large-v3.pt",
 29 |     "large": "https://openaipublic.azureedge.net/main/whisper/models/e5b1a55b89c1367dacf97e3e19bfd829a01529dbfdeefa8caeb59b3f1b81dadb/large-v3.pt",
 30 |     "large-v3-turbo": "https://openaipublic.azureedge.net/main/whisper/models/aff26ae408abcba5fbf8813c21e62b0941638c5f6eebfb145be0c9839262a19a/large-v3-turbo.pt",
 31 |     "turbo": "https://openaipublic.azureedge.net/main/whisper/models/aff26ae408abcba5fbf8813c21e62b0941638c5f6eebfb145be0c9839262a19a/large-v3-turbo.pt",
 32 | }
 33 | 
 34 | # base85-encoded (n_layers, n_heads) boolean arrays indicating the cross-attention heads that are
 35 | # highly correlated to the word-level timing, i.e. the alignment between audio and text tokens.
 36 | _ALIGNMENT_HEADS = {
 37 |     "tiny.en": b"ABzY8J1N>@0{>%R00Bk>$p{7v037`oCl~+#00",
 38 |     "tiny": b"ABzY8bu8Lr0{>%RKn9Fp%m@SkK7Kt=7ytkO",
 39 |     "base.en": b"ABzY8;40c<0{>%RzzG;p*o+Vo09|#PsxSZm00",
 40 |     "base": b"ABzY8KQ!870{>%RzyTQH3`Q^yNP!>##QT-<FaQ7m",
 41 |     "small.en": b"ABzY8>?_)10{>%RpeA61k&I|OI3I$65C{;;pbCHh0B{qLQ;+}v00",
 42 |     "small": b"ABzY8DmU6=0{>%Rpa?J`kvJ6qF(V^F86#Xh7JUGMK}P<N0000",
 43 |     "medium.en": b"ABzY8usPae0{>%R7<zz_OvQ{)4kMa0BMw6u5rT}kRKX;$NfYBv00*Hl@qhsU00",
 44 |     "medium": b"ABzY8B0Jh+0{>%R7}kK1fFL7w6%<-Pf*t^=N)Qr&0RR9",
 45 |     "large-v1": b"ABzY8r9j$a0{>%R7#4sLmoOs{s)o3~84-RPdcFk!JR<kSfC2yj",
 46 |     "large-v2": b"ABzY8zd+h!0{>%R7=D0pU<_bnWW*tkYAhobTNnu$jnkEkXqp)j;w1Tzk)UH3X%SZd&fFZ2fC2yj",
 47 |     "large-v3": b"ABzY8gWO1E0{>%R7(9S+Kn!D~%ngiGaR?*L!iJG9p-nab0JQ=-{D1-g00",
 48 |     "large": b"ABzY8gWO1E0{>%R7(9S+Kn!D~%ngiGaR?*L!iJG9p-nab0JQ=-{D1-g00",
 49 |     "large-v3-turbo": b"ABzY8j^C+e0{>%RARaKHP%t(lGR*)0g!tONPyhe`",
 50 |     "turbo": b"ABzY8j^C+e0{>%RARaKHP%t(lGR*)0g!tONPyhe`",
 51 | }
 52 | 
 53 | 
 54 | def _download(url: str, root: str, in_memory: bool) -> Union[bytes, str]:
 55 |     os.makedirs(root, exist_ok=True)
 56 | 
 57 |     expected_sha256 = url.split("/")[-2]
 58 |     download_target = os.path.join(root, os.path.basename(url))
 59 | 
 60 |     if os.path.exists(download_target) and not os.path.isfile(download_target):
 61 |         raise RuntimeError(f"{download_target} exists and is not a regular file")
 62 | 
 63 |     if os.path.isfile(download_target):
 64 |         with open(download_target, "rb") as f:
 65 |             model_bytes = f.read()
 66 |         if hashlib.sha256(model_bytes).hexdigest() == expected_sha256:
 67 |             return model_bytes if in_memory else download_target
 68 |         else:
 69 |             warnings.warn(
 70 |                 f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file"
 71 |             )
 72 | 
 73 |     with urllib.request.urlopen(url) as source, open(download_target, "wb") as output:
 74 |         with tqdm(
 75 |             total=int(source.info().get("Content-Length")),
 76 |             ncols=80,
 77 |             unit="iB",
 78 |             unit_scale=True,
 79 |             unit_divisor=1024,
 80 |         ) as loop:
 81 |             while True:
 82 |                 buffer = source.read(8192)
 83 |                 if not buffer:
 84 |                     break
 85 | 
 86 |                 output.write(buffer)
 87 |                 loop.update(len(buffer))
 88 | 
 89 |     model_bytes = open(download_target, "rb").read()
 90 |     if hashlib.sha256(model_bytes).hexdigest() != expected_sha256:
 91 |         raise RuntimeError(
 92 |             "Model has been downloaded but the SHA256 checksum does not not match. Please retry loading the model."
 93 |         )
 94 | 
 95 |     return model_bytes if in_memory else download_target
 96 | 
 97 | 
 98 | def available_models() -> List[str]:
 99 |     """Returns the names of available models"""
100 |     return list(_MODELS.keys())
101 | 
102 | 
103 | def load_model(
104 |     name: str,
105 |     device: Optional[Union[str, torch.device]] = None,
106 |     download_root: str = None,
107 |     in_memory: bool = False,
108 | ) -> Whisper:
109 |     """
110 |     Load a Whisper ASR model
111 | 
112 |     Parameters
113 |     ----------
114 |     name : str
115 |         one of the official model names listed by `whisper.available_models()`, or
116 |         path to a model checkpoint containing the model dimensions and the model state_dict.
117 |     device : Union[str, torch.device]
118 |         the PyTorch device to put the model into
119 |     download_root: str
120 |         path to download the model files; by default, it uses "~/.cache/whisper"
121 |     in_memory: bool
122 |         whether to preload the model weights into host memory
123 | 
124 |     Returns
125 |     -------
126 |     model : Whisper
127 |         The Whisper ASR model instance
128 |     """
129 | 
130 |     if device is None:
131 |         device = "cuda" if torch.cuda.is_available() else "cpu"
132 |     if download_root is None:
133 |         default = os.path.join(os.path.expanduser("~"), ".cache")
134 |         download_root = os.path.join(os.getenv("XDG_CACHE_HOME", default), "whisper")
135 | 
136 |     if name in _MODELS:
137 |         checkpoint_file = _download(_MODELS[name], download_root, in_memory)
138 |         alignment_heads = _ALIGNMENT_HEADS[name]
139 |     elif os.path.isfile(name):
140 |         checkpoint_file = open(name, "rb").read() if in_memory else name
141 |         alignment_heads = None
142 |     else:
143 |         raise RuntimeError(
144 |             f"Model {name} not found; available models = {available_models()}"
145 |         )
146 | 
147 |     with (
148 |         io.BytesIO(checkpoint_file) if in_memory else open(checkpoint_file, "rb")
149 |     ) as fp:
150 |         checkpoint = torch.load(fp, map_location=device)
151 |     del checkpoint_file
152 | 
153 |     dims = ModelDimensions(**checkpoint["dims"])
154 |     model = Whisper(dims)
155 |     model.load_state_dict(checkpoint["model_state_dict"])
156 | 
157 |     if alignment_heads is not None:
158 |         model.set_alignment_heads(alignment_heads)
159 | 
160 |     return model.to(device)
161 | 


--------------------------------------------------------------------------------
/pytranscriber/model/google_speech.py:
--------------------------------------------------------------------------------
  1 | '''
  2 |    (C) 2025 Raryel C. Souza
  3 |     This program is free software: you can redistribute it and/or modify
  4 |     it under the terms of the GNU General Public License as published by
  5 |     the Free Software Foundation, either version 3 of the License, or
  6 |     (at your option) any later version.
  7 |     This program is distributed in the hope that it will be useful,
  8 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 |     GNU General Public License for more details.
 11 |     You should have received a copy of the GNU General Public License
 12 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
 13 | '''
 14 | 
 15 | class Google_Speech:
 16 |     supported_languages_list = ["en-US - English (United States)",
 17 |                                     "cmn-Hans-CN - Chinese (Simplified, China)",
 18 |                                     "cmn-Hant-TW - Chinese (Traditional, Taiwan)",
 19 |                                     "yue-Hant-HK - Cantonese (Traditional, HK)",
 20 |                                     "en-AU - English (Australia)",
 21 |                                     "en-CA - English (Canada)",
 22 |                                     "en-GB - English (United Kingdom)",
 23 |                                     "en-HK - English (Hong Kong)",
 24 |                                     "en-IN - English (India)",
 25 |                                     "en-GB - English (Ireland)",
 26 |                                     "en-NZ - English (New Zealand)",
 27 |                                     "en-PH - English (Philippines)",
 28 |                                     "en-SG - English (Singapore)",
 29 |                                     "af - Afrikaans",
 30 |                                     "ar - Arabic",
 31 |                                     'ar-DZ - Arabic (Algeria)',
 32 |                                     'ar-EG - Arabic (Egypt)',
 33 |                                     'ar-IQ - Arabic (Iraq)',
 34 |                                     'ar-IS - Arabic (Israel)',
 35 |                                     'ar-JO - Arabic (Jordan)',
 36 |                                     'ar-KW - Arabic (Kuwait)',
 37 |                                     'ar-LB - Arabic (Lebanon)',
 38 |                                     'ar-MA - Arabic (Morocco)',
 39 |                                     'ar-OM - Arabic (Oman)',
 40 |                                     'ar-QA - Arabic (Qatar)',
 41 |                                     'ar-SA - Arabic (Saudi Arabia)',
 42 |                                     'ar-PS - Arabic (State of Palestine)',
 43 |                                     'ar-TN - Arabic (Tunisia)',
 44 |                                     'ar-AE - Arabic (United Arab Emirates)',
 45 |                                     'ar-YE - Arabic (Yemen)',
 46 |                                     "az - Azerbaijani",
 47 |                                     "be - Belarusian",
 48 |                                     "bg - Bulgarian",
 49 |                                     "bn - Bengali",
 50 |                                     "bs - Bosnian",
 51 |                                     "ca - Catalan",
 52 |                                     "ceb -Cebuano",
 53 |                                     "cs - Czech",
 54 |                                     "cy - Welsh",
 55 |                                     "da - Danish",
 56 |                                     "de - German",
 57 |                                     'de-AT - German (Austria)',
 58 |                                     'de-CH - German (Switzerland)',
 59 |                                     "el - Greek",
 60 |                                     "eo - Esperanto",
 61 |                                     'es-ES - Spanish (Spain)',
 62 |                                     'es-AR - Spanish (Argentina)',
 63 |                                     'es-BO - Spanish (Bolivia)',
 64 |                                     'es-CL - Spanish (Chile)',
 65 |                                     'es-CO - Spanish (Colombia)',
 66 |                                     'es-CR - Spanish (Costa Rica)',
 67 |                                     'es-DO - Spanish (Dominican Republic)',
 68 |                                     'es-EC - Spanish (Ecuador)',
 69 |                                     'es-GT - Spanish (Guatemala)',
 70 |                                     'es-HN - Spanish (Honduras)',
 71 |                                     'es-MX - Spanish (Mexico)',
 72 |                                     'es-NI - Spanish (Nicaragua)',
 73 |                                     'es-PA - Spanish (Panama)',
 74 |                                     'es-PE - Spanish (Peru)',
 75 |                                     'es-PR - Spanish (Puerto Rico)',
 76 |                                     'es-PY - Spanish (Paraguay)',
 77 |                                     'es-SV - Spanish (El Salvador)',
 78 |                                     'es-UY - Spanish (Uruguay)',
 79 |                                     'es-US - Spanish (United States)',
 80 |                                     'es-VE - Spanish (Venezuela)',
 81 |                                     "et - Estonian",
 82 |                                     "eu - Basque",
 83 |                                     "fa - Persian",
 84 |                                     'fil-PH - Filipino (Philippines)',
 85 |                                     "fi - Finnish",
 86 |                                     "fr - French",
 87 |                                     'fr-BE - French (Belgium)',
 88 |                                     'fr-CA - French (Canada)',
 89 |                                     'fr-CH - French (Switzerland)',
 90 |                                     "ga - Irish",
 91 |                                     "gl - Galician",
 92 |                                     "gu -Gujarati",
 93 |                                     "ha - Hausa",
 94 |                                     "hi - Hindi",
 95 |                                     "hmn - Hmong",
 96 |                                     "hr - Croatian",
 97 |                                     "ht - Haitian Creole",
 98 |                                     "hu - Hungarian",
 99 |                                     "hy - Armenian",
100 |                                     "id - Indonesian",
101 |                                     "ig - Igbo",
102 |                                     "is - Icelandic",
103 |                                     "it - Italian",
104 |                                     'it-CH - Italian (Switzerland)',
105 |                                     "iw - Hebrew",
106 |                                     "ja - Japanese",
107 |                                     "jw - Javanese",
108 |                                     "ka - Georgian",
109 |                                     "kk - Kazakh",
110 |                                     "km - Khmer",
111 |                                     "kn - Kannada",
112 |                                     "ko - Korean",
113 |                                     "la - Latin",
114 |                                     "lo - Lao",
115 |                                     "lt - Lithuanian",
116 |                                     "lv - Latvian",
117 |                                     "mg - Malagasy",
118 |                                     "mi - Maori",
119 |                                     "mk - Macedonian",
120 |                                     "ml - Malayalam",
121 |                                     "mn - Mongolian",
122 |                                     "mr - Marathi",
123 |                                     "ms - Malay",
124 |                                     "mt - Maltese",
125 |                                     "my - Myanmar (Burmese)",
126 |                                     "ne - Nepali",
127 |                                     "nl - Dutch",
128 |                                     "no - Norwegian",
129 |                                     "ny - Chichewa",
130 |                                     "pa - Punjabi",
131 |                                     "pl - Polish",
132 |                                     "pt-BR - Portuguese (Brazil)",
133 |                                     "pt-PT - Portuguese (Portugal)",
134 |                                     "ro - Romanian",
135 |                                     "ru - Russian",
136 |                                     "si - Sinhala",
137 |                                     "sk - Slovak",
138 |                                     "sl - Slovenian",
139 |                                     "so - Somali",
140 |                                     "sq - Albanian",
141 |                                     "sr - Serbian",
142 |                                     "st - Sesotho",
143 |                                     "su - Sudanese",
144 |                                     "sv - Swedish",
145 |                                     "sw - Swahili",
146 |                                     "ta - Tamil",
147 |                                     'ta-IN - Tamil (India)',
148 |                                     'ta-MY - Tamil (Malaysia)',
149 |                                     'ta-SG - Tamil (Singapore)',
150 |                                     'ta-LK - Tamil (Sri Lanka)',
151 |                                     "te - Telugu",
152 |                                     "tg - Tajik",
153 |                                     "th - Thai",
154 |                                     "tl - Filipino",
155 |                                     "tr - Turkish",
156 |                                     "uk - Ukrainian",
157 |                                     "ur - Urdu",
158 |                                     "uz - Uzbek",
159 |                                     "vi - Vietnamese",
160 |                                     "yi - Yiddish",
161 |                                     "yo - Yoruba",
162 |                                     "zu - Zulu"]
163 |     @staticmethod
164 |     def get_supported_languages():
165 |         return Google_Speech.supported_languages_list


--------------------------------------------------------------------------------
/whisper/utils.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import re
  4 | import sys
  5 | import zlib
  6 | from typing import Callable, List, Optional, TextIO
  7 | 
  8 | system_encoding = sys.getdefaultencoding()
  9 | 
 10 | if system_encoding != "utf-8":
 11 | 
 12 |     def make_safe(string):
 13 |         # replaces any character not representable using the system default encoding with an '?',
 14 |         # avoiding UnicodeEncodeError (https://github.com/openai/whisper/discussions/729).
 15 |         return string.encode(system_encoding, errors="replace").decode(system_encoding)
 16 | 
 17 | else:
 18 | 
 19 |     def make_safe(string):
 20 |         # utf-8 can encode any Unicode code point, so no need to do the round-trip encoding
 21 |         return string
 22 | 
 23 | 
 24 | def exact_div(x, y):
 25 |     assert x % y == 0
 26 |     return x // y
 27 | 
 28 | 
 29 | def str2bool(string):
 30 |     str2val = {"True": True, "False": False}
 31 |     if string in str2val:
 32 |         return str2val[string]
 33 |     else:
 34 |         raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
 35 | 
 36 | 
 37 | def optional_int(string):
 38 |     return None if string == "None" else int(string)
 39 | 
 40 | 
 41 | def optional_float(string):
 42 |     return None if string == "None" else float(string)
 43 | 
 44 | 
 45 | def compression_ratio(text) -> float:
 46 |     text_bytes = text.encode("utf-8")
 47 |     return len(text_bytes) / len(zlib.compress(text_bytes))
 48 | 
 49 | 
 50 | def format_timestamp(
 51 |     seconds: float, always_include_hours: bool = False, decimal_marker: str = "."
 52 | ):
 53 |     assert seconds >= 0, "non-negative timestamp expected"
 54 |     milliseconds = round(seconds * 1000.0)
 55 | 
 56 |     hours = milliseconds // 3_600_000
 57 |     milliseconds -= hours * 3_600_000
 58 | 
 59 |     minutes = milliseconds // 60_000
 60 |     milliseconds -= minutes * 60_000
 61 | 
 62 |     seconds = milliseconds // 1_000
 63 |     milliseconds -= seconds * 1_000
 64 | 
 65 |     hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
 66 |     return (
 67 |         f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
 68 |     )
 69 | 
 70 | 
 71 | def get_start(segments: List[dict]) -> Optional[float]:
 72 |     return next(
 73 |         (w["start"] for s in segments for w in s["words"]),
 74 |         segments[0]["start"] if segments else None,
 75 |     )
 76 | 
 77 | 
 78 | def get_end(segments: List[dict]) -> Optional[float]:
 79 |     return next(
 80 |         (w["end"] for s in reversed(segments) for w in reversed(s["words"])),
 81 |         segments[-1]["end"] if segments else None,
 82 |     )
 83 | 
 84 | 
 85 | class ResultWriter:
 86 |     extension: str
 87 | 
 88 |     def __init__(self, output_dir: str):
 89 |         self.output_dir = output_dir
 90 | 
 91 |     def __call__(
 92 |         self, result: dict, audio_path: str, options: Optional[dict] = None, **kwargs
 93 |     ):
 94 |         audio_basename = os.path.basename(audio_path)
 95 |         audio_basename = os.path.splitext(audio_basename)[0]
 96 |         output_path = os.path.join(
 97 |             self.output_dir, audio_basename + "." + self.extension
 98 |         )
 99 | 
100 |         with open(output_path, "w", encoding="utf-8") as f:
101 |             self.write_result(result, file=f, options=options, **kwargs)
102 | 
103 |     def write_result(
104 |         self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs
105 |     ):
106 |         raise NotImplementedError
107 | 
108 | 
109 | class WriteTXT(ResultWriter):
110 |     extension: str = "txt"
111 | 
112 |     def write_result(
113 |         self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs
114 |     ):
115 |         for segment in result["segments"]:
116 |             print(segment["text"].strip(), file=file, flush=True)
117 | 
118 | 
119 | class SubtitlesWriter(ResultWriter):
120 |     always_include_hours: bool
121 |     decimal_marker: str
122 | 
123 |     def iterate_result(
124 |         self,
125 |         result: dict,
126 |         options: Optional[dict] = None,
127 |         *,
128 |         max_line_width: Optional[int] = None,
129 |         max_line_count: Optional[int] = None,
130 |         highlight_words: bool = False,
131 |         max_words_per_line: Optional[int] = None,
132 |     ):
133 |         options = options or {}
134 |         max_line_width = max_line_width or options.get("max_line_width")
135 |         max_line_count = max_line_count or options.get("max_line_count")
136 |         highlight_words = highlight_words or options.get("highlight_words", False)
137 |         max_words_per_line = max_words_per_line or options.get("max_words_per_line")
138 |         preserve_segments = max_line_count is None or max_line_width is None
139 |         max_line_width = max_line_width or 1000
140 |         max_words_per_line = max_words_per_line or 1000
141 | 
142 |         def iterate_subtitles():
143 |             line_len = 0
144 |             line_count = 1
145 |             # the next subtitle to yield (a list of word timings with whitespace)
146 |             subtitle: List[dict] = []
147 |             last: float = get_start(result["segments"]) or 0.0
148 |             for segment in result["segments"]:
149 |                 chunk_index = 0
150 |                 words_count = max_words_per_line
151 |                 while chunk_index < len(segment["words"]):
152 |                     remaining_words = len(segment["words"]) - chunk_index
153 |                     if max_words_per_line > len(segment["words"]) - chunk_index:
154 |                         words_count = remaining_words
155 |                     for i, original_timing in enumerate(
156 |                         segment["words"][chunk_index : chunk_index + words_count]
157 |                     ):
158 |                         timing = original_timing.copy()
159 |                         long_pause = (
160 |                             not preserve_segments and timing["start"] - last > 3.0
161 |                         )
162 |                         has_room = line_len + len(timing["word"]) <= max_line_width
163 |                         seg_break = i == 0 and len(subtitle) > 0 and preserve_segments
164 |                         if (
165 |                             line_len > 0
166 |                             and has_room
167 |                             and not long_pause
168 |                             and not seg_break
169 |                         ):
170 |                             # line continuation
171 |                             line_len += len(timing["word"])
172 |                         else:
173 |                             # new line
174 |                             timing["word"] = timing["word"].strip()
175 |                             if (
176 |                                 len(subtitle) > 0
177 |                                 and max_line_count is not None
178 |                                 and (long_pause or line_count >= max_line_count)
179 |                                 or seg_break
180 |                             ):
181 |                                 # subtitle break
182 |                                 yield subtitle
183 |                                 subtitle = []
184 |                                 line_count = 1
185 |                             elif line_len > 0:
186 |                                 # line break
187 |                                 line_count += 1
188 |                                 timing["word"] = "\n" + timing["word"]
189 |                             line_len = len(timing["word"].strip())
190 |                         subtitle.append(timing)
191 |                         last = timing["start"]
192 |                     chunk_index += max_words_per_line
193 |             if len(subtitle) > 0:
194 |                 yield subtitle
195 | 
196 |         if len(result["segments"]) > 0 and "words" in result["segments"][0]:
197 |             for subtitle in iterate_subtitles():
198 |                 subtitle_start = self.format_timestamp(subtitle[0]["start"])
199 |                 subtitle_end = self.format_timestamp(subtitle[-1]["end"])
200 |                 subtitle_text = "".join([word["word"] for word in subtitle])
201 |                 if highlight_words:
202 |                     last = subtitle_start
203 |                     all_words = [timing["word"] for timing in subtitle]
204 |                     for i, this_word in enumerate(subtitle):
205 |                         start = self.format_timestamp(this_word["start"])
206 |                         end = self.format_timestamp(this_word["end"])
207 |                         if last != start:
208 |                             yield last, start, subtitle_text
209 | 
210 |                         yield start, end, "".join(
211 |                             [
212 |                                 re.sub(r"^(\s*)(.*)$", r"\1<u>\2</u>", word)
213 |                                 if j == i
214 |                                 else word
215 |                                 for j, word in enumerate(all_words)
216 |                             ]
217 |                         )
218 |                         last = end
219 |                 else:
220 |                     yield subtitle_start, subtitle_end, subtitle_text
221 |         else:
222 |             for segment in result["segments"]:
223 |                 segment_start = self.format_timestamp(segment["start"])
224 |                 segment_end = self.format_timestamp(segment["end"])
225 |                 segment_text = segment["text"].strip().replace("-->", "->")
226 |                 yield segment_start, segment_end, segment_text
227 | 
228 |     def format_timestamp(self, seconds: float):
229 |         return format_timestamp(
230 |             seconds=seconds,
231 |             always_include_hours=self.always_include_hours,
232 |             decimal_marker=self.decimal_marker,
233 |         )
234 | 
235 | 
236 | class WriteVTT(SubtitlesWriter):
237 |     extension: str = "vtt"
238 |     always_include_hours: bool = False
239 |     decimal_marker: str = "."
240 | 
241 |     def write_result(
242 |         self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs
243 |     ):
244 |         print("WEBVTT\n", file=file)
245 |         for start, end, text in self.iterate_result(result, options, **kwargs):
246 |             print(f"{start} --> {end}\n{text}\n", file=file, flush=True)
247 | 
248 | 
249 | class WriteSRT(SubtitlesWriter):
250 |     extension: str = "srt"
251 |     always_include_hours: bool = True
252 |     decimal_marker: str = ","
253 | 
254 |     def write_result(
255 |         self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs
256 |     ):
257 |         for i, (start, end, text) in enumerate(
258 |             self.iterate_result(result, options, **kwargs), start=1
259 |         ):
260 |             print(f"{i}\n{start} --> {end}\n{text}\n", file=file, flush=True)
261 | 
262 | 
263 | class WriteTSV(ResultWriter):
264 |     """
265 |     Write a transcript to a file in TSV (tab-separated values) format containing lines like:
266 |     <start time in integer milliseconds>\t<end time in integer milliseconds>\t<transcript text>
267 | 
268 |     Using integer milliseconds as start and end times means there's no chance of interference from
269 |     an environment setting a language encoding that causes the decimal in a floating point number
270 |     to appear as a comma; also is faster and more efficient to parse & store, e.g., in C++.
271 |     """
272 | 
273 |     extension: str = "tsv"
274 | 
275 |     def write_result(
276 |         self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs
277 |     ):
278 |         print("start", "end", "text", sep="\t", file=file)
279 |         for segment in result["segments"]:
280 |             print(round(1000 * segment["start"]), file=file, end="\t")
281 |             print(round(1000 * segment["end"]), file=file, end="\t")
282 |             print(segment["text"].strip().replace("\t", " "), file=file, flush=True)
283 | 
284 | 
285 | class WriteJSON(ResultWriter):
286 |     extension: str = "json"
287 | 
288 |     def write_result(
289 |         self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs
290 |     ):
291 |         json.dump(result, file)
292 | 
293 | 
294 | def get_writer(
295 |     output_format: str, output_dir: str
296 | ) -> Callable[[dict, TextIO, dict], None]:
297 |     writers = {
298 |         "txt": WriteTXT,
299 |         "vtt": WriteVTT,
300 |         "srt": WriteSRT,
301 |         "tsv": WriteTSV,
302 |         "json": WriteJSON,
303 |     }
304 | 
305 |     if output_format == "all":
306 |         all_writers = [writer(output_dir) for writer in writers.values()]
307 | 
308 |         def write_all(
309 |             result: dict, file: TextIO, options: Optional[dict] = None, **kwargs
310 |         ):
311 |             for writer in all_writers:
312 |                 writer(result, file, options, **kwargs)
313 | 
314 |         return write_all
315 | 
316 |     return writers[output_format](output_dir)
317 | 


--------------------------------------------------------------------------------
/pytranscriber/gui/main/window_main.ui:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <ui version="4.0">
  3 |  <class>window</class>
  4 |  <widget class="QMainWindow" name="window">
  5 |   <property name="geometry">
  6 |    <rect>
  7 |     <x>0</x>
  8 |     <y>0</y>
  9 |     <width>1045</width>
 10 |     <height>610</height>
 11 |    </rect>
 12 |   </property>
 13 |   <property name="windowTitle">
 14 |    <string>pyTranscriber v2.1 - 13/07/2025</string>
 15 |   </property>
 16 |   <widget class="QWidget" name="centralwidget">
 17 |    <widget class="QPushButton" name="bSelectMedia">
 18 |     <property name="geometry">
 19 |      <rect>
 20 |       <x>10</x>
 21 |       <y>10</y>
 22 |       <width>141</width>
 23 |       <height>34</height>
 24 |      </rect>
 25 |     </property>
 26 |     <property name="text">
 27 |      <string>Select file(s)</string>
 28 |     </property>
 29 |    </widget>
 30 |    <widget class="QPushButton" name="bConvert">
 31 |     <property name="enabled">
 32 |      <bool>false</bool>
 33 |     </property>
 34 |     <property name="geometry">
 35 |      <rect>
 36 |       <x>200</x>
 37 |       <y>380</y>
 38 |       <width>341</width>
 39 |       <height>34</height>
 40 |      </rect>
 41 |     </property>
 42 |     <property name="text">
 43 |      <string>Transcribe Audio / Generate Subtitles</string>
 44 |     </property>
 45 |    </widget>
 46 |    <widget class="QProgressBar" name="progressBar">
 47 |     <property name="geometry">
 48 |      <rect>
 49 |       <x>20</x>
 50 |       <y>470</y>
 51 |       <width>1021</width>
 52 |       <height>23</height>
 53 |      </rect>
 54 |     </property>
 55 |     <property name="value">
 56 |      <number>0</number>
 57 |     </property>
 58 |    </widget>
 59 |    <widget class="QLabel" name="labelCurrentOperation">
 60 |     <property name="geometry">
 61 |      <rect>
 62 |       <x>20</x>
 63 |       <y>420</y>
 64 |       <width>871</width>
 65 |       <height>41</height>
 66 |      </rect>
 67 |     </property>
 68 |     <property name="text">
 69 |      <string/>
 70 |     </property>
 71 |    </widget>
 72 |    <widget class="QPushButton" name="bOpenOutputFolder">
 73 |     <property name="geometry">
 74 |      <rect>
 75 |       <x>550</x>
 76 |       <y>380</y>
 77 |       <width>241</width>
 78 |       <height>34</height>
 79 |      </rect>
 80 |     </property>
 81 |     <property name="text">
 82 |      <string>Open Output Folder</string>
 83 |     </property>
 84 |    </widget>
 85 |    <widget class="QPushButton" name="bSelectOutputFolder">
 86 |     <property name="geometry">
 87 |      <rect>
 88 |       <x>10</x>
 89 |       <y>180</y>
 90 |       <width>141</width>
 91 |       <height>34</height>
 92 |      </rect>
 93 |     </property>
 94 |     <property name="text">
 95 |      <string>Output Location</string>
 96 |     </property>
 97 |    </widget>
 98 |    <widget class="QLineEdit" name="qleOutputFolder">
 99 |     <property name="geometry">
100 |      <rect>
101 |       <x>160</x>
102 |       <y>180</y>
103 |       <width>861</width>
104 |       <height>32</height>
105 |      </rect>
106 |     </property>
107 |     <property name="text">
108 |      <string/>
109 |     </property>
110 |     <property name="readOnly">
111 |      <bool>true</bool>
112 |     </property>
113 |    </widget>
114 |    <widget class="QGroupBox" name="groupBox">
115 |     <property name="geometry">
116 |      <rect>
117 |       <x>160</x>
118 |       <y>10</y>
119 |       <width>871</width>
120 |       <height>161</height>
121 |      </rect>
122 |     </property>
123 |     <property name="title">
124 |      <string>List of files to generate transcribe audio / generate subtitles</string>
125 |     </property>
126 |     <property name="alignment">
127 |      <set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignTop</set>
128 |     </property>
129 |     <property name="flat">
130 |      <bool>false</bool>
131 |     </property>
132 |     <property name="checkable">
133 |      <bool>false</bool>
134 |     </property>
135 |     <widget class="QListWidget" name="qlwListFilesSelected">
136 |      <property name="geometry">
137 |       <rect>
138 |        <x>10</x>
139 |        <y>30</y>
140 |        <width>851</width>
141 |        <height>121</height>
142 |       </rect>
143 |      </property>
144 |     </widget>
145 |    </widget>
146 |    <widget class="QPushButton" name="bRemoveFile">
147 |     <property name="geometry">
148 |      <rect>
149 |       <x>10</x>
150 |       <y>50</y>
151 |       <width>141</width>
152 |       <height>34</height>
153 |      </rect>
154 |     </property>
155 |     <property name="text">
156 |      <string>Remove file(s)</string>
157 |     </property>
158 |    </widget>
159 |    <widget class="QLabel" name="labelProgressFileIndex">
160 |     <property name="geometry">
161 |      <rect>
162 |       <x>20</x>
163 |       <y>500</y>
164 |       <width>131</width>
165 |       <height>41</height>
166 |      </rect>
167 |     </property>
168 |     <property name="text">
169 |      <string/>
170 |     </property>
171 |    </widget>
172 |    <widget class="QPushButton" name="bCancel">
173 |     <property name="geometry">
174 |      <rect>
175 |       <x>470</x>
176 |       <y>510</y>
177 |       <width>108</width>
178 |       <height>36</height>
179 |      </rect>
180 |     </property>
181 |     <property name="text">
182 |      <string>Cancel</string>
183 |     </property>
184 |    </widget>
185 |    <widget class="QCheckBox" name="chbxOpenOutputFilesAuto">
186 |     <property name="geometry">
187 |      <rect>
188 |       <x>10</x>
189 |       <y>220</y>
190 |       <width>291</width>
191 |       <height>32</height>
192 |      </rect>
193 |     </property>
194 |     <property name="text">
195 |      <string>Open output files automatically</string>
196 |     </property>
197 |     <property name="checked">
198 |      <bool>true</bool>
199 |     </property>
200 |    </widget>
201 |    <widget class="QWidget" name="horizontalLayoutWidget">
202 |     <property name="geometry">
203 |      <rect>
204 |       <x>200</x>
205 |       <y>250</y>
206 |       <width>591</width>
207 |       <height>38</height>
208 |      </rect>
209 |     </property>
210 |     <layout class="QHBoxLayout" name="horizontalLayout_5">
211 |      <item>
212 |       <widget class="QLabel" name="labelSelectLang">
213 |        <property name="text">
214 |         <string>Audio Language:</string>
215 |        </property>
216 |       </widget>
217 |      </item>
218 |      <item>
219 |       <widget class="QComboBox" name="cbSelectLang">
220 |        <property name="sizeAdjustPolicy">
221 |         <enum>QComboBox::AdjustToContents</enum>
222 |        </property>
223 |       </widget>
224 |      </item>
225 |     </layout>
226 |    </widget>
227 |    <widget class="QWidget" name="horizontalLayoutWidget_2">
228 |     <property name="geometry">
229 |      <rect>
230 |       <x>200</x>
231 |       <y>290</y>
232 |       <width>591</width>
233 |       <height>41</height>
234 |      </rect>
235 |     </property>
236 |     <layout class="QHBoxLayout" name="horizontalLayout">
237 |      <item>
238 |       <widget class="QLabel" name="lEngine">
239 |        <property name="text">
240 |         <string>Engine:</string>
241 |        </property>
242 |       </widget>
243 |      </item>
244 |      <item>
245 |       <widget class="QRadioButton" name="rbGoogleEngine">
246 |        <property name="text">
247 |         <string>Google Speech (cloud processing)</string>
248 |        </property>
249 |        <property name="checked">
250 |         <bool>true</bool>
251 |        </property>
252 |       </widget>
253 |      </item>
254 |      <item>
255 |       <widget class="QRadioButton" name="rbWhisper">
256 |        <property name="enabled">
257 |         <bool>true</bool>
258 |        </property>
259 |        <property name="text">
260 |         <string>openAI Whisper (local processing)</string>
261 |        </property>
262 |        <property name="checkable">
263 |         <bool>true</bool>
264 |        </property>
265 |       </widget>
266 |      </item>
267 |     </layout>
268 |    </widget>
269 |    <widget class="QWidget" name="horizontalLayoutWidget_3">
270 |     <property name="geometry">
271 |      <rect>
272 |       <x>200</x>
273 |       <y>330</y>
274 |       <width>611</width>
275 |       <height>31</height>
276 |      </rect>
277 |     </property>
278 |     <layout class="QHBoxLayout" name="horizontalLayout_2">
279 |      <item>
280 |       <widget class="QLabel" name="lModels">
281 |        <property name="enabled">
282 |         <bool>true</bool>
283 |        </property>
284 |        <property name="text">
285 |         <string>Models:</string>
286 |        </property>
287 |       </widget>
288 |      </item>
289 |      <item>
290 |       <widget class="QRadioButton" name="rbModelTiny">
291 |        <property name="text">
292 |         <string>Tiny</string>
293 |        </property>
294 |        <property name="checked">
295 |         <bool>true</bool>
296 |        </property>
297 |       </widget>
298 |      </item>
299 |      <item>
300 |       <widget class="QRadioButton" name="rbModelBase">
301 |        <property name="enabled">
302 |         <bool>true</bool>
303 |        </property>
304 |        <property name="text">
305 |         <string>Base</string>
306 |        </property>
307 |        <property name="checkable">
308 |         <bool>true</bool>
309 |        </property>
310 |       </widget>
311 |      </item>
312 |      <item>
313 |       <widget class="QRadioButton" name="rbModelSmall">
314 |        <property name="enabled">
315 |         <bool>true</bool>
316 |        </property>
317 |        <property name="text">
318 |         <string>Small</string>
319 |        </property>
320 |        <property name="checkable">
321 |         <bool>true</bool>
322 |        </property>
323 |       </widget>
324 |      </item>
325 |      <item>
326 |       <widget class="QRadioButton" name="rbModelMedium">
327 |        <property name="enabled">
328 |         <bool>true</bool>
329 |        </property>
330 |        <property name="text">
331 |         <string>Medium</string>
332 |        </property>
333 |        <property name="checkable">
334 |         <bool>true</bool>
335 |        </property>
336 |       </widget>
337 |      </item>
338 |      <item>
339 |       <widget class="QRadioButton" name="rbModelLarge">
340 |        <property name="enabled">
341 |         <bool>true</bool>
342 |        </property>
343 |        <property name="text">
344 |         <string>Large</string>
345 |        </property>
346 |        <property name="checkable">
347 |         <bool>true</bool>
348 |        </property>
349 |       </widget>
350 |      </item>
351 |     </layout>
352 |    </widget>
353 |   </widget>
354 |   <widget class="QMenuBar" name="menubar">
355 |    <property name="geometry">
356 |     <rect>
357 |      <x>0</x>
358 |      <y>0</y>
359 |      <width>1045</width>
360 |      <height>23</height>
361 |     </rect>
362 |    </property>
363 |    <widget class="QMenu" name="menuAbout">
364 |     <property name="title">
365 |      <string>Abo&amp;ut</string>
366 |     </property>
367 |     <addaction name="actionLicense"/>
368 |     <addaction name="actionDonation"/>
369 |     <addaction name="actionAbout_pyTranscriber"/>
370 |    </widget>
371 |    <widget class="QMenu" name="menuProxy">
372 |     <property name="title">
373 |      <string>&amp;Settings</string>
374 |     </property>
375 |     <addaction name="actionProxy"/>
376 |    </widget>
377 |    <widget class="QMenu" name="menuLanguage">
378 |     <property name="title">
379 |      <string>&amp;Language</string>
380 |     </property>
381 |     <addaction name="actionEnglish"/>
382 |     <addaction name="actionChineseTraditional"/>
383 |     <addaction name="actionChineseSimplified"/>
384 |     <addaction name="actionPortuguese"/>
385 |    </widget>
386 |    <addaction name="menuProxy"/>
387 |    <addaction name="menuLanguage"/>
388 |    <addaction name="menuAbout"/>
389 |   </widget>
390 |   <widget class="QStatusBar" name="statusbar"/>
391 |   <action name="actionLicense">
392 |    <property name="text">
393 |     <string>&amp;License</string>
394 |    </property>
395 |   </action>
396 |   <action name="actionDonation">
397 |    <property name="text">
398 |     <string>&amp;Funding at Github Sponsors</string>
399 |    </property>
400 |   </action>
401 |   <action name="actionAbout_pyTranscriber">
402 |    <property name="text">
403 |     <string>&amp;About pyTranscriber</string>
404 |    </property>
405 |   </action>
406 |   <action name="actionProxy">
407 |    <property name="text">
408 |     <string>&amp;Proxy</string>
409 |    </property>
410 |    <property name="toolTip">
411 |     <string>Proxy setting</string>
412 |    </property>
413 |   </action>
414 |   <action name="actionEnglish">
415 |    <property name="text">
416 |     <string>English</string>
417 |    </property>
418 |   </action>
419 |   <action name="actionChineseTraditional">
420 |    <property name="text">
421 |     <string>繁體中文 - Chinese Traditional</string>
422 |    </property>
423 |   </action>
424 |   <action name="actionChineseSimplified">
425 |    <property name="text">
426 |     <string>简体中文 - Chinese Simplified</string>
427 |    </property>
428 |   </action>
429 |   <action name="actionPortuguese">
430 |    <property name="text">
431 |     <string>Português</string>
432 |    </property>
433 |   </action>
434 |  </widget>
435 |  <resources/>
436 |  <connections/>
437 | </ui>
438 | 


--------------------------------------------------------------------------------
/whisper/model.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import gzip
  3 | from contextlib import contextmanager
  4 | from dataclasses import dataclass
  5 | from typing import Dict, Iterable, Optional, Tuple
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | import torch.nn.functional as F
 10 | from torch import Tensor, nn
 11 | 
 12 | from .decoding import decode as decode_function
 13 | from .decoding import detect_language as detect_language_function
 14 | from .transcribe import transcribe as transcribe_function
 15 | 
 16 | try:
 17 |     from torch.nn.functional import scaled_dot_product_attention
 18 | 
 19 |     SDPA_AVAILABLE = True
 20 | except (ImportError, RuntimeError, OSError):
 21 |     scaled_dot_product_attention = None
 22 |     SDPA_AVAILABLE = False
 23 | 
 24 | 
 25 | @dataclass
 26 | class ModelDimensions:
 27 |     n_mels: int
 28 |     n_audio_ctx: int
 29 |     n_audio_state: int
 30 |     n_audio_head: int
 31 |     n_audio_layer: int
 32 |     n_vocab: int
 33 |     n_text_ctx: int
 34 |     n_text_state: int
 35 |     n_text_head: int
 36 |     n_text_layer: int
 37 | 
 38 | 
 39 | class LayerNorm(nn.LayerNorm):
 40 |     def forward(self, x: Tensor) -> Tensor:
 41 |         return super().forward(x.float()).type(x.dtype)
 42 | 
 43 | 
 44 | class Linear(nn.Linear):
 45 |     def forward(self, x: Tensor) -> Tensor:
 46 |         return F.linear(
 47 |             x,
 48 |             self.weight.to(x.dtype),
 49 |             None if self.bias is None else self.bias.to(x.dtype),
 50 |         )
 51 | 
 52 | 
 53 | class Conv1d(nn.Conv1d):
 54 |     def _conv_forward(
 55 |         self, x: Tensor, weight: Tensor, bias: Optional[Tensor]
 56 |     ) -> Tensor:
 57 |         return super()._conv_forward(
 58 |             x, weight.to(x.dtype), None if bias is None else bias.to(x.dtype)
 59 |         )
 60 | 
 61 | 
 62 | def sinusoids(length, channels, max_timescale=10000):
 63 |     """Returns sinusoids for positional embedding"""
 64 |     assert channels % 2 == 0
 65 |     log_timescale_increment = np.log(max_timescale) / (channels // 2 - 1)
 66 |     inv_timescales = torch.exp(-log_timescale_increment * torch.arange(channels // 2))
 67 |     scaled_time = torch.arange(length)[:, np.newaxis] * inv_timescales[np.newaxis, :]
 68 |     return torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=1)
 69 | 
 70 | 
 71 | @contextmanager
 72 | def disable_sdpa():
 73 |     prev_state = MultiHeadAttention.use_sdpa
 74 |     try:
 75 |         MultiHeadAttention.use_sdpa = False
 76 |         yield
 77 |     finally:
 78 |         MultiHeadAttention.use_sdpa = prev_state
 79 | 
 80 | 
 81 | class MultiHeadAttention(nn.Module):
 82 |     use_sdpa = True
 83 | 
 84 |     def __init__(self, n_state: int, n_head: int):
 85 |         super().__init__()
 86 |         self.n_head = n_head
 87 |         self.query = Linear(n_state, n_state)
 88 |         self.key = Linear(n_state, n_state, bias=False)
 89 |         self.value = Linear(n_state, n_state)
 90 |         self.out = Linear(n_state, n_state)
 91 | 
 92 |     def forward(
 93 |         self,
 94 |         x: Tensor,
 95 |         xa: Optional[Tensor] = None,
 96 |         mask: Optional[Tensor] = None,
 97 |         kv_cache: Optional[dict] = None,
 98 |     ):
 99 |         q = self.query(x)
100 | 
101 |         if kv_cache is None or xa is None or self.key not in kv_cache:
102 |             # hooks, if installed (i.e. kv_cache is not None), will prepend the cached kv tensors;
103 |             # otherwise, perform key/value projections for self- or cross-attention as usual.
104 |             k = self.key(x if xa is None else xa)
105 |             v = self.value(x if xa is None else xa)
106 |         else:
107 |             # for cross-attention, calculate keys and values once and reuse in subsequent calls.
108 |             k = kv_cache[self.key]
109 |             v = kv_cache[self.value]
110 | 
111 |         wv, qk = self.qkv_attention(q, k, v, mask)
112 |         return self.out(wv), qk
113 | 
114 |     def qkv_attention(
115 |         self, q: Tensor, k: Tensor, v: Tensor, mask: Optional[Tensor] = None
116 |     ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
117 |         n_batch, n_ctx, n_state = q.shape
118 |         scale = (n_state // self.n_head) ** -0.25
119 |         q = q.view(*q.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)
120 |         k = k.view(*k.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)
121 |         v = v.view(*v.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)
122 | 
123 |         if SDPA_AVAILABLE and MultiHeadAttention.use_sdpa:
124 |             a = scaled_dot_product_attention(
125 |                 q, k, v, is_causal=mask is not None and n_ctx > 1
126 |             )
127 |             out = a.permute(0, 2, 1, 3).flatten(start_dim=2)
128 |             qk = None
129 |         else:
130 |             qk = (q * scale) @ (k * scale).transpose(-1, -2)
131 |             if mask is not None:
132 |                 qk = qk + mask[:n_ctx, :n_ctx]
133 |             qk = qk.float()
134 | 
135 |             w = F.softmax(qk, dim=-1).to(q.dtype)
136 |             out = (w @ v).permute(0, 2, 1, 3).flatten(start_dim=2)
137 |             qk = qk.detach()
138 | 
139 |         return out, qk
140 | 
141 | 
142 | class ResidualAttentionBlock(nn.Module):
143 |     def __init__(self, n_state: int, n_head: int, cross_attention: bool = False):
144 |         super().__init__()
145 | 
146 |         self.attn = MultiHeadAttention(n_state, n_head)
147 |         self.attn_ln = LayerNorm(n_state)
148 | 
149 |         self.cross_attn = (
150 |             MultiHeadAttention(n_state, n_head) if cross_attention else None
151 |         )
152 |         self.cross_attn_ln = LayerNorm(n_state) if cross_attention else None
153 | 
154 |         n_mlp = n_state * 4
155 |         self.mlp = nn.Sequential(
156 |             Linear(n_state, n_mlp), nn.GELU(), Linear(n_mlp, n_state)
157 |         )
158 |         self.mlp_ln = LayerNorm(n_state)
159 | 
160 |     def forward(
161 |         self,
162 |         x: Tensor,
163 |         xa: Optional[Tensor] = None,
164 |         mask: Optional[Tensor] = None,
165 |         kv_cache: Optional[dict] = None,
166 |     ):
167 |         x = x + self.attn(self.attn_ln(x), mask=mask, kv_cache=kv_cache)[0]
168 |         if self.cross_attn:
169 |             x = x + self.cross_attn(self.cross_attn_ln(x), xa, kv_cache=kv_cache)[0]
170 |         x = x + self.mlp(self.mlp_ln(x))
171 |         return x
172 | 
173 | 
174 | class AudioEncoder(nn.Module):
175 |     def __init__(
176 |         self, n_mels: int, n_ctx: int, n_state: int, n_head: int, n_layer: int
177 |     ):
178 |         super().__init__()
179 |         self.conv1 = Conv1d(n_mels, n_state, kernel_size=3, padding=1)
180 |         self.conv2 = Conv1d(n_state, n_state, kernel_size=3, stride=2, padding=1)
181 |         self.register_buffer("positional_embedding", sinusoids(n_ctx, n_state))
182 | 
183 |         self.blocks: Iterable[ResidualAttentionBlock] = nn.ModuleList(
184 |             [ResidualAttentionBlock(n_state, n_head) for _ in range(n_layer)]
185 |         )
186 |         self.ln_post = LayerNorm(n_state)
187 | 
188 |     def forward(self, x: Tensor):
189 |         """
190 |         x : torch.Tensor, shape = (batch_size, n_mels, n_ctx)
191 |             the mel spectrogram of the audio
192 |         """
193 |         x = F.gelu(self.conv1(x))
194 |         x = F.gelu(self.conv2(x))
195 |         x = x.permute(0, 2, 1)
196 | 
197 |         assert x.shape[1:] == self.positional_embedding.shape, "incorrect audio shape"
198 |         x = (x + self.positional_embedding).to(x.dtype)
199 | 
200 |         for block in self.blocks:
201 |             x = block(x)
202 | 
203 |         x = self.ln_post(x)
204 |         return x
205 | 
206 | 
207 | class TextDecoder(nn.Module):
208 |     def __init__(
209 |         self, n_vocab: int, n_ctx: int, n_state: int, n_head: int, n_layer: int
210 |     ):
211 |         super().__init__()
212 | 
213 |         self.token_embedding = nn.Embedding(n_vocab, n_state)
214 |         self.positional_embedding = nn.Parameter(torch.empty(n_ctx, n_state))
215 | 
216 |         self.blocks: Iterable[ResidualAttentionBlock] = nn.ModuleList(
217 |             [
218 |                 ResidualAttentionBlock(n_state, n_head, cross_attention=True)
219 |                 for _ in range(n_layer)
220 |             ]
221 |         )
222 |         self.ln = LayerNorm(n_state)
223 | 
224 |         mask = torch.empty(n_ctx, n_ctx).fill_(-np.inf).triu_(1)
225 |         self.register_buffer("mask", mask, persistent=False)
226 | 
227 |     def forward(self, x: Tensor, xa: Tensor, kv_cache: Optional[dict] = None):
228 |         """
229 |         x : torch.LongTensor, shape = (batch_size, <= n_ctx)
230 |             the text tokens
231 |         xa : torch.Tensor, shape = (batch_size, n_audio_ctx, n_audio_state)
232 |             the encoded audio features to be attended on
233 |         """
234 |         offset = next(iter(kv_cache.values())).shape[1] if kv_cache else 0
235 |         x = (
236 |             self.token_embedding(x)
237 |             + self.positional_embedding[offset : offset + x.shape[-1]]
238 |         )
239 |         x = x.to(xa.dtype)
240 | 
241 |         for block in self.blocks:
242 |             x = block(x, xa, mask=self.mask, kv_cache=kv_cache)
243 | 
244 |         x = self.ln(x)
245 |         logits = (
246 |             x @ torch.transpose(self.token_embedding.weight.to(x.dtype), 0, 1)
247 |         ).float()
248 | 
249 |         return logits
250 | 
251 | 
252 | class Whisper(nn.Module):
253 |     def __init__(self, dims: ModelDimensions):
254 |         super().__init__()
255 |         self.dims = dims
256 |         self.encoder = AudioEncoder(
257 |             self.dims.n_mels,
258 |             self.dims.n_audio_ctx,
259 |             self.dims.n_audio_state,
260 |             self.dims.n_audio_head,
261 |             self.dims.n_audio_layer,
262 |         )
263 |         self.decoder = TextDecoder(
264 |             self.dims.n_vocab,
265 |             self.dims.n_text_ctx,
266 |             self.dims.n_text_state,
267 |             self.dims.n_text_head,
268 |             self.dims.n_text_layer,
269 |         )
270 |         # use the last half among the decoder layers for time alignment by default;
271 |         # to use a specific set of heads, see `set_alignment_heads()` below.
272 |         all_heads = torch.zeros(
273 |             self.dims.n_text_layer, self.dims.n_text_head, dtype=torch.bool
274 |         )
275 |         all_heads[self.dims.n_text_layer // 2 :] = True
276 |         self.register_buffer("alignment_heads", all_heads.to_sparse(), persistent=False)
277 | 
278 |     def set_alignment_heads(self, dump: bytes):
279 |         array = np.frombuffer(
280 |             gzip.decompress(base64.b85decode(dump)), dtype=bool
281 |         ).copy()
282 |         mask = torch.from_numpy(array).reshape(
283 |             self.dims.n_text_layer, self.dims.n_text_head
284 |         )
285 |         self.register_buffer("alignment_heads", mask.to_sparse(), persistent=False)
286 | 
287 |     def embed_audio(self, mel: torch.Tensor):
288 |         return self.encoder(mel)
289 | 
290 |     def logits(self, tokens: torch.Tensor, audio_features: torch.Tensor):
291 |         return self.decoder(tokens, audio_features)
292 | 
293 |     def forward(
294 |         self, mel: torch.Tensor, tokens: torch.Tensor
295 |     ) -> Dict[str, torch.Tensor]:
296 |         return self.decoder(tokens, self.encoder(mel))
297 | 
298 |     @property
299 |     def device(self):
300 |         return next(self.parameters()).device
301 | 
302 |     @property
303 |     def is_multilingual(self):
304 |         return self.dims.n_vocab >= 51865
305 | 
306 |     @property
307 |     def num_languages(self):
308 |         return self.dims.n_vocab - 51765 - int(self.is_multilingual)
309 | 
310 |     def install_kv_cache_hooks(self, cache: Optional[dict] = None):
311 |         """
312 |         The `MultiHeadAttention` module optionally accepts `kv_cache` which stores the key and value
313 |         tensors calculated for the previous positions. This method returns a dictionary that stores
314 |         all caches, and the necessary hooks for the key and value projection modules that save the
315 |         intermediate tensors to be reused during later calculations.
316 | 
317 |         Returns
318 |         -------
319 |         cache : Dict[nn.Module, torch.Tensor]
320 |             A dictionary object mapping the key/value projection modules to its cache
321 |         hooks : List[RemovableHandle]
322 |             List of PyTorch RemovableHandle objects to stop the hooks to be called
323 |         """
324 |         cache = {**cache} if cache is not None else {}
325 |         hooks = []
326 | 
327 |         def save_to_cache(module, _, output):
328 |             if module not in cache or output.shape[1] > self.dims.n_text_ctx:
329 |                 # save as-is, for the first token or cross attention
330 |                 cache[module] = output
331 |             else:
332 |                 cache[module] = torch.cat([cache[module], output], dim=1).detach()
333 |             return cache[module]
334 | 
335 |         def install_hooks(layer: nn.Module):
336 |             if isinstance(layer, MultiHeadAttention):
337 |                 hooks.append(layer.key.register_forward_hook(save_to_cache))
338 |                 hooks.append(layer.value.register_forward_hook(save_to_cache))
339 | 
340 |         self.decoder.apply(install_hooks)
341 |         return cache, hooks
342 | 
343 |     detect_language = detect_language_function
344 |     transcribe = transcribe_function
345 |     decode = decode_function
346 | 


--------------------------------------------------------------------------------
/pytranscriber/gui/main/window_main.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Form implementation generated from reading ui file 'window_main.ui'
  4 | #
  5 | # Created by: PyQt5 UI code generator 5.15.4
  6 | #
  7 | # WARNING: Any manual changes made to this file will be lost when pyuic5 is
  8 | # run again.  Do not edit this file unless you know what you are doing.
  9 | 
 10 | 
 11 | from PyQt5 import QtCore, QtGui, QtWidgets
 12 | 
 13 | 
 14 | class Ui_window(object):
 15 |     def setupUi(self, window):
 16 |         window.setObjectName("window")
 17 |         window.resize(1045, 610)
 18 |         self.centralwidget = QtWidgets.QWidget(window)
 19 |         self.centralwidget.setObjectName("centralwidget")
 20 |         self.bSelectMedia = QtWidgets.QPushButton(self.centralwidget)
 21 |         self.bSelectMedia.setGeometry(QtCore.QRect(10, 10, 141, 34))
 22 |         self.bSelectMedia.setObjectName("bSelectMedia")
 23 |         self.bConvert = QtWidgets.QPushButton(self.centralwidget)
 24 |         self.bConvert.setEnabled(False)
 25 |         self.bConvert.setGeometry(QtCore.QRect(200, 380, 341, 34))
 26 |         self.bConvert.setObjectName("bConvert")
 27 |         self.progressBar = QtWidgets.QProgressBar(self.centralwidget)
 28 |         self.progressBar.setGeometry(QtCore.QRect(20, 470, 1021, 23))
 29 |         self.progressBar.setProperty("value", 0)
 30 |         self.progressBar.setObjectName("progressBar")
 31 |         self.labelCurrentOperation = QtWidgets.QLabel(self.centralwidget)
 32 |         self.labelCurrentOperation.setGeometry(QtCore.QRect(20, 420, 871, 41))
 33 |         self.labelCurrentOperation.setText("")
 34 |         self.labelCurrentOperation.setObjectName("labelCurrentOperation")
 35 |         self.bOpenOutputFolder = QtWidgets.QPushButton(self.centralwidget)
 36 |         self.bOpenOutputFolder.setGeometry(QtCore.QRect(550, 380, 241, 34))
 37 |         self.bOpenOutputFolder.setObjectName("bOpenOutputFolder")
 38 |         self.bSelectOutputFolder = QtWidgets.QPushButton(self.centralwidget)
 39 |         self.bSelectOutputFolder.setGeometry(QtCore.QRect(10, 180, 141, 34))
 40 |         self.bSelectOutputFolder.setObjectName("bSelectOutputFolder")
 41 |         self.qleOutputFolder = QtWidgets.QLineEdit(self.centralwidget)
 42 |         self.qleOutputFolder.setGeometry(QtCore.QRect(160, 180, 861, 32))
 43 |         self.qleOutputFolder.setText("")
 44 |         self.qleOutputFolder.setReadOnly(True)
 45 |         self.qleOutputFolder.setObjectName("qleOutputFolder")
 46 |         self.groupBox = QtWidgets.QGroupBox(self.centralwidget)
 47 |         self.groupBox.setGeometry(QtCore.QRect(160, 10, 871, 161))
 48 |         self.groupBox.setAlignment(QtCore.Qt.AlignLeading|QtCore.Qt.AlignLeft|QtCore.Qt.AlignTop)
 49 |         self.groupBox.setFlat(False)
 50 |         self.groupBox.setCheckable(False)
 51 |         self.groupBox.setObjectName("groupBox")
 52 |         self.qlwListFilesSelected = QtWidgets.QListWidget(self.groupBox)
 53 |         self.qlwListFilesSelected.setGeometry(QtCore.QRect(10, 30, 851, 121))
 54 |         self.qlwListFilesSelected.setObjectName("qlwListFilesSelected")
 55 |         self.bRemoveFile = QtWidgets.QPushButton(self.centralwidget)
 56 |         self.bRemoveFile.setGeometry(QtCore.QRect(10, 50, 141, 34))
 57 |         self.bRemoveFile.setObjectName("bRemoveFile")
 58 |         self.labelProgressFileIndex = QtWidgets.QLabel(self.centralwidget)
 59 |         self.labelProgressFileIndex.setGeometry(QtCore.QRect(20, 500, 131, 41))
 60 |         self.labelProgressFileIndex.setText("")
 61 |         self.labelProgressFileIndex.setObjectName("labelProgressFileIndex")
 62 |         self.bCancel = QtWidgets.QPushButton(self.centralwidget)
 63 |         self.bCancel.setGeometry(QtCore.QRect(470, 510, 108, 36))
 64 |         self.bCancel.setObjectName("bCancel")
 65 |         self.chbxOpenOutputFilesAuto = QtWidgets.QCheckBox(self.centralwidget)
 66 |         self.chbxOpenOutputFilesAuto.setGeometry(QtCore.QRect(10, 220, 291, 32))
 67 |         self.chbxOpenOutputFilesAuto.setChecked(True)
 68 |         self.chbxOpenOutputFilesAuto.setObjectName("chbxOpenOutputFilesAuto")
 69 |         self.horizontalLayoutWidget = QtWidgets.QWidget(self.centralwidget)
 70 |         self.horizontalLayoutWidget.setGeometry(QtCore.QRect(200, 250, 591, 38))
 71 |         self.horizontalLayoutWidget.setObjectName("horizontalLayoutWidget")
 72 |         self.horizontalLayout_5 = QtWidgets.QHBoxLayout(self.horizontalLayoutWidget)
 73 |         self.horizontalLayout_5.setContentsMargins(0, 0, 0, 0)
 74 |         self.horizontalLayout_5.setObjectName("horizontalLayout_5")
 75 |         self.labelSelectLang = QtWidgets.QLabel(self.horizontalLayoutWidget)
 76 |         self.labelSelectLang.setObjectName("labelSelectLang")
 77 |         self.horizontalLayout_5.addWidget(self.labelSelectLang)
 78 |         self.cbSelectLang = QtWidgets.QComboBox(self.horizontalLayoutWidget)
 79 |         self.cbSelectLang.setSizeAdjustPolicy(QtWidgets.QComboBox.AdjustToContents)
 80 |         self.cbSelectLang.setObjectName("cbSelectLang")
 81 |         self.horizontalLayout_5.addWidget(self.cbSelectLang)
 82 |         self.horizontalLayoutWidget_2 = QtWidgets.QWidget(self.centralwidget)
 83 |         self.horizontalLayoutWidget_2.setGeometry(QtCore.QRect(200, 290, 591, 41))
 84 |         self.horizontalLayoutWidget_2.setObjectName("horizontalLayoutWidget_2")
 85 |         self.horizontalLayout = QtWidgets.QHBoxLayout(self.horizontalLayoutWidget_2)
 86 |         self.horizontalLayout.setContentsMargins(0, 0, 0, 0)
 87 |         self.horizontalLayout.setObjectName("horizontalLayout")
 88 |         self.lEngine = QtWidgets.QLabel(self.horizontalLayoutWidget_2)
 89 |         self.lEngine.setObjectName("lEngine")
 90 |         self.horizontalLayout.addWidget(self.lEngine)
 91 |         self.rbGoogleEngine = QtWidgets.QRadioButton(self.horizontalLayoutWidget_2)
 92 |         self.rbGoogleEngine.setChecked(True)
 93 |         self.rbGoogleEngine.setObjectName("rbGoogleEngine")
 94 |         self.horizontalLayout.addWidget(self.rbGoogleEngine)
 95 |         self.rbWhisper = QtWidgets.QRadioButton(self.horizontalLayoutWidget_2)
 96 |         self.rbWhisper.setEnabled(True)
 97 |         self.rbWhisper.setCheckable(True)
 98 |         self.rbWhisper.setObjectName("rbWhisper")
 99 |         self.horizontalLayout.addWidget(self.rbWhisper)
100 |         self.horizontalLayoutWidget_3 = QtWidgets.QWidget(self.centralwidget)
101 |         self.horizontalLayoutWidget_3.setGeometry(QtCore.QRect(200, 330, 611, 31))
102 |         self.horizontalLayoutWidget_3.setObjectName("horizontalLayoutWidget_3")
103 |         self.horizontalLayout_2 = QtWidgets.QHBoxLayout(self.horizontalLayoutWidget_3)
104 |         self.horizontalLayout_2.setContentsMargins(0, 0, 0, 0)
105 |         self.horizontalLayout_2.setObjectName("horizontalLayout_2")
106 |         self.lModels = QtWidgets.QLabel(self.horizontalLayoutWidget_3)
107 |         self.lModels.setEnabled(True)
108 |         self.lModels.setObjectName("lModels")
109 |         self.horizontalLayout_2.addWidget(self.lModels)
110 |         self.rbModelTiny = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3)
111 |         self.rbModelTiny.setChecked(True)
112 |         self.rbModelTiny.setObjectName("rbModelTiny")
113 |         self.horizontalLayout_2.addWidget(self.rbModelTiny)
114 |         self.rbModelBase = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3)
115 |         self.rbModelBase.setEnabled(True)
116 |         self.rbModelBase.setCheckable(True)
117 |         self.rbModelBase.setObjectName("rbModelBase")
118 |         self.horizontalLayout_2.addWidget(self.rbModelBase)
119 |         self.rbModelSmall = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3)
120 |         self.rbModelSmall.setEnabled(True)
121 |         self.rbModelSmall.setCheckable(True)
122 |         self.rbModelSmall.setObjectName("rbModelSmall")
123 |         self.horizontalLayout_2.addWidget(self.rbModelSmall)
124 |         self.rbModelMedium = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3)
125 |         self.rbModelMedium.setEnabled(True)
126 |         self.rbModelMedium.setCheckable(True)
127 |         self.rbModelMedium.setObjectName("rbModelMedium")
128 |         self.horizontalLayout_2.addWidget(self.rbModelMedium)
129 |         self.rbModelLarge = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3)
130 |         self.rbModelLarge.setEnabled(True)
131 |         self.rbModelLarge.setCheckable(True)
132 |         self.rbModelLarge.setObjectName("rbModelLarge")
133 |         self.horizontalLayout_2.addWidget(self.rbModelLarge)
134 |         window.setCentralWidget(self.centralwidget)
135 |         self.menubar = QtWidgets.QMenuBar(window)
136 |         self.menubar.setGeometry(QtCore.QRect(0, 0, 1045, 23))
137 |         self.menubar.setObjectName("menubar")
138 |         self.menuAbout = QtWidgets.QMenu(self.menubar)
139 |         self.menuAbout.setObjectName("menuAbout")
140 |         self.menuProxy = QtWidgets.QMenu(self.menubar)
141 |         self.menuProxy.setObjectName("menuProxy")
142 |         self.menuLanguage = QtWidgets.QMenu(self.menubar)
143 |         self.menuLanguage.setObjectName("menuLanguage")
144 |         window.setMenuBar(self.menubar)
145 |         self.statusbar = QtWidgets.QStatusBar(window)
146 |         self.statusbar.setObjectName("statusbar")
147 |         window.setStatusBar(self.statusbar)
148 |         self.actionLicense = QtWidgets.QAction(window)
149 |         self.actionLicense.setObjectName("actionLicense")
150 |         self.actionDonation = QtWidgets.QAction(window)
151 |         self.actionDonation.setObjectName("actionDonation")
152 |         self.actionAbout_pyTranscriber = QtWidgets.QAction(window)
153 |         self.actionAbout_pyTranscriber.setObjectName("actionAbout_pyTranscriber")
154 |         self.actionProxy = QtWidgets.QAction(window)
155 |         self.actionProxy.setObjectName("actionProxy")
156 |         self.actionEnglish = QtWidgets.QAction(window)
157 |         self.actionEnglish.setObjectName("actionEnglish")
158 |         self.actionChineseTraditional = QtWidgets.QAction(window)
159 |         self.actionChineseTraditional.setObjectName("actionChineseTraditional")
160 |         self.actionChineseSimplified = QtWidgets.QAction(window)
161 |         self.actionChineseSimplified.setObjectName("actionChineseSimplified")
162 |         self.actionPortuguese = QtWidgets.QAction(window)
163 |         self.actionPortuguese.setObjectName("actionPortuguese")
164 |         self.menuAbout.addAction(self.actionLicense)
165 |         self.menuAbout.addAction(self.actionDonation)
166 |         self.menuAbout.addAction(self.actionAbout_pyTranscriber)
167 |         self.menuProxy.addAction(self.actionProxy)
168 |         self.menuLanguage.addAction(self.actionEnglish)
169 |         self.menuLanguage.addAction(self.actionChineseTraditional)
170 |         self.menuLanguage.addAction(self.actionChineseSimplified)
171 |         self.menuLanguage.addAction(self.actionPortuguese)
172 |         self.menubar.addAction(self.menuProxy.menuAction())
173 |         self.menubar.addAction(self.menuLanguage.menuAction())
174 |         self.menubar.addAction(self.menuAbout.menuAction())
175 | 
176 |         self.retranslateUi(window)
177 |         QtCore.QMetaObject.connectSlotsByName(window)
178 | 
179 |     def retranslateUi(self, window):
180 |         _translate = QtCore.QCoreApplication.translate
181 |         window.setWindowTitle(_translate("window", "pyTranscriber v2.1 - 13/07/2025"))
182 |         self.bSelectMedia.setText(_translate("window", "Select file(s)"))
183 |         self.bConvert.setText(_translate("window", "Transcribe Audio / Generate Subtitles"))
184 |         self.bOpenOutputFolder.setText(_translate("window", "Open Output Folder"))
185 |         self.bSelectOutputFolder.setText(_translate("window", "Output Location"))
186 |         self.groupBox.setTitle(_translate("window", "List of files to generate transcribe audio / generate subtitles"))
187 |         self.bRemoveFile.setText(_translate("window", "Remove file(s)"))
188 |         self.bCancel.setText(_translate("window", "Cancel"))
189 |         self.chbxOpenOutputFilesAuto.setText(_translate("window", "Open output files automatically"))
190 |         self.labelSelectLang.setText(_translate("window", "Audio Language:"))
191 |         self.lEngine.setText(_translate("window", "Engine:"))
192 |         self.rbGoogleEngine.setText(_translate("window", "Google Speech (cloud processing)"))
193 |         self.rbWhisper.setText(_translate("window", "openAI Whisper (local processing)"))
194 |         self.lModels.setText(_translate("window", "Models:"))
195 |         self.rbModelTiny.setText(_translate("window", "Tiny"))
196 |         self.rbModelBase.setText(_translate("window", "Base"))
197 |         self.rbModelSmall.setText(_translate("window", "Small"))
198 |         self.rbModelMedium.setText(_translate("window", "Medium"))
199 |         self.rbModelLarge.setText(_translate("window", "Large"))
200 |         self.menuAbout.setTitle(_translate("window", "Abo&ut"))
201 |         self.menuProxy.setTitle(_translate("window", "&Settings"))
202 |         self.menuLanguage.setTitle(_translate("window", "&Language"))
203 |         self.actionLicense.setText(_translate("window", "&License"))
204 |         self.actionDonation.setText(_translate("window", "&Funding at Github Sponsors"))
205 |         self.actionAbout_pyTranscriber.setText(_translate("window", "&More about pyTranscriber"))
206 |         self.actionProxy.setText(_translate("window", "&Proxy"))
207 |         self.actionProxy.setToolTip(_translate("window", "Proxy setting"))
208 |         self.actionEnglish.setText(_translate("window", "English"))
209 |         self.actionChineseTraditional.setText(_translate("window", "繁體中文 - Chinese Traditional"))
210 |         self.actionChineseSimplified.setText(_translate("window", "简体中文 - Chinese Simplified"))
211 |         self.actionPortuguese.setText(_translate("window", "Português"))
212 | 


--------------------------------------------------------------------------------