├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── linux-nuitka.yml │ ├── linux-pyinstaller.yml │ ├── mac-pyinstaller.yml │ ├── win-nuitka.yml │ └── win-pyinstaller-dev2.yml ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── Pipfile ├── README.md ├── __init__.py ├── autosub ├── __init__-0.4.0.py ├── __init__.py ├── constants.py └── formatters.py ├── deployment ├── freeze-linux-nuitka.sh ├── freeze-linux.sh ├── freeze-nuitka-win.bat ├── freeze-win.sh ├── nuitka-win-standalone.bat └── win │ ├── script-installer-windows-standalone.iss │ └── script-installer-windows.iss ├── doc ├── entitlements.plist ├── lightning.jpeg ├── pyTranscriber.png ├── screenshot1.png ├── screenshot2.png ├── screenshot3.png └── technical_details.md ├── freeze-mac.sh ├── main.py ├── nuitka-win-standalone.bat ├── patches ├── autosub-0.3.13.patch ├── autosub-0.4.0.patch └── note.txt ├── pytranscriber.sqlite ├── pytranscriber ├── __init__.py ├── control │ ├── __init__.py │ ├── ctr_autosub.py │ ├── ctr_db.py │ ├── ctr_engine.py │ ├── ctr_main.py │ ├── ctr_proxy.py │ ├── ctr_whisper.py │ ├── thread_cancel_autosub.py │ ├── thread_exec_autosub.py │ ├── thread_exec_generic.py │ └── thread_exec_whisper.py ├── gui │ ├── Português.qm │ ├── Português.ts │ ├── __init__.py │ ├── main │ │ ├── view_main.py │ │ ├── window_main.py │ │ └── window_main.ui │ ├── message_util.py │ ├── proxy.py │ ├── proxy.ui │ ├── proxy │ │ ├── __init__.py │ │ ├── view_proxy.py │ │ ├── window_proxy.py │ │ └── window_proxy.ui │ ├── 简体中文 - Chinese Simplified.qm │ ├── 简体中文 - Chinese Simplified.ts │ ├── 繁體中文 - Chinese Traditional.qm │ └── 繁體中文 - Chinese Traditional.ts ├── model │ ├── __init__.py │ ├── google_speech.py │ ├── transcription_parameters.py │ └── whisper.py └── util │ ├── __init__.py │ ├── srtparser.py │ └── util.py ├── requirements.txt ├── script-installer-windows-standalone.iss └── whisper ├── __init__.py ├── __main__.py ├── assets ├── gpt2.tiktoken ├── mel_filters.npz └── multilingual.tiktoken ├── audio.py ├── decoding.py ├── model.py ├── normalizers ├── __init__.py ├── basic.py ├── english.json └── english.py ├── timing.py ├── tokenizer.py ├── transcribe.py ├── triton_ops.py ├── utils.py └── version.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: @raryelcostasouza # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: pytranscriber # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/linux-nuitka.yml: -------------------------------------------------------------------------------- 1 | name: Linux Nuitka Pipeline 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | - develop 8 | pull_request: 9 | branches: 10 | - master 11 | - develop 12 | 13 | jobs: 14 | build: 15 | runs-on: ubuntu-22.04 # Ensure the job runs only on Ubuntu 22.04 16 | 17 | steps: 18 | - name: Checkout repository 19 | uses: actions/checkout@v3 20 | with: 21 | fetch-depth: 0 # Fetch all tags 22 | 23 | - name: Get latest Git tag 24 | id: get_version 25 | run: | 26 | VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.1.0") 27 | echo "VERSION=$VERSION" >> $GITHUB_ENV 28 | echo "Resolved version: $VERSION" 29 | 30 | - name: Install missing system libraries (XCB, TBB, etc.) 31 | run: | 32 | sudo apt-get update 33 | sudo apt-get install -y \ 34 | libxcb1 \ 35 | libxcb-keysyms1 \ 36 | libxcb-shape0 \ 37 | libxcb-xkb1 \ 38 | libxcb-render-util0 \ 39 | libxcb-image0 \ 40 | libxcb-xinerama0 \ 41 | libxkbcommon-x11-0 \ 42 | libxcb-icccm4 \ 43 | libtbb12 \ 44 | ccache \ 45 | libsox-dev 46 | 47 | 48 | - name: Install FFmpeg 49 | run: sudo apt update && sudo apt install -y ffmpeg 50 | 51 | - name: Verify FFmpeg installation 52 | run: | 53 | which ffmpeg 54 | ffmpeg -version 55 | 56 | - name: Set up Python 3.8 57 | uses: actions/setup-python@v4 58 | with: 59 | python-version: "3.8" 60 | 61 | - name: Set up Python virtual environment 62 | run: | 63 | python -m venv .venv 64 | 65 | - name: Install dependencies 66 | run: | 67 | source .venv/bin/activate 68 | pip install --upgrade pip 69 | pip install -r requirements.txt 70 | pip install nuitka 71 | 72 | - name: Verify Whisper assets directory 73 | run: | 74 | source .venv/bin/activate 75 | whisperPath=$(python -c "import whisper; print(whisper.__file__)") 76 | assetsPath=$(dirname $whisperPath)/assets 77 | if [ -d "$assetsPath" ]; then 78 | echo "The 'assets' directory exists at: $assetsPath" 79 | else 80 | echo "The 'assets' directory DOES NOT exist." 81 | exit 1 82 | fi 83 | 84 | - name: Compile with Nuitka 85 | run: | 86 | source .venv/bin/activate 87 | ffmpegPath=$(which ffmpeg) 88 | nuitka \ 89 | --assume-yes-for-downloads \ 90 | --enable-plugin=pyqt5 \ 91 | --include-data-files="pytranscriber.sqlite=pytranscriber.sqlite" \ 92 | --include-data-files="$ffmpegPath=ffmpeg" \ 93 | --include-data-files="pytranscriber/gui/*.qm=pytranscriber/gui/" \ 94 | --include-package-data="whisper:assets/*=whisper/assets" \ 95 | main.py \ 96 | --onefile \ 97 | --output-dir=dist 98 | 99 | - name: Zip the binary with version number 100 | run: | 101 | cd dist 102 | mv main.bin "pyTranscriber-${VERSION}" 103 | 104 | - name: Upload built executable 105 | uses: actions/upload-artifact@v4 106 | with: 107 | name: pyTranscriber-linux-nuitka-${{ env.VERSION }} 108 | path: ./dist/pyTranscriber-${{ env.VERSION }} # Adjust this path if Nuitka outputs elsewhere 109 | 110 | download: 111 | runs-on: ubuntu-22.04 112 | needs: build 113 | steps: 114 | - name: Download built executable 115 | uses: actions/download-artifact@v4 116 | with: 117 | path: ./output 118 | 119 | - name: List downloaded files 120 | run: dir ./output 121 | -------------------------------------------------------------------------------- /.github/workflows/linux-pyinstaller.yml: -------------------------------------------------------------------------------- 1 | name: Linux PyInstaller 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | - develop 8 | pull_request: 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-22.04 13 | 14 | steps: 15 | - name: Checkout repository 16 | uses: actions/checkout@v3 17 | with: 18 | fetch-depth: 0 # Fetch all tags 19 | 20 | - name: Get latest Git tag 21 | id: get_version 22 | run: | 23 | VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.1.0") 24 | echo "VERSION=$VERSION" >> $GITHUB_ENV 25 | echo "Resolved version: $VERSION" 26 | 27 | - name: Install missing system libraries (XCB, TBB, etc.) 28 | run: | 29 | sudo apt-get update 30 | sudo apt-get install -y \ 31 | libxcb1 \ 32 | libxcb-keysyms1 \ 33 | libxcb-shape0 \ 34 | libxcb-xkb1 \ 35 | libxcb-render-util0 \ 36 | libxcb-image0 \ 37 | libxcb-xinerama0 \ 38 | libxkbcommon-x11-0 \ 39 | libxcb-icccm4 \ 40 | libtbb12 \ 41 | libsox-dev 42 | 43 | - name: Install FFmpeg 44 | run: sudo apt update && sudo apt install -y ffmpeg 45 | 46 | - name: Verify FFmpeg installation 47 | run: | 48 | which ffmpeg 49 | ffmpeg -version 50 | 51 | - name: Set up Python 3.8 52 | uses: actions/setup-python@v4 53 | with: 54 | python-version: "3.8" 55 | 56 | - name: Set up Python virtual environment 57 | run: | 58 | python -m venv .venv 59 | 60 | - name: Activate virtual environment and install dependencies 61 | run: | 62 | source .venv/bin/activate 63 | pip install --upgrade pip 64 | pip install -r requirements.txt 65 | pip install pyinstaller 66 | 67 | - name: Verify existence of Whisper assets directory 68 | run: | 69 | source .venv/bin/activate # Activate the virtual environment 70 | ASSETS_PATH=$(python -c "import whisper; import os; print(os.path.join(os.path.dirname(whisper.__file__), 'assets'))") 71 | if [ -d "$ASSETS_PATH" ]; then 72 | echo "The 'assets' directory exists at: $ASSETS_PATH" 73 | echo "ASSETS_PATH=$ASSETS_PATH" >> $GITHUB_ENV 74 | else 75 | echo "The 'assets' directory does NOT exist." 76 | exit 1 77 | fi 78 | 79 | - name: Compile with pyInstaller 80 | run: | 81 | source .venv/bin/activate 82 | FFMPPEG_PATH=$(which ffmpeg) 83 | pyinstaller main.py \ 84 | --path="$(pwd)" \ 85 | --onefile \ 86 | --add-binary="$FFMPPEG_PATH:." \ 87 | --add-binary="pytranscriber.sqlite:." \ 88 | --add-data="pytranscriber/gui/*.qm:pytranscriber/gui/" \ 89 | --add-data="$ASSETS_PATH:whisper/assets" 90 | 91 | - name: Rename and zip the binary with version number 92 | run: | 93 | cd dist 94 | mv main "pyTranscriber-${VERSION}" 95 | 96 | - name: Upload built executable 97 | uses: actions/upload-artifact@v4 98 | with: 99 | name: pyTranscriber-linux-pyinstaller-${{ env.VERSION }} 100 | path: ./dist/pyTranscriber-${{ env.VERSION }} 101 | 102 | download: 103 | runs-on: ubuntu-22.04 104 | needs: build 105 | steps: 106 | - name: Download built executable 107 | uses: actions/download-artifact@v4 108 | with: 109 | path: ./output 110 | 111 | - name: List downloaded files 112 | run: ls -la ./output 113 | -------------------------------------------------------------------------------- /.github/workflows/mac-pyinstaller.yml: -------------------------------------------------------------------------------- 1 | name: MacOS PyInstaller 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - master 8 | - develop 9 | 10 | 11 | jobs: 12 | build: 13 | runs-on: macos-14 # Use macOS ARM64 runner 14 | steps: 15 | - name: Checkout repository 16 | uses: actions/checkout@v4 17 | with: 18 | fetch-depth: 0 # Ensure full history and tags are available 19 | 20 | - name: Get latest Git tag 21 | id: get_version 22 | run: | 23 | VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "v0.1.0") 24 | echo "VERSION=$VERSION" >> $GITHUB_ENV 25 | echo "Resolved version: $VERSION" 26 | 27 | - name: Setup FFmpeg 28 | uses: federicocarboni/setup-ffmpeg@v3.1 29 | with: 30 | ffmpeg-version: release 31 | architecture: x64 32 | 33 | - name: Verify FFmpeg installation 34 | run: | 35 | which ffmpeg 36 | ffmpeg -version 37 | 38 | - name: Set up Python 3.8 39 | uses: actions/setup-python@v4 40 | with: 41 | python-version: "3.8" 42 | 43 | - name: Set up Python virtual environment 44 | run: | 45 | python -m venv .venv 46 | 47 | - name: Activate virtual environment and install dependencies 48 | run: | 49 | source .venv/bin/activate 50 | pip install --upgrade pip 51 | pip install -r requirements.txt 52 | pip install pyinstaller 53 | 54 | - name: Verify existence of Whisper assets directory 55 | run: | 56 | source .venv/bin/activate # Activate the virtual environment 57 | ASSETS_PATH=$(python -c "import whisper; import os; print(os.path.join(os.path.dirname(whisper.__file__), 'assets'))") 58 | if [ -d "$ASSETS_PATH" ]; then 59 | echo "The 'assets' directory exists at: $ASSETS_PATH" 60 | echo "ASSETS_PATH=$ASSETS_PATH" >> $GITHUB_ENV 61 | else 62 | echo "The 'assets' directory does NOT exist." 63 | exit 1 64 | fi 65 | 66 | - name: Compile with pyInstaller 67 | run: | 68 | source .venv/bin/activate 69 | FFMPPEG_PATH=$(which ffmpeg) 70 | pyinstaller main.py \ 71 | --windowed \ 72 | --path="$(pwd)" \ 73 | --add-binary="$FFMPPEG_PATH:." \ 74 | --add-binary="pytranscriber.sqlite:." \ 75 | --add-data="pytranscriber/gui/*.qm:pytranscriber/gui/" \ 76 | --add-data="$ASSETS_PATH:whisper/assets" 77 | 78 | - name: Zip the .app bundle with version number 79 | run: | 80 | cd dist 81 | mv main.app "pyTranscriber-${VERSION}.app" 82 | zip -r "pyTranscriber-macos-${VERSION}.zip" "pyTranscriber-${VERSION}.app" 83 | 84 | - name: Upload built executable with version number 85 | uses: actions/upload-artifact@v4 86 | with: 87 | name: pyTranscriber-macos-${{ env.VERSION }} 88 | path: ./dist/pyTranscriber-macos-${{ env.VERSION }}.zip # Path adjusted for macOS 89 | 90 | download: 91 | runs-on: macos-14 # macOS ARM64 runner for downloading 92 | needs: build 93 | steps: 94 | - name: Download built executable 95 | uses: actions/download-artifact@v4 96 | with: 97 | path: ./output 98 | 99 | - name: List downloaded files 100 | run: ls -la ./output 101 | -------------------------------------------------------------------------------- /.github/workflows/win-nuitka.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - master 5 | - develop 6 | pull_request: 7 | 8 | jobs: 9 | build: 10 | runs-on: windows-latest 11 | 12 | steps: 13 | - name: Checkout repository 14 | uses: actions/checkout@v4 15 | with: 16 | fetch-depth: 0 17 | 18 | - name: Get latest Git tag 19 | id: get_version 20 | run: | 21 | $VERSION = git describe --tags --abbrev=0 2>$null 22 | if (-not $VERSION) { 23 | $VERSION = "v0.1.0" # Default version if no tags are found 24 | } 25 | echo "VERSION=$VERSION" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 26 | Write-Host "Resolved version: $VERSION" 27 | shell: pwsh 28 | 29 | - name: Setup FFmpeg 30 | uses: federicocarboni/setup-ffmpeg@v3.1 31 | with: 32 | ffmpeg-version: release # Specify the desired FFmpeg version 33 | architecture: x64 34 | 35 | - name: Add FFmpeg to PATH 36 | run: | 37 | $ffmpegPath = (Get-Command ffmpeg).Source 38 | $env:Path += ";$($ffmpegPath.Substring(0, $ffmpegPath.LastIndexOf('\')))" 39 | $ffmpegPath 40 | shell: pwsh 41 | 42 | - name: Verify FFmpeg installation 43 | run: | 44 | where ffmpeg 45 | ffmpeg -version 46 | 47 | - name: Set up Python 3.8 48 | uses: actions/setup-python@v4 49 | with: 50 | python-version: "3.8" 51 | 52 | - name: Set up Python virtual environment 53 | run: | 54 | python -m venv .venv 55 | 56 | - name: Activate virtual environment 57 | run: | 58 | .\.venv\Scripts\Activate 59 | 60 | - name: Install dependencies 61 | run: | 62 | pip install --upgrade pip 63 | pip install -r requirements.txt 64 | pip install nuitka 65 | 66 | - name: Verificar existência do diretório assets do whisper 67 | run: | 68 | $whisperPath = (python -c "import whisper; print(whisper.__file__)").Trim() 69 | $assetsPath = Join-Path (Split-Path $whisperPath) 'assets' 70 | if (Test-Path $assetsPath) { 71 | Write-Host "O diretório 'assets' existe em: $assetsPath" 72 | } else { 73 | Write-Host "O diretório 'assets' NÃO existe." 74 | exit 1 75 | } 76 | 77 | - name: Compile with Nuitka 78 | run: | 79 | $whisperPath = (python -c "import whisper; print(whisper.__file__)").Trim() 80 | $assetsPath = Join-Path (Split-Path $whisperPath) 'assets' 81 | $ffmpegPath = (Get-Command ffmpeg).Source 82 | nuitka ` 83 | --assume-yes-for-downloads ` 84 | --enable-plugin=pyqt5 ` 85 | --include-data-files="pytranscriber.sqlite=pytranscriber.sqlite" ` 86 | --include-data-files="$ffmpegPath=ffmpeg.exe" ` 87 | --include-data-files="pytranscriber/gui/*.qm=pytranscriber/gui/" ` 88 | --include-data-files="$assetsPath\*=whisper/assets/" ` 89 | main.py ` 90 | --onefile ` 91 | --output-dir=dist ` 92 | --windows-console-mode=disable 93 | 94 | - name: Rename and zip the .exe bundle with version number 95 | run: | 96 | Set-Location -Path dist 97 | Write-Host "Renaming main.exe to pyTranscriber-$env:VERSION.exe" 98 | Rename-Item -Force main.exe "pyTranscriber-$env:VERSION.exe" 99 | Write-Host "Creating zip archive: pyTranscriber-$env:VERSION.zip" 100 | Compress-Archive -Path "pyTranscriber-$env:VERSION.exe" -DestinationPath "pyTranscriber-win-$env:VERSION.zip" 101 | shell: pwsh 102 | 103 | - name: Upload built executable 104 | uses: actions/upload-artifact@v4 105 | with: 106 | name: pyTranscriber-win-${{ env.VERSION }} 107 | path: ./dist/pyTranscriber-win-${{ env.VERSION }}.zip # Adjust this path if Nuitka outputs elsewhere 108 | 109 | download: 110 | runs-on: windows-latest 111 | needs: build 112 | steps: 113 | - name: Download built executable 114 | uses: actions/download-artifact@v4 115 | with: 116 | path: ./output 117 | 118 | - name: List downloaded files 119 | run: dir ./output 120 | -------------------------------------------------------------------------------- /.github/workflows/win-pyinstaller-dev2.yml: -------------------------------------------------------------------------------- 1 | name: Windows PyInstaller 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | - develop 8 | pull_request: 9 | 10 | jobs: 11 | build: 12 | runs-on: windows-latest 13 | strategy: 14 | matrix: 15 | python-version: ["3.8", "3.10", "3.12"] # Paraleliza builds para cada versão do Python 16 | steps: 17 | - name: Checkout repository 18 | uses: actions/checkout@v4 19 | with: 20 | fetch-depth: 0 # Fetch all tags 21 | 22 | - name: Get latest Git tag 23 | id: get_version 24 | run: | 25 | $VERSION = git describe --tags --abbrev=0 2>$null 26 | if (-not $VERSION) { 27 | $VERSION = "v0.1.0" # Default version if no tags are found 28 | } 29 | echo "VERSION=$VERSION" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 30 | Write-Host "Resolved version: $VERSION" 31 | shell: pwsh 32 | 33 | - name: Setup FFmpeg 34 | uses: federicocarboni/setup-ffmpeg@v3.1 35 | with: 36 | ffmpeg-version: release 37 | architecture: x64 38 | 39 | - name: Verify FFmpeg installation 40 | run: | 41 | where ffmpeg 42 | ffmpeg -version 43 | 44 | - name: Set up Python ${{ matrix.python-version }} 45 | uses: actions/setup-python@v4 46 | with: 47 | python-version: ${{ matrix.python-version }} 48 | 49 | - name: Create and activate virtual environment for Python ${{ matrix.python-version }} 50 | run: | 51 | python -m venv .venv-${{ matrix.python-version }} 52 | .\.venv-${{ matrix.python-version }}\Scripts\Activate 53 | shell: pwsh 54 | 55 | - name: Install dependencies for Python ${{ matrix.python-version }} 56 | run: | 57 | .\.venv-${{ matrix.python-version }}\Scripts\Activate 58 | python -m ensurepip --upgrade 59 | python -m pip install --upgrade pip 60 | python -m pip install -r requirements.txt 61 | python -m pip install pyinstaller 62 | shell: pwsh 63 | 64 | - name: Verify whisper assets directory for Python ${{ matrix.python-version }} 65 | run: | 66 | .\.venv-${{ matrix.python-version }}\Scripts\Activate 67 | $whisperPath = (python -c "import whisper; print(whisper.__file__)").Trim() 68 | $assetsPath = Join-Path (Split-Path $whisperPath) 'assets' 69 | if (Test-Path $assetsPath) { 70 | Write-Host "O diretório 'assets' existe em: $assetsPath" 71 | echo "ASSETS_PATH=$assetsPath" >> $env:GITHUB_ENV 72 | } else { 73 | Write-Host "O diretório 'assets' NÃO existe." 74 | exit 1 75 | } 76 | shell: pwsh 77 | 78 | - name: Compile with PyInstaller for Python ${{ matrix.python-version }} 79 | run: | 80 | .\.venv-${{ matrix.python-version }}\Scripts\Activate 81 | $ffmpegPath = (Get-Command ffmpeg).Source 82 | pyinstaller main.py ` 83 | --onefile ` 84 | --path="$(Get-Location)" ` 85 | --add-binary="$ffmpegPath;." ` 86 | --add-binary="pytranscriber.sqlite;." ` 87 | --add-data="pytranscriber/gui/*.qm;pytranscriber/gui/" ` 88 | --add-data="${env:ASSETS_PATH};whisper/assets" ` 89 | --clean 90 | 91 | shell: pwsh 92 | 93 | - name: Rename and zip the .exe bundle with version number 94 | run: | 95 | Set-Location -Path dist 96 | Write-Host "Renaming main.exe to pyTranscriber-$env:VERSION.exe" 97 | Rename-Item -Force main.exe "pyTranscriber-$env:VERSION.exe" 98 | Write-Host "Creating zip archive: pyTranscriber-win-$env:VERSION.zip" 99 | Compress-Archive -Path "pyTranscriber-$env:VERSION.exe" -DestinationPath "pyTranscriber-win-$env:VERSION.zip" 100 | shell: pwsh 101 | 102 | - name: Upload built executable for Python ${{ matrix.python-version }} 103 | uses: actions/upload-artifact@v4 104 | with: 105 | name: pyTranscriber-win-${{ env.VERSION }}-py${{ matrix.python-version }} 106 | path: ./dist/pyTranscriber-win-${{ env.VERSION }}.zip 107 | 108 | download: 109 | runs-on: windows-latest 110 | needs: build 111 | steps: 112 | - name: Download built executables 113 | uses: actions/download-artifact@v4 114 | with: 115 | path: ./output 116 | 117 | - name: List downloaded files 118 | run: dir ./output 119 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | lib/ 2 | python-libs/ 3 | bin/ 4 | *.spec 5 | *pyc 6 | *.egg-info 7 | *html 8 | build/ 9 | tests/ 10 | dist/ 11 | .DS_Store 12 | MANIFEST 13 | *#* 14 | ffmpeg* 15 | notes.txt 16 | 17 | Pipfile.lock 18 | 19 | Pipfile 20 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | autosub = "*" 8 | pyqt5 = "==5.15.4" 9 | pyinstaller = "*" 10 | macholib = "*" 11 | cachetools = "==4.2.4" 12 | certifi = "==2021.10.8" 13 | chardet = "==4.0.0" 14 | charset-normalizer = "==2.0.6" 15 | google-api-core = "==2.1.0" 16 | google-api-python-client = "==2.24.0" 17 | google-auth = "==2.3.0" 18 | google-auth-httplib2 = "==0.1.0" 19 | google-auth-oauthlib = "==0.4.6" 20 | googleapis-common-protos = "==1.53.0" 21 | httplib2 = "==0.20.1" 22 | idna = "==3.2" 23 | oauthlib = "==3.1.1" 24 | progressbar = "==2.5" 25 | protobuf = "==3.18.1" 26 | pyasn1 = "==0.4.8" 27 | pyasn1-modules = "==0.2.8" 28 | pyparsing = "==2.4.7" 29 | pyqt5-qt5 = "==5.15.2" 30 | pyqt5-sip = "==12.9.0" 31 | pysrt = "==1.1.2" 32 | requests = "==2.26.0" 33 | requests-oauthlib = "==1.3.0" 34 | rsa = "==4.7.2" 35 | six = "==1.16.0" 36 | uritemplate = "==3.0.1" 37 | urllib3 = "==1.26.7" 38 | nuitka = "*" 39 | orderedset = "*" 40 | zstandard = "*" 41 | 42 | [dev-packages] 43 | 44 | [requires] 45 | python_version = "3.8" 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyTranscriber 2 | 3 | [](https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=YHB854YHPJCU8&item_name=Donation+pyTranscriber¤cy_code=BRL) 4 | [](https://github.com/raryelcostasouza/pyTranscriber/raw/master/doc/lightning.jpeg) 5 | 6 | [](https://ko-fi.com/A0A6AIR3D) 7 | 8 | MOVED TO NEW WEBSITE - <a href="https://pytranscriber.github.io">https://pytranscriber.github.io</a> 9 | 10 | More than 640k downloads since first version. Thanks! 11 | Check live statistics at <a href="https://somsubhra.github.io/github-release-stats/?username=raryelcostasouza&repository=pyTranscriber&page=1&per_page=30">GitHub Release Stats</a> 12 | 13 | # Thanks to the people helping funding 14 | Jixing Zhao, Narsu Narsu, Lucas Thorn, Soonkj Jung, Sergio Moreno, Yooki Adair, Adrien Jarton, YP, JOY_EASY, RodrigoRios, Zhou Mi, Dongmei Chen, Jung Yi Hung, Tah Kwang Tomas Tso 15 | 16 | # UPDATE - v2.0-stable - 07/07/2025 17 | 1. Added binary for Linux (GLIBC 2.35 or newer) 18 | 19 | # UPDATE - v2.0-stable - 22/05/2025 20 | 1. Fixed issue with cantonese language not working using whisper engine 21 | 2. Fixed srt file generation not being compliant with srt syntax 22 | 23 | # UPDATE - v2.0-RC_1 - 04/03/2025 24 | 1. Added support for <a hfref="https://openai.com/index/whisper/">openAI Whisper</a> with local processing of media files as alternative to Google Speech API (where all media file is uploaded to Google servers for processing) 25 | 2. Added saving/load settings to sqlite local db 26 | 27 | 28 | # UPDATE - v1.9 - 22/12/2022 29 | 1. Windows/Linux version compiled with Nuitka (https://github.com/Nuitka/Nuitka) instead of pyInstaller to improve stability and fix random crashes while transcribing audio. If you still experience issues please report at Issues section. 30 | 2. Support for Ogg/ogv/mkv/webm media files on file selector 31 | 32 | # UPDATE - v1.8 - 17/08/2022 33 | 1. Fixed bug: language codes for Chinese Languages updated accordingly to Speech API. Changed to "cmn-Hans-CN" and "cmn-Hant-TW" instead of "zh / zh-TW").. The output was always mistakenly coming in Cantonese (yue-Hant-HK). Now they come properly in Traditional Chinese and Simplified Chinese. Thanks to "Specter Hi" for reporting! 34 | 2. Added GUI language switch feature 35 | 3. Updated link to funding campaign at GitHub Sponsors 36 | 37 | # UPDATE - v1.7 - 08/08/2022 38 | 1. add proxy setting 39 | 2. change the function 'pytranscriber.util.MyUtil.is_internet_connected' 40 | 3. add requirements.txt 41 | 4. rebuilt using pyInstaller 5.3 - more stability to prevent multithreading crashes on Windows 42 | 5. Added pipfile 43 | 44 |  45 | 46 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/38ff7bf01985a893f0483be5547c83228e74b6ec/__init__.py -------------------------------------------------------------------------------- /autosub/constants.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines constants used by autosub. 3 | """ 4 | 5 | from __future__ import unicode_literals 6 | 7 | GOOGLE_SPEECH_API_KEY = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw" 8 | GOOGLE_SPEECH_API_URL = "http://www.google.com/speech-api/v2/recognize?client=chromium&lang={lang}&key={key}" # pylint: disable=line-too-long 9 | 10 | LANGUAGE_CODES = { 11 | 'af': 'Afrikaans', 12 | 'ar': 'Arabic', 13 | 'az': 'Azerbaijani', 14 | 'be': 'Belarusian', 15 | 'bg': 'Bulgarian', 16 | 'bn': 'Bengali', 17 | 'bs': 'Bosnian', 18 | 'ca': 'Catalan', 19 | 'ceb': 'Cebuano', 20 | 'cs': 'Czech', 21 | 'cy': 'Welsh', 22 | 'da': 'Danish', 23 | 'de': 'German', 24 | 'el': 'Greek', 25 | 'en-AU': 'English (Australia)', 26 | 'en-CA': 'English (Canada)', 27 | 'en-GB': 'English (United Kingdom)', 28 | 'en-IN': 'English (India)', 29 | 'en-IE': 'English (Ireland)', 30 | 'en-NZ': 'English (New Zealand)', 31 | 'en-PH': 'English (Philippines)', 32 | 'en-SG': 'English (Singapore)', 33 | 'en-US': 'English (United States)', 34 | 'eo': 'Esperanto', 35 | 'es-AR': 'Spanish (Argentina)', 36 | 'es-CL': 'Spanish (Chile)', 37 | 'es-ES': 'Spanish (Spain)', 38 | 'es-US': 'Spanish (United States)', 39 | 'es-MX': 'Spanish (Mexico)', 40 | 'es': 'Spanish', 41 | 'et': 'Estonian', 42 | 'eu': 'Basque', 43 | 'fa': 'Persian', 44 | 'fi': 'Finnish', 45 | 'fr': 'French', 46 | 'ga': 'Irish', 47 | 'gl': 'Galician', 48 | 'gu': 'Gujarati', 49 | 'ha': 'Hausa', 50 | 'hi': 'Hindi', 51 | 'hmn': 'Hmong', 52 | 'hr': 'Croatian', 53 | 'ht': 'Haitian Creole', 54 | 'hu': 'Hungarian', 55 | 'hy': 'Armenian', 56 | 'id': 'Indonesian', 57 | 'ig': 'Igbo', 58 | 'is': 'Icelandic', 59 | 'it': 'Italian', 60 | 'iw': 'Hebrew', 61 | 'ja': 'Japanese', 62 | 'jw': 'Javanese', 63 | 'ka': 'Georgian', 64 | 'kk': 'Kazakh', 65 | 'km': 'Khmer', 66 | 'kn': 'Kannada', 67 | 'ko': 'Korean', 68 | 'la': 'Latin', 69 | 'lo': 'Lao', 70 | 'lt': 'Lithuanian', 71 | 'lv': 'Latvian', 72 | 'mg': 'Malagasy', 73 | 'mi': 'Maori', 74 | 'mk': 'Macedonian', 75 | 'ml': 'Malayalam', 76 | 'mn': 'Mongolian', 77 | 'mr': 'Marathi', 78 | 'ms': 'Malay', 79 | 'mt': 'Maltese', 80 | 'my': 'Myanmar (Burmese)', 81 | 'ne': 'Nepali', 82 | 'nl': 'Dutch', 83 | 'no': 'Norwegian', 84 | 'ny': 'Chichewa', 85 | 'pa': 'Punjabi', 86 | 'pl': 'Polish', 87 | 'pt-BR': 'Portuguese (Brazil)', 88 | 'pt-PT': 'Portuguese (Portugal)', 89 | 'ro': 'Romanian', 90 | 'ru': 'Russian', 91 | 'si': 'Sinhala', 92 | 'sk': 'Slovak', 93 | 'sl': 'Slovenian', 94 | 'so': 'Somali', 95 | 'sq': 'Albanian', 96 | 'sr': 'Serbian', 97 | 'st': 'Sesotho', 98 | 'su': 'Sudanese', 99 | 'sv': 'Swedish', 100 | 'sw': 'Swahili', 101 | 'ta': 'Tamil', 102 | 'te': 'Telugu', 103 | 'tg': 'Tajik', 104 | 'th': 'Thai', 105 | 'tl': 'Filipino', 106 | 'tr': 'Turkish', 107 | 'uk': 'Ukrainian', 108 | 'ur': 'Urdu', 109 | 'uz': 'Uzbek', 110 | 'vi': 'Vietnamese', 111 | 'yi': 'Yiddish', 112 | 'yo': 'Yoruba', 113 | 'yue-Hant-HK': 'Cantonese, (Traditional HK)', 114 | 'zh': 'Chinese (Simplified, China)', 115 | 'zh-HK': 'Chinese (Simplified, Hong Kong)', 116 | 'zh-TW': 'Chinese (Traditional, Taiwan)', 117 | 'zu': 'Zulu', 118 | } 119 | -------------------------------------------------------------------------------- /autosub/formatters.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines subtitle formatters used by autosub. 3 | """ 4 | 5 | # -*- coding: utf-8 -*- 6 | from __future__ import unicode_literals 7 | 8 | import json 9 | 10 | import pysrt 11 | import six 12 | 13 | 14 | def srt_formatter(subtitles, padding_before=0, padding_after=0): 15 | """ 16 | Serialize a list of subtitles according to the SRT format, with optional time padding. 17 | """ 18 | sub_rip_file = pysrt.SubRipFile() 19 | for i, ((start, end), text) in enumerate(subtitles, start=1): 20 | item = pysrt.SubRipItem() 21 | item.index = i 22 | item.text = six.text_type(text) 23 | item.start.seconds = max(0, start - padding_before) 24 | item.end.seconds = end + padding_after 25 | sub_rip_file.append(item) 26 | return '\n'.join(six.text_type(item) for item in sub_rip_file) 27 | 28 | 29 | def vtt_formatter(subtitles, padding_before=0, padding_after=0): 30 | """ 31 | Serialize a list of subtitles according to the VTT format, with optional time padding. 32 | """ 33 | text = srt_formatter(subtitles, padding_before, padding_after) 34 | text = 'WEBVTT\n\n' + text.replace(',', '.') 35 | return text 36 | 37 | 38 | def json_formatter(subtitles): 39 | """ 40 | Serialize a list of subtitles as a JSON blob. 41 | """ 42 | subtitle_dicts = [ 43 | { 44 | 'start': start, 45 | 'end': end, 46 | 'content': text, 47 | } 48 | for ((start, end), text) 49 | in subtitles 50 | ] 51 | return json.dumps(subtitle_dicts) 52 | 53 | 54 | def raw_formatter(subtitles): 55 | """ 56 | Serialize a list of subtitles as a newline-delimited string. 57 | """ 58 | return ' '.join(text for (_rng, text) in subtitles) 59 | 60 | 61 | FORMATTERS = { 62 | 'srt': srt_formatter, 63 | 'vtt': vtt_formatter, 64 | 'json': json_formatter, 65 | 'raw': raw_formatter, 66 | } 67 | -------------------------------------------------------------------------------- /deployment/freeze-linux-nuitka.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pipenv shell 4 | nuitka3 --enable-plugin=pyqt5 --include-data-files="ffmpeg"="./" \ 5 | --include-data-files="pytranscriber/gui/*.qm"="pytranscriber/gui/" \ 6 | --include-data-files="venv/lib/python3.8/site-packages/whisper/assets" \ 7 | main.py \ 8 | --onefile -------------------------------------------------------------------------------- /deployment/freeze-linux.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pipenv shell 4 | pyinstaller main.py main.spec --path="$(pwd)" --add-binary="ffmpeg:." --add-data="pytranscriber/gui/*.qm:pytranscriber/gui/" --onefile --clean 5 | -------------------------------------------------------------------------------- /deployment/freeze-nuitka-win.bat: -------------------------------------------------------------------------------- 1 | nuitka --enable-plugin=pyqt5 --include-data-files="ffmpeg.exe"="./" --include-data-files="pytranscriber/gui/*.qm"="pytranscriber/gui/" main.py --onefile --disable-console -------------------------------------------------------------------------------- /deployment/freeze-win.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pipenv shell 4 | pyinstaller main.py --path=$pwd --add-binary="ffmpeg.exe;." --add-data="pytranscriber/gui/*.qm;pytranscriber/gui/" --onefile --clean 5 | -------------------------------------------------------------------------------- /deployment/nuitka-win-standalone.bat: -------------------------------------------------------------------------------- 1 | nuitka --enable-plugin=pyqt5 main.py --disable-console --standalone -------------------------------------------------------------------------------- /deployment/win/script-installer-windows-standalone.iss: -------------------------------------------------------------------------------- 1 | ; Script generated by the Inno Setup Script Wizard. 2 | ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES! 3 | 4 | [Setup] 5 | ; NOTE: The value of AppId uniquely identifies this application. 6 | ; Do not use the same AppId value in installers for other applications. 7 | ; (To generate a new GUID, click Tools | Generate GUID inside the IDE.) 8 | AppId={{5240AB76-FC62-4BFA-A1EF-FA49AF701F80} 9 | AppName=pyTranscriber 10 | AppVersion=1.9 11 | AppVerName=pyTranscriber 1.9 12 | AppPublisher=Raryel C. Souza 13 | AppPublisherURL=https://github.com/raryelcostasouza/pyTranscriber 14 | AppSupportURL=https://github.com/raryelcostasouza/pyTranscriber 15 | AppUpdatesURL=https://github.com/raryelcostasouza/pyTranscriber 16 | DefaultDirName={pf}\pyTranscriber 17 | DisableDirPage=yes 18 | DisableProgramGroupPage=yes 19 | LicenseFile=.\LICENSE 20 | OutputBaseFilename=setup 21 | Compression=lzma 22 | SolidCompression=yes 23 | 24 | [Languages] 25 | Name: "english"; MessagesFile: "compiler:Default.isl" 26 | 27 | [Tasks] 28 | Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked 29 | 30 | [Files] 31 | Source: ".\main.dist\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs 32 | ; NOTE: Don't use "Flags: ignoreversion" on any shared system files 33 | 34 | [Icons] 35 | Name: "{commonprograms}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe" 36 | Name: "{commondesktop}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe"; Tasks: desktopicon 37 | 38 | [Run] 39 | Filename: "{app}\pyTranscriber.exe"; Description: "{cm:LaunchProgram,pyTranscriber}"; Flags: nowait postinstall skipifsilent 40 | 41 | -------------------------------------------------------------------------------- /deployment/win/script-installer-windows.iss: -------------------------------------------------------------------------------- 1 | ; Script generated by the Inno Setup Script Wizard. 2 | ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES! 3 | 4 | [Setup] 5 | ; NOTE: The value of AppId uniquely identifies this application. 6 | ; Do not use the same AppId value in installers for other applications. 7 | ; (To generate a new GUID, click Tools | Generate GUID inside the IDE.) 8 | AppId={{5240AB76-FC62-4BFA-A1EF-FA49AF701F80} 9 | AppName=pyTranscriber 10 | AppVersion=1.9 11 | AppVerName=pyTranscriber 1.9 12 | AppPublisher=Raryel C. Souza 13 | AppPublisherURL=https://github.com/raryelcostasouza/pyTranscriber 14 | AppSupportURL=https://github.com/raryelcostasouza/pyTranscriber 15 | AppUpdatesURL=https://github.com/raryelcostasouza/pyTranscriber 16 | DefaultDirName={pf}\pyTranscriber 17 | DisableDirPage=yes 18 | DisableProgramGroupPage=yes 19 | LicenseFile=.\LICENSE 20 | OutputBaseFilename=setup 21 | Compression=lzma 22 | SolidCompression=yes 23 | 24 | [Languages] 25 | Name: "english"; MessagesFile: "compiler:Default.isl" 26 | 27 | [Tasks] 28 | Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked 29 | 30 | [Files] 31 | Source: ".\dist\pyTranscriber.exe"; DestDir: "{app}"; Flags: ignoreversion 32 | ; NOTE: Don't use "Flags: ignoreversion" on any shared system files 33 | 34 | [Icons] 35 | Name: "{commonprograms}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe" 36 | Name: "{commondesktop}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe"; Tasks: desktopicon 37 | 38 | [Run] 39 | Filename: "{app}\pyTranscriber.exe"; Description: "{cm:LaunchProgram,pyTranscriber}"; Flags: nowait postinstall skipifsilent 40 | 41 | -------------------------------------------------------------------------------- /doc/entitlements.plist: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="UTF-8"?> 2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> 3 | <plist version="1.0"> 4 | <dict> 5 | <!-- These are required for binaries built by PyInstaller --> 6 | <key>com.apple.security.cs.allow-jit</key> 7 | <true/> 8 | <key>com.apple.security.cs.allow-unsigned-executable-memory</key> 9 | <true/> 10 | <key>com.apple.security.cs.disable-library-validation</key><true/> 11 | </dict> 12 | </plist> 13 | -------------------------------------------------------------------------------- /doc/lightning.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/38ff7bf01985a893f0483be5547c83228e74b6ec/doc/lightning.jpeg -------------------------------------------------------------------------------- /doc/pyTranscriber.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/38ff7bf01985a893f0483be5547c83228e74b6ec/doc/pyTranscriber.png -------------------------------------------------------------------------------- /doc/screenshot1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/38ff7bf01985a893f0483be5547c83228e74b6ec/doc/screenshot1.png -------------------------------------------------------------------------------- /doc/screenshot2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/38ff7bf01985a893f0483be5547c83228e74b6ec/doc/screenshot2.png -------------------------------------------------------------------------------- /doc/screenshot3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/38ff7bf01985a893f0483be5547c83228e74b6ec/doc/screenshot3.png -------------------------------------------------------------------------------- /doc/technical_details.md: -------------------------------------------------------------------------------- 1 | <h1>For Developers - Technical Details</h1> 2 | 3 | This app consists basically of a friendly pyQt5 graphical interface for a customized version of <a href="https://github.com/agermanidis/autosub">Autosub 0.4.0</a> that can run on Linux, Windows and MacOS. All the hard work of processing the audio and generating the subtitles is done by Autosub. 4 | 5 | <h2>Dependencies to build</h2> 6 | 7 | <ol> 8 | <li>pip3 install pipenv 9 | <li>pipenv install (install all dependencies from Pipfile) 10 | <li>Download the <a href="https://johnvansickle.com/ffmpeg/">static ffmpeg binary </a> and move it to project root folder 11 | 12 | # How to run? 13 | $ pipenv shell 14 | $ python3 main.py 15 | 16 | 17 | # How to edit the GUI? 18 | Install Qt5 Designer and open the file pytranscriber/gui/gui.ui 19 | 20 | # How to convert the .ui file (qt5designer project file) to .py? 21 | $ pyuic5 gui.ui -o gui.py 22 | 23 | # How to generate the python bundled binary package version with ffmpeg included? 24 | 25 | # Linux: 26 | $ pyinstaller main.py --path="$(pwd)" --add-binary="ffmpeg:." --onefile --clean 27 | 28 | # Windows: 29 | $ pyinstaller main.py --path=$pwd --add-binary="ffmpeg.exe;." --onefile --clean 30 | 31 | # Mac: 32 | $ pyinstaller main.py --path="$(pwd)" --add-binary="ffmpeg:." --clean --windowed 33 | 34 | 35 | The output binary will be on subfolder dist/main and has all dependencies included. For more details check pyinstaller documentation 36 | 37 | # On Linux how to generate a statically linked binary so it can run even on systems with older glibc installed? 38 | 39 | As explained in <a href=https://github.com/pyinstaller/pyinstaller/wiki/FAQ>pyInstaller FAQ</a>: 40 | > The executable that PyInstaller builds is not fully static, in that it still depends on the system libc. Under Linux, the ABI of GLIBC is backward compatible, but not forward compatible. So if you link against a newer GLIBC, you can't run the resulting executable on an older system. 41 | 42 | > <b>Solution 1)</b>To compile the Python interpreter with its modules (and also probably bootloader) on the oldest system you have around, so that it gets linked with the oldest version of GLIBC. 43 | 44 | > <b>Solution 2)</b> to use a tool like StaticX to create a fully-static bundled version of your PyInstaller application. StaticX bundles all dependencies, including libc and ld.so. (Python code :arrow_right: PyInstaller :arrow_right: StaticX :arrow_right: Fully-static application)" 45 | 46 | <b>Install staticx and patchelf (dependency)</b> 47 | 48 | $ pip3 install --user patchelf-wrapper 49 | 50 | $ pip3 install --user staticx 51 | 52 | <b>After generating the binary with pyinstaller, open the dist folder and run: </b> 53 | 54 | $ staticx main main-static 55 | 56 | The newly created main-static contains all library dependencies, including glibc, so it should be able to run even on very old systems. 57 | 58 | Note: In my Manjaro system the first time I run this command I got an error related to "libmpdec.so.2 => not found". Installing the package <b>mpdecimal</b> on the package manager solved the issue. 59 | -------------------------------------------------------------------------------- /freeze-mac.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pipenv shell 4 | pyinstaller main.py \ 5 | --path="$(pwd)" \ 6 | --add-binary="ffmpeg-bin/ffmpeg:." \ 7 | --add-binary="pytranscriber.sqlite:." \ 8 | --add-data="pytranscriber/gui/*.qm:pytranscriber/gui/" \ 9 | --add-data="venv/lib/python3.8/site-packages/whisper/assets:whisper/assets" \ 10 | --clean \ 11 | --windowed \ 12 | --noconfirm 13 | 14 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # (C) 2019 Raryel C. Souza 2 | # This program is free software: you can redistribute it and/or modify 3 | # it under the terms of the GNU General Public License as published by 4 | # the Free Software Foundation, either version 3 of the License, or 5 | # (at your option) any later version. 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # You should have received a copy of the GNU General Public License 11 | # along with this program. If not, see <https://www.gnu.org/licenses/>. 12 | 13 | from pytranscriber.control.ctr_main import Ctr_Main 14 | from pytranscriber.gui.message_util import MessageUtil 15 | import multiprocessing 16 | import sys 17 | 18 | if __name__ == '__main__': 19 | multiprocessing.freeze_support() 20 | 21 | try: 22 | ctrMain = Ctr_Main() 23 | sys.exit(0) 24 | except Exception as ex: 25 | MessageUtil.show_error_message(str(ex), "Main Error") 26 | sys.exit(1) 27 | 28 | 29 | -------------------------------------------------------------------------------- /nuitka-win-standalone.bat: -------------------------------------------------------------------------------- 1 | nuitka --enable-plugin=pyqt5 main.py --disable-console --standalone -------------------------------------------------------------------------------- /patches/autosub-0.3.13.patch: -------------------------------------------------------------------------------- 1 | --- __init__-old.py 2019-01-27 11:18:19.560918050 +0700 2 | +++ __init__.py 2019-01-24 09:27:17.057865917 +0700 3 | @@ -262,6 +262,14 @@ 4 | 5 | return 0 6 | 7 | +def percentage(currentval, maxval): 8 | + return 100 * currentval / float(maxval) 9 | + 10 | + 11 | +def output_progress(listener_progress, str_task, progress_percent): 12 | + if listener_progress != None: 13 | + listener_progress(str_task,progress_percent) 14 | + 15 | 16 | def generate_subtitles( 17 | source_path, 18 | @@ -271,6 +279,7 @@ 19 | dst_language=DEFAULT_DST_LANGUAGE, 20 | subtitle_file_format=DEFAULT_SUBTITLE_FORMAT, 21 | api_key=None, 22 | + listener_progress=None, 23 | ): 24 | audio_filename, audio_rate = extract_audio(source_path) 25 | 26 | @@ -284,21 +293,28 @@ 27 | transcripts = [] 28 | if regions: 29 | try: 30 | - widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', 31 | + str_task_1 = "Converting speech regions to FLAC files: " 32 | + widgets = [str_task_1, Percentage(), ' ', Bar(), ' ', 33 | ETA()] 34 | - pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() 35 | + len_regions = len(regions) 36 | + pbar = ProgressBar(widgets=widgets, maxval=len_regions).start() 37 | extracted_regions = [] 38 | for i, extracted_region in enumerate(pool.imap(converter, regions)): 39 | extracted_regions.append(extracted_region) 40 | pbar.update(i) 41 | + progress_percent= percentage(i, len_regions) 42 | + output_progress(listener_progress,str_task_1,progress_percent) 43 | pbar.finish() 44 | 45 | - widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()] 46 | + str_task_2 = "Performing speech recognition: " 47 | + widgets = [str_task_2, Percentage(), ' ', Bar(), ' ', ETA()] 48 | pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() 49 | 50 | for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)): 51 | transcripts.append(transcript) 52 | pbar.update(i) 53 | + progress_percent= percentage(i, len_regions) 54 | + output_progress(listener_progress,str_task_2,progress_percent) 55 | pbar.finish() 56 | 57 | if not is_same_language(src_language, dst_language): 58 | @@ -349,4 +365,5 @@ 59 | 60 | 61 | if __name__ == '__main__': 62 | + multiprocessing.freeze_support() 63 | sys.exit(main()) 64 | -------------------------------------------------------------------------------- /patches/autosub-0.4.0.patch: -------------------------------------------------------------------------------- 1 | --- __init__-0.4.0.py 2019-02-09 21:21:16.335586891 +0700 2 | +++ __init__.py 2019-02-10 21:25:41.864964164 +0700 3 | @@ -8,16 +8,22 @@ 4 | 5 | import argparse 6 | import audioop 7 | -import json 8 | import math 9 | import multiprocessing 10 | import os 11 | +from json import JSONDecodeError 12 | import subprocess 13 | import sys 14 | import tempfile 15 | import wave 16 | 17 | +import json 18 | import requests 19 | +try: 20 | + from json.decoder import JSONDecodeError 21 | +except ImportError: 22 | + JSONDecodeError = ValueError 23 | + 24 | from googleapiclient.discovery import build 25 | from progressbar import ProgressBar, Percentage, Bar, ETA 26 | 27 | @@ -61,8 +67,10 @@ 28 | start, end = region 29 | start = max(0, start - self.include_before) 30 | end += self.include_after 31 | - temp = tempfile.NamedTemporaryFile(suffix='.flac') 32 | - command = ["ffmpeg", "-ss", str(start), "-t", str(end - start), 33 | + #delete=False necessary for running on Windows 34 | + temp = tempfile.NamedTemporaryFile(suffix='.flac', delete=False) 35 | + program_ffmpeg = which("ffmpeg") 36 | + command = [str(program_ffmpeg), "-ss", str(start), "-t", str(end - start), 37 | "-y", "-i", self.source_path, 38 | "-loglevel", "error", temp.name] 39 | use_shell = True if os.name == "nt" else False 40 | @@ -102,6 +110,8 @@ 41 | except IndexError: 42 | # no result 43 | continue 44 | + except JSONDecodeError: 45 | + continue 46 | 47 | except KeyboardInterrupt: 48 | return None 49 | @@ -149,17 +159,25 @@ 50 | Checks whether a file is executable. 51 | """ 52 | return os.path.isfile(file_path) and os.access(file_path, os.X_OK) 53 | - 54 | + #necessary to run on Windows 55 | + if os.name == "nt": 56 | + program += ".exe" 57 | fpath, _ = os.path.split(program) 58 | if fpath: 59 | if is_exe(program): 60 | return program 61 | else: 62 | - for path in os.environ["PATH"].split(os.pathsep): 63 | - path = path.strip('"') 64 | - exe_file = os.path.join(path, program) 65 | - if is_exe(exe_file): 66 | - return exe_file 67 | + #looks for file in the script execution folder before checking on system path 68 | + current_dir = os.getcwd() 69 | + local_program = os.path.join(current_dir, program) 70 | + if is_exe(local_program): 71 | + return local_program 72 | + else: 73 | + for path in os.environ["PATH"].split(os.pathsep): 74 | + path = path.strip('"') 75 | + exe_file = os.path.join(path, program) 76 | + if is_exe(exe_file): 77 | + return exe_file 78 | return None 79 | 80 | 81 | @@ -171,10 +189,11 @@ 82 | if not os.path.isfile(filename): 83 | print("The given file does not exist: {}".format(filename)) 84 | raise Exception("Invalid filepath: {}".format(filename)) 85 | - if not which("ffmpeg"): 86 | + program_ffmpeg = which("ffmpeg") 87 | + if not program_ffmpeg: 88 | print("ffmpeg: Executable not found on machine.") 89 | raise Exception("Dependency not found: ffmpeg") 90 | - command = ["ffmpeg", "-y", "-i", filename, 91 | + command = [str(program_ffmpeg), "-y", "-i", filename, 92 | "-ac", str(channels), "-ar", str(rate), 93 | "-loglevel", "error", temp.name] 94 | use_shell = True if os.name == "nt" else False 95 | @@ -233,6 +252,12 @@ 96 | """ 97 | Given an input audio/video file, generate subtitles in the specified language and format. 98 | """ 99 | + 100 | + if "Darwin" in os.uname(): 101 | + #the default unix fork method does not work on Mac OS 102 | + #need to use forkserver 103 | + multiprocessing.set_start_method('forkserver') 104 | + 105 | audio_filename, audio_rate = extract_audio(source_path) 106 | 107 | regions = find_speech_regions(audio_filename) 108 | -------------------------------------------------------------------------------- /patches/note.txt: -------------------------------------------------------------------------------- 1 | The autosub version used for pyTranscriber had to be customized a little bit. 2 | The patch in this folder was made comparing the original autosub/__init__.py file from version 0.4.0 to the customized version I made. 3 | -------------------------------------------------------------------------------- /pytranscriber.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/38ff7bf01985a893f0483be5547c83228e74b6ec/pytranscriber.sqlite -------------------------------------------------------------------------------- /pytranscriber/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/38ff7bf01985a893f0483be5547c83228e74b6ec/pytranscriber/__init__.py -------------------------------------------------------------------------------- /pytranscriber/control/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/38ff7bf01985a893f0483be5547c83228e74b6ec/pytranscriber/control/__init__.py -------------------------------------------------------------------------------- /pytranscriber/control/ctr_autosub.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see <https://www.gnu.org/licenses/>. 13 | ''' 14 | 15 | from autosub import FLACConverter 16 | from autosub import SpeechRecognizer 17 | from autosub import extract_audio 18 | from autosub import find_speech_regions 19 | from autosub import DEFAULT_CONCURRENCY 20 | from autosub import DEFAULT_SUBTITLE_FORMAT 21 | from autosub import GOOGLE_SPEECH_API_KEY 22 | from autosub.formatters import FORMATTERS 23 | 24 | import multiprocessing 25 | import time 26 | import os 27 | 28 | from pytranscriber.util.util import MyUtil 29 | 30 | 31 | class Ctr_Autosub: 32 | 33 | cancel = False 34 | 35 | @staticmethod 36 | def init(): 37 | Ctr_Autosub.cancel = False 38 | 39 | @staticmethod 40 | def is_operation_canceled(): 41 | return Ctr_Autosub.cancel 42 | 43 | 44 | @staticmethod 45 | def output_progress(listener_progress, str_task, progress_percent): 46 | # only update progress if not requested to cancel 47 | if not Ctr_Autosub.cancel: 48 | listener_progress(str_task, progress_percent) 49 | 50 | @staticmethod 51 | def cancel_operation(): 52 | Ctr_Autosub.cancel = True 53 | 54 | while Ctr_Autosub.step == 0: 55 | time.sleep(0.1) 56 | 57 | # the first step involves ffmpeg and cannot be stopped safely 58 | if Ctr_Autosub.step == 1: 59 | # close wait for threads to finish their work first 60 | Ctr_Autosub.pool.close() 61 | Ctr_Autosub.pool.join() 62 | 63 | else: 64 | # terminates the threads immediately 65 | Ctr_Autosub.pool.terminate() 66 | Ctr_Autosub.pool.join() 67 | 68 | @staticmethod 69 | def generate_subtitles(# pylint: disable=too-many-locals,too-many-arguments 70 | source_path, 71 | src_language, 72 | listener_progress, 73 | output=None, 74 | concurrency=DEFAULT_CONCURRENCY, 75 | subtitle_file_format=DEFAULT_SUBTITLE_FORMAT, 76 | proxies=None 77 | ): 78 | 79 | # windows not support forkserver... only spawn 80 | if os.name != "nt" and "Darwin" in os.uname(): 81 | # necessary for running on MacOS 82 | # method can be set only once, otherwise crash 83 | #from python 3.8 above the default for macos is spawn and not fork 84 | if 'spawn' != multiprocessing.get_start_method(allow_none=True): 85 | multiprocessing.set_start_method('spawn') 86 | Ctr_Autosub.cancel = False 87 | Ctr_Autosub.step = 0 88 | """ 89 | Given an input audio/video file, generate subtitles in the specified language and format. 90 | """ 91 | audio_filename, audio_rate = extract_audio(source_path) 92 | 93 | regions = find_speech_regions(audio_filename) 94 | 95 | converter = FLACConverter(source_path=audio_filename) 96 | recognizer = SpeechRecognizer(language=src_language, rate=audio_rate, 97 | api_key=GOOGLE_SPEECH_API_KEY, proxies=proxies) 98 | transcripts = [] 99 | if regions: 100 | try: 101 | if Ctr_Autosub.cancel: 102 | return -1 103 | 104 | str_task_1 = "Step 1 of 2: Converting speech regions to FLAC files " 105 | len_regions = len(regions) 106 | extracted_regions = [] 107 | Ctr_Autosub.pool = multiprocessing.Pool(concurrency) 108 | for i, extracted_region in enumerate(Ctr_Autosub.pool.imap(converter, regions)): 109 | Ctr_Autosub.step = 1 110 | extracted_regions.append(extracted_region) 111 | progress_percent = MyUtil.percentage(i, len_regions) 112 | Ctr_Autosub.output_progress(listener_progress, str_task_1, progress_percent) 113 | if Ctr_Autosub.cancel: 114 | return -1 115 | else: 116 | Ctr_Autosub.pool.close() 117 | Ctr_Autosub.pool.join() 118 | 119 | str_task_2 = "Step 2 of 2: Performing speech recognition " 120 | Ctr_Autosub.pool = multiprocessing.Pool(concurrency) 121 | for i, transcript in enumerate(Ctr_Autosub.pool.imap(recognizer, extracted_regions)): 122 | Ctr_Autosub.step = 2 123 | transcripts.append(transcript) 124 | progress_percent = MyUtil.percentage(i, len_regions) 125 | Ctr_Autosub.output_progress(listener_progress, str_task_2, progress_percent) 126 | 127 | if Ctr_Autosub.cancel: 128 | return -1 129 | else: 130 | Ctr_Autosub.pool.close() 131 | Ctr_Autosub.pool.join() 132 | 133 | except KeyboardInterrupt: 134 | Ctr_Autosub.pbar.finish() 135 | Ctr_Autosub.pool.terminate() 136 | Ctr_Autosub.pool.join() 137 | raise 138 | 139 | timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] 140 | formatter = FORMATTERS.get(subtitle_file_format) 141 | formatted_subtitles = formatter(timed_subtitles) 142 | 143 | dest = output 144 | 145 | if not dest: 146 | base = os.path.splitext(source_path)[0] 147 | dest = "{base}.{format}".format(base=base, format=subtitle_file_format) 148 | 149 | with open(dest, 'wb') as output_file: 150 | output_file.write(formatted_subtitles.encode("utf-8")) 151 | 152 | os.remove(audio_filename) 153 | 154 | if Ctr_Autosub.cancel: 155 | return -1 156 | else: 157 | Ctr_Autosub.pool.close() 158 | Ctr_Autosub.pool.join() 159 | 160 | return dest 161 | -------------------------------------------------------------------------------- /pytranscriber/control/ctr_db.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see <https://www.gnu.org/licenses/>. 13 | ''' 14 | 15 | from pathlib import PurePath 16 | 17 | from pytranscriber.gui.message_util import MessageUtil 18 | import sqlite3 19 | 20 | 21 | class CtrDB: 22 | conn = None 23 | DB_ERROR = "DB Error" 24 | 25 | def connect(self): 26 | if self.conn: 27 | return self.conn.cursor() 28 | else: 29 | try: 30 | local_program_path = PurePath(__file__).parent.parent.parent.joinpath('pytranscriber.sqlite') 31 | str_local_program_path = str(local_program_path) 32 | 33 | 34 | 35 | self.conn = sqlite3.connect(str_local_program_path) 36 | cur = self.conn.cursor() 37 | 38 | return cur 39 | except Exception as ex: 40 | MessageUtil.show_error_message("ConnectDB" + str(ex), self.DB_ERROR) 41 | exit(1) 42 | 43 | def close(self): 44 | self.conn.close() 45 | self.conn = None 46 | 47 | def _load_one_row(self, table_name): 48 | cur = self.connect() 49 | if cur is None: 50 | exit(1) 51 | 52 | try: 53 | cur.execute('SELECT * FROM ' + table_name) 54 | return cur.fetchone() 55 | except sqlite3.Error as e: 56 | MessageUtil.show_error_message("LoadOneRow " + str(e), self.DB_ERROR) 57 | return None 58 | 59 | def _save_single_column(self, query, value): 60 | cur = self.connect() 61 | try: 62 | cur.execute(query,(value,)) 63 | self.conn.commit() 64 | except sqlite3.Error as e: 65 | MessageUtil.show_error_message("SaveSingleColumn " + str(e), self.DB_ERROR) 66 | self.close() 67 | 68 | def _truncate_table(self, table_name): 69 | cur = self.connect() 70 | try: 71 | cur.execute('DELETE FROM ' + table_name) 72 | self.conn.commit() 73 | except sqlite3.Error as e: 74 | MessageUtil.show_error_message("TruncateTable " + str(e), self.DB_ERROR) 75 | self.close() 76 | 77 | def load_last_language(self): 78 | return self._load_one_row('Language') 79 | 80 | def clear_last_language(self): 81 | self._truncate_table('Language') 82 | 83 | def save_last_language(self, language): 84 | cur = self.connect() 85 | try: 86 | cur.execute('INSERT INTO Language (last_language) VALUES (?)', 87 | (language,)) 88 | self.conn.commit() 89 | except sqlite3.Error as e: 90 | MessageUtil.show_error_message("SaveLastLanguage " + str(e), self.DB_ERROR) 91 | self.close() 92 | 93 | def load_proxy(self): 94 | return self._load_one_row('Proxy') 95 | 96 | def clear_proxy(self): 97 | self._truncate_table('Proxy') 98 | 99 | def save_proxy(self, proxy): 100 | cur = self.connect() 101 | try: 102 | cur.execute('INSERT INTO Proxy (proxy_address) VALUES (?)', 103 | (proxy['https'],)) 104 | self.conn.commit() 105 | MessageUtil.show_info_message('Proxy address saved successfully', 'Proxy settings saved') 106 | except sqlite3.Error as e: 107 | MessageUtil.show_error_message("SaveProxy " + str(e), self.DB_ERROR) 108 | self.close() 109 | -------------------------------------------------------------------------------- /pytranscriber/control/ctr_engine.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see <https://www.gnu.org/licenses/>. 13 | ''' 14 | 15 | class CtrEngine: 16 | @staticmethod 17 | def init(): 18 | CtrEngine.cancel = False 19 | 20 | @staticmethod 21 | def is_operation_canceled(): 22 | return CtrEngine.cancel 23 | 24 | @staticmethod 25 | def cancel_operation(): 26 | CtrEngine.cancel = True 27 | 28 | @staticmethod 29 | def save_output_file(output_path, file_content): 30 | f = open(output_path, 'wb') 31 | f.write(file_content.encode("utf-8")) 32 | f.close() 33 | -------------------------------------------------------------------------------- /pytranscriber/control/ctr_main.py: -------------------------------------------------------------------------------- 1 | # (C) 2025 Raryel C. Souza 2 | # This program is free software: you can redistribute it and/or modify 3 | # it under the terms of the GNU General Public License as published by 4 | # the Free Software Foundation, either version 3 of the License, or 5 | # (at your option) any later version. 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # You should have received a copy of the GNU General Public License 11 | # along with this program. If not, see <https://www.gnu.org/licenses/>. 12 | 13 | from pytranscriber.control.ctr_proxy import Ctr_Proxy 14 | from pytranscriber.control.ctr_db import CtrDB 15 | from pytranscriber.gui.main.view_main import ViewMain 16 | 17 | 18 | class Ctr_Main(): 19 | 20 | def __init__(self): 21 | self.ctrDB = CtrDB() 22 | self.ctrProxy = Ctr_Proxy(self) 23 | 24 | self.last_language = None 25 | 26 | self.viewMain = ViewMain(self) 27 | 28 | self._load_last_language() 29 | self.viewMain.show() 30 | 31 | def save_last_language(self, language): 32 | self.ctrDB.clear_last_language() 33 | self.ctrDB.save_last_language(language) 34 | 35 | def _load_last_language(self): 36 | data = self.ctrDB.load_last_language() 37 | if data is not None: 38 | 39 | self.last_language = data[1] 40 | self.viewMain.set_gui_language(self.last_language) 41 | 42 | 43 | -------------------------------------------------------------------------------- /pytranscriber/control/ctr_proxy.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see <https://www.gnu.org/licenses/>. 13 | ''' 14 | 15 | from pytranscriber.util.util import MyUtil 16 | from pytranscriber.gui.message_util import MessageUtil 17 | from pytranscriber.gui.proxy.view_proxy import ViewProxy 18 | 19 | 20 | class Ctr_Proxy(): 21 | proxy = { 22 | 'http': None, 23 | 'https': None 24 | } 25 | 26 | def __init__(self, ctrMain): 27 | self.ctrMain = ctrMain 28 | self.viewProxy = None 29 | 30 | def show(self): 31 | if self.viewProxy is None: 32 | self.viewProxy = ViewProxy(self) 33 | self.viewProxy.show() 34 | 35 | def save(self): 36 | self.ctrMain.ctrDB.clear_proxy() 37 | # saving the proxy address 38 | if self.proxy['https']: 39 | self.ctrMain.ctrDB.save_proxy(self.proxy) 40 | # saving proxy address disabled 41 | else: 42 | MessageUtil.show_info_message('Proxy disabled successfully', 'Proxy disabled') 43 | 44 | def load_data(self): 45 | if self.viewProxy is None: 46 | self.viewProxy = ViewProxy(self) 47 | 48 | data = self.ctrMain.ctrDB.load_proxy() 49 | if data is not None: 50 | self.set_proxy_setting(data[1], False) 51 | 52 | def test_proxy_setting(self, proxy_addr): 53 | proxy = {'http': proxy_addr, 'https': proxy_addr} 54 | 55 | if not MyUtil.is_internet_connected(proxy): 56 | MessageUtil.show_error_message('Error connecting to Google.','Error') 57 | else: 58 | MessageUtil.show_info_message('Successfully connected to Google.', 'Success') 59 | 60 | def set_proxy_setting(self, proxy_addr, frontend_request=False): 61 | self.proxy = {'http': proxy_addr, 'https': proxy_addr} 62 | if frontend_request: 63 | self.save() 64 | else: 65 | self.viewProxy.refresh_gui(proxy_addr) 66 | 67 | def get_proxy_setting(self): 68 | return self.proxy 69 | -------------------------------------------------------------------------------- /pytranscriber/control/ctr_whisper.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see <https://www.gnu.org/licenses/>. 13 | ''' 14 | 15 | from PyQt5.QtCore import pyqtSignal, QObject 16 | from PyQt5.QtWidgets import QMessageBox 17 | import os 18 | import sys 19 | import whisper 20 | import datetime 21 | import shutil 22 | from pytranscriber.control.ctr_engine import CtrEngine 23 | 24 | 25 | class CtrWhisper(CtrEngine, QObject): 26 | errorSignal = pyqtSignal(str) # Define the signal 27 | MODEL_DIR = None 28 | 29 | @classmethod 30 | def initialize(cls): 31 | """Initialize MODEL_DIR before using the class.""" 32 | if cls.MODEL_DIR is None: 33 | cls.MODEL_DIR = cls.get_whisper_model_dir() 34 | 35 | def __init__(self): 36 | super().__init__() 37 | self.errorSignal.connect(self.show_error_message) # Connect signal to slot 38 | 39 | @staticmethod 40 | def get_whisper_model_dir(): 41 | base_path = os.path.expanduser("~/pytranscriber") # User's home directory 42 | 43 | model_dir = os.path.join(base_path, "whisper_models") 44 | os.makedirs(model_dir, exist_ok=True) # Ensure directory exists 45 | return model_dir 46 | 47 | @staticmethod 48 | def generate_subtitles(source_path, src_language, outputSRT=None, outputTXT=None, model='base'): 49 | CtrWhisper.patch_ffmpeg() # Ensure FFmpeg is available 50 | 51 | model = whisper.load_model(model, download_root=CtrWhisper.MODEL_DIR) 52 | result = model.transcribe(source_path, verbose=True, language=src_language) 53 | 54 | if CtrEngine.is_operation_canceled(): 55 | return -1 56 | 57 | content_srt = CtrWhisper.generate_srt_file_content(result["segments"]) 58 | content_txt = CtrWhisper.generate_txt_file_content(result["segments"]) 59 | 60 | CtrWhisper.save_output_file(outputSRT, content_srt) 61 | CtrWhisper.save_output_file(outputTXT, content_txt) 62 | 63 | return outputSRT 64 | 65 | @staticmethod 66 | def show_error_message(message): 67 | """Displays the error message in a PyQt5 QMessageBox.""" 68 | msg_box = QMessageBox() 69 | msg_box.setIcon(QMessageBox.Critical) 70 | msg_box.setWindowTitle("Error") 71 | msg_box.setText(message) 72 | msg_box.exec_() 73 | 74 | @staticmethod 75 | def generate_srt_file_content(transcribed_segments): 76 | content = "" 77 | 78 | def format_timestamp(seconds): 79 | """Convert seconds to SRT-compliant timestamp (HH:MM:SS,mmm).""" 80 | td = datetime.timedelta(seconds=seconds) 81 | total_seconds = int(td.total_seconds()) 82 | millis = int(round((td.total_seconds() - total_seconds) * 1000)) 83 | hours = total_seconds // 3600 84 | minutes = (total_seconds % 3600) // 60 85 | secs = total_seconds % 60 86 | return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}" 87 | 88 | for i, s in enumerate(transcribed_segments, start=1): 89 | start_time = format_timestamp(s["start"]) 90 | end_time = format_timestamp(s["end"]) 91 | content += f"{i}\n{start_time} --> {end_time}\n{s['text'].strip()}\n\n" 92 | 93 | return content 94 | 95 | @staticmethod 96 | def generate_txt_file_content(transcribed_segments): 97 | content = "" 98 | for s in transcribed_segments: 99 | content = content + str(s["text"]) 100 | return content 101 | 102 | #forces whisper to use the embedded ffmpeg in frozen app 103 | @staticmethod 104 | def patch_ffmpeg(): 105 | """Ensure FFmpeg is correctly detected and patched for PyInstaller frozen apps.""" 106 | if getattr(sys, "frozen", False): # Running as a bundled executable 107 | ffmpeg_path = os.path.join(sys._MEIPASS, "ffmpeg") 108 | else: 109 | ffmpeg_path = shutil.which("ffmpeg") # Use system-wide FFmpeg 110 | 111 | if not ffmpeg_path: 112 | raise FileNotFoundError("FFmpeg not found!") 113 | 114 | os.environ["FFMPEG_PATH"] = ffmpeg_path 115 | os.environ["PATH"] += os.pathsep + os.path.dirname(ffmpeg_path) 116 | 117 | # Monkey-patch shutil.which to always return the correct FFmpeg path 118 | original_which = shutil.which 119 | 120 | def patched_which(cmd, *args, **kwargs): 121 | if cmd == "ffmpeg": 122 | return ffmpeg_path 123 | return original_which(cmd, *args, **kwargs) 124 | 125 | shutil.which = patched_which # Apply the patch -------------------------------------------------------------------------------- /pytranscriber/control/thread_cancel_autosub.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see <https://www.gnu.org/licenses/>. 13 | ''' 14 | 15 | from PyQt5.QtCore import QThread 16 | from PyQt5.QtCore import pyqtSignal 17 | 18 | 19 | class Thread_Cancel_Autosub(QThread): 20 | signalTerminated = pyqtSignal() 21 | 22 | def __init__(self, pObjWT): 23 | self.objWT = pObjWT 24 | QThread.__init__(self) 25 | 26 | def run(self): 27 | self.objWT.cancel() 28 | self.signalTerminated.emit() 29 | -------------------------------------------------------------------------------- /pytranscriber/control/thread_exec_autosub.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see <https://www.gnu.org/licenses/>. 13 | ''' 14 | 15 | from PyQt5.QtCore import QThread 16 | from PyQt5.QtCore import pyqtSignal 17 | from pathlib import Path 18 | from pytranscriber.util.srtparser import SRTParser 19 | from pytranscriber.util.util import MyUtil 20 | from pytranscriber.control.ctr_autosub import Ctr_Autosub 21 | import os 22 | import traceback 23 | 24 | 25 | class Thread_Exec_Autosub(QThread): 26 | signalLockGUI = pyqtSignal() 27 | signalResetGUIAfterCancel = pyqtSignal() 28 | signalResetGUIAfterSuccess = pyqtSignal() 29 | signalProgress = pyqtSignal(str, int) 30 | signalProgressFileYofN = pyqtSignal(str) 31 | signalErrorMsg = pyqtSignal(str) 32 | 33 | def __init__(self, objParamAutosub): 34 | self.objParamAutosub = objParamAutosub 35 | self.running = True 36 | QThread.__init__(self) 37 | 38 | def __updateProgressFileYofN(self, currentIndex, countFiles ): 39 | self.signalProgressFileYofN.emit("File " + str(currentIndex+1) + " of " +str(countFiles)) 40 | 41 | def listenerProgress(self, string, percent): 42 | self.signalProgress.emit(string, percent) 43 | 44 | def __generatePathOutputFile(self, sourceFile): 45 | #extract the filename without extension from the path 46 | base = os.path.basename(sourceFile) 47 | #[0] is filename, [1] is file extension 48 | fileName = os.path.splitext(base)[0] 49 | 50 | #the output file has same name as input file, located on output Folder 51 | #with extension .srt 52 | pathOutputFolder = Path(self.objParamAutosub.outputFolder) 53 | outputFileSRT = pathOutputFolder / (fileName + ".srt") 54 | outputFileTXT = pathOutputFolder / (fileName + ".txt") 55 | return [outputFileSRT, outputFileTXT] 56 | 57 | def __runAutosubForMedia(self, index, langCode): 58 | sourceFile = self.objParamAutosub.listFiles[index] 59 | outputFiles = self.__generatePathOutputFile(sourceFile) 60 | outputFileSRT = outputFiles[0] 61 | outputFileTXT = outputFiles[1] 62 | 63 | #run autosub 64 | try: 65 | fOutput = Ctr_Autosub.generate_subtitles(source_path = sourceFile, 66 | output = outputFileSRT, 67 | src_language = langCode, 68 | listener_progress = self.listenerProgress, proxies=self.objParamAutosub.proxies) 69 | except Exception as e: 70 | error_msg = f"""Error! Unable to generate subtitles: {traceback.format_exc()}""" 71 | self.signalErrorMsg.emit(error_msg) # Emit the full traceback 72 | 73 | #if nothing was returned 74 | if not fOutput: 75 | self.signalErrorMsg.emit("Error! Unable to generate subtitles for file " + sourceFile + ".") 76 | elif fOutput != -1: 77 | #if the operation was not canceled 78 | 79 | #updated the progress message 80 | self.listenerProgress("Finished", 100) 81 | 82 | #parses the .srt subtitle file and export text to .txt file 83 | SRTParser.extractTextFromSRT(str(outputFileSRT)) 84 | 85 | if self.objParamAutosub.boolOpenOutputFilesAuto: 86 | #open both SRT and TXT output files 87 | MyUtil.open_file(outputFileTXT) 88 | MyUtil.open_file(outputFileSRT) 89 | 90 | def __loopSelectedFiles(self): 91 | self.signalLockGUI.emit() 92 | 93 | langCode = self.objParamAutosub.langCode 94 | 95 | #if output directory does not exist, creates it 96 | pathOutputFolder = Path(self.objParamAutosub.outputFolder) 97 | 98 | if not os.path.exists(pathOutputFolder): 99 | os.mkdir(pathOutputFolder) 100 | #if there the output file is not a directory 101 | if not os.path.isdir(pathOutputFolder): 102 | #force the user to select a different output directory 103 | self.signalErrorMsg.emit("Error! Invalid output folder. Please choose another one.") 104 | else: 105 | #go ahead with autosub process 106 | nFiles = len(self.objParamAutosub.listFiles) 107 | for i in range(nFiles): 108 | #does not continue the loop if user clicked cancel button 109 | if not Ctr_Autosub.is_operation_canceled(): 110 | self.__updateProgressFileYofN(i, nFiles) 111 | self.__runAutosubForMedia(i, langCode) 112 | 113 | #if operation is canceled does not clear the file list 114 | if Ctr_Autosub.is_operation_canceled(): 115 | self.signalResetGUIAfterCancel.emit() 116 | else: 117 | self.signalResetGUIAfterSuccess.emit() 118 | 119 | 120 | def run(self): 121 | Ctr_Autosub.init() 122 | self.__loopSelectedFiles() 123 | self.running = False 124 | 125 | def cancel(self): 126 | Ctr_Autosub.cancel_operation() 127 | -------------------------------------------------------------------------------- /pytranscriber/control/thread_exec_generic.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see <https://www.gnu.org/licenses/>. 13 | ''' 14 | 15 | from abc import ABC, abstractmethod 16 | from PyQt5.QtCore import QThread 17 | from PyQt5.QtCore import pyqtSignal 18 | from pathlib import Path 19 | from pytranscriber.control.ctr_engine import CtrEngine 20 | import os 21 | 22 | class ThreadExecGeneric(QThread): 23 | signalLockGUI = pyqtSignal() 24 | signalResetGUIAfterCancel = pyqtSignal() 25 | signalResetGUIAfterSuccess = pyqtSignal() 26 | signalProgress = pyqtSignal(str, int) 27 | signalProgressFileYofN = pyqtSignal(str) 28 | signalErrorMsg = pyqtSignal(str) 29 | 30 | def __init__(self, obj_transcription_parameters): 31 | self.obj_transcription_parameters = obj_transcription_parameters 32 | self.running = True 33 | QThread.__init__(self) 34 | 35 | def listenerProgress(self, string, percent): 36 | self.signalProgress.emit(string, percent) 37 | 38 | def _loopSelectedFiles(self): 39 | self.signalLockGUI.emit() 40 | #MessageUtil.show_info_message("loop selected files") 41 | 42 | langCode = self.obj_transcription_parameters.langCode 43 | 44 | #if output directory does not exist, creates it 45 | pathOutputFolder = Path(self.obj_transcription_parameters.outputFolder) 46 | 47 | if not os.path.exists(pathOutputFolder): 48 | os.mkdir(pathOutputFolder) 49 | #if there the output file is not a directory 50 | if not os.path.isdir(pathOutputFolder): 51 | #force the user to select a different output directory 52 | self.signalErrorMsg.emit("Error! Invalid output folder. Please choose another one.") 53 | else: 54 | #go ahead with autosub process 55 | nFiles = len(self.obj_transcription_parameters.listFiles) 56 | for i in range(nFiles): 57 | #does not continue the loop if user clicked cancel button 58 | if not CtrEngine.is_operation_canceled(): 59 | self._updateProgressFileYofN(i, nFiles) 60 | #MessageUtil.show_info_message("run engine for media") 61 | self._run_engine_for_media(i, langCode) 62 | 63 | #if operation is canceled does not clear the file list 64 | if CtrEngine.is_operation_canceled(): 65 | self.signalResetGUIAfterCancel.emit() 66 | else: 67 | self.signalResetGUIAfterSuccess.emit() 68 | 69 | @abstractmethod 70 | def _run_engine_for_media(self, index, langCode): 71 | pass 72 | 73 | def _updateProgressFileYofN(self, currentIndex, countFiles): 74 | self.signalProgressFileYofN.emit("File " + str(currentIndex + 1) + " of " + str(countFiles)) 75 | 76 | def _generatePathOutputFile(self, sourceFile): 77 | # extract the filename without extension from the path 78 | base = os.path.basename(sourceFile) 79 | # [0] is filename, [1] is file extension 80 | fileName = os.path.splitext(base)[0] 81 | 82 | # the output file has same name as input file, located on output Folder 83 | # with extension .srt 84 | pathOutputFolder = Path(self.obj_transcription_parameters.outputFolder) 85 | outputFileSRT = pathOutputFolder / (fileName + ".srt") 86 | outputFileTXT = pathOutputFolder / (fileName + ".txt") 87 | return [outputFileSRT, outputFileTXT] 88 | 89 | @staticmethod 90 | def cancel(): 91 | CtrEngine.cancel_operation() 92 | -------------------------------------------------------------------------------- /pytranscriber/control/thread_exec_whisper.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see <https://www.gnu.org/licenses/>. 13 | ''' 14 | 15 | from pytranscriber.control.ctr_whisper import CtrWhisper 16 | from pytranscriber.control.thread_exec_generic import ThreadExecGeneric 17 | from pytranscriber.util.util import MyUtil 18 | import traceback 19 | 20 | 21 | class Thread_Exec_Whisper(ThreadExecGeneric): 22 | 23 | def run(self): 24 | CtrWhisper.init() 25 | super()._loopSelectedFiles() 26 | self.running = False 27 | 28 | def _run_engine_for_media(self, index, langCode): 29 | sourceFile = self.obj_transcription_parameters.listFiles[index] 30 | outputFiles = self._generatePathOutputFile(sourceFile) 31 | outputFileSRT = outputFiles[0] 32 | outputFileTXT = outputFiles[1] 33 | 34 | fOutput = None 35 | try: 36 | fOutput = CtrWhisper.generate_subtitles(source_path=sourceFile, 37 | outputSRT=outputFileSRT, 38 | outputTXT=outputFileTXT, 39 | src_language=langCode, 40 | model=self.obj_transcription_parameters.get_model_whisper()) 41 | except Exception as e: 42 | error_msg = f"""Error! Unable to generate subtitles: {traceback.format_exc()}""" 43 | self.signalErrorMsg.emit(error_msg) # Emit the full traceback 44 | 45 | #if nothing was returned 46 | if not fOutput: 47 | self.signalErrorMsg.emit("Error! Unable to generate subtitles for file " + sourceFile + ".") 48 | elif fOutput != -1: 49 | #if the operation was not canceled 50 | 51 | #updated the progress message 52 | self.listenerProgress("Finished", 100) 53 | 54 | if self.obj_transcription_parameters.boolOpenOutputFilesAuto: 55 | #open both SRT and TXT output files 56 | MyUtil.open_file(outputFileTXT) 57 | MyUtil.open_file(outputFileSRT) -------------------------------------------------------------------------------- /pytranscriber/gui/Português.qm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/38ff7bf01985a893f0483be5547c83228e74b6ec/pytranscriber/gui/Português.qm -------------------------------------------------------------------------------- /pytranscriber/gui/Português.ts: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="utf-8"?> 2 | <!DOCTYPE TS> 3 | <TS version="2.1"> 4 | <context> 5 | <name>window</name> 6 | <message> 7 | <location filename="gui.py" line="129"/> 8 | <source>pyTranscriber - v1.8 - 17/08/2022</source> 9 | <translation>pyTranscriber -v1.8 - 17/08/2022</translation> 10 | </message> 11 | <message> 12 | <location filename="gui.py" line="130"/> 13 | <source>Select file(s)</source> 14 | <translation>Selecionar arquivo(s)</translation> 15 | </message> 16 | <message> 17 | <location filename="gui.py" line="131"/> 18 | <source>Transcribe Audio / Generate Subtitles</source> 19 | <translation>Transcrever áudio / Gerar Legendas</translation> 20 | </message> 21 | <message> 22 | <location filename="gui.py" line="132"/> 23 | <source>Open Output Folder</source> 24 | <translation>Abrir Pasta de Destino</translation> 25 | </message> 26 | <message> 27 | <location filename="gui.py" line="133"/> 28 | <source>Output Location</source> 29 | <translation>Pasta de Destino</translation> 30 | </message> 31 | <message> 32 | <location filename="gui.py" line="134"/> 33 | <source>List of files to generate transcribe audio / generate subtitles</source> 34 | <translation>Lista de arquivos para gerar legendas/transcrever áudio</translation> 35 | </message> 36 | <message> 37 | <location filename="gui.py" line="135"/> 38 | <source>Remove file(s)</source> 39 | <translation>Remover arquivo(s)</translation> 40 | </message> 41 | <message> 42 | <location filename="gui.py" line="136"/> 43 | <source>Cancel</source> 44 | <translation>Cancelar</translation> 45 | </message> 46 | <message> 47 | <location filename="gui.py" line="137"/> 48 | <source>Open output files automatically</source> 49 | <translation>Abrir arquivos de saída automaticamente</translation> 50 | </message> 51 | <message> 52 | <location filename="gui.py" line="138"/> 53 | <source>Audio Language:</source> 54 | <translation>Idioma do áudio:</translation> 55 | </message> 56 | <message> 57 | <location filename="gui.py" line="139"/> 58 | <source>Abo&ut</source> 59 | <translation>Sob&re</translation> 60 | </message> 61 | <message> 62 | <location filename="gui.py" line="140"/> 63 | <source>&Settings</source> 64 | <translation>&Configurações</translation> 65 | </message> 66 | <message> 67 | <location filename="gui.py" line="141"/> 68 | <source>&Language</source> 69 | <translation>&Idioma</translation> 70 | </message> 71 | <message> 72 | <location filename="gui.py" line="142"/> 73 | <source>&License</source> 74 | <translation>&Licença</translation> 75 | </message> 76 | <message> 77 | <location filename="gui.py" line="143"/> 78 | <source>&Funding at Github Sponsors</source> 79 | <translation>Patrocínio no GitHub Sponsors</translation> 80 | </message> 81 | <message> 82 | <location filename="gui.py" line="144"/> 83 | <source>&More about pyTranscriber</source> 84 | <translation>&Sobre o pyTranscriber</translation> 85 | </message> 86 | <message> 87 | <location filename="gui.py" line="145"/> 88 | <source>&Proxy</source> 89 | <translation>Proxy</translation> 90 | </message> 91 | <message> 92 | <location filename="gui.py" line="146"/> 93 | <source>Proxy setting</source> 94 | <translation>Configurações de Proxy</translation> 95 | </message> 96 | <message> 97 | <location filename="gui.py" line="147"/> 98 | <source>English</source> 99 | <translation type="unfinished"></translation> 100 | </message> 101 | <message> 102 | <location filename="gui.py" line="148"/> 103 | <source>繁體中文 - Chinese Traditional</source> 104 | <translation type="unfinished"></translation> 105 | </message> 106 | <message> 107 | <location filename="gui.py" line="149"/> 108 | <source>简体中文 - Chinese Simplified</source> 109 | <translation type="unfinished"></translation> 110 | </message> 111 | <message> 112 | <location filename="gui.py" line="150"/> 113 | <source>Português</source> 114 | <translation type="unfinished"></translation> 115 | </message> 116 | </context> 117 | </TS> 118 | -------------------------------------------------------------------------------- /pytranscriber/gui/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/38ff7bf01985a893f0483be5547c83228e74b6ec/pytranscriber/gui/__init__.py -------------------------------------------------------------------------------- /pytranscriber/gui/main/window_main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Form implementation generated from reading ui file 'window_main.ui' 4 | # 5 | # Created by: PyQt5 UI code generator 5.15.4 6 | # 7 | # WARNING: Any manual changes made to this file will be lost when pyuic5 is 8 | # run again. Do not edit this file unless you know what you are doing. 9 | 10 | 11 | from PyQt5 import QtCore, QtGui, QtWidgets 12 | 13 | 14 | class Ui_window(object): 15 | def setupUi(self, window): 16 | window.setObjectName("window") 17 | window.resize(1045, 610) 18 | self.centralwidget = QtWidgets.QWidget(window) 19 | self.centralwidget.setObjectName("centralwidget") 20 | self.bSelectMedia = QtWidgets.QPushButton(self.centralwidget) 21 | self.bSelectMedia.setGeometry(QtCore.QRect(10, 10, 141, 34)) 22 | self.bSelectMedia.setObjectName("bSelectMedia") 23 | self.bConvert = QtWidgets.QPushButton(self.centralwidget) 24 | self.bConvert.setEnabled(False) 25 | self.bConvert.setGeometry(QtCore.QRect(200, 380, 341, 34)) 26 | self.bConvert.setObjectName("bConvert") 27 | self.progressBar = QtWidgets.QProgressBar(self.centralwidget) 28 | self.progressBar.setGeometry(QtCore.QRect(20, 470, 1021, 23)) 29 | self.progressBar.setProperty("value", 0) 30 | self.progressBar.setObjectName("progressBar") 31 | self.labelCurrentOperation = QtWidgets.QLabel(self.centralwidget) 32 | self.labelCurrentOperation.setGeometry(QtCore.QRect(20, 420, 871, 41)) 33 | self.labelCurrentOperation.setText("") 34 | self.labelCurrentOperation.setObjectName("labelCurrentOperation") 35 | self.bOpenOutputFolder = QtWidgets.QPushButton(self.centralwidget) 36 | self.bOpenOutputFolder.setGeometry(QtCore.QRect(550, 380, 241, 34)) 37 | self.bOpenOutputFolder.setObjectName("bOpenOutputFolder") 38 | self.bSelectOutputFolder = QtWidgets.QPushButton(self.centralwidget) 39 | self.bSelectOutputFolder.setGeometry(QtCore.QRect(10, 180, 141, 34)) 40 | self.bSelectOutputFolder.setObjectName("bSelectOutputFolder") 41 | self.qleOutputFolder = QtWidgets.QLineEdit(self.centralwidget) 42 | self.qleOutputFolder.setGeometry(QtCore.QRect(160, 180, 861, 32)) 43 | self.qleOutputFolder.setText("") 44 | self.qleOutputFolder.setReadOnly(True) 45 | self.qleOutputFolder.setObjectName("qleOutputFolder") 46 | self.groupBox = QtWidgets.QGroupBox(self.centralwidget) 47 | self.groupBox.setGeometry(QtCore.QRect(160, 10, 871, 161)) 48 | self.groupBox.setAlignment(QtCore.Qt.AlignLeading|QtCore.Qt.AlignLeft|QtCore.Qt.AlignTop) 49 | self.groupBox.setFlat(False) 50 | self.groupBox.setCheckable(False) 51 | self.groupBox.setObjectName("groupBox") 52 | self.qlwListFilesSelected = QtWidgets.QListWidget(self.groupBox) 53 | self.qlwListFilesSelected.setGeometry(QtCore.QRect(10, 30, 851, 121)) 54 | self.qlwListFilesSelected.setObjectName("qlwListFilesSelected") 55 | self.bRemoveFile = QtWidgets.QPushButton(self.centralwidget) 56 | self.bRemoveFile.setGeometry(QtCore.QRect(10, 50, 141, 34)) 57 | self.bRemoveFile.setObjectName("bRemoveFile") 58 | self.labelProgressFileIndex = QtWidgets.QLabel(self.centralwidget) 59 | self.labelProgressFileIndex.setGeometry(QtCore.QRect(20, 500, 131, 41)) 60 | self.labelProgressFileIndex.setText("") 61 | self.labelProgressFileIndex.setObjectName("labelProgressFileIndex") 62 | self.bCancel = QtWidgets.QPushButton(self.centralwidget) 63 | self.bCancel.setGeometry(QtCore.QRect(470, 510, 108, 36)) 64 | self.bCancel.setObjectName("bCancel") 65 | self.chbxOpenOutputFilesAuto = QtWidgets.QCheckBox(self.centralwidget) 66 | self.chbxOpenOutputFilesAuto.setGeometry(QtCore.QRect(10, 220, 291, 32)) 67 | self.chbxOpenOutputFilesAuto.setChecked(True) 68 | self.chbxOpenOutputFilesAuto.setObjectName("chbxOpenOutputFilesAuto") 69 | self.horizontalLayoutWidget = QtWidgets.QWidget(self.centralwidget) 70 | self.horizontalLayoutWidget.setGeometry(QtCore.QRect(200, 250, 591, 38)) 71 | self.horizontalLayoutWidget.setObjectName("horizontalLayoutWidget") 72 | self.horizontalLayout_5 = QtWidgets.QHBoxLayout(self.horizontalLayoutWidget) 73 | self.horizontalLayout_5.setContentsMargins(0, 0, 0, 0) 74 | self.horizontalLayout_5.setObjectName("horizontalLayout_5") 75 | self.labelSelectLang = QtWidgets.QLabel(self.horizontalLayoutWidget) 76 | self.labelSelectLang.setObjectName("labelSelectLang") 77 | self.horizontalLayout_5.addWidget(self.labelSelectLang) 78 | self.cbSelectLang = QtWidgets.QComboBox(self.horizontalLayoutWidget) 79 | self.cbSelectLang.setSizeAdjustPolicy(QtWidgets.QComboBox.AdjustToContents) 80 | self.cbSelectLang.setObjectName("cbSelectLang") 81 | self.horizontalLayout_5.addWidget(self.cbSelectLang) 82 | self.horizontalLayoutWidget_2 = QtWidgets.QWidget(self.centralwidget) 83 | self.horizontalLayoutWidget_2.setGeometry(QtCore.QRect(200, 290, 591, 41)) 84 | self.horizontalLayoutWidget_2.setObjectName("horizontalLayoutWidget_2") 85 | self.horizontalLayout = QtWidgets.QHBoxLayout(self.horizontalLayoutWidget_2) 86 | self.horizontalLayout.setContentsMargins(0, 0, 0, 0) 87 | self.horizontalLayout.setObjectName("horizontalLayout") 88 | self.lEngine = QtWidgets.QLabel(self.horizontalLayoutWidget_2) 89 | self.lEngine.setObjectName("lEngine") 90 | self.horizontalLayout.addWidget(self.lEngine) 91 | self.rbGoogleEngine = QtWidgets.QRadioButton(self.horizontalLayoutWidget_2) 92 | self.rbGoogleEngine.setChecked(True) 93 | self.rbGoogleEngine.setObjectName("rbGoogleEngine") 94 | self.horizontalLayout.addWidget(self.rbGoogleEngine) 95 | self.rbWhisper = QtWidgets.QRadioButton(self.horizontalLayoutWidget_2) 96 | self.rbWhisper.setEnabled(True) 97 | self.rbWhisper.setCheckable(True) 98 | self.rbWhisper.setObjectName("rbWhisper") 99 | self.horizontalLayout.addWidget(self.rbWhisper) 100 | self.horizontalLayoutWidget_3 = QtWidgets.QWidget(self.centralwidget) 101 | self.horizontalLayoutWidget_3.setGeometry(QtCore.QRect(200, 330, 611, 31)) 102 | self.horizontalLayoutWidget_3.setObjectName("horizontalLayoutWidget_3") 103 | self.horizontalLayout_2 = QtWidgets.QHBoxLayout(self.horizontalLayoutWidget_3) 104 | self.horizontalLayout_2.setContentsMargins(0, 0, 0, 0) 105 | self.horizontalLayout_2.setObjectName("horizontalLayout_2") 106 | self.lModels = QtWidgets.QLabel(self.horizontalLayoutWidget_3) 107 | self.lModels.setEnabled(True) 108 | self.lModels.setObjectName("lModels") 109 | self.horizontalLayout_2.addWidget(self.lModels) 110 | self.rbModelTiny = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3) 111 | self.rbModelTiny.setChecked(True) 112 | self.rbModelTiny.setObjectName("rbModelTiny") 113 | self.horizontalLayout_2.addWidget(self.rbModelTiny) 114 | self.rbModelBase = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3) 115 | self.rbModelBase.setEnabled(True) 116 | self.rbModelBase.setCheckable(True) 117 | self.rbModelBase.setObjectName("rbModelBase") 118 | self.horizontalLayout_2.addWidget(self.rbModelBase) 119 | self.rbModelSmall = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3) 120 | self.rbModelSmall.setEnabled(True) 121 | self.rbModelSmall.setCheckable(True) 122 | self.rbModelSmall.setObjectName("rbModelSmall") 123 | self.horizontalLayout_2.addWidget(self.rbModelSmall) 124 | self.rbModelMedium = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3) 125 | self.rbModelMedium.setEnabled(True) 126 | self.rbModelMedium.setCheckable(True) 127 | self.rbModelMedium.setObjectName("rbModelMedium") 128 | self.horizontalLayout_2.addWidget(self.rbModelMedium) 129 | self.rbModelLarge = QtWidgets.QRadioButton(self.horizontalLayoutWidget_3) 130 | self.rbModelLarge.setEnabled(True) 131 | self.rbModelLarge.setCheckable(True) 132 | self.rbModelLarge.setObjectName("rbModelLarge") 133 | self.horizontalLayout_2.addWidget(self.rbModelLarge) 134 | window.setCentralWidget(self.centralwidget) 135 | self.menubar = QtWidgets.QMenuBar(window) 136 | self.menubar.setGeometry(QtCore.QRect(0, 0, 1045, 23)) 137 | self.menubar.setObjectName("menubar") 138 | self.menuAbout = QtWidgets.QMenu(self.menubar) 139 | self.menuAbout.setObjectName("menuAbout") 140 | self.menuProxy = QtWidgets.QMenu(self.menubar) 141 | self.menuProxy.setObjectName("menuProxy") 142 | self.menuLanguage = QtWidgets.QMenu(self.menubar) 143 | self.menuLanguage.setObjectName("menuLanguage") 144 | window.setMenuBar(self.menubar) 145 | self.statusbar = QtWidgets.QStatusBar(window) 146 | self.statusbar.setObjectName("statusbar") 147 | window.setStatusBar(self.statusbar) 148 | self.actionLicense = QtWidgets.QAction(window) 149 | self.actionLicense.setObjectName("actionLicense") 150 | self.actionDonation = QtWidgets.QAction(window) 151 | self.actionDonation.setObjectName("actionDonation") 152 | self.actionAbout_pyTranscriber = QtWidgets.QAction(window) 153 | self.actionAbout_pyTranscriber.setObjectName("actionAbout_pyTranscriber") 154 | self.actionProxy = QtWidgets.QAction(window) 155 | self.actionProxy.setObjectName("actionProxy") 156 | self.actionEnglish = QtWidgets.QAction(window) 157 | self.actionEnglish.setObjectName("actionEnglish") 158 | self.actionChineseTraditional = QtWidgets.QAction(window) 159 | self.actionChineseTraditional.setObjectName("actionChineseTraditional") 160 | self.actionChineseSimplified = QtWidgets.QAction(window) 161 | self.actionChineseSimplified.setObjectName("actionChineseSimplified") 162 | self.actionPortuguese = QtWidgets.QAction(window) 163 | self.actionPortuguese.setObjectName("actionPortuguese") 164 | self.menuAbout.addAction(self.actionLicense) 165 | self.menuAbout.addAction(self.actionDonation) 166 | self.menuAbout.addAction(self.actionAbout_pyTranscriber) 167 | self.menuProxy.addAction(self.actionProxy) 168 | self.menuLanguage.addAction(self.actionEnglish) 169 | self.menuLanguage.addAction(self.actionChineseTraditional) 170 | self.menuLanguage.addAction(self.actionChineseSimplified) 171 | self.menuLanguage.addAction(self.actionPortuguese) 172 | self.menubar.addAction(self.menuProxy.menuAction()) 173 | self.menubar.addAction(self.menuLanguage.menuAction()) 174 | self.menubar.addAction(self.menuAbout.menuAction()) 175 | 176 | self.retranslateUi(window) 177 | QtCore.QMetaObject.connectSlotsByName(window) 178 | 179 | def retranslateUi(self, window): 180 | _translate = QtCore.QCoreApplication.translate 181 | window.setWindowTitle(_translate("window", "pyTranscriber v2.0 - 24/05/2025")) 182 | self.bSelectMedia.setText(_translate("window", "Select file(s)")) 183 | self.bConvert.setText(_translate("window", "Transcribe Audio / Generate Subtitles")) 184 | self.bOpenOutputFolder.setText(_translate("window", "Open Output Folder")) 185 | self.bSelectOutputFolder.setText(_translate("window", "Output Location")) 186 | self.groupBox.setTitle(_translate("window", "List of files to generate transcribe audio / generate subtitles")) 187 | self.bRemoveFile.setText(_translate("window", "Remove file(s)")) 188 | self.bCancel.setText(_translate("window", "Cancel")) 189 | self.chbxOpenOutputFilesAuto.setText(_translate("window", "Open output files automatically")) 190 | self.labelSelectLang.setText(_translate("window", "Audio Language:")) 191 | self.lEngine.setText(_translate("window", "Engine:")) 192 | self.rbGoogleEngine.setText(_translate("window", "Google Speech (cloud processing)")) 193 | self.rbWhisper.setText(_translate("window", "openAI Whisper (local processing)")) 194 | self.lModels.setText(_translate("window", "Models:")) 195 | self.rbModelTiny.setText(_translate("window", "Tiny")) 196 | self.rbModelBase.setText(_translate("window", "Base")) 197 | self.rbModelSmall.setText(_translate("window", "Small")) 198 | self.rbModelMedium.setText(_translate("window", "Medium")) 199 | self.rbModelLarge.setText(_translate("window", "Large")) 200 | self.menuAbout.setTitle(_translate("window", "Abo&ut")) 201 | self.menuProxy.setTitle(_translate("window", "&Settings")) 202 | self.menuLanguage.setTitle(_translate("window", "&Language")) 203 | self.actionLicense.setText(_translate("window", "&License")) 204 | self.actionDonation.setText(_translate("window", "&Funding at Github Sponsors")) 205 | self.actionAbout_pyTranscriber.setText(_translate("window", "&More about pyTranscriber")) 206 | self.actionProxy.setText(_translate("window", "&Proxy")) 207 | self.actionProxy.setToolTip(_translate("window", "Proxy setting")) 208 | self.actionEnglish.setText(_translate("window", "English")) 209 | self.actionChineseTraditional.setText(_translate("window", "繁體中文 - Chinese Traditional")) 210 | self.actionChineseSimplified.setText(_translate("window", "简体中文 - Chinese Simplified")) 211 | self.actionPortuguese.setText(_translate("window", "Português")) 212 | -------------------------------------------------------------------------------- /pytranscriber/gui/main/window_main.ui: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="UTF-8"?> 2 | <ui version="4.0"> 3 | <class>window</class> 4 | <widget class="QMainWindow" name="window"> 5 | <property name="geometry"> 6 | <rect> 7 | <x>0</x> 8 | <y>0</y> 9 | <width>1045</width> 10 | <height>610</height> 11 | </rect> 12 | </property> 13 | <property name="windowTitle"> 14 | <string>pyTranscriber v2.0 - 24/05/2025</string> 15 | </property> 16 | <widget class="QWidget" name="centralwidget"> 17 | <widget class="QPushButton" name="bSelectMedia"> 18 | <property name="geometry"> 19 | <rect> 20 | <x>10</x> 21 | <y>10</y> 22 | <width>141</width> 23 | <height>34</height> 24 | </rect> 25 | </property> 26 | <property name="text"> 27 | <string>Select file(s)</string> 28 | </property> 29 | </widget> 30 | <widget class="QPushButton" name="bConvert"> 31 | <property name="enabled"> 32 | <bool>false</bool> 33 | </property> 34 | <property name="geometry"> 35 | <rect> 36 | <x>200</x> 37 | <y>380</y> 38 | <width>341</width> 39 | <height>34</height> 40 | </rect> 41 | </property> 42 | <property name="text"> 43 | <string>Transcribe Audio / Generate Subtitles</string> 44 | </property> 45 | </widget> 46 | <widget class="QProgressBar" name="progressBar"> 47 | <property name="geometry"> 48 | <rect> 49 | <x>20</x> 50 | <y>470</y> 51 | <width>1021</width> 52 | <height>23</height> 53 | </rect> 54 | </property> 55 | <property name="value"> 56 | <number>0</number> 57 | </property> 58 | </widget> 59 | <widget class="QLabel" name="labelCurrentOperation"> 60 | <property name="geometry"> 61 | <rect> 62 | <x>20</x> 63 | <y>420</y> 64 | <width>871</width> 65 | <height>41</height> 66 | </rect> 67 | </property> 68 | <property name="text"> 69 | <string/> 70 | </property> 71 | </widget> 72 | <widget class="QPushButton" name="bOpenOutputFolder"> 73 | <property name="geometry"> 74 | <rect> 75 | <x>550</x> 76 | <y>380</y> 77 | <width>241</width> 78 | <height>34</height> 79 | </rect> 80 | </property> 81 | <property name="text"> 82 | <string>Open Output Folder</string> 83 | </property> 84 | </widget> 85 | <widget class="QPushButton" name="bSelectOutputFolder"> 86 | <property name="geometry"> 87 | <rect> 88 | <x>10</x> 89 | <y>180</y> 90 | <width>141</width> 91 | <height>34</height> 92 | </rect> 93 | </property> 94 | <property name="text"> 95 | <string>Output Location</string> 96 | </property> 97 | </widget> 98 | <widget class="QLineEdit" name="qleOutputFolder"> 99 | <property name="geometry"> 100 | <rect> 101 | <x>160</x> 102 | <y>180</y> 103 | <width>861</width> 104 | <height>32</height> 105 | </rect> 106 | </property> 107 | <property name="text"> 108 | <string/> 109 | </property> 110 | <property name="readOnly"> 111 | <bool>true</bool> 112 | </property> 113 | </widget> 114 | <widget class="QGroupBox" name="groupBox"> 115 | <property name="geometry"> 116 | <rect> 117 | <x>160</x> 118 | <y>10</y> 119 | <width>871</width> 120 | <height>161</height> 121 | </rect> 122 | </property> 123 | <property name="title"> 124 | <string>List of files to generate transcribe audio / generate subtitles</string> 125 | </property> 126 | <property name="alignment"> 127 | <set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignTop</set> 128 | </property> 129 | <property name="flat"> 130 | <bool>false</bool> 131 | </property> 132 | <property name="checkable"> 133 | <bool>false</bool> 134 | </property> 135 | <widget class="QListWidget" name="qlwListFilesSelected"> 136 | <property name="geometry"> 137 | <rect> 138 | <x>10</x> 139 | <y>30</y> 140 | <width>851</width> 141 | <height>121</height> 142 | </rect> 143 | </property> 144 | </widget> 145 | </widget> 146 | <widget class="QPushButton" name="bRemoveFile"> 147 | <property name="geometry"> 148 | <rect> 149 | <x>10</x> 150 | <y>50</y> 151 | <width>141</width> 152 | <height>34</height> 153 | </rect> 154 | </property> 155 | <property name="text"> 156 | <string>Remove file(s)</string> 157 | </property> 158 | </widget> 159 | <widget class="QLabel" name="labelProgressFileIndex"> 160 | <property name="geometry"> 161 | <rect> 162 | <x>20</x> 163 | <y>500</y> 164 | <width>131</width> 165 | <height>41</height> 166 | </rect> 167 | </property> 168 | <property name="text"> 169 | <string/> 170 | </property> 171 | </widget> 172 | <widget class="QPushButton" name="bCancel"> 173 | <property name="geometry"> 174 | <rect> 175 | <x>470</x> 176 | <y>510</y> 177 | <width>108</width> 178 | <height>36</height> 179 | </rect> 180 | </property> 181 | <property name="text"> 182 | <string>Cancel</string> 183 | </property> 184 | </widget> 185 | <widget class="QCheckBox" name="chbxOpenOutputFilesAuto"> 186 | <property name="geometry"> 187 | <rect> 188 | <x>10</x> 189 | <y>220</y> 190 | <width>291</width> 191 | <height>32</height> 192 | </rect> 193 | </property> 194 | <property name="text"> 195 | <string>Open output files automatically</string> 196 | </property> 197 | <property name="checked"> 198 | <bool>true</bool> 199 | </property> 200 | </widget> 201 | <widget class="QWidget" name="horizontalLayoutWidget"> 202 | <property name="geometry"> 203 | <rect> 204 | <x>200</x> 205 | <y>250</y> 206 | <width>591</width> 207 | <height>38</height> 208 | </rect> 209 | </property> 210 | <layout class="QHBoxLayout" name="horizontalLayout_5"> 211 | <item> 212 | <widget class="QLabel" name="labelSelectLang"> 213 | <property name="text"> 214 | <string>Audio Language:</string> 215 | </property> 216 | </widget> 217 | </item> 218 | <item> 219 | <widget class="QComboBox" name="cbSelectLang"> 220 | <property name="sizeAdjustPolicy"> 221 | <enum>QComboBox::AdjustToContents</enum> 222 | </property> 223 | </widget> 224 | </item> 225 | </layout> 226 | </widget> 227 | <widget class="QWidget" name="horizontalLayoutWidget_2"> 228 | <property name="geometry"> 229 | <rect> 230 | <x>200</x> 231 | <y>290</y> 232 | <width>591</width> 233 | <height>41</height> 234 | </rect> 235 | </property> 236 | <layout class="QHBoxLayout" name="horizontalLayout"> 237 | <item> 238 | <widget class="QLabel" name="lEngine"> 239 | <property name="text"> 240 | <string>Engine:</string> 241 | </property> 242 | </widget> 243 | </item> 244 | <item> 245 | <widget class="QRadioButton" name="rbGoogleEngine"> 246 | <property name="text"> 247 | <string>Google Speech (cloud processing)</string> 248 | </property> 249 | <property name="checked"> 250 | <bool>true</bool> 251 | </property> 252 | </widget> 253 | </item> 254 | <item> 255 | <widget class="QRadioButton" name="rbWhisper"> 256 | <property name="enabled"> 257 | <bool>true</bool> 258 | </property> 259 | <property name="text"> 260 | <string>openAI Whisper (local processing)</string> 261 | </property> 262 | <property name="checkable"> 263 | <bool>true</bool> 264 | </property> 265 | </widget> 266 | </item> 267 | </layout> 268 | </widget> 269 | <widget class="QWidget" name="horizontalLayoutWidget_3"> 270 | <property name="geometry"> 271 | <rect> 272 | <x>200</x> 273 | <y>330</y> 274 | <width>611</width> 275 | <height>31</height> 276 | </rect> 277 | </property> 278 | <layout class="QHBoxLayout" name="horizontalLayout_2"> 279 | <item> 280 | <widget class="QLabel" name="lModels"> 281 | <property name="enabled"> 282 | <bool>true</bool> 283 | </property> 284 | <property name="text"> 285 | <string>Models:</string> 286 | </property> 287 | </widget> 288 | </item> 289 | <item> 290 | <widget class="QRadioButton" name="rbModelTiny"> 291 | <property name="text"> 292 | <string>Tiny</string> 293 | </property> 294 | <property name="checked"> 295 | <bool>true</bool> 296 | </property> 297 | </widget> 298 | </item> 299 | <item> 300 | <widget class="QRadioButton" name="rbModelBase"> 301 | <property name="enabled"> 302 | <bool>true</bool> 303 | </property> 304 | <property name="text"> 305 | <string>Base</string> 306 | </property> 307 | <property name="checkable"> 308 | <bool>true</bool> 309 | </property> 310 | </widget> 311 | </item> 312 | <item> 313 | <widget class="QRadioButton" name="rbModelSmall"> 314 | <property name="enabled"> 315 | <bool>true</bool> 316 | </property> 317 | <property name="text"> 318 | <string>Small</string> 319 | </property> 320 | <property name="checkable"> 321 | <bool>true</bool> 322 | </property> 323 | </widget> 324 | </item> 325 | <item> 326 | <widget class="QRadioButton" name="rbModelMedium"> 327 | <property name="enabled"> 328 | <bool>true</bool> 329 | </property> 330 | <property name="text"> 331 | <string>Medium</string> 332 | </property> 333 | <property name="checkable"> 334 | <bool>true</bool> 335 | </property> 336 | </widget> 337 | </item> 338 | <item> 339 | <widget class="QRadioButton" name="rbModelLarge"> 340 | <property name="enabled"> 341 | <bool>true</bool> 342 | </property> 343 | <property name="text"> 344 | <string>Large</string> 345 | </property> 346 | <property name="checkable"> 347 | <bool>true</bool> 348 | </property> 349 | </widget> 350 | </item> 351 | </layout> 352 | </widget> 353 | </widget> 354 | <widget class="QMenuBar" name="menubar"> 355 | <property name="geometry"> 356 | <rect> 357 | <x>0</x> 358 | <y>0</y> 359 | <width>1045</width> 360 | <height>23</height> 361 | </rect> 362 | </property> 363 | <widget class="QMenu" name="menuAbout"> 364 | <property name="title"> 365 | <string>Abo&ut</string> 366 | </property> 367 | <addaction name="actionLicense"/> 368 | <addaction name="actionDonation"/> 369 | <addaction name="actionAbout_pyTranscriber"/> 370 | </widget> 371 | <widget class="QMenu" name="menuProxy"> 372 | <property name="title"> 373 | <string>&Settings</string> 374 | </property> 375 | <addaction name="actionProxy"/> 376 | </widget> 377 | <widget class="QMenu" name="menuLanguage"> 378 | <property name="title"> 379 | <string>&Language</string> 380 | </property> 381 | <addaction name="actionEnglish"/> 382 | <addaction name="actionChineseTraditional"/> 383 | <addaction name="actionChineseSimplified"/> 384 | <addaction name="actionPortuguese"/> 385 | </widget> 386 | <addaction name="menuProxy"/> 387 | <addaction name="menuLanguage"/> 388 | <addaction name="menuAbout"/> 389 | </widget> 390 | <widget class="QStatusBar" name="statusbar"/> 391 | <action name="actionLicense"> 392 | <property name="text"> 393 | <string>&License</string> 394 | </property> 395 | </action> 396 | <action name="actionDonation"> 397 | <property name="text"> 398 | <string>&Funding at Github Sponsors</string> 399 | </property> 400 | </action> 401 | <action name="actionAbout_pyTranscriber"> 402 | <property name="text"> 403 | <string>&About pyTranscriber</string> 404 | </property> 405 | </action> 406 | <action name="actionProxy"> 407 | <property name="text"> 408 | <string>&Proxy</string> 409 | </property> 410 | <property name="toolTip"> 411 | <string>Proxy setting</string> 412 | </property> 413 | </action> 414 | <action name="actionEnglish"> 415 | <property name="text"> 416 | <string>English</string> 417 | </property> 418 | </action> 419 | <action name="actionChineseTraditional"> 420 | <property name="text"> 421 | <string>繁體中文 - Chinese Traditional</string> 422 | </property> 423 | </action> 424 | <action name="actionChineseSimplified"> 425 | <property name="text"> 426 | <string>简体中文 - Chinese Simplified</string> 427 | </property> 428 | </action> 429 | <action name="actionPortuguese"> 430 | <property name="text"> 431 | <string>Português</string> 432 | </property> 433 | </action> 434 | </widget> 435 | <resources/> 436 | <connections/> 437 | </ui> 438 | -------------------------------------------------------------------------------- /pytranscriber/gui/message_util.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see <https://www.gnu.org/licenses/>. 13 | ''' 14 | 15 | from PyQt5.QtWidgets import QMessageBox 16 | 17 | 18 | class MessageUtil: 19 | 20 | @staticmethod 21 | def show_info_message(info_msg, title=""): 22 | msg = QMessageBox() 23 | msg.setIcon(QMessageBox.Information) 24 | 25 | msg.setWindowTitle(title) 26 | msg.setText(info_msg) 27 | msg.exec() 28 | 29 | @staticmethod 30 | def show_error_message(error_msg, title="Error"): 31 | msg = QMessageBox() 32 | msg.setIcon(QMessageBox.Critical) 33 | 34 | msg.setWindowTitle(title) 35 | msg.setText(error_msg) 36 | msg.exec() 37 | -------------------------------------------------------------------------------- /pytranscriber/gui/proxy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Form implementation generated from reading ui file '.\proxy.ui' 4 | # 5 | # Created by: PyQt5 UI code generator 5.15.4 6 | # 7 | # WARNING: Any manual changes made to this file will be lost when pyuic5 is 8 | # run again. Do not edit this file unless you know what you are doing. 9 | 10 | 11 | from PyQt5 import QtCore, QtGui, QtWidgets 12 | 13 | 14 | class Ui_Dialog(object): 15 | def setupUi(self, Dialog): 16 | Dialog.setObjectName("Dialog") 17 | Dialog.resize(500, 120) 18 | Dialog.setAutoFillBackground(False) 19 | Dialog.setSizeGripEnabled(False) 20 | self.verticalLayout = QtWidgets.QVBoxLayout(Dialog) 21 | self.verticalLayout.setObjectName("verticalLayout") 22 | self.groupBox = QtWidgets.QGroupBox(Dialog) 23 | self.groupBox.setTitle("") 24 | self.groupBox.setObjectName("groupBox") 25 | self.verticalLayout_2 = QtWidgets.QVBoxLayout(self.groupBox) 26 | self.verticalLayout_2.setObjectName("verticalLayout_2") 27 | self.radioButtonNone = QtWidgets.QRadioButton(self.groupBox) 28 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed) 29 | sizePolicy.setHorizontalStretch(0) 30 | sizePolicy.setVerticalStretch(0) 31 | sizePolicy.setHeightForWidth(self.radioButtonNone.sizePolicy().hasHeightForWidth()) 32 | self.radioButtonNone.setSizePolicy(sizePolicy) 33 | self.radioButtonNone.setChecked(True) 34 | self.radioButtonNone.setObjectName("radioButtonNone") 35 | self.verticalLayout_2.addWidget(self.radioButtonNone) 36 | self.radioButtonHTTP = QtWidgets.QRadioButton(self.groupBox) 37 | self.radioButtonHTTP.setEnabled(True) 38 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed) 39 | sizePolicy.setHorizontalStretch(0) 40 | sizePolicy.setVerticalStretch(0) 41 | sizePolicy.setHeightForWidth(self.radioButtonHTTP.sizePolicy().hasHeightForWidth()) 42 | self.radioButtonHTTP.setSizePolicy(sizePolicy) 43 | self.radioButtonHTTP.setObjectName("radioButtonHTTP") 44 | self.verticalLayout_2.addWidget(self.radioButtonHTTP) 45 | self.gridLayout = QtWidgets.QGridLayout() 46 | self.gridLayout.setObjectName("gridLayout") 47 | self.lineEditHttpProxy = QtWidgets.QLineEdit(self.groupBox) 48 | self.lineEditHttpProxy.setToolTip("") 49 | self.lineEditHttpProxy.setStatusTip("") 50 | self.lineEditHttpProxy.setInputMethodHints(QtCore.Qt.ImhUrlCharactersOnly) 51 | self.lineEditHttpProxy.setObjectName("lineEditHttpProxy") 52 | self.gridLayout.addWidget(self.lineEditHttpProxy, 0, 1, 1, 1) 53 | self.label = QtWidgets.QLabel(self.groupBox) 54 | self.label.setObjectName("label") 55 | self.gridLayout.addWidget(self.label, 0, 0, 1, 1) 56 | self.pushButtonTest = QtWidgets.QPushButton(self.groupBox) 57 | self.pushButtonTest.setEnabled(True) 58 | self.pushButtonTest.setObjectName("pushButtonTest") 59 | self.gridLayout.addWidget(self.pushButtonTest, 0, 2, 1, 1) 60 | self.verticalLayout_2.addLayout(self.gridLayout) 61 | self.verticalLayout.addWidget(self.groupBox) 62 | self.buttonBox = QtWidgets.QDialogButtonBox(Dialog) 63 | self.buttonBox.setOrientation(QtCore.Qt.Horizontal) 64 | self.buttonBox.setStandardButtons(QtWidgets.QDialogButtonBox.Cancel|QtWidgets.QDialogButtonBox.Ok) 65 | self.buttonBox.setObjectName("buttonBox") 66 | self.verticalLayout.addWidget(self.buttonBox) 67 | 68 | self.retranslateUi(Dialog) 69 | self.buttonBox.accepted.connect(Dialog.accept) 70 | self.buttonBox.rejected.connect(Dialog.reject) 71 | self.radioButtonNone.clicked['bool'].connect(self.lineEditHttpProxy.setDisabled) 72 | self.radioButtonNone.clicked['bool'].connect(self.pushButtonTest.setDisabled) 73 | self.radioButtonHTTP.clicked['bool'].connect(self.pushButtonTest.setEnabled) 74 | self.radioButtonHTTP.clicked['bool'].connect(self.lineEditHttpProxy.setEnabled) 75 | QtCore.QMetaObject.connectSlotsByName(Dialog) 76 | 77 | def retranslateUi(self, Dialog): 78 | _translate = QtCore.QCoreApplication.translate 79 | Dialog.setWindowTitle(_translate("Dialog", "Proxy setting")) 80 | self.radioButtonNone.setText(_translate("Dialog", "None")) 81 | self.radioButtonHTTP.setText(_translate("Dialog", "HTTP")) 82 | self.lineEditHttpProxy.setPlaceholderText(_translate("Dialog", "http://127.0.0.1:1080")) 83 | self.label.setText(_translate("Dialog", "URL:")) 84 | self.pushButtonTest.setText(_translate("Dialog", "Test")) 85 | -------------------------------------------------------------------------------- /pytranscriber/gui/proxy.ui: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="UTF-8"?> 2 | <ui version="4.0"> 3 | <class>Dialog</class> 4 | <widget class="QDialog" name="Dialog"> 5 | <property name="geometry"> 6 | <rect> 7 | <x>0</x> 8 | <y>0</y> 9 | <width>381</width> 10 | <height>115</height> 11 | </rect> 12 | </property> 13 | <property name="windowTitle"> 14 | <string>Proxy setting</string> 15 | </property> 16 | <property name="autoFillBackground"> 17 | <bool>false</bool> 18 | </property> 19 | <property name="sizeGripEnabled"> 20 | <bool>false</bool> 21 | </property> 22 | <layout class="QVBoxLayout" name="verticalLayout"> 23 | <item> 24 | <widget class="QGroupBox" name="groupBox"> 25 | <property name="title"> 26 | <string/> 27 | </property> 28 | <layout class="QVBoxLayout" name="verticalLayout_2"> 29 | <item> 30 | <widget class="QRadioButton" name="radioButtonNone"> 31 | <property name="sizePolicy"> 32 | <sizepolicy hsizetype="Minimum" vsizetype="Fixed"> 33 | <horstretch>0</horstretch> 34 | <verstretch>0</verstretch> 35 | </sizepolicy> 36 | </property> 37 | <property name="font"> 38 | <font> 39 | <pointsize>9</pointsize> 40 | </font> 41 | </property> 42 | <property name="text"> 43 | <string>None</string> 44 | </property> 45 | <property name="checked"> 46 | <bool>true</bool> 47 | </property> 48 | </widget> 49 | </item> 50 | <item> 51 | <widget class="QRadioButton" name="radioButtonHTTP"> 52 | <property name="enabled"> 53 | <bool>true</bool> 54 | </property> 55 | <property name="sizePolicy"> 56 | <sizepolicy hsizetype="Minimum" vsizetype="Fixed"> 57 | <horstretch>0</horstretch> 58 | <verstretch>0</verstretch> 59 | </sizepolicy> 60 | </property> 61 | <property name="font"> 62 | <font> 63 | <pointsize>9</pointsize> 64 | </font> 65 | </property> 66 | <property name="text"> 67 | <string>HTTP</string> 68 | </property> 69 | </widget> 70 | </item> 71 | <item> 72 | <layout class="QGridLayout" name="gridLayout"> 73 | <item row="0" column="1"> 74 | <widget class="QLineEdit" name="lineEditHttpProxy"> 75 | <property name="toolTip"> 76 | <string/> 77 | </property> 78 | <property name="statusTip"> 79 | <string/> 80 | </property> 81 | <property name="inputMethodHints"> 82 | <set>Qt::ImhUrlCharactersOnly</set> 83 | </property> 84 | <property name="placeholderText"> 85 | <string>http://127.0.0.1:1080</string> 86 | </property> 87 | </widget> 88 | </item> 89 | <item row="0" column="0"> 90 | <widget class="QLabel" name="label"> 91 | <property name="font"> 92 | <font> 93 | <pointsize>9</pointsize> 94 | </font> 95 | </property> 96 | <property name="text"> 97 | <string>URL:</string> 98 | </property> 99 | </widget> 100 | </item> 101 | <item row="0" column="2"> 102 | <widget class="QPushButton" name="pushButtonTest"> 103 | <property name="enabled"> 104 | <bool>true</bool> 105 | </property> 106 | <property name="font"> 107 | <font> 108 | <pointsize>9</pointsize> 109 | </font> 110 | </property> 111 | <property name="text"> 112 | <string>Test</string> 113 | </property> 114 | </widget> 115 | </item> 116 | </layout> 117 | </item> 118 | </layout> 119 | </widget> 120 | </item> 121 | <item> 122 | <widget class="QDialogButtonBox" name="buttonBox"> 123 | <property name="orientation"> 124 | <enum>Qt::Horizontal</enum> 125 | </property> 126 | <property name="standardButtons"> 127 | <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set> 128 | </property> 129 | </widget> 130 | </item> 131 | </layout> 132 | </widget> 133 | <resources/> 134 | <connections> 135 | <connection> 136 | <sender>buttonBox</sender> 137 | <signal>accepted()</signal> 138 | <receiver>Dialog</receiver> 139 | <slot>accept()</slot> 140 | <hints> 141 | <hint type="sourcelabel"> 142 | <x>224</x> 143 | <y>100</y> 144 | </hint> 145 | <hint type="destinationlabel"> 146 | <x>157</x> 147 | <y>108</y> 148 | </hint> 149 | </hints> 150 | </connection> 151 | <connection> 152 | <sender>buttonBox</sender> 153 | <signal>rejected()</signal> 154 | <receiver>Dialog</receiver> 155 | <slot>reject()</slot> 156 | <hints> 157 | <hint type="sourcelabel"> 158 | <x>255</x> 159 | <y>102</y> 160 | </hint> 161 | <hint type="destinationlabel"> 162 | <x>261</x> 163 | <y>108</y> 164 | </hint> 165 | </hints> 166 | </connection> 167 | <connection> 168 | <sender>radioButtonNone</sender> 169 | <signal>clicked(bool)</signal> 170 | <receiver>lineEditHttpProxy</receiver> 171 | <slot>setDisabled(bool)</slot> 172 | <hints> 173 | <hint type="sourcelabel"> 174 | <x>130</x> 175 | <y>19</y> 176 | </hint> 177 | <hint type="destinationlabel"> 178 | <x>111</x> 179 | <y>60</y> 180 | </hint> 181 | </hints> 182 | </connection> 183 | <connection> 184 | <sender>radioButtonNone</sender> 185 | <signal>clicked(bool)</signal> 186 | <receiver>pushButtonTest</receiver> 187 | <slot>setDisabled(bool)</slot> 188 | <hints> 189 | <hint type="sourcelabel"> 190 | <x>130</x> 191 | <y>19</y> 192 | </hint> 193 | <hint type="destinationlabel"> 194 | <x>219</x> 195 | <y>60</y> 196 | </hint> 197 | </hints> 198 | </connection> 199 | <connection> 200 | <sender>radioButtonHTTP</sender> 201 | <signal>clicked(bool)</signal> 202 | <receiver>pushButtonTest</receiver> 203 | <slot>setEnabled(bool)</slot> 204 | <hints> 205 | <hint type="sourcelabel"> 206 | <x>130</x> 207 | <y>36</y> 208 | </hint> 209 | <hint type="destinationlabel"> 210 | <x>219</x> 211 | <y>60</y> 212 | </hint> 213 | </hints> 214 | </connection> 215 | <connection> 216 | <sender>radioButtonHTTP</sender> 217 | <signal>clicked(bool)</signal> 218 | <receiver>lineEditHttpProxy</receiver> 219 | <slot>setEnabled(bool)</slot> 220 | <hints> 221 | <hint type="sourcelabel"> 222 | <x>130</x> 223 | <y>36</y> 224 | </hint> 225 | <hint type="destinationlabel"> 226 | <x>111</x> 227 | <y>60</y> 228 | </hint> 229 | </hints> 230 | </connection> 231 | </connections> 232 | </ui> 233 | -------------------------------------------------------------------------------- /pytranscriber/gui/proxy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/38ff7bf01985a893f0483be5547c83228e74b6ec/pytranscriber/gui/proxy/__init__.py -------------------------------------------------------------------------------- /pytranscriber/gui/proxy/view_proxy.py: -------------------------------------------------------------------------------- 1 | from PyQt5.QtWidgets import QDialog 2 | from pytranscriber.gui.proxy.window_proxy import Ui_Dialog 3 | from pytranscriber.gui.message_util import MessageUtil 4 | 5 | 6 | class ViewProxy: 7 | 8 | def __init__(self, ctr_proxy): 9 | self.ctr_proxy = ctr_proxy 10 | self.proxy_dialog = QDialog() 11 | loaded_proxy_dialog = Ui_Dialog() 12 | loaded_proxy_dialog.setupUi(self.proxy_dialog) 13 | 14 | self.radioButtonNone = loaded_proxy_dialog.radioButtonNone 15 | self.radioButtonHTTP = loaded_proxy_dialog.radioButtonHTTP 16 | self.radioButtonNone.clicked.connect(self.__listener_rbOnClicked) 17 | self.lineEditHttpProxy = loaded_proxy_dialog.lineEditHttpProxy 18 | self.lineEditHttpProxy.textChanged.connect(self.__listenerLineEditInput) 19 | self.pushButtonTest = loaded_proxy_dialog.pushButtonTest 20 | self.bSave = loaded_proxy_dialog.bSave 21 | 22 | self.pushButtonTest.clicked.connect(self.__listener_test) 23 | self.bSave.clicked.connect(self.__listener_save) 24 | self.__clear_proxy_settings() 25 | 26 | def show(self): 27 | self.ctr_proxy.load_data() 28 | self.proxy_dialog.exec_() 29 | 30 | def __clear_proxy_settings(self): 31 | self.radioButtonNone.setChecked(True) 32 | self.lineEditHttpProxy.setEnabled(False) 33 | self.pushButtonTest.setEnabled(False) 34 | 35 | def refresh_gui(self, proxy_address=None): 36 | if not proxy_address: 37 | self.__clear_proxy_settings() 38 | else: 39 | self.radioButtonHTTP.setChecked(True) 40 | self.lineEditHttpProxy.setEnabled(True) 41 | self.pushButtonTest.setEnabled(True) 42 | self.lineEditHttpProxy.setText(str(proxy_address)) 43 | 44 | def __listener_test(self): 45 | proxy_input = self.lineEditHttpProxy.text() 46 | 47 | if proxy_input and self.radioButtonHTTP.isChecked(): 48 | self.ctr_proxy.test_proxy_setting(proxy_input) 49 | 50 | def __listener_save(self): 51 | proxy_input = self.lineEditHttpProxy.text() 52 | 53 | if proxy_input and self.radioButtonHTTP.isChecked(): 54 | self.ctr_proxy.set_proxy_setting(proxy_input, True) 55 | elif self.radioButtonNone.isChecked(): 56 | self.ctr_proxy.set_proxy_setting('',True) 57 | 58 | def __listener_rbOnClicked(self): 59 | if self.radioButtonNone.isChecked(): 60 | self.lineEditHttpProxy.setText('') 61 | 62 | def __listenerLineEditInput(self): 63 | if self.lineEditHttpProxy.text(): 64 | self.pushButtonTest.setEnabled(True) 65 | else: 66 | self.pushButtonTest.setEnabled(False) 67 | 68 | -------------------------------------------------------------------------------- /pytranscriber/gui/proxy/window_proxy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Form implementation generated from reading ui file 'window_proxy.ui' 4 | # 5 | # Created by: PyQt5 UI code generator 5.15.4 6 | # 7 | # WARNING: Any manual changes made to this file will be lost when pyuic5 is 8 | # run again. Do not edit this file unless you know what you are doing. 9 | 10 | 11 | from PyQt5 import QtCore, QtGui, QtWidgets 12 | 13 | 14 | class Ui_Dialog(object): 15 | def setupUi(self, Dialog): 16 | Dialog.setObjectName("Dialog") 17 | Dialog.resize(381, 171) 18 | Dialog.setAutoFillBackground(False) 19 | Dialog.setSizeGripEnabled(False) 20 | self.verticalLayout = QtWidgets.QVBoxLayout(Dialog) 21 | self.verticalLayout.setObjectName("verticalLayout") 22 | self.groupBox = QtWidgets.QGroupBox(Dialog) 23 | self.groupBox.setTitle("") 24 | self.groupBox.setObjectName("groupBox") 25 | self.verticalLayout_2 = QtWidgets.QVBoxLayout(self.groupBox) 26 | self.verticalLayout_2.setObjectName("verticalLayout_2") 27 | self.radioButtonNone = QtWidgets.QRadioButton(self.groupBox) 28 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed) 29 | sizePolicy.setHorizontalStretch(0) 30 | sizePolicy.setVerticalStretch(0) 31 | sizePolicy.setHeightForWidth(self.radioButtonNone.sizePolicy().hasHeightForWidth()) 32 | self.radioButtonNone.setSizePolicy(sizePolicy) 33 | font = QtGui.QFont() 34 | font.setPointSize(9) 35 | self.radioButtonNone.setFont(font) 36 | self.radioButtonNone.setChecked(True) 37 | self.radioButtonNone.setObjectName("radioButtonNone") 38 | self.verticalLayout_2.addWidget(self.radioButtonNone) 39 | self.radioButtonHTTP = QtWidgets.QRadioButton(self.groupBox) 40 | self.radioButtonHTTP.setEnabled(True) 41 | sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed) 42 | sizePolicy.setHorizontalStretch(0) 43 | sizePolicy.setVerticalStretch(0) 44 | sizePolicy.setHeightForWidth(self.radioButtonHTTP.sizePolicy().hasHeightForWidth()) 45 | self.radioButtonHTTP.setSizePolicy(sizePolicy) 46 | font = QtGui.QFont() 47 | font.setPointSize(9) 48 | self.radioButtonHTTP.setFont(font) 49 | self.radioButtonHTTP.setObjectName("radioButtonHTTP") 50 | self.verticalLayout_2.addWidget(self.radioButtonHTTP) 51 | self.gridLayout = QtWidgets.QGridLayout() 52 | self.gridLayout.setObjectName("gridLayout") 53 | self.pushButtonTest = QtWidgets.QPushButton(self.groupBox) 54 | self.pushButtonTest.setEnabled(True) 55 | font = QtGui.QFont() 56 | font.setPointSize(9) 57 | self.pushButtonTest.setFont(font) 58 | self.pushButtonTest.setObjectName("pushButtonTest") 59 | self.gridLayout.addWidget(self.pushButtonTest, 0, 2, 1, 1) 60 | self.lineEditHttpProxy = QtWidgets.QLineEdit(self.groupBox) 61 | self.lineEditHttpProxy.setToolTip("") 62 | self.lineEditHttpProxy.setStatusTip("") 63 | self.lineEditHttpProxy.setInputMethodHints(QtCore.Qt.ImhUrlCharactersOnly) 64 | self.lineEditHttpProxy.setObjectName("lineEditHttpProxy") 65 | self.gridLayout.addWidget(self.lineEditHttpProxy, 0, 1, 1, 1) 66 | self.label = QtWidgets.QLabel(self.groupBox) 67 | font = QtGui.QFont() 68 | font.setPointSize(9) 69 | self.label.setFont(font) 70 | self.label.setObjectName("label") 71 | self.gridLayout.addWidget(self.label, 0, 0, 1, 1) 72 | self.bSave = QtWidgets.QPushButton(self.groupBox) 73 | font = QtGui.QFont() 74 | font.setPointSize(9) 75 | self.bSave.setFont(font) 76 | self.bSave.setObjectName("bSave") 77 | self.gridLayout.addWidget(self.bSave, 1, 2, 1, 1) 78 | self.verticalLayout_2.addLayout(self.gridLayout) 79 | self.verticalLayout.addWidget(self.groupBox) 80 | 81 | self.retranslateUi(Dialog) 82 | self.radioButtonNone.clicked['bool'].connect(self.lineEditHttpProxy.setDisabled) 83 | self.radioButtonNone.clicked['bool'].connect(self.pushButtonTest.setDisabled) 84 | self.radioButtonHTTP.clicked['bool'].connect(self.lineEditHttpProxy.setEnabled) 85 | QtCore.QMetaObject.connectSlotsByName(Dialog) 86 | 87 | def retranslateUi(self, Dialog): 88 | _translate = QtCore.QCoreApplication.translate 89 | Dialog.setWindowTitle(_translate("Dialog", "Proxy setting")) 90 | self.radioButtonNone.setText(_translate("Dialog", "Disabled")) 91 | self.radioButtonHTTP.setText(_translate("Dialog", "Enabled")) 92 | self.pushButtonTest.setText(_translate("Dialog", "Test")) 93 | self.lineEditHttpProxy.setPlaceholderText(_translate("Dialog", "http://127.0.0.1:1080")) 94 | self.label.setText(_translate("Dialog", "URL:")) 95 | self.bSave.setText(_translate("Dialog", "Save")) 96 | -------------------------------------------------------------------------------- /pytranscriber/gui/proxy/window_proxy.ui: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="UTF-8"?> 2 | <ui version="4.0"> 3 | <class>Dialog</class> 4 | <widget class="QDialog" name="Dialog"> 5 | <property name="geometry"> 6 | <rect> 7 | <x>0</x> 8 | <y>0</y> 9 | <width>381</width> 10 | <height>171</height> 11 | </rect> 12 | </property> 13 | <property name="windowTitle"> 14 | <string>Proxy setting</string> 15 | </property> 16 | <property name="autoFillBackground"> 17 | <bool>false</bool> 18 | </property> 19 | <property name="sizeGripEnabled"> 20 | <bool>false</bool> 21 | </property> 22 | <layout class="QVBoxLayout" name="verticalLayout"> 23 | <item> 24 | <widget class="QGroupBox" name="groupBox"> 25 | <property name="title"> 26 | <string/> 27 | </property> 28 | <layout class="QVBoxLayout" name="verticalLayout_2"> 29 | <item> 30 | <widget class="QRadioButton" name="radioButtonNone"> 31 | <property name="sizePolicy"> 32 | <sizepolicy hsizetype="Minimum" vsizetype="Fixed"> 33 | <horstretch>0</horstretch> 34 | <verstretch>0</verstretch> 35 | </sizepolicy> 36 | </property> 37 | <property name="font"> 38 | <font> 39 | <pointsize>9</pointsize> 40 | </font> 41 | </property> 42 | <property name="text"> 43 | <string>Disabled</string> 44 | </property> 45 | <property name="checked"> 46 | <bool>true</bool> 47 | </property> 48 | </widget> 49 | </item> 50 | <item> 51 | <widget class="QRadioButton" name="radioButtonHTTP"> 52 | <property name="enabled"> 53 | <bool>true</bool> 54 | </property> 55 | <property name="sizePolicy"> 56 | <sizepolicy hsizetype="Minimum" vsizetype="Fixed"> 57 | <horstretch>0</horstretch> 58 | <verstretch>0</verstretch> 59 | </sizepolicy> 60 | </property> 61 | <property name="font"> 62 | <font> 63 | <pointsize>9</pointsize> 64 | </font> 65 | </property> 66 | <property name="text"> 67 | <string>Enabled</string> 68 | </property> 69 | </widget> 70 | </item> 71 | <item> 72 | <layout class="QGridLayout" name="gridLayout"> 73 | <item row="0" column="2"> 74 | <widget class="QPushButton" name="pushButtonTest"> 75 | <property name="enabled"> 76 | <bool>true</bool> 77 | </property> 78 | <property name="font"> 79 | <font> 80 | <pointsize>9</pointsize> 81 | </font> 82 | </property> 83 | <property name="text"> 84 | <string>Test</string> 85 | </property> 86 | </widget> 87 | </item> 88 | <item row="0" column="1"> 89 | <widget class="QLineEdit" name="lineEditHttpProxy"> 90 | <property name="toolTip"> 91 | <string/> 92 | </property> 93 | <property name="statusTip"> 94 | <string/> 95 | </property> 96 | <property name="inputMethodHints"> 97 | <set>Qt::ImhUrlCharactersOnly</set> 98 | </property> 99 | <property name="placeholderText"> 100 | <string>http://127.0.0.1:1080</string> 101 | </property> 102 | </widget> 103 | </item> 104 | <item row="0" column="0"> 105 | <widget class="QLabel" name="label"> 106 | <property name="font"> 107 | <font> 108 | <pointsize>9</pointsize> 109 | </font> 110 | </property> 111 | <property name="text"> 112 | <string>URL:</string> 113 | </property> 114 | </widget> 115 | </item> 116 | <item row="1" column="2"> 117 | <widget class="QPushButton" name="bSave"> 118 | <property name="font"> 119 | <font> 120 | <pointsize>9</pointsize> 121 | </font> 122 | </property> 123 | <property name="text"> 124 | <string>Save</string> 125 | </property> 126 | </widget> 127 | </item> 128 | </layout> 129 | </item> 130 | </layout> 131 | </widget> 132 | </item> 133 | </layout> 134 | </widget> 135 | <resources/> 136 | <connections> 137 | <connection> 138 | <sender>radioButtonNone</sender> 139 | <signal>clicked(bool)</signal> 140 | <receiver>lineEditHttpProxy</receiver> 141 | <slot>setDisabled(bool)</slot> 142 | <hints> 143 | <hint type="sourcelabel"> 144 | <x>130</x> 145 | <y>19</y> 146 | </hint> 147 | <hint type="destinationlabel"> 148 | <x>111</x> 149 | <y>60</y> 150 | </hint> 151 | </hints> 152 | </connection> 153 | <connection> 154 | <sender>radioButtonNone</sender> 155 | <signal>clicked(bool)</signal> 156 | <receiver>pushButtonTest</receiver> 157 | <slot>setDisabled(bool)</slot> 158 | <hints> 159 | <hint type="sourcelabel"> 160 | <x>130</x> 161 | <y>19</y> 162 | </hint> 163 | <hint type="destinationlabel"> 164 | <x>219</x> 165 | <y>60</y> 166 | </hint> 167 | </hints> 168 | </connection> 169 | <connection> 170 | <sender>radioButtonHTTP</sender> 171 | <signal>clicked(bool)</signal> 172 | <receiver>lineEditHttpProxy</receiver> 173 | <slot>setEnabled(bool)</slot> 174 | <hints> 175 | <hint type="sourcelabel"> 176 | <x>130</x> 177 | <y>36</y> 178 | </hint> 179 | <hint type="destinationlabel"> 180 | <x>111</x> 181 | <y>60</y> 182 | </hint> 183 | </hints> 184 | </connection> 185 | </connections> 186 | </ui> 187 | -------------------------------------------------------------------------------- /pytranscriber/gui/简体中文 - Chinese Simplified.qm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/38ff7bf01985a893f0483be5547c83228e74b6ec/pytranscriber/gui/简体中文 - Chinese Simplified.qm -------------------------------------------------------------------------------- /pytranscriber/gui/简体中文 - Chinese Simplified.ts: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="utf-8"?> 2 | <!DOCTYPE TS> 3 | <TS version="2.1" language="zh" sourcelanguage="en"> 4 | <context> 5 | <name>window</name> 6 | <message> 7 | <location filename="gui.py" line="114"/> 8 | <source>pyTranscriber - v1.7 - 08/08/2020</source> 9 | <translation>pyTranscriber -v1.8 - 20/08/2022</translation> 10 | </message> 11 | <message> 12 | <location filename="gui.py" line="115"/> 13 | <source>Select file(s)</source> 14 | <translation>选择文件</translation> 15 | </message> 16 | <message> 17 | <location filename="gui.py" line="116"/> 18 | <source>Transcribe Audio / Generate Subtitles</source> 19 | <translation>转译音频 / 生成字幕</translation> 20 | </message> 21 | <message> 22 | <location filename="gui.py" line="117"/> 23 | <source>Open Output Folder</source> 24 | <translation>打开导出文件夹</translation> 25 | </message> 26 | <message> 27 | <location filename="gui.py" line="118"/> 28 | <source>Output Location</source> 29 | <translation>导出位置</translation> 30 | </message> 31 | <message> 32 | <location filename="gui.py" line="119"/> 33 | <source>&List of files to generate transcribe audio / generate subtitles</source> 34 | <translation>&转译 / 生成字幕文件列表</translation> 35 | </message> 36 | <message> 37 | <location filename="gui.py" line="120"/> 38 | <source>Remove file(s)</source> 39 | <translation>移除文件</translation> 40 | </message> 41 | <message> 42 | <location filename="gui.py" line="121"/> 43 | <source>Cancel</source> 44 | <translation>取消</translation> 45 | </message> 46 | <message> 47 | <location filename="gui.py" line="122"/> 48 | <source>Open output files automatically</source> 49 | <translation>完成后自动打开文件夹</translation> 50 | </message> 51 | <message> 52 | <location filename="gui.py" line="123"/> 53 | <source>Audio Language:</source> 54 | <translation>选择音频语言</translation> 55 | </message> 56 | <message> 57 | <location filename="gui.py" line="124"/> 58 | <source>Abo&ut</source> 59 | <translation>Abo&ut</translation> 60 | </message> 61 | <message> 62 | <location filename="gui.py" line="125"/> 63 | <source>Settings</source> 64 | <translation>Settings</translation> 65 | </message> 66 | <message> 67 | <location filename="gui.py" line="141"/> 68 | <source>&Language</source> 69 | <translation>语</translation> 70 | </message> 71 | <message> 72 | <location filename="gui.py" line="126"/> 73 | <source>&License</source> 74 | <translation>&License</translation> 75 | </message> 76 | <message> 77 | <location filename="gui.py" line="127"/> 78 | <source>&Funding at Github Sponsors</source> 79 | <translation>资助 GitHub 上的项目</translation> 80 | </message> 81 | <message> 82 | <location filename="gui.py" line="128"/> 83 | <source>&More about pyTranscriber</source> 84 | <translation>&关于 pyTranscriber</translation> 85 | </message> 86 | <message> 87 | <location filename="gui.py" line="129"/> 88 | <source>Proxy</source> 89 | <translation>Proxy</translation> 90 | </message> 91 | <message> 92 | <location filename="gui.py" line="130"/> 93 | <source>Proxy settings</source> 94 | <translation>Proxy settings</translation> 95 | </message> 96 | </context> 97 | </TS> 98 | -------------------------------------------------------------------------------- /pytranscriber/gui/繁體中文 - Chinese Traditional.qm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/38ff7bf01985a893f0483be5547c83228e74b6ec/pytranscriber/gui/繁體中文 - Chinese Traditional.qm -------------------------------------------------------------------------------- /pytranscriber/gui/繁體中文 - Chinese Traditional.ts: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="utf-8"?> 2 | <!DOCTYPE TS> 3 | <TS version="2.1"> 4 | <context> 5 | <name>window</name> 6 | <message> 7 | <location filename="gui.py" line="129"/> 8 | <source>pyTranscriber - v1.8 - 17/08/2022</source> 9 | <translation>pyTranscriber -v1.8 - 20/08/2022</translation> 10 | </message> 11 | <message> 12 | <location filename="gui.py" line="130"/> 13 | <source>Select file(s)</source> 14 | <translation>選擇檔案</translation> 15 | </message> 16 | <message> 17 | <location filename="gui.py" line="131"/> 18 | <source>Transcribe Audio / Generate Subtitles</source> 19 | <translation>轉譯音訊 / 生成字幕</translation> 20 | </message> 21 | <message> 22 | <location filename="gui.py" line="132"/> 23 | <source>Open Output Folder</source> 24 | <translation>開啟輸出位置</translation> 25 | </message> 26 | <message> 27 | <location filename="gui.py" line="133"/> 28 | <source>Output Location</source> 29 | <translation>選取輸出位置</translation> 30 | </message> 31 | <message> 32 | <location filename="gui.py" line="134"/> 33 | <source>List of files to generate transcribe audio / generate subtitles</source> 34 | <translation>&轉譯音訊 / 生成字幕檔案清單</translation> 35 | </message> 36 | <message> 37 | <location filename="gui.py" line="135"/> 38 | <source>Remove file(s)</source> 39 | <translation>移除檔案</translation> 40 | </message> 41 | <message> 42 | <location filename="gui.py" line="136"/> 43 | <source>Cancel</source> 44 | <translation>取消</translation> 45 | </message> 46 | <message> 47 | <location filename="gui.py" line="137"/> 48 | <source>Open output files automatically</source> 49 | <translation>完成後自動開啟輸出資料夾</translation> 50 | </message> 51 | <message> 52 | <location filename="gui.py" line="138"/> 53 | <source>Audio Language:</source> 54 | <translation>選擇音訊語言</translation> 55 | </message> 56 | <message> 57 | <location filename="gui.py" line="139"/> 58 | <source>Abo&ut</source> 59 | <translation>關於</translation> 60 | </message> 61 | <message> 62 | <location filename="gui.py" line="140"/> 63 | <source>&Settings</source> 64 | <translation>設定</translation> 65 | </message> 66 | <message> 67 | <location filename="gui.py" line="141"/> 68 | <source>&Language</source> 69 | <translation>語言</translation> 70 | </message> 71 | <message> 72 | <location filename="gui.py" line="142"/> 73 | <source>&License</source> 74 | <translation>&License</translation> 75 | </message> 76 | <message> 77 | <location filename="gui.py" line="143"/> 78 | <source>&Funding at Github Sponsors</source> 79 | <translation>在 Github 上成為贊助者</translation> 80 | </message> 81 | <message> 82 | <location filename="gui.py" line="144"/> 83 | <source>&More about pyTranscriber</source> 84 | <translation>&關於 pyTranscriber</translation> 85 | </message> 86 | <message> 87 | <location filename="gui.py" line="145"/> 88 | <source>&Proxy</source> 89 | <translation>代理伺服器(Proxy)</translation> 90 | </message> 91 | <message> 92 | <location filename="gui.py" line="146"/> 93 | <source>Proxy setting</source> 94 | <translation>代理伺服器設定</translation> 95 | </message> 96 | <message> 97 | <location filename="gui.py" line="147"/> 98 | <source>English</source> 99 | <translation type="unfinished"></translation> 100 | </message> 101 | <message> 102 | <location filename="gui.py" line="148"/> 103 | <source>繁體中文 - Chinese Traditional</source> 104 | <translation type="unfinished"></translation> 105 | </message> 106 | <message> 107 | <location filename="gui.py" line="149"/> 108 | <source>简体中文 - Chinese Simplified</source> 109 | <translation type="unfinished"></translation> 110 | </message> 111 | <message> 112 | <location filename="gui.py" line="150"/> 113 | <source>Português</source> 114 | <translation type="unfinished"></translation> 115 | </message> 116 | </context> 117 | </TS> 118 | -------------------------------------------------------------------------------- /pytranscriber/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/38ff7bf01985a893f0483be5547c83228e74b6ec/pytranscriber/model/__init__.py -------------------------------------------------------------------------------- /pytranscriber/model/google_speech.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see <https://www.gnu.org/licenses/>. 13 | ''' 14 | 15 | class Google_Speech: 16 | supported_languages_list = ["en-US - English (United States)", 17 | "cmn-Hans-CN - Chinese (Simplified, China)", 18 | "cmn-Hant-TW - Chinese (Traditional, Taiwan)", 19 | "yue-Hant-HK - Cantonese (Traditional, HK)", 20 | "en-AU - English (Australia)", 21 | "en-CA - English (Canada)", 22 | "en-GB - English (United Kingdom)", 23 | "en-HK - English (Hong Kong)", 24 | "en-IN - English (India)", 25 | "en-GB - English (Ireland)", 26 | "en-NZ - English (New Zealand)", 27 | "en-PH - English (Philippines)", 28 | "en-SG - English (Singapore)", 29 | "af - Afrikaans", 30 | "ar - Arabic", 31 | 'ar-DZ - Arabic (Algeria)', 32 | 'ar-EG - Arabic (Egypt)', 33 | 'ar-IQ - Arabic (Iraq)', 34 | 'ar-IS - Arabic (Israel)', 35 | 'ar-JO - Arabic (Jordan)', 36 | 'ar-KW - Arabic (Kuwait)', 37 | 'ar-LB - Arabic (Lebanon)', 38 | 'ar-MA - Arabic (Morocco)', 39 | 'ar-OM - Arabic (Oman)', 40 | 'ar-QA - Arabic (Qatar)', 41 | 'ar-SA - Arabic (Saudi Arabia)', 42 | 'ar-PS - Arabic (State of Palestine)', 43 | 'ar-TN - Arabic (Tunisia)', 44 | 'ar-AE - Arabic (United Arab Emirates)', 45 | 'ar-YE - Arabic (Yemen)', 46 | "az - Azerbaijani", 47 | "be - Belarusian", 48 | "bg - Bulgarian", 49 | "bn - Bengali", 50 | "bs - Bosnian", 51 | "ca - Catalan", 52 | "ceb -Cebuano", 53 | "cs - Czech", 54 | "cy - Welsh", 55 | "da - Danish", 56 | "de - German", 57 | 'de-AT - German (Austria)', 58 | 'de-CH - German (Switzerland)', 59 | "el - Greek", 60 | "eo - Esperanto", 61 | 'es-ES - Spanish (Spain)', 62 | 'es-AR - Spanish (Argentina)', 63 | 'es-BO - Spanish (Bolivia)', 64 | 'es-CL - Spanish (Chile)', 65 | 'es-CO - Spanish (Colombia)', 66 | 'es-CR - Spanish (Costa Rica)', 67 | 'es-DO - Spanish (Dominican Republic)', 68 | 'es-EC - Spanish (Ecuador)', 69 | 'es-GT - Spanish (Guatemala)', 70 | 'es-HN - Spanish (Honduras)', 71 | 'es-MX - Spanish (Mexico)', 72 | 'es-NI - Spanish (Nicaragua)', 73 | 'es-PA - Spanish (Panama)', 74 | 'es-PE - Spanish (Peru)', 75 | 'es-PR - Spanish (Puerto Rico)', 76 | 'es-PY - Spanish (Paraguay)', 77 | 'es-SV - Spanish (El Salvador)', 78 | 'es-UY - Spanish (Uruguay)', 79 | 'es-US - Spanish (United States)', 80 | 'es-VE - Spanish (Venezuela)', 81 | "et - Estonian", 82 | "eu - Basque", 83 | "fa - Persian", 84 | 'fil-PH - Filipino (Philippines)', 85 | "fi - Finnish", 86 | "fr - French", 87 | 'fr-BE - French (Belgium)', 88 | 'fr-CA - French (Canada)', 89 | 'fr-CH - French (Switzerland)', 90 | "ga - Irish", 91 | "gl - Galician", 92 | "gu -Gujarati", 93 | "ha - Hausa", 94 | "hi - Hindi", 95 | "hmn - Hmong", 96 | "hr - Croatian", 97 | "ht - Haitian Creole", 98 | "hu - Hungarian", 99 | "hy - Armenian", 100 | "id - Indonesian", 101 | "ig - Igbo", 102 | "is - Icelandic", 103 | "it - Italian", 104 | 'it-CH - Italian (Switzerland)', 105 | "iw - Hebrew", 106 | "ja - Japanese", 107 | "jw - Javanese", 108 | "ka - Georgian", 109 | "kk - Kazakh", 110 | "km - Khmer", 111 | "kn - Kannada", 112 | "ko - Korean", 113 | "la - Latin", 114 | "lo - Lao", 115 | "lt - Lithuanian", 116 | "lv - Latvian", 117 | "mg - Malagasy", 118 | "mi - Maori", 119 | "mk - Macedonian", 120 | "ml - Malayalam", 121 | "mn - Mongolian", 122 | "mr - Marathi", 123 | "ms - Malay", 124 | "mt - Maltese", 125 | "my - Myanmar (Burmese)", 126 | "ne - Nepali", 127 | "nl - Dutch", 128 | "no - Norwegian", 129 | "ny - Chichewa", 130 | "pa - Punjabi", 131 | "pl - Polish", 132 | "pt-BR - Portuguese (Brazil)", 133 | "pt-PT - Portuguese (Portugal)", 134 | "ro - Romanian", 135 | "ru - Russian", 136 | "si - Sinhala", 137 | "sk - Slovak", 138 | "sl - Slovenian", 139 | "so - Somali", 140 | "sq - Albanian", 141 | "sr - Serbian", 142 | "st - Sesotho", 143 | "su - Sudanese", 144 | "sv - Swedish", 145 | "sw - Swahili", 146 | "ta - Tamil", 147 | 'ta-IN - Tamil (India)', 148 | 'ta-MY - Tamil (Malaysia)', 149 | 'ta-SG - Tamil (Singapore)', 150 | 'ta-LK - Tamil (Sri Lanka)', 151 | "te - Telugu", 152 | "tg - Tajik", 153 | "th - Thai", 154 | "tl - Filipino", 155 | "tr - Turkish", 156 | "uk - Ukrainian", 157 | "ur - Urdu", 158 | "uz - Uzbek", 159 | "vi - Vietnamese", 160 | "yi - Yiddish", 161 | "yo - Yoruba", 162 | "zu - Zulu"] 163 | @staticmethod 164 | def get_supported_languages(): 165 | return Google_Speech.supported_languages_list -------------------------------------------------------------------------------- /pytranscriber/model/transcription_parameters.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2019 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see <https://www.gnu.org/licenses/>. 13 | ''' 14 | 15 | class Transcription_Parameters(): 16 | 17 | def __init__(self, listFiles, outputFolder, langCode, 18 | boolOpenOutputFilesAuto, proxies=None): 19 | self.listFiles = listFiles 20 | self.outputFolder = outputFolder 21 | self.langCode = langCode 22 | self.boolOpenOutputFilesAuto = boolOpenOutputFilesAuto 23 | self.proxies = proxies 24 | self.model_whisper = None 25 | 26 | def set_model_whisper(self, model): 27 | self.model_whisper = model 28 | 29 | def get_model_whisper(self): 30 | return self.model_whisper -------------------------------------------------------------------------------- /pytranscriber/model/whisper.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2025 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see <https://www.gnu.org/licenses/>. 13 | ''' 14 | 15 | class Whisper: 16 | 17 | supported_languages_list = None 18 | supported_languages_dict = { 19 | "en": "english", 20 | "zh": "chinese", 21 | "de": "german", 22 | "es": "spanish", 23 | "ru": "russian", 24 | "ko": "korean", 25 | "fr": "french", 26 | "ja": "japanese", 27 | "pt": "portuguese", 28 | "tr": "turkish", 29 | "pl": "polish", 30 | "ca": "catalan", 31 | "nl": "dutch", 32 | "ar": "arabic", 33 | "sv": "swedish", 34 | "it": "italian", 35 | "id": "indonesian", 36 | "hi": "hindi", 37 | "fi": "finnish", 38 | "vi": "vietnamese", 39 | "he": "hebrew", 40 | "uk": "ukrainian", 41 | "el": "greek", 42 | "ms": "malay", 43 | "cs": "czech", 44 | "ro": "romanian", 45 | "da": "danish", 46 | "hu": "hungarian", 47 | "ta": "tamil", 48 | "no": "norwegian", 49 | "th": "thai", 50 | "ur": "urdu", 51 | "hr": "croatian", 52 | "bg": "bulgarian", 53 | "lt": "lithuanian", 54 | "la": "latin", 55 | "mi": "maori", 56 | "ml": "malayalam", 57 | "cy": "welsh", 58 | "sk": "slovak", 59 | "te": "telugu", 60 | "fa": "persian", 61 | "lv": "latvian", 62 | "bn": "bengali", 63 | "sr": "serbian", 64 | "az": "azerbaijani", 65 | "sl": "slovenian", 66 | "kn": "kannada", 67 | "et": "estonian", 68 | "mk": "macedonian", 69 | "br": "breton", 70 | "eu": "basque", 71 | "is": "icelandic", 72 | "hy": "armenian", 73 | "ne": "nepali", 74 | "mn": "mongolian", 75 | "bs": "bosnian", 76 | "kk": "kazakh", 77 | "sq": "albanian", 78 | "sw": "swahili", 79 | "gl": "galician", 80 | "mr": "marathi", 81 | "pa": "punjabi", 82 | "si": "sinhala", 83 | "km": "khmer", 84 | "sn": "shona", 85 | "yo": "yoruba", 86 | "so": "somali", 87 | "af": "afrikaans", 88 | "oc": "occitan", 89 | "ka": "georgian", 90 | "be": "belarusian", 91 | "tg": "tajik", 92 | "sd": "sindhi", 93 | "gu": "gujarati", 94 | "am": "amharic", 95 | "yi": "yiddish", 96 | "lo": "lao", 97 | "uz": "uzbek", 98 | "fo": "faroese", 99 | "ht": "haitian creole", 100 | "ps": "pashto", 101 | "tk": "turkmen", 102 | "nn": "nynorsk", 103 | "mt": "maltese", 104 | "sa": "sanskrit", 105 | "lb": "luxembourgish", 106 | "my": "myanmar", 107 | "bo": "tibetan", 108 | "tl": "tagalog", 109 | "mg": "malagasy", 110 | "as": "assamese", 111 | "tt": "tatar", 112 | "haw": "hawaiian", 113 | "ln": "lingala", 114 | "ha": "hausa", 115 | "ba": "bashkir", 116 | "jw": "javanese", 117 | "su": "sundanese", 118 | "yue": "cantonese", 119 | } 120 | 121 | @staticmethod 122 | def convert_dict_to_list(): 123 | Whisper.supported_languages_list = list() 124 | for (k, v) in Whisper.supported_languages_dict.items(): 125 | Whisper.supported_languages_list.append(k + " - " + v) 126 | 127 | @staticmethod 128 | def get_supported_languages(): 129 | if Whisper.supported_languages_list is None: 130 | Whisper.convert_dict_to_list() 131 | return Whisper.supported_languages_list -------------------------------------------------------------------------------- /pytranscriber/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/38ff7bf01985a893f0483be5547c83228e74b6ec/pytranscriber/util/__init__.py -------------------------------------------------------------------------------- /pytranscriber/util/srtparser.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2019 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see <https://www.gnu.org/licenses/>. 13 | ''' 14 | 15 | import re, sys 16 | 17 | class SRTParser(object): 18 | @staticmethod 19 | def extractTextFromSRT(fileSRT): 20 | file_name = fileSRT 21 | file_encoding = 'utf-8' 22 | 23 | #loop through the lines for parsing 24 | with open(file_name, encoding=file_encoding, errors='replace') as f: 25 | lines = f.readlines() 26 | new_lines = SRTParser.clean_up(lines) 27 | new_file_name = file_name[:-4] + '.txt' 28 | 29 | #write parsed txt file 30 | with open(new_file_name, 'w', encoding=file_encoding) as f: 31 | for line in new_lines: 32 | f.write(line) 33 | 34 | @staticmethod 35 | def clean_up(lines): 36 | regexSubtitleIndexNumber = re.compile("[0-9]+") 37 | 38 | new_lines = [] 39 | for line in lines[1:]: 40 | #if line empty or 41 | #if line contains --> or 42 | #if line matches the subtitle index regex 43 | #then skip line 44 | if (not line or not line.strip()) or ("-->" in line) or regexSubtitleIndexNumber.match(line): 45 | continue 46 | else: 47 | #append line 48 | new_lines.append(line) 49 | return new_lines 50 | -------------------------------------------------------------------------------- /pytranscriber/util/util.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (C) 2019 Raryel C. Souza 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see <https://www.gnu.org/licenses/>. 13 | ''' 14 | 15 | import platform 16 | import os 17 | import subprocess 18 | 19 | import requests 20 | from requests.adapters import HTTPAdapter, Retry 21 | import time 22 | 23 | 24 | class MyUtil(object): 25 | @staticmethod 26 | def open_file(path): 27 | if platform.system() == "Windows": 28 | os.startfile(path) 29 | elif platform.system() == "Darwin": 30 | subprocess.Popen(["open", path]) 31 | else: 32 | subprocess.Popen(["xdg-open", path]) 33 | 34 | @staticmethod 35 | def is_internet_connected(proxies=None): 36 | try: 37 | # connect to the host -- tells us if the host is actually 38 | # reachable 39 | headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Firefox/91.0'} 40 | 41 | res = MyUtil.send_request('https://www.google.com', proxies=proxies, headers=headers) 42 | if res != 200: 43 | return False 44 | 45 | else: 46 | return True 47 | except Exception as e: 48 | print("Error Name: ", e.__class__.__name__) 49 | print("Error Message: ", e) 50 | pass 51 | 52 | return False 53 | 54 | @staticmethod 55 | def send_request(url, 56 | n_retries=0, 57 | backoff_factor=0.9, 58 | status_codes=[504, 503, 502, 500, 429, 302, 408, 425], 59 | proxies=None, 60 | headers=None): 61 | sess = requests.Session() 62 | retries = Retry(connect=n_retries, backoff_factor=backoff_factor, 63 | status_forcelist=status_codes) 64 | sess.mount("https://", HTTPAdapter(max_retries=retries)) 65 | sess.mount("http://", HTTPAdapter(max_retries=retries)) 66 | try: 67 | response = sess.get(url, timeout=5, proxies=proxies, headers=headers) 68 | response.raise_for_status() # Raises an HTTPError for bad responses 69 | return response.status_code 70 | except requests.Timeout: 71 | print("The request timed out") 72 | except requests.RequestException as e: 73 | print(f"An error occurred: {e}") 74 | return -1 75 | 76 | 77 | @staticmethod 78 | def percentage(currentval, maxval): 79 | return 100 * currentval / float(maxval) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # CUDA-enabled PyTorch packages (CUDA 12.6) 2 | torch==2.3.0 3 | torchvision==0.18.0 4 | torchaudio==2.3.0 5 | 6 | # Ensure the extra index for PyTorch CUDA wheels 7 | --extra-index-url https://download.pytorch.org/whl/cu126 8 | 9 | # Other dependencies 10 | cachetools==4.2.4 11 | certifi==2024.7.4 12 | chardet==4.0.0 13 | charset-normalizer==2.0.6 14 | google-api-core==2.1.0 15 | google-api-python-client==2.24.0 16 | google-auth==2.3.0 17 | google-auth-httplib2==0.1.0 18 | google-auth-oauthlib==0.4.6 19 | googleapis-common-protos==1.53.0 20 | httplib2==0.20.1 21 | idna==3.7 22 | oauthlib==3.2.2 23 | progressbar==2.5 24 | protobuf==4.21.6 25 | pyasn1==0.4.8 26 | pyasn1-modules==0.2.8 27 | pyparsing==2.4.7 28 | pyqt5==5.15.10 29 | pyqt5-sip==12.13.0 30 | pysrt==1.1.2 31 | requests==2.32.0 32 | requests-oauthlib==1.3.0 33 | rsa==4.7.2 34 | six==1.16.0 35 | uritemplate==3.0.1 36 | urllib3==2.2.2 37 | openai-whisper 38 | platformdirs 39 | -------------------------------------------------------------------------------- /script-installer-windows-standalone.iss: -------------------------------------------------------------------------------- 1 | ; Script generated by the Inno Setup Script Wizard. 2 | ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES! 3 | 4 | [Setup] 5 | ; NOTE: The value of AppId uniquely identifies this application. 6 | ; Do not use the same AppId value in installers for other applications. 7 | ; (To generate a new GUID, click Tools | Generate GUID inside the IDE.) 8 | AppId={{5240AB76-FC62-4BFA-A1EF-FA49AF701F80} 9 | AppName=pyTranscriber 10 | AppVersion=1.9 11 | AppVerName=pyTranscriber 1.9 12 | AppPublisher=Raryel C. Souza 13 | AppPublisherURL=https://github.com/raryelcostasouza/pyTranscriber 14 | AppSupportURL=https://github.com/raryelcostasouza/pyTranscriber 15 | AppUpdatesURL=https://github.com/raryelcostasouza/pyTranscriber 16 | DefaultDirName={pf}\pyTranscriber 17 | DisableDirPage=yes 18 | DisableProgramGroupPage=yes 19 | LicenseFile=.\LICENSE 20 | OutputBaseFilename=setup 21 | Compression=lzma 22 | SolidCompression=yes 23 | 24 | [Languages] 25 | Name: "english"; MessagesFile: "compiler:Default.isl" 26 | 27 | [Tasks] 28 | Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked 29 | 30 | [Files] 31 | Source: ".\main.dist\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs 32 | ; NOTE: Don't use "Flags: ignoreversion" on any shared system files 33 | 34 | [Icons] 35 | Name: "{commonprograms}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe" 36 | Name: "{commondesktop}\pyTranscriber"; Filename: "{app}\pyTranscriber.exe"; Tasks: desktopicon 37 | 38 | [Run] 39 | Filename: "{app}\pyTranscriber.exe"; Description: "{cm:LaunchProgram,pyTranscriber}"; Flags: nowait postinstall skipifsilent 40 | 41 | -------------------------------------------------------------------------------- /whisper/__init__.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import io 3 | import os 4 | import urllib 5 | import warnings 6 | from typing import List, Optional, Union 7 | 8 | import torch 9 | from tqdm import tqdm 10 | 11 | from .audio import load_audio, log_mel_spectrogram, pad_or_trim 12 | from .decoding import DecodingOptions, DecodingResult, decode, detect_language 13 | from .model import ModelDimensions, Whisper 14 | from .transcribe import transcribe 15 | from .version import __version__ 16 | 17 | _MODELS = { 18 | "tiny.en": "https://openaipublic.azureedge.net/main/whisper/models/d3dd57d32accea0b295c96e26691aa14d8822fac7d9d27d5dc00b4ca2826dd03/tiny.en.pt", 19 | "tiny": "https://openaipublic.azureedge.net/main/whisper/models/65147644a518d12f04e32d6f3b26facc3f8dd46e5390956a9424a650c0ce22b9/tiny.pt", 20 | "base.en": "https://openaipublic.azureedge.net/main/whisper/models/25a8566e1d0c1e2231d1c762132cd20e0f96a85d16145c3a00adf5d1ac670ead/base.en.pt", 21 | "base": "https://openaipublic.azureedge.net/main/whisper/models/ed3a0b6b1c0edf879ad9b11b1af5a0e6ab5db9205f891f668f8b0e6c6326e34e/base.pt", 22 | "small.en": "https://openaipublic.azureedge.net/main/whisper/models/f953ad0fd29cacd07d5a9eda5624af0f6bcf2258be67c92b79389873d91e0872/small.en.pt", 23 | "small": "https://openaipublic.azureedge.net/main/whisper/models/9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794/small.pt", 24 | "medium.en": "https://openaipublic.azureedge.net/main/whisper/models/d7440d1dc186f76616474e0ff0b3b6b879abc9d1a4926b7adfa41db2d497ab4f/medium.en.pt", 25 | "medium": "https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt", 26 | "large-v1": "https://openaipublic.azureedge.net/main/whisper/models/e4b87e7e0bf463eb8e6956e646f1e277e901512310def2c24bf0e11bd3c28e9a/large-v1.pt", 27 | "large-v2": "https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt", 28 | "large-v3": "https://openaipublic.azureedge.net/main/whisper/models/e5b1a55b89c1367dacf97e3e19bfd829a01529dbfdeefa8caeb59b3f1b81dadb/large-v3.pt", 29 | "large": "https://openaipublic.azureedge.net/main/whisper/models/e5b1a55b89c1367dacf97e3e19bfd829a01529dbfdeefa8caeb59b3f1b81dadb/large-v3.pt", 30 | "large-v3-turbo": "https://openaipublic.azureedge.net/main/whisper/models/aff26ae408abcba5fbf8813c21e62b0941638c5f6eebfb145be0c9839262a19a/large-v3-turbo.pt", 31 | "turbo": "https://openaipublic.azureedge.net/main/whisper/models/aff26ae408abcba5fbf8813c21e62b0941638c5f6eebfb145be0c9839262a19a/large-v3-turbo.pt", 32 | } 33 | 34 | # base85-encoded (n_layers, n_heads) boolean arrays indicating the cross-attention heads that are 35 | # highly correlated to the word-level timing, i.e. the alignment between audio and text tokens. 36 | _ALIGNMENT_HEADS = { 37 | "tiny.en": b"ABzY8J1N>@0{>%R00Bk>$p{7v037`oCl~+#00", 38 | "tiny": b"ABzY8bu8Lr0{>%RKn9Fp%m@SkK7Kt=7ytkO", 39 | "base.en": b"ABzY8;40c<0{>%RzzG;p*o+Vo09|#PsxSZm00", 40 | "base": b"ABzY8KQ!870{>%RzyTQH3`Q^yNP!>##QT-<FaQ7m", 41 | "small.en": b"ABzY8>?_)10{>%RpeA61k&I|OI3I$65C{;;pbCHh0B{qLQ;+}v00", 42 | "small": b"ABzY8DmU6=0{>%Rpa?J`kvJ6qF(V^F86#Xh7JUGMK}P<N0000", 43 | "medium.en": b"ABzY8usPae0{>%R7<zz_OvQ{)4kMa0BMw6u5rT}kRKX;$NfYBv00*Hl@qhsU00", 44 | "medium": b"ABzY8B0Jh+0{>%R7}kK1fFL7w6%<-Pf*t^=N)Qr&0RR9", 45 | "large-v1": b"ABzY8r9j$a0{>%R7#4sLmoOs{s)o3~84-RPdcFk!JR<kSfC2yj", 46 | "large-v2": b"ABzY8zd+h!0{>%R7=D0pU<_bnWW*tkYAhobTNnu$jnkEkXqp)j;w1Tzk)UH3X%SZd&fFZ2fC2yj", 47 | "large-v3": b"ABzY8gWO1E0{>%R7(9S+Kn!D~%ngiGaR?*L!iJG9p-nab0JQ=-{D1-g00", 48 | "large": b"ABzY8gWO1E0{>%R7(9S+Kn!D~%ngiGaR?*L!iJG9p-nab0JQ=-{D1-g00", 49 | "large-v3-turbo": b"ABzY8j^C+e0{>%RARaKHP%t(lGR*)0g!tONPyhe`", 50 | "turbo": b"ABzY8j^C+e0{>%RARaKHP%t(lGR*)0g!tONPyhe`", 51 | } 52 | 53 | 54 | def _download(url: str, root: str, in_memory: bool) -> Union[bytes, str]: 55 | os.makedirs(root, exist_ok=True) 56 | 57 | expected_sha256 = url.split("/")[-2] 58 | download_target = os.path.join(root, os.path.basename(url)) 59 | 60 | if os.path.exists(download_target) and not os.path.isfile(download_target): 61 | raise RuntimeError(f"{download_target} exists and is not a regular file") 62 | 63 | if os.path.isfile(download_target): 64 | with open(download_target, "rb") as f: 65 | model_bytes = f.read() 66 | if hashlib.sha256(model_bytes).hexdigest() == expected_sha256: 67 | return model_bytes if in_memory else download_target 68 | else: 69 | warnings.warn( 70 | f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file" 71 | ) 72 | 73 | with urllib.request.urlopen(url) as source, open(download_target, "wb") as output: 74 | with tqdm( 75 | total=int(source.info().get("Content-Length")), 76 | ncols=80, 77 | unit="iB", 78 | unit_scale=True, 79 | unit_divisor=1024, 80 | ) as loop: 81 | while True: 82 | buffer = source.read(8192) 83 | if not buffer: 84 | break 85 | 86 | output.write(buffer) 87 | loop.update(len(buffer)) 88 | 89 | model_bytes = open(download_target, "rb").read() 90 | if hashlib.sha256(model_bytes).hexdigest() != expected_sha256: 91 | raise RuntimeError( 92 | "Model has been downloaded but the SHA256 checksum does not not match. Please retry loading the model." 93 | ) 94 | 95 | return model_bytes if in_memory else download_target 96 | 97 | 98 | def available_models() -> List[str]: 99 | """Returns the names of available models""" 100 | return list(_MODELS.keys()) 101 | 102 | 103 | def load_model( 104 | name: str, 105 | device: Optional[Union[str, torch.device]] = None, 106 | download_root: str = None, 107 | in_memory: bool = False, 108 | ) -> Whisper: 109 | """ 110 | Load a Whisper ASR model 111 | 112 | Parameters 113 | ---------- 114 | name : str 115 | one of the official model names listed by `whisper.available_models()`, or 116 | path to a model checkpoint containing the model dimensions and the model state_dict. 117 | device : Union[str, torch.device] 118 | the PyTorch device to put the model into 119 | download_root: str 120 | path to download the model files; by default, it uses "~/.cache/whisper" 121 | in_memory: bool 122 | whether to preload the model weights into host memory 123 | 124 | Returns 125 | ------- 126 | model : Whisper 127 | The Whisper ASR model instance 128 | """ 129 | 130 | if device is None: 131 | device = "cuda" if torch.cuda.is_available() else "cpu" 132 | if download_root is None: 133 | default = os.path.join(os.path.expanduser("~"), ".cache") 134 | download_root = os.path.join(os.getenv("XDG_CACHE_HOME", default), "whisper") 135 | 136 | if name in _MODELS: 137 | checkpoint_file = _download(_MODELS[name], download_root, in_memory) 138 | alignment_heads = _ALIGNMENT_HEADS[name] 139 | elif os.path.isfile(name): 140 | checkpoint_file = open(name, "rb").read() if in_memory else name 141 | alignment_heads = None 142 | else: 143 | raise RuntimeError( 144 | f"Model {name} not found; available models = {available_models()}" 145 | ) 146 | 147 | with ( 148 | io.BytesIO(checkpoint_file) if in_memory else open(checkpoint_file, "rb") 149 | ) as fp: 150 | checkpoint = torch.load(fp, map_location=device) 151 | del checkpoint_file 152 | 153 | dims = ModelDimensions(**checkpoint["dims"]) 154 | model = Whisper(dims) 155 | model.load_state_dict(checkpoint["model_state_dict"]) 156 | 157 | if alignment_heads is not None: 158 | model.set_alignment_heads(alignment_heads) 159 | 160 | return model.to(device) 161 | -------------------------------------------------------------------------------- /whisper/__main__.py: -------------------------------------------------------------------------------- 1 | from .transcribe import cli 2 | 3 | cli() 4 | -------------------------------------------------------------------------------- /whisper/assets/mel_filters.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raryelcostasouza/pyTranscriber/38ff7bf01985a893f0483be5547c83228e74b6ec/whisper/assets/mel_filters.npz -------------------------------------------------------------------------------- /whisper/audio.py: -------------------------------------------------------------------------------- 1 | import os 2 | from functools import lru_cache 3 | from subprocess import CalledProcessError, run 4 | from typing import Optional, Union 5 | 6 | import numpy as np 7 | import torch 8 | import torch.nn.functional as F 9 | 10 | from .utils import exact_div 11 | 12 | # hard-coded audio hyperparameters 13 | SAMPLE_RATE = 16000 14 | N_FFT = 400 15 | HOP_LENGTH = 160 16 | CHUNK_LENGTH = 30 17 | N_SAMPLES = CHUNK_LENGTH * SAMPLE_RATE # 480000 samples in a 30-second chunk 18 | N_FRAMES = exact_div(N_SAMPLES, HOP_LENGTH) # 3000 frames in a mel spectrogram input 19 | 20 | N_SAMPLES_PER_TOKEN = HOP_LENGTH * 2 # the initial convolutions has stride 2 21 | FRAMES_PER_SECOND = exact_div(SAMPLE_RATE, HOP_LENGTH) # 10ms per audio frame 22 | TOKENS_PER_SECOND = exact_div(SAMPLE_RATE, N_SAMPLES_PER_TOKEN) # 20ms per audio token 23 | 24 | 25 | def load_audio(file: str, sr: int = SAMPLE_RATE): 26 | """ 27 | Open an audio file and read as mono waveform, resampling as necessary 28 | 29 | Parameters 30 | ---------- 31 | file: str 32 | The audio file to open 33 | 34 | sr: int 35 | The sample rate to resample the audio if necessary 36 | 37 | Returns 38 | ------- 39 | A NumPy array containing the audio waveform, in float32 dtype. 40 | """ 41 | 42 | # This launches a subprocess to decode audio while down-mixing 43 | # and resampling as necessary. Requires the ffmpeg CLI in PATH. 44 | # fmt: off 45 | cmd = [ 46 | "ffmpeg", 47 | "-nostdin", 48 | "-threads", "0", 49 | "-i", file, 50 | "-f", "s16le", 51 | "-ac", "1", 52 | "-acodec", "pcm_s16le", 53 | "-ar", str(sr), 54 | "-" 55 | ] 56 | # fmt: on 57 | try: 58 | out = run(cmd, capture_output=True, check=True).stdout 59 | except CalledProcessError as e: 60 | raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e 61 | 62 | return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0 63 | 64 | 65 | def pad_or_trim(array, length: int = N_SAMPLES, *, axis: int = -1): 66 | """ 67 | Pad or trim the audio array to N_SAMPLES, as expected by the encoder. 68 | """ 69 | if torch.is_tensor(array): 70 | if array.shape[axis] > length: 71 | array = array.index_select( 72 | dim=axis, index=torch.arange(length, device=array.device) 73 | ) 74 | 75 | if array.shape[axis] < length: 76 | pad_widths = [(0, 0)] * array.ndim 77 | pad_widths[axis] = (0, length - array.shape[axis]) 78 | array = F.pad(array, [pad for sizes in pad_widths[::-1] for pad in sizes]) 79 | else: 80 | if array.shape[axis] > length: 81 | array = array.take(indices=range(length), axis=axis) 82 | 83 | if array.shape[axis] < length: 84 | pad_widths = [(0, 0)] * array.ndim 85 | pad_widths[axis] = (0, length - array.shape[axis]) 86 | array = np.pad(array, pad_widths) 87 | 88 | return array 89 | 90 | 91 | @lru_cache(maxsize=None) 92 | def mel_filters(device, n_mels: int) -> torch.Tensor: 93 | """ 94 | load the mel filterbank matrix for projecting STFT into a Mel spectrogram. 95 | Allows decoupling librosa dependency; saved using: 96 | 97 | np.savez_compressed( 98 | "mel_filters.npz", 99 | mel_80=librosa.filters.mel(sr=16000, n_fft=400, n_mels=80), 100 | mel_128=librosa.filters.mel(sr=16000, n_fft=400, n_mels=128), 101 | ) 102 | """ 103 | assert n_mels in {80, 128}, f"Unsupported n_mels: {n_mels}" 104 | 105 | filters_path = os.path.join(os.path.dirname(__file__), "assets", "mel_filters.npz") 106 | with np.load(filters_path, allow_pickle=False) as f: 107 | return torch.from_numpy(f[f"mel_{n_mels}"]).to(device) 108 | 109 | 110 | def log_mel_spectrogram( 111 | audio: Union[str, np.ndarray, torch.Tensor], 112 | n_mels: int = 80, 113 | padding: int = 0, 114 | device: Optional[Union[str, torch.device]] = None, 115 | ): 116 | """ 117 | Compute the log-Mel spectrogram of 118 | 119 | Parameters 120 | ---------- 121 | audio: Union[str, np.ndarray, torch.Tensor], shape = (*) 122 | The path to audio or either a NumPy array or Tensor containing the audio waveform in 16 kHz 123 | 124 | n_mels: int 125 | The number of Mel-frequency filters, only 80 is supported 126 | 127 | padding: int 128 | Number of zero samples to pad to the right 129 | 130 | device: Optional[Union[str, torch.device]] 131 | If given, the audio tensor is moved to this device before STFT 132 | 133 | Returns 134 | ------- 135 | torch.Tensor, shape = (80, n_frames) 136 | A Tensor that contains the Mel spectrogram 137 | """ 138 | if not torch.is_tensor(audio): 139 | if isinstance(audio, str): 140 | audio = load_audio(audio) 141 | audio = torch.from_numpy(audio) 142 | 143 | if device is not None: 144 | audio = audio.to(device) 145 | if padding > 0: 146 | audio = F.pad(audio, (0, padding)) 147 | window = torch.hann_window(N_FFT).to(audio.device) 148 | stft = torch.stft(audio, N_FFT, HOP_LENGTH, window=window, return_complex=True) 149 | magnitudes = stft[..., :-1].abs() ** 2 150 | 151 | filters = mel_filters(audio.device, n_mels) 152 | mel_spec = filters @ magnitudes 153 | 154 | log_spec = torch.clamp(mel_spec, min=1e-10).log10() 155 | log_spec = torch.maximum(log_spec, log_spec.max() - 8.0) 156 | log_spec = (log_spec + 4.0) / 4.0 157 | return log_spec 158 | -------------------------------------------------------------------------------- /whisper/model.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import gzip 3 | from contextlib import contextmanager 4 | from dataclasses import dataclass 5 | from typing import Dict, Iterable, Optional, Tuple 6 | 7 | import numpy as np 8 | import torch 9 | import torch.nn.functional as F 10 | from torch import Tensor, nn 11 | 12 | from .decoding import decode as decode_function 13 | from .decoding import detect_language as detect_language_function 14 | from .transcribe import transcribe as transcribe_function 15 | 16 | try: 17 | from torch.nn.functional import scaled_dot_product_attention 18 | 19 | SDPA_AVAILABLE = True 20 | except (ImportError, RuntimeError, OSError): 21 | scaled_dot_product_attention = None 22 | SDPA_AVAILABLE = False 23 | 24 | 25 | @dataclass 26 | class ModelDimensions: 27 | n_mels: int 28 | n_audio_ctx: int 29 | n_audio_state: int 30 | n_audio_head: int 31 | n_audio_layer: int 32 | n_vocab: int 33 | n_text_ctx: int 34 | n_text_state: int 35 | n_text_head: int 36 | n_text_layer: int 37 | 38 | 39 | class LayerNorm(nn.LayerNorm): 40 | def forward(self, x: Tensor) -> Tensor: 41 | return super().forward(x.float()).type(x.dtype) 42 | 43 | 44 | class Linear(nn.Linear): 45 | def forward(self, x: Tensor) -> Tensor: 46 | return F.linear( 47 | x, 48 | self.weight.to(x.dtype), 49 | None if self.bias is None else self.bias.to(x.dtype), 50 | ) 51 | 52 | 53 | class Conv1d(nn.Conv1d): 54 | def _conv_forward( 55 | self, x: Tensor, weight: Tensor, bias: Optional[Tensor] 56 | ) -> Tensor: 57 | return super()._conv_forward( 58 | x, weight.to(x.dtype), None if bias is None else bias.to(x.dtype) 59 | ) 60 | 61 | 62 | def sinusoids(length, channels, max_timescale=10000): 63 | """Returns sinusoids for positional embedding""" 64 | assert channels % 2 == 0 65 | log_timescale_increment = np.log(max_timescale) / (channels // 2 - 1) 66 | inv_timescales = torch.exp(-log_timescale_increment * torch.arange(channels // 2)) 67 | scaled_time = torch.arange(length)[:, np.newaxis] * inv_timescales[np.newaxis, :] 68 | return torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=1) 69 | 70 | 71 | @contextmanager 72 | def disable_sdpa(): 73 | prev_state = MultiHeadAttention.use_sdpa 74 | try: 75 | MultiHeadAttention.use_sdpa = False 76 | yield 77 | finally: 78 | MultiHeadAttention.use_sdpa = prev_state 79 | 80 | 81 | class MultiHeadAttention(nn.Module): 82 | use_sdpa = True 83 | 84 | def __init__(self, n_state: int, n_head: int): 85 | super().__init__() 86 | self.n_head = n_head 87 | self.query = Linear(n_state, n_state) 88 | self.key = Linear(n_state, n_state, bias=False) 89 | self.value = Linear(n_state, n_state) 90 | self.out = Linear(n_state, n_state) 91 | 92 | def forward( 93 | self, 94 | x: Tensor, 95 | xa: Optional[Tensor] = None, 96 | mask: Optional[Tensor] = None, 97 | kv_cache: Optional[dict] = None, 98 | ): 99 | q = self.query(x) 100 | 101 | if kv_cache is None or xa is None or self.key not in kv_cache: 102 | # hooks, if installed (i.e. kv_cache is not None), will prepend the cached kv tensors; 103 | # otherwise, perform key/value projections for self- or cross-attention as usual. 104 | k = self.key(x if xa is None else xa) 105 | v = self.value(x if xa is None else xa) 106 | else: 107 | # for cross-attention, calculate keys and values once and reuse in subsequent calls. 108 | k = kv_cache[self.key] 109 | v = kv_cache[self.value] 110 | 111 | wv, qk = self.qkv_attention(q, k, v, mask) 112 | return self.out(wv), qk 113 | 114 | def qkv_attention( 115 | self, q: Tensor, k: Tensor, v: Tensor, mask: Optional[Tensor] = None 116 | ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: 117 | n_batch, n_ctx, n_state = q.shape 118 | scale = (n_state // self.n_head) ** -0.25 119 | q = q.view(*q.shape[:2], self.n_head, -1).permute(0, 2, 1, 3) 120 | k = k.view(*k.shape[:2], self.n_head, -1).permute(0, 2, 1, 3) 121 | v = v.view(*v.shape[:2], self.n_head, -1).permute(0, 2, 1, 3) 122 | 123 | if SDPA_AVAILABLE and MultiHeadAttention.use_sdpa: 124 | a = scaled_dot_product_attention( 125 | q, k, v, is_causal=mask is not None and n_ctx > 1 126 | ) 127 | out = a.permute(0, 2, 1, 3).flatten(start_dim=2) 128 | qk = None 129 | else: 130 | qk = (q * scale) @ (k * scale).transpose(-1, -2) 131 | if mask is not None: 132 | qk = qk + mask[:n_ctx, :n_ctx] 133 | qk = qk.float() 134 | 135 | w = F.softmax(qk, dim=-1).to(q.dtype) 136 | out = (w @ v).permute(0, 2, 1, 3).flatten(start_dim=2) 137 | qk = qk.detach() 138 | 139 | return out, qk 140 | 141 | 142 | class ResidualAttentionBlock(nn.Module): 143 | def __init__(self, n_state: int, n_head: int, cross_attention: bool = False): 144 | super().__init__() 145 | 146 | self.attn = MultiHeadAttention(n_state, n_head) 147 | self.attn_ln = LayerNorm(n_state) 148 | 149 | self.cross_attn = ( 150 | MultiHeadAttention(n_state, n_head) if cross_attention else None 151 | ) 152 | self.cross_attn_ln = LayerNorm(n_state) if cross_attention else None 153 | 154 | n_mlp = n_state * 4 155 | self.mlp = nn.Sequential( 156 | Linear(n_state, n_mlp), nn.GELU(), Linear(n_mlp, n_state) 157 | ) 158 | self.mlp_ln = LayerNorm(n_state) 159 | 160 | def forward( 161 | self, 162 | x: Tensor, 163 | xa: Optional[Tensor] = None, 164 | mask: Optional[Tensor] = None, 165 | kv_cache: Optional[dict] = None, 166 | ): 167 | x = x + self.attn(self.attn_ln(x), mask=mask, kv_cache=kv_cache)[0] 168 | if self.cross_attn: 169 | x = x + self.cross_attn(self.cross_attn_ln(x), xa, kv_cache=kv_cache)[0] 170 | x = x + self.mlp(self.mlp_ln(x)) 171 | return x 172 | 173 | 174 | class AudioEncoder(nn.Module): 175 | def __init__( 176 | self, n_mels: int, n_ctx: int, n_state: int, n_head: int, n_layer: int 177 | ): 178 | super().__init__() 179 | self.conv1 = Conv1d(n_mels, n_state, kernel_size=3, padding=1) 180 | self.conv2 = Conv1d(n_state, n_state, kernel_size=3, stride=2, padding=1) 181 | self.register_buffer("positional_embedding", sinusoids(n_ctx, n_state)) 182 | 183 | self.blocks: Iterable[ResidualAttentionBlock] = nn.ModuleList( 184 | [ResidualAttentionBlock(n_state, n_head) for _ in range(n_layer)] 185 | ) 186 | self.ln_post = LayerNorm(n_state) 187 | 188 | def forward(self, x: Tensor): 189 | """ 190 | x : torch.Tensor, shape = (batch_size, n_mels, n_ctx) 191 | the mel spectrogram of the audio 192 | """ 193 | x = F.gelu(self.conv1(x)) 194 | x = F.gelu(self.conv2(x)) 195 | x = x.permute(0, 2, 1) 196 | 197 | assert x.shape[1:] == self.positional_embedding.shape, "incorrect audio shape" 198 | x = (x + self.positional_embedding).to(x.dtype) 199 | 200 | for block in self.blocks: 201 | x = block(x) 202 | 203 | x = self.ln_post(x) 204 | return x 205 | 206 | 207 | class TextDecoder(nn.Module): 208 | def __init__( 209 | self, n_vocab: int, n_ctx: int, n_state: int, n_head: int, n_layer: int 210 | ): 211 | super().__init__() 212 | 213 | self.token_embedding = nn.Embedding(n_vocab, n_state) 214 | self.positional_embedding = nn.Parameter(torch.empty(n_ctx, n_state)) 215 | 216 | self.blocks: Iterable[ResidualAttentionBlock] = nn.ModuleList( 217 | [ 218 | ResidualAttentionBlock(n_state, n_head, cross_attention=True) 219 | for _ in range(n_layer) 220 | ] 221 | ) 222 | self.ln = LayerNorm(n_state) 223 | 224 | mask = torch.empty(n_ctx, n_ctx).fill_(-np.inf).triu_(1) 225 | self.register_buffer("mask", mask, persistent=False) 226 | 227 | def forward(self, x: Tensor, xa: Tensor, kv_cache: Optional[dict] = None): 228 | """ 229 | x : torch.LongTensor, shape = (batch_size, <= n_ctx) 230 | the text tokens 231 | xa : torch.Tensor, shape = (batch_size, n_audio_ctx, n_audio_state) 232 | the encoded audio features to be attended on 233 | """ 234 | offset = next(iter(kv_cache.values())).shape[1] if kv_cache else 0 235 | x = ( 236 | self.token_embedding(x) 237 | + self.positional_embedding[offset : offset + x.shape[-1]] 238 | ) 239 | x = x.to(xa.dtype) 240 | 241 | for block in self.blocks: 242 | x = block(x, xa, mask=self.mask, kv_cache=kv_cache) 243 | 244 | x = self.ln(x) 245 | logits = ( 246 | x @ torch.transpose(self.token_embedding.weight.to(x.dtype), 0, 1) 247 | ).float() 248 | 249 | return logits 250 | 251 | 252 | class Whisper(nn.Module): 253 | def __init__(self, dims: ModelDimensions): 254 | super().__init__() 255 | self.dims = dims 256 | self.encoder = AudioEncoder( 257 | self.dims.n_mels, 258 | self.dims.n_audio_ctx, 259 | self.dims.n_audio_state, 260 | self.dims.n_audio_head, 261 | self.dims.n_audio_layer, 262 | ) 263 | self.decoder = TextDecoder( 264 | self.dims.n_vocab, 265 | self.dims.n_text_ctx, 266 | self.dims.n_text_state, 267 | self.dims.n_text_head, 268 | self.dims.n_text_layer, 269 | ) 270 | # use the last half among the decoder layers for time alignment by default; 271 | # to use a specific set of heads, see `set_alignment_heads()` below. 272 | all_heads = torch.zeros( 273 | self.dims.n_text_layer, self.dims.n_text_head, dtype=torch.bool 274 | ) 275 | all_heads[self.dims.n_text_layer // 2 :] = True 276 | self.register_buffer("alignment_heads", all_heads.to_sparse(), persistent=False) 277 | 278 | def set_alignment_heads(self, dump: bytes): 279 | array = np.frombuffer( 280 | gzip.decompress(base64.b85decode(dump)), dtype=bool 281 | ).copy() 282 | mask = torch.from_numpy(array).reshape( 283 | self.dims.n_text_layer, self.dims.n_text_head 284 | ) 285 | self.register_buffer("alignment_heads", mask.to_sparse(), persistent=False) 286 | 287 | def embed_audio(self, mel: torch.Tensor): 288 | return self.encoder(mel) 289 | 290 | def logits(self, tokens: torch.Tensor, audio_features: torch.Tensor): 291 | return self.decoder(tokens, audio_features) 292 | 293 | def forward( 294 | self, mel: torch.Tensor, tokens: torch.Tensor 295 | ) -> Dict[str, torch.Tensor]: 296 | return self.decoder(tokens, self.encoder(mel)) 297 | 298 | @property 299 | def device(self): 300 | return next(self.parameters()).device 301 | 302 | @property 303 | def is_multilingual(self): 304 | return self.dims.n_vocab >= 51865 305 | 306 | @property 307 | def num_languages(self): 308 | return self.dims.n_vocab - 51765 - int(self.is_multilingual) 309 | 310 | def install_kv_cache_hooks(self, cache: Optional[dict] = None): 311 | """ 312 | The `MultiHeadAttention` module optionally accepts `kv_cache` which stores the key and value 313 | tensors calculated for the previous positions. This method returns a dictionary that stores 314 | all caches, and the necessary hooks for the key and value projection modules that save the 315 | intermediate tensors to be reused during later calculations. 316 | 317 | Returns 318 | ------- 319 | cache : Dict[nn.Module, torch.Tensor] 320 | A dictionary object mapping the key/value projection modules to its cache 321 | hooks : List[RemovableHandle] 322 | List of PyTorch RemovableHandle objects to stop the hooks to be called 323 | """ 324 | cache = {**cache} if cache is not None else {} 325 | hooks = [] 326 | 327 | def save_to_cache(module, _, output): 328 | if module not in cache or output.shape[1] > self.dims.n_text_ctx: 329 | # save as-is, for the first token or cross attention 330 | cache[module] = output 331 | else: 332 | cache[module] = torch.cat([cache[module], output], dim=1).detach() 333 | return cache[module] 334 | 335 | def install_hooks(layer: nn.Module): 336 | if isinstance(layer, MultiHeadAttention): 337 | hooks.append(layer.key.register_forward_hook(save_to_cache)) 338 | hooks.append(layer.value.register_forward_hook(save_to_cache)) 339 | 340 | self.decoder.apply(install_hooks) 341 | return cache, hooks 342 | 343 | detect_language = detect_language_function 344 | transcribe = transcribe_function 345 | decode = decode_function 346 | -------------------------------------------------------------------------------- /whisper/normalizers/__init__.py: -------------------------------------------------------------------------------- 1 | from .basic import BasicTextNormalizer as BasicTextNormalizer 2 | from .english import EnglishTextNormalizer as EnglishTextNormalizer 3 | -------------------------------------------------------------------------------- /whisper/normalizers/basic.py: -------------------------------------------------------------------------------- 1 | import re 2 | import unicodedata 3 | 4 | import regex 5 | 6 | # non-ASCII letters that are not separated by "NFKD" normalization 7 | ADDITIONAL_DIACRITICS = { 8 | "œ": "oe", 9 | "Œ": "OE", 10 | "ø": "o", 11 | "Ø": "O", 12 | "æ": "ae", 13 | "Æ": "AE", 14 | "ß": "ss", 15 | "ẞ": "SS", 16 | "đ": "d", 17 | "Đ": "D", 18 | "ð": "d", 19 | "Ð": "D", 20 | "þ": "th", 21 | "Þ": "th", 22 | "ł": "l", 23 | "Ł": "L", 24 | } 25 | 26 | 27 | def remove_symbols_and_diacritics(s: str, keep=""): 28 | """ 29 | Replace any other markers, symbols, and punctuations with a space, 30 | and drop any diacritics (category 'Mn' and some manual mappings) 31 | """ 32 | return "".join( 33 | c 34 | if c in keep 35 | else ADDITIONAL_DIACRITICS[c] 36 | if c in ADDITIONAL_DIACRITICS 37 | else "" 38 | if unicodedata.category(c) == "Mn" 39 | else " " 40 | if unicodedata.category(c)[0] in "MSP" 41 | else c 42 | for c in unicodedata.normalize("NFKD", s) 43 | ) 44 | 45 | 46 | def remove_symbols(s: str): 47 | """ 48 | Replace any other markers, symbols, punctuations with a space, keeping diacritics 49 | """ 50 | return "".join( 51 | " " if unicodedata.category(c)[0] in "MSP" else c 52 | for c in unicodedata.normalize("NFKC", s) 53 | ) 54 | 55 | 56 | class BasicTextNormalizer: 57 | def __init__(self, remove_diacritics: bool = False, split_letters: bool = False): 58 | self.clean = ( 59 | remove_symbols_and_diacritics if remove_diacritics else remove_symbols 60 | ) 61 | self.split_letters = split_letters 62 | 63 | def __call__(self, s: str): 64 | s = s.lower() 65 | s = re.sub(r"[<\[][^>\]]*[>\]]", "", s) # remove words between brackets 66 | s = re.sub(r"\(([^)]+?)\)", "", s) # remove words between parenthesis 67 | s = self.clean(s).lower() 68 | 69 | if self.split_letters: 70 | s = " ".join(regex.findall(r"\X", s, regex.U)) 71 | 72 | s = re.sub( 73 | r"\s+", " ", s 74 | ) # replace any successive whitespace characters with a space 75 | 76 | return s 77 | -------------------------------------------------------------------------------- /whisper/triton_ops.py: -------------------------------------------------------------------------------- 1 | from functools import lru_cache 2 | 3 | import numpy as np 4 | import torch 5 | 6 | try: 7 | import triton 8 | import triton.language as tl 9 | except ImportError: 10 | raise RuntimeError("triton import failed; try `pip install --pre triton`") 11 | 12 | 13 | @triton.jit 14 | def dtw_kernel( 15 | cost, trace, x, x_stride, cost_stride, trace_stride, N, M, BLOCK_SIZE: tl.constexpr 16 | ): 17 | offsets = tl.arange(0, BLOCK_SIZE) 18 | mask = offsets < M 19 | 20 | for k in range(1, N + M + 1): # k = i + j 21 | tl.debug_barrier() 22 | 23 | p0 = cost + (k - 1) * cost_stride 24 | p1 = cost + k * cost_stride 25 | p2 = cost + k * cost_stride + 1 26 | 27 | c0 = tl.load(p0 + offsets, mask=mask) 28 | c1 = tl.load(p1 + offsets, mask=mask) 29 | c2 = tl.load(p2 + offsets, mask=mask) 30 | 31 | x_row = tl.load(x + (k - 1) * x_stride + offsets, mask=mask, other=0) 32 | cost_row = x_row + tl.minimum(tl.minimum(c0, c1), c2) 33 | 34 | cost_ptr = cost + (k + 1) * cost_stride + 1 35 | tl.store(cost_ptr + offsets, cost_row, mask=mask) 36 | 37 | trace_ptr = trace + (k + 1) * trace_stride + 1 38 | tl.store(trace_ptr + offsets, 2, mask=mask & (c2 <= c0) & (c2 <= c1)) 39 | tl.store(trace_ptr + offsets, 1, mask=mask & (c1 <= c0) & (c1 <= c2)) 40 | tl.store(trace_ptr + offsets, 0, mask=mask & (c0 <= c1) & (c0 <= c2)) 41 | 42 | 43 | @lru_cache(maxsize=None) 44 | def median_kernel(filter_width: int): 45 | @triton.jit 46 | def kernel( 47 | y, x, x_stride, y_stride, BLOCK_SIZE: tl.constexpr 48 | ): # x.shape[-1] == filter_width 49 | row_idx = tl.program_id(0) 50 | offsets = tl.arange(0, BLOCK_SIZE) 51 | mask = offsets < y_stride 52 | 53 | x_ptr = x + row_idx * x_stride # noqa: F841 54 | y_ptr = y + row_idx * y_stride 55 | 56 | LOAD_ALL_ROWS_HERE # noqa: F821 57 | 58 | BUBBLESORT_HERE # noqa: F821 59 | 60 | tl.store(y_ptr + offsets, MIDDLE_ROW_HERE, mask=mask) # noqa: F821 61 | 62 | kernel = triton.JITFunction(kernel.fn) 63 | kernel.src = kernel.src.replace( 64 | " LOAD_ALL_ROWS_HERE", 65 | "\n".join( 66 | [ 67 | f" row{i} = tl.load(x_ptr + offsets + {i}, mask=mask)" 68 | for i in range(filter_width) 69 | ] 70 | ), 71 | ) 72 | kernel.src = kernel.src.replace( 73 | " BUBBLESORT_HERE", 74 | "\n\n".join( 75 | [ 76 | "\n\n".join( 77 | [ 78 | "\n".join( 79 | [ 80 | f" smaller = tl.where(row{j} < row{j + 1}, row{j}, row{j + 1})", 81 | f" larger = tl.where(row{j} > row{j + 1}, row{j}, row{j + 1})", 82 | f" row{j} = smaller", 83 | f" row{j + 1} = larger", 84 | ] 85 | ) 86 | for j in range(filter_width - i - 1) 87 | ] 88 | ) 89 | for i in range(filter_width // 2 + 1) 90 | ] 91 | ), 92 | ) 93 | kernel.src = kernel.src.replace("MIDDLE_ROW_HERE", f"row{filter_width // 2}") 94 | 95 | return kernel 96 | 97 | 98 | def median_filter_cuda(x: torch.Tensor, filter_width: int): 99 | """Apply a median filter of given width along the last dimension of x""" 100 | slices = x.contiguous().unfold(-1, filter_width, 1) 101 | grid = np.prod(slices.shape[:-2]) 102 | 103 | kernel = median_kernel(filter_width) 104 | y = torch.empty_like(slices[..., 0]) 105 | 106 | BLOCK_SIZE = 1 << (y.stride(-2) - 1).bit_length() 107 | kernel[(grid,)](y, x, x.stride(-2), y.stride(-2), BLOCK_SIZE=BLOCK_SIZE) 108 | 109 | return y 110 | -------------------------------------------------------------------------------- /whisper/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | import sys 5 | import zlib 6 | from typing import Callable, List, Optional, TextIO 7 | 8 | system_encoding = sys.getdefaultencoding() 9 | 10 | if system_encoding != "utf-8": 11 | 12 | def make_safe(string): 13 | # replaces any character not representable using the system default encoding with an '?', 14 | # avoiding UnicodeEncodeError (https://github.com/openai/whisper/discussions/729). 15 | return string.encode(system_encoding, errors="replace").decode(system_encoding) 16 | 17 | else: 18 | 19 | def make_safe(string): 20 | # utf-8 can encode any Unicode code point, so no need to do the round-trip encoding 21 | return string 22 | 23 | 24 | def exact_div(x, y): 25 | assert x % y == 0 26 | return x // y 27 | 28 | 29 | def str2bool(string): 30 | str2val = {"True": True, "False": False} 31 | if string in str2val: 32 | return str2val[string] 33 | else: 34 | raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}") 35 | 36 | 37 | def optional_int(string): 38 | return None if string == "None" else int(string) 39 | 40 | 41 | def optional_float(string): 42 | return None if string == "None" else float(string) 43 | 44 | 45 | def compression_ratio(text) -> float: 46 | text_bytes = text.encode("utf-8") 47 | return len(text_bytes) / len(zlib.compress(text_bytes)) 48 | 49 | 50 | def format_timestamp( 51 | seconds: float, always_include_hours: bool = False, decimal_marker: str = "." 52 | ): 53 | assert seconds >= 0, "non-negative timestamp expected" 54 | milliseconds = round(seconds * 1000.0) 55 | 56 | hours = milliseconds // 3_600_000 57 | milliseconds -= hours * 3_600_000 58 | 59 | minutes = milliseconds // 60_000 60 | milliseconds -= minutes * 60_000 61 | 62 | seconds = milliseconds // 1_000 63 | milliseconds -= seconds * 1_000 64 | 65 | hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else "" 66 | return ( 67 | f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}" 68 | ) 69 | 70 | 71 | def get_start(segments: List[dict]) -> Optional[float]: 72 | return next( 73 | (w["start"] for s in segments for w in s["words"]), 74 | segments[0]["start"] if segments else None, 75 | ) 76 | 77 | 78 | def get_end(segments: List[dict]) -> Optional[float]: 79 | return next( 80 | (w["end"] for s in reversed(segments) for w in reversed(s["words"])), 81 | segments[-1]["end"] if segments else None, 82 | ) 83 | 84 | 85 | class ResultWriter: 86 | extension: str 87 | 88 | def __init__(self, output_dir: str): 89 | self.output_dir = output_dir 90 | 91 | def __call__( 92 | self, result: dict, audio_path: str, options: Optional[dict] = None, **kwargs 93 | ): 94 | audio_basename = os.path.basename(audio_path) 95 | audio_basename = os.path.splitext(audio_basename)[0] 96 | output_path = os.path.join( 97 | self.output_dir, audio_basename + "." + self.extension 98 | ) 99 | 100 | with open(output_path, "w", encoding="utf-8") as f: 101 | self.write_result(result, file=f, options=options, **kwargs) 102 | 103 | def write_result( 104 | self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs 105 | ): 106 | raise NotImplementedError 107 | 108 | 109 | class WriteTXT(ResultWriter): 110 | extension: str = "txt" 111 | 112 | def write_result( 113 | self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs 114 | ): 115 | for segment in result["segments"]: 116 | print(segment["text"].strip(), file=file, flush=True) 117 | 118 | 119 | class SubtitlesWriter(ResultWriter): 120 | always_include_hours: bool 121 | decimal_marker: str 122 | 123 | def iterate_result( 124 | self, 125 | result: dict, 126 | options: Optional[dict] = None, 127 | *, 128 | max_line_width: Optional[int] = None, 129 | max_line_count: Optional[int] = None, 130 | highlight_words: bool = False, 131 | max_words_per_line: Optional[int] = None, 132 | ): 133 | options = options or {} 134 | max_line_width = max_line_width or options.get("max_line_width") 135 | max_line_count = max_line_count or options.get("max_line_count") 136 | highlight_words = highlight_words or options.get("highlight_words", False) 137 | max_words_per_line = max_words_per_line or options.get("max_words_per_line") 138 | preserve_segments = max_line_count is None or max_line_width is None 139 | max_line_width = max_line_width or 1000 140 | max_words_per_line = max_words_per_line or 1000 141 | 142 | def iterate_subtitles(): 143 | line_len = 0 144 | line_count = 1 145 | # the next subtitle to yield (a list of word timings with whitespace) 146 | subtitle: List[dict] = [] 147 | last: float = get_start(result["segments"]) or 0.0 148 | for segment in result["segments"]: 149 | chunk_index = 0 150 | words_count = max_words_per_line 151 | while chunk_index < len(segment["words"]): 152 | remaining_words = len(segment["words"]) - chunk_index 153 | if max_words_per_line > len(segment["words"]) - chunk_index: 154 | words_count = remaining_words 155 | for i, original_timing in enumerate( 156 | segment["words"][chunk_index : chunk_index + words_count] 157 | ): 158 | timing = original_timing.copy() 159 | long_pause = ( 160 | not preserve_segments and timing["start"] - last > 3.0 161 | ) 162 | has_room = line_len + len(timing["word"]) <= max_line_width 163 | seg_break = i == 0 and len(subtitle) > 0 and preserve_segments 164 | if ( 165 | line_len > 0 166 | and has_room 167 | and not long_pause 168 | and not seg_break 169 | ): 170 | # line continuation 171 | line_len += len(timing["word"]) 172 | else: 173 | # new line 174 | timing["word"] = timing["word"].strip() 175 | if ( 176 | len(subtitle) > 0 177 | and max_line_count is not None 178 | and (long_pause or line_count >= max_line_count) 179 | or seg_break 180 | ): 181 | # subtitle break 182 | yield subtitle 183 | subtitle = [] 184 | line_count = 1 185 | elif line_len > 0: 186 | # line break 187 | line_count += 1 188 | timing["word"] = "\n" + timing["word"] 189 | line_len = len(timing["word"].strip()) 190 | subtitle.append(timing) 191 | last = timing["start"] 192 | chunk_index += max_words_per_line 193 | if len(subtitle) > 0: 194 | yield subtitle 195 | 196 | if len(result["segments"]) > 0 and "words" in result["segments"][0]: 197 | for subtitle in iterate_subtitles(): 198 | subtitle_start = self.format_timestamp(subtitle[0]["start"]) 199 | subtitle_end = self.format_timestamp(subtitle[-1]["end"]) 200 | subtitle_text = "".join([word["word"] for word in subtitle]) 201 | if highlight_words: 202 | last = subtitle_start 203 | all_words = [timing["word"] for timing in subtitle] 204 | for i, this_word in enumerate(subtitle): 205 | start = self.format_timestamp(this_word["start"]) 206 | end = self.format_timestamp(this_word["end"]) 207 | if last != start: 208 | yield last, start, subtitle_text 209 | 210 | yield start, end, "".join( 211 | [ 212 | re.sub(r"^(\s*)(.*)quot;, r"\1<u>\2</u>", word) 213 | if j == i 214 | else word 215 | for j, word in enumerate(all_words) 216 | ] 217 | ) 218 | last = end 219 | else: 220 | yield subtitle_start, subtitle_end, subtitle_text 221 | else: 222 | for segment in result["segments"]: 223 | segment_start = self.format_timestamp(segment["start"]) 224 | segment_end = self.format_timestamp(segment["end"]) 225 | segment_text = segment["text"].strip().replace("-->", "->") 226 | yield segment_start, segment_end, segment_text 227 | 228 | def format_timestamp(self, seconds: float): 229 | return format_timestamp( 230 | seconds=seconds, 231 | always_include_hours=self.always_include_hours, 232 | decimal_marker=self.decimal_marker, 233 | ) 234 | 235 | 236 | class WriteVTT(SubtitlesWriter): 237 | extension: str = "vtt" 238 | always_include_hours: bool = False 239 | decimal_marker: str = "." 240 | 241 | def write_result( 242 | self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs 243 | ): 244 | print("WEBVTT\n", file=file) 245 | for start, end, text in self.iterate_result(result, options, **kwargs): 246 | print(f"{start} --> {end}\n{text}\n", file=file, flush=True) 247 | 248 | 249 | class WriteSRT(SubtitlesWriter): 250 | extension: str = "srt" 251 | always_include_hours: bool = True 252 | decimal_marker: str = "," 253 | 254 | def write_result( 255 | self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs 256 | ): 257 | for i, (start, end, text) in enumerate( 258 | self.iterate_result(result, options, **kwargs), start=1 259 | ): 260 | print(f"{i}\n{start} --> {end}\n{text}\n", file=file, flush=True) 261 | 262 | 263 | class WriteTSV(ResultWriter): 264 | """ 265 | Write a transcript to a file in TSV (tab-separated values) format containing lines like: 266 | <start time in integer milliseconds>\t<end time in integer milliseconds>\t<transcript text> 267 | 268 | Using integer milliseconds as start and end times means there's no chance of interference from 269 | an environment setting a language encoding that causes the decimal in a floating point number 270 | to appear as a comma; also is faster and more efficient to parse & store, e.g., in C++. 271 | """ 272 | 273 | extension: str = "tsv" 274 | 275 | def write_result( 276 | self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs 277 | ): 278 | print("start", "end", "text", sep="\t", file=file) 279 | for segment in result["segments"]: 280 | print(round(1000 * segment["start"]), file=file, end="\t") 281 | print(round(1000 * segment["end"]), file=file, end="\t") 282 | print(segment["text"].strip().replace("\t", " "), file=file, flush=True) 283 | 284 | 285 | class WriteJSON(ResultWriter): 286 | extension: str = "json" 287 | 288 | def write_result( 289 | self, result: dict, file: TextIO, options: Optional[dict] = None, **kwargs 290 | ): 291 | json.dump(result, file) 292 | 293 | 294 | def get_writer( 295 | output_format: str, output_dir: str 296 | ) -> Callable[[dict, TextIO, dict], None]: 297 | writers = { 298 | "txt": WriteTXT, 299 | "vtt": WriteVTT, 300 | "srt": WriteSRT, 301 | "tsv": WriteTSV, 302 | "json": WriteJSON, 303 | } 304 | 305 | if output_format == "all": 306 | all_writers = [writer(output_dir) for writer in writers.values()] 307 | 308 | def write_all( 309 | result: dict, file: TextIO, options: Optional[dict] = None, **kwargs 310 | ): 311 | for writer in all_writers: 312 | writer(result, file, options, **kwargs) 313 | 314 | return write_all 315 | 316 | return writers[output_format](output_dir) 317 | -------------------------------------------------------------------------------- /whisper/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "20240930" 2 | --------------------------------------------------------------------------------