├── .coveragerc ├── .gitattributes ├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── synchronization-problem.md └── workflows │ └── ci.yml ├── .gitignore ├── .gitmodules ├── .readthedocs.yml ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── HISTORY.rst ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── docs ├── .gitignore ├── Makefile ├── _static │ └── .keep ├── _templates │ └── .keep ├── conf.py ├── index.rst ├── make.bat └── requirements-docs.txt ├── ffsubsync ├── __init__.py ├── _version.py ├── aligners.py ├── constants.py ├── ffmpeg_utils.py ├── ffsubsync.py ├── ffsubsync_gui.py ├── file_utils.py ├── generic_subtitles.py ├── golden_section_search.py ├── sklearn_shim.py ├── speech_transformers.py ├── subtitle_parser.py ├── subtitle_transformers.py └── version.py ├── gui ├── .gitignore ├── Makefile ├── README.md ├── build-macos.sh ├── build-windows.sh ├── build.spec ├── entrypoint-windows.sh ├── ffsubsync-gui.py ├── hooks │ └── hook-webrtcvad.py ├── package-macos.sh └── requirements.txt ├── pyproject.toml ├── pytest.ini ├── requirements-dev.txt ├── requirements.txt ├── resources ├── img │ ├── config_icon.png │ ├── program_icon.icns │ ├── program_icon.png │ ├── subsync.png │ ├── tearing-me-apart-correct.gif │ └── tearing-me-apart-wrong.gif └── lib │ └── win64 │ └── VCRUNTIME140_1.dll ├── scripts ├── blacken.sh ├── bump-version.py ├── deploy.sh └── write-version.py ├── setup.cfg ├── setup.py ├── tests ├── test_alignment.py ├── test_integration.py ├── test_misc.py └── test_subtitles.py └── versioneer.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = ffsubsync/ffsubsync_gui.py, ffsubsync/_version.py, ffsubsync/version.py 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | ffsubsync/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: smacke 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Environment (please complete the following information):** 11 | - OS: [e.g. Windows 10, MacOS Mojave, etc.] 12 | - python version (`python --version`) 13 | - subsync version (`subsync --version`) 14 | 15 | **Describe the bug** 16 | A clear and concise description of what the bug is. 17 | 18 | **To Reproduce** 19 | How to reproduce the behavior. 20 | 21 | **Expected behavior** 22 | A clear and concise description of what you expected to happen. 23 | 24 | **Output** 25 | Copy+paste stdout from running the command here. 26 | 27 | **Test case** 28 | [Optional] You can bundle additional debugging information into a tar archive as follows: 29 | ``` 30 | subsync vid.mkv -i in.srt -o out.srt --make-test-case 31 | ``` 32 | This will create a file `vid.mkv.$timestamp.tar.gz` or similar a few KiB in size; you can attach it by clicking the "attach files" button below. 33 | 34 | **Additional context** 35 | Add any other context about the problem here. 36 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/synchronization-problem.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Synchronization problem 3 | about: Help us to improve syncing by reporting failed syncs 4 | title: output subtitles still out of sync 5 | labels: out-of-sync 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Upload a tarball with debugging information** 11 | 1. Run the command that produces the out-of-sync subtitle output, but with the additional `--make-test-case` flag, i.e.: `subsync ref.mkv -i in.srt -o failed.srt --make-test-case` 12 | 2. This results in a file of the form `ref.mkv.$timestamp.tar.gz` or similar. 13 | 3. Please upload this file using the "attach files" button at the bottom of the text prompt. 14 | 15 | That's all! Thank you for contributing a test case; this helps me to continue improving the sync and to add additional integration tests once improvements have been made. 16 | 17 | **Additional context** 18 | Add any other context about the problem here that might be helpful. 19 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ffsubsync 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ${{ matrix.os }} 9 | 10 | strategy: 11 | matrix: 12 | os: [ 'ubuntu-22.04', 'windows-latest' ] 13 | python-version: [ '3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13' ] 14 | include: 15 | - python-version: '3.7' 16 | os: 'macos-13' 17 | - python-version: '3.8' 18 | os: 'macos-13' 19 | - python-version: '3.9' 20 | os: 'macos-13' 21 | - python-version: '3.10' 22 | os: 'macos-latest' 23 | - python-version: '3.11' 24 | os: 'macos-latest' 25 | - python-version: '3.12' 26 | os: 'macos-latest' 27 | - python-version: '3.13' 28 | os: 'macos-latest' 29 | steps: 30 | - uses: actions/checkout@v4 31 | with: 32 | fetch-depth: 1 33 | - uses: smacke/submodule-checkout@v3 34 | if: ${{ matrix.os == 'ubuntu-latest' && matrix.python-version != '3.10'}} 35 | with: 36 | ssh-key: '${{ secrets.TEST_DATA_SECRET }}' 37 | - name: Set up Python 38 | uses: actions/setup-python@v5 39 | with: 40 | python-version: ${{ matrix.python-version }} 41 | - name: Install dependencies 42 | run: | 43 | python -m pip install --upgrade pip 44 | pip install -r requirements.txt 45 | pip install -r requirements-dev.txt 46 | pip install -e . 47 | - name: Lint with flake8 48 | run: | 49 | pip install flake8 50 | # stop the build if there are Python syntax errors or undefined names 51 | #flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 52 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 53 | #flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 54 | flake8 . --exit-zero 55 | - name: Run unit tests with pytest (no coverage) 56 | if: matrix.os != 'ubuntu-latest' 57 | run: | 58 | pytest --cov-config=.coveragerc --cov-report= --cov=ffsubsync -v -m 'not integration' tests/ 59 | - name: Run unit tests with pytest (with coverage) 60 | if: matrix.os == 'ubuntu-latest' 61 | run: | 62 | pytest --cov-config=.coveragerc --cov-report=xml:cov.xml --cov=ffsubsync -v -m 'not integration' tests/ 63 | - name: Run integration tests with pytest 64 | if: ${{ matrix.os == 'ubuntu-latest' && matrix.python-version != '3.10'}} 65 | run: | 66 | INTEGRATION=1 pytest --cov-config=.coveragerc --cov-report=xml:cov.xml --cov=ffsubsync -v -m 'integration' tests/ 67 | - name: Upload coverage report 68 | if: matrix.os == 'ubuntu-latest' 69 | uses: codecov/codecov-action@v1 70 | with: 71 | token: '${{ secrets.CODECOV_TOKEN }}' 72 | files: ./cov.xml 73 | env_vars: PYTHON 74 | name: codecov-umbrella 75 | fail_ci_if_error: true 76 | verbose: true 77 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | scratch-notebooks/ 2 | **/__pycache__ 3 | build 4 | dist 5 | *.egg-info 6 | .vim 7 | __version__ 8 | .venv/ 9 | .coverage 10 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "test-data"] 2 | path = test-data 3 | url = git@github.com:smacke/subsync-data 4 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Build documentation in the docs/ directory with Sphinx 9 | sphinx: 10 | configuration: docs/conf.py 11 | 12 | # Optionally build your docs in additional formats such as PDF and ePub 13 | formats: [pdf] 14 | 15 | # Optionally set the version of Python and requirements required to build your docs 16 | python: 17 | version: 3.8 18 | install: 19 | - method: setuptools 20 | path: . 21 | - requirements: docs/requirements-docs.txt 22 | 23 | submodules: 24 | exclude: all 25 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "3.6" 5 | - "3.7" 6 | - "3.8" 7 | - "3.9" 8 | 9 | os: 10 | - linux 11 | # - osx 12 | 13 | dist: xenial 14 | 15 | git: 16 | submodules: false 17 | lfs_skip_smudge: true 18 | 19 | install: 20 | - pip install -r requirements.txt 21 | - pip install -r requirements-dev.txt 22 | - pip install -e . 23 | 24 | #addons: 25 | # apt: 26 | # update: true 27 | # packages: ffmpeg 28 | # homebrew: 29 | # packages: ffmpeg 30 | 31 | script: 32 | - pytest -v -m 'not integration' tests/ 33 | - flake8 . --exit-zero 34 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at stephen.macke@gmail.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /HISTORY.rst: -------------------------------------------------------------------------------- 1 | History 2 | ======= 3 | 4 | 0.4.28 (2025-02-16) 5 | ------------------- 6 | * Add support for Python 3.13; 7 | 8 | 0.4.27 (2024-12-23) 9 | ------------------- 10 | * Add support for WebVTT by @GrahamDigital; 11 | * Make setuptools an explicit requirement to improve support for Python 3.12+; 12 | 13 | 0.4.26 (2024-10-15) 14 | ------------------- 15 | * Allow progress to work for multiple syncs even if alignment fails for a particular input; 16 | * Allow specifying ffmpeg exe path using --ffmpeg-path; 17 | * Updates for Python 3.12; 18 | * Don't report sync as successful if best score is in negatives (from @ajitid); 19 | * Turn on Audio Sync for audio extraction process (from @dvh312); 20 | 21 | 0.4.25 (2023-03-26) 22 | ------------------- 23 | * Replace unmaintained cchardet with faust-cchardet; 24 | 25 | 0.4.23 (2023-01-17) 26 | ------------------- 27 | * Bugfix for waitpid on Windows; 28 | 29 | 0.4.22 (2022-12-31) 30 | ------------------- 31 | * Misc maintenance / compatibility fixes; 32 | 33 | 0.4.19 (2022-01-07) 34 | ------------------- 35 | * Blacken code and get rid of future_annotations dependency; 36 | 37 | 0.4.18 (2021-11-07) 38 | ------------------- 39 | * Allow `--apply-offset-seconds` when only subtitles specified; 40 | * Make golden section search over scale factors option (`--gss`) available from help; 41 | * Use -inf as objective for invalid offsets; 42 | 43 | 0.4.17 (2021-10-03) 44 | ------------------- 45 | * Don't remove log file if --log-dir-path explicitly requested; 46 | * Add --suppress-output-if-offset-less-than arg to suppress output for small syncs; 47 | 48 | 0.4.16 (2021-07-22) 49 | ------------------- 50 | * Fix a couple of validation bugs that prevented certain uncommon command line options from use; 51 | 52 | 0.4.15 (2021-05-25) 53 | ------------------- 54 | * Make typing_extensions a requirement 55 | 56 | 0.4.14 (2021-05-10) 57 | ------------------- 58 | * Hotfix for pysubs2 on Python 3.6; 59 | 60 | 0.4.13 (2021-05-10) 61 | ------------------- 62 | * Support SSA embedded fonts using new pysubs2 'opaque_fonts' metadata; 63 | * Set min required pysubs2 version to 1.2.0 to ensure the aforementioned functionality is available; 64 | 65 | 0.4.12 (2021-04-13) 66 | ------------------- 67 | * Pin auditok to 0.1.5 to avoid API-breaking change 68 | 69 | 0.4.11 (2021-01-29) 70 | ------------------- 71 | * Misc sync improvements: 72 | * Have webrtcvad use '0' as the non speech label instead of 0.5; 73 | * Allow the vad non speech label to be specified via the --non-speech-label command line parameter; 74 | * Don't try to infer framerate ratio based on length between first and last speech frames for non-subtitle speech detection; 75 | 76 | 0.4.10 (2021-01-18) 77 | ------------------- 78 | * Lots of improvements from PRs submitted by @alucryd (big thanks!): 79 | * Retain ASS styles; 80 | * Support syncing several subs against the same ref via --overwrite-input flag; 81 | * Add --apply-offset-seconds postprocess option to shift alignment by prespecified amount; 82 | * Filter out metadata in subtitles when extracting speech; 83 | * Add experimental --golden-section-search over framerate ratio (off by default); 84 | * Try to improve sync by inferring framerate ratio based on relative duration of synced vs unsynced; 85 | 86 | 0.4.9 (2020-10-11) 87 | ------------------ 88 | * Make default max offset seconds 60 and enforce during alignment as opposed to throwing away alignments with > max_offset_seconds; 89 | * Add experimental section for using golden section search to find framerate ratio; 90 | * Restore ability to read stdin and write stdout after buggy permissions check; 91 | * Exceptions that occur during syncing were mistakenly suppressed; this is now fixed; 92 | 93 | 0.4.8 (2020-09-22) 94 | ------------------ 95 | * Use webrtcvad-wheels on Windows to eliminate dependency on compiler; 96 | 97 | 0.4.7 (2020-09-05) 98 | ------------------ 99 | * Misc bugfixes and stability improvements; 100 | 101 | 0.4.6 (2020-06-10) 102 | ------------------ 103 | * Bugfix for writing subs to stdout; 104 | 105 | 0.4.5 (2020-06-09) 106 | ------------------ 107 | * Allow MicroDVD input format; 108 | * Use output extension to determine output format; 109 | 110 | 0.4.4 (2020-06-08) 111 | ------------------ 112 | * Use rich formatting for Python >= 3.6; 113 | * Use versioneer to manage versions; 114 | 115 | 0.4.3 (2020-06-07) 116 | ------------------ 117 | * Fix regression where stdout not used for default output; 118 | * Add ability to specify path to ffmpeg / ffprobe binaries; 119 | * Add ability to overwrite the input / unsynced srt with the --overwrite-input flag; 120 | 121 | 0.4.2 (2020-06-06) 122 | ------------------ 123 | * Fix Python 2 compatibility bug; 124 | 125 | 0.4.1 (2020-06-06) 126 | ------------------ 127 | * Add --reference-stream option for selecting the stream / track from the video reference to use for speech detection; 128 | 129 | 0.4.0 (2020-06-02) 130 | ------------------ 131 | * Remove dependency on scikit-learn; 132 | * Implement PyInstaller / Gooey build process for graphical application on MacOS and Windows; 133 | 134 | 0.3.7 (2020-05-11) 135 | ------------------ 136 | * Fix PyPI issues; 137 | 138 | 0.3.5 (2020-05-08) 139 | ------------------ 140 | * Fix corner case bug that occurred when multiple sync attempts were scored the same; 141 | 142 | 0.3.4 (2020-03-20) 143 | ------------------ 144 | * Attempt speech extraction from subtitle tracks embedded in video first before using VAD; 145 | 146 | 0.3.3 (2020-03-15) 147 | ------------------ 148 | * Hotfix for test archive creation bug; 149 | 150 | 0.3.2 (2020-03-13) 151 | ------------------ 152 | * Add ability to merge synced and reference subs into bilingual subs when reference is srt; 153 | 154 | 0.3.1 (2020-03-12) 155 | ------------------ 156 | * Fix bug when handling ass/ssa input, this format should work now; 157 | 158 | 0.3.0 (2020-03-11) 159 | ------------------ 160 | * Better detection of text file encodings; 161 | * ASS / SSA functionality (but currently untested); 162 | * Allow serialize speech with --serialize-speech flag; 163 | * Convenient --make-test-case flag to create test cases when filing sync-related bugs; 164 | * Use utf-8 as default output encoding (instead of using same encoding as input); 165 | * More robust test framework (integration tests!); 166 | 167 | 0.2.17 (2019-12-21) 168 | ------------------ 169 | * Try to correct for framerate differences by picking best framerate ratio; 170 | 171 | 0.2.16 (2019-12-04) 172 | ------------------ 173 | * Revert changes from 0.2.9 now that srt parses weird timestamps robustly; 174 | 175 | 0.2.15 (2019-10-11) 176 | ------------------ 177 | * Revert changes from 0.2.12 (caused regression on Windows); 178 | 179 | 0.2.14 (2019-10-07) 180 | ------------------ 181 | * Bump min required scikit-learn to 0.20.4; 182 | 183 | 0.2.12 (2019-10-06) 184 | ------------------ 185 | * Clear O_NONBLOCK flag on stdout stream in case it is set; 186 | 187 | 0.2.11 (2019-10-06) 188 | ------------------ 189 | * Quick and dirty fix to recover without progress info if `ffmpeg.probe` raises; 190 | 191 | 0.2.10 (2019-09-22) 192 | ------------------ 193 | * Specify utf-8 encoding at top of file for backcompat with Python2; 194 | 195 | 0.2.9 (2019-09-22) 196 | ------------------ 197 | * Quck and dirty fix to properly handle timestamp ms fields with >3 digits; 198 | 199 | 0.2.8 (2019-06-15) 200 | ------------------ 201 | * Allow user to specify start time (in seconds) for processing; 202 | 203 | 0.2.7 (2019-05-28) 204 | ------------------ 205 | * Add utf-16 to list of encodings to try for inference purposes; 206 | 207 | 0.2.6 (2019-05-15) 208 | ------------------ 209 | * Fix argument parsing regression; 210 | 211 | 0.2.5 (2019-05-14) 212 | ------------------ 213 | * Clamp subtitles to maximum duration (default 10); 214 | 215 | 0.2.4 (2019-03-19) 216 | ------------------ 217 | * Add six to requirements.txt; 218 | * Set default encoding to utf8 to ensure non ascii filenames handled properly; 219 | 220 | 0.2.3 (2019-03-08) 221 | ------------------ 222 | * Minor change to subtitle speech extraction; 223 | 224 | 0.2.2 (2019-03-08) 225 | ------------------ 226 | * Allow reading input srt from stdin; 227 | * Allow specifying encodings for reference, input, and output srt; 228 | * Use the same encoding for both input srt and output srt by default; 229 | * Developer note: using sklearn-style data pipelines now; 230 | 231 | 0.2.1 (2019-03-07) 232 | ------------------ 233 | * Developer note: change progress-only to vlc-mode and remove from help docs; 234 | 235 | 0.2.0 (2019-03-06) 236 | ------------------ 237 | * Get rid of auditok (GPLv3, was hurting alignment algorithm); 238 | * Change to alignment algo: don't penalize matching video non-speech with subtitle speech; 239 | 240 | 0.1.7 (2019-03-05) 241 | ------------------ 242 | * Add Chinese to the list of encodings that can be inferred; 243 | * Make srt parsing more robust; 244 | 245 | 0.1.6 (2019-03-04) 246 | ------------------ 247 | * Misc bugfixes; 248 | * Proper logging; 249 | * Proper version handling; 250 | 251 | 0.1.0 (2019-02-24) 252 | ------------------ 253 | * Support srt format; 254 | * Support using srt as reference; 255 | * Support using video as reference (via ffmpeg); 256 | * Support writing to stdout or file (read from stdin not yet supported; can only read from file); 257 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2019 Stephen Macke 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.rst 2 | include versioneer.py 3 | include ffsubsync/_version.py 4 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | .PHONY: clean build bump deploy black blackcheck check test tests deps devdeps 3 | 4 | clean: 5 | rm -rf dist/ build/ *.egg-info/ 6 | 7 | build: clean 8 | python setup.py sdist bdist_wheel --universal 9 | 10 | bump: 11 | ./scripts/bump-version.py 12 | 13 | deploy: build 14 | ./scripts/deploy.sh 15 | 16 | black: 17 | ./scripts/blacken.sh 18 | 19 | blackcheck: 20 | ./scripts/blacken.sh --check 21 | 22 | lint: 23 | flake8 24 | 25 | typecheck: 26 | mypy ffsubsync 27 | 28 | check_no_typing: 29 | INTEGRATION=1 pytest --cov-config=.coveragerc --cov=ffsubsync 30 | 31 | check: blackcheck typecheck check_no_typing 32 | 33 | test: check 34 | tests: check 35 | 36 | deps: 37 | pip install -r requirements.txt 38 | 39 | devdeps: 40 | pip install -e . 41 | pip install -r requirements-dev.txt 42 | 43 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | FFsubsync 2 | ======= 3 | 4 | [![CI Status](https://github.com/smacke/ffsubsync/workflows/ffsubsync/badge.svg)](https://github.com/smacke/ffsubsync/actions) 5 | [![Support Ukraine](https://badgen.net/badge/support/UKRAINE/?color=0057B8&labelColor=FFD700)](https://github.com/vshymanskyy/StandWithUkraine/blob/main/docs/README.md) 6 | [![Checked with mypy](http://www.mypy-lang.org/static/mypy_badge.svg)](http://mypy-lang.org/) 7 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 8 | [![License: MIT](https://img.shields.io/badge/License-MIT-maroon.svg)](https://opensource.org/licenses/MIT) 9 | [![Python Versions](https://img.shields.io/pypi/pyversions/ffsubsync.svg)](https://pypi.org/project/ffsubsync) 10 | [![Documentation Status](https://readthedocs.org/projects/ffsubsync/badge/?version=latest)](https://ffsubsync.readthedocs.io/en/latest/?badge=latest) 11 | [![PyPI Version](https://img.shields.io/pypi/v/ffsubsync.svg)](https://pypi.org/project/ffsubsync) 12 | 13 | 14 | Language-agnostic automatic synchronization of subtitles with video, so that 15 | subtitles are aligned to the correct starting point within the video. 16 | 17 | Turn this: | Into this: 18 | :-------------------------------:|:-------------------------: 19 | ![](https://raw.githubusercontent.com/smacke/ffsubsync/master/resources/img/tearing-me-apart-wrong.gif) | ![](https://raw.githubusercontent.com/smacke/ffsubsync/master/resources/img/tearing-me-apart-correct.gif) 20 | 21 | Helping Development 22 | ------------------- 23 | Please consider [supporting Ukraine](https://github.com/vshymanskyy/StandWithUkraine/blob/main/docs/README.md) 24 | rather than donating directly to this project. That said, at the request of 25 | some, you can now help cover my coffee expenses using the Github Sponsors 26 | button at the top, or using the below Paypal Donate button: 27 | 28 | [![Donate](https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=XJC5ANLMYECJE) 29 | 30 | Install 31 | ------- 32 | First, make sure ffmpeg is installed. On MacOS, this looks like: 33 | ~~~ 34 | brew install ffmpeg 35 | ~~~ 36 | (Windows users: make sure `ffmpeg` is on your path and can be referenced 37 | from the command line!) 38 | 39 | Next, grab the package (compatible with Python >= 3.6): 40 | ~~~ 41 | pip install ffsubsync 42 | ~~~ 43 | If you want to live dangerously, you can grab the latest version as follows: 44 | ~~~ 45 | pip install git+https://github.com/smacke/ffsubsync@latest 46 | ~~~ 47 | 48 | Usage 49 | ----- 50 | `ffs`, `subsync` and `ffsubsync` all work as entrypoints: 51 | ~~~ 52 | ffs video.mp4 -i unsynchronized.srt -o synchronized.srt 53 | ~~~ 54 | 55 | There may be occasions where you have a correctly synchronized srt file in a 56 | language you are unfamiliar with, as well as an unsynchronized srt file in your 57 | native language. In this case, you can use the correctly synchronized srt file 58 | directly as a reference for synchronization, instead of using the video as the 59 | reference: 60 | 61 | ~~~ 62 | ffsubsync reference.srt -i unsynchronized.srt -o synchronized.srt 63 | ~~~ 64 | 65 | `ffsubsync` uses the file extension to decide whether to perform voice activity 66 | detection on the audio or to directly extract speech from an srt file. 67 | 68 | Sync Issues 69 | ----------- 70 | If the sync fails, the following recourses are available: 71 | - Try to sync assuming identical video / subtitle framerates by passing 72 | `--no-fix-framerate`; 73 | - Try passing `--gss` to use [golden-section search](https://en.wikipedia.org/wiki/Golden-section_search) 74 | to find the optimal ratio between video and subtitle framerates (by default, 75 | only a few common ratios are evaluated); 76 | - Try a value of `--max-offset-seconds` greater than the default of 60, in the 77 | event that the subtitles are out of sync by more than 60 seconds (empirically 78 | unlikely in practice, but possible). 79 | - Try `--vad=auditok` since [auditok](https://github.com/amsehili/auditok) can 80 | sometimes work better in the case of low-quality audio than WebRTC's VAD. 81 | Auditok does not specifically detect voice, but instead detects all audio; 82 | this property can yield suboptimal syncing behavior when a proper VAD can 83 | work well, but can be effective in some cases. 84 | 85 | If the sync still fails, consider trying one of the following similar tools: 86 | - [sc0ty/subsync](https://github.com/sc0ty/subsync): does speech-to-text and looks for matching word morphemes 87 | - [kaegi/alass](https://github.com/kaegi/alass): rust-based subtitle synchronizer with a fancy dynamic programming algorithm 88 | - [tympanix/subsync](https://github.com/tympanix/subsync): neural net based approach that optimizes directly for alignment when performing speech detection 89 | - [oseiskar/autosubsync](https://github.com/oseiskar/autosubsync): performs speech detection with bespoke spectrogram + logistic regression 90 | - [pums974/srtsync](https://github.com/pums974/srtsync): similar approach to ffsubsync (WebRTC's VAD + FFT to maximize signal cross correlation) 91 | 92 | Speed 93 | ----- 94 | `ffsubsync` usually finishes in 20 to 30 seconds, depending on the length of 95 | the video. The most expensive step is actually extraction of raw audio. If you 96 | already have a correctly synchronized "reference" srt file (in which case audio 97 | extraction can be skipped), `ffsubsync` typically runs in less than a second. 98 | 99 | How It Works 100 | ------------ 101 | The synchronization algorithm operates in 3 steps: 102 | 1. Discretize both the video file's audio stream and the subtitles into 10ms 103 | windows. 104 | 2. For each 10ms window, determine whether that window contains speech. This 105 | is trivial to do for subtitles (we just determine whether any subtitle is 106 | "on" during each time window); for the audio stream, use an off-the-shelf 107 | voice activity detector (VAD) like 108 | the one built into [webrtc](https://webrtc.org/). 109 | 3. Now we have two binary strings: one for the subtitles, and one for the 110 | video. Try to align these strings by matching 0's with 0's and 1's with 111 | 1's. We score these alignments as (# video 1's matched w/ subtitle 1's) - (# 112 | video 1's matched with subtitle 0's). 113 | 114 | The best-scoring alignment from step 3 determines how to offset the subtitles 115 | in time so that they are properly synced with the video. Because the binary 116 | strings are fairly long (millions of digits for video longer than an hour), the 117 | naive O(n^2) strategy for scoring all alignments is unacceptable. Instead, we 118 | use the fact that "scoring all alignments" is a convolution operation and can 119 | be implemented with the Fast Fourier Transform (FFT), bringing the complexity 120 | down to O(n log n). 121 | 122 | Limitations 123 | ----------- 124 | In most cases, inconsistencies between video and subtitles occur when starting 125 | or ending segments present in video are not present in subtitles, or vice versa. 126 | This can occur, for example, when a TV episode recap in the subtitles was pruned 127 | from video. FFsubsync typically works well in these cases, and in my experience 128 | this covers >95% of use cases. Handling breaks and splits outside of the beginning 129 | and ending segments is left to future work (see below). 130 | 131 | Future Work 132 | ----------- 133 | Besides general stability and usability improvements, one line 134 | of work aims to extend the synchronization algorithm to handle splits 135 | / breaks in the middle of video not present in subtitles (or vice versa). 136 | Developing a robust solution will take some time (assuming one is possible). 137 | See [#10](https://github.com/smacke/ffsubsync/issues/10) for more details. 138 | 139 | History 140 | ------- 141 | The implementation for this project was started during HackIllinois 2019, for 142 | which it received an **_Honorable Mention_** (ranked in the top 5 projects, 143 | excluding projects that won company-specific prizes). 144 | 145 | Credits 146 | ------- 147 | This project would not be possible without the following libraries: 148 | - [ffmpeg](https://www.ffmpeg.org/) and the [ffmpeg-python](https://github.com/kkroening/ffmpeg-python) wrapper, for extracting raw audio from video 149 | - VAD from [webrtc](https://webrtc.org/) and the [py-webrtcvad](https://github.com/wiseman/py-webrtcvad) wrapper, for speech detection 150 | - [srt](https://pypi.org/project/srt/) for operating on [SRT files](https://en.wikipedia.org/wiki/SubRip#SubRip_text_file_format) 151 | - [numpy](http://www.numpy.org/) and, indirectly, [FFTPACK](https://www.netlib.org/fftpack/), which powers the FFT-based algorithm for fast scoring of alignments between subtitles (or subtitles and video) 152 | - Other excellent Python libraries like [argparse](https://docs.python.org/3/library/argparse.html), [rich](https://github.com/willmcgugan/rich), and [tqdm](https://tqdm.github.io/), not related to the core functionality, but which enable much better experiences for developers and users. 153 | 154 | # License 155 | Code in this project is [MIT licensed](https://opensource.org/licenses/MIT). 156 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_static/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smacke/ffsubsync/4d275da8b446de4be582d44337e99b2f75b56ebe/docs/_static/.keep -------------------------------------------------------------------------------- /docs/_templates/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smacke/ffsubsync/4d275da8b446de4be582d44337e99b2f75b56ebe/docs/_templates/.keep -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'ffsubsync' 21 | copyright = '2020, Stephen Macke' 22 | author = 'Stephen Macke' 23 | 24 | 25 | # -- General configuration --------------------------------------------------- 26 | 27 | # ref: https://stackoverflow.com/questions/56336234/build-fail-sphinx-error-contents-rst-not-found 28 | master_doc = 'index' 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | 'sphinx.ext.autodoc', 35 | 'sphinxarg.ext', 36 | 'sphinx_rtd_theme', 37 | ] 38 | 39 | # Add any paths that contain templates here, relative to this directory. 40 | templates_path = ['_templates'] 41 | 42 | # List of patterns, relative to source directory, that match files and 43 | # directories to ignore when looking for source files. 44 | # This pattern also affects html_static_path and html_extra_path. 45 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 46 | 47 | 48 | # -- Options for HTML output ------------------------------------------------- 49 | 50 | # The theme to use for HTML and HTML Help pages. See the documentation for 51 | # a list of builtin themes. 52 | # 53 | html_theme = 'sphinx_rtd_theme' 54 | 55 | # Add any paths that contain custom static files (such as style sheets) here, 56 | # relative to this directory. They are copied after the builtin static files, 57 | # so a file named "default.css" will overwrite the builtin "default.css". 58 | html_static_path = ['_static'] 59 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. ffsubsync documentation master file, created by 2 | sphinx-quickstart on Mon Dec 2 17:06:18 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to FFsubsync's documentation! 7 | ===================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | 14 | 15 | Indices and tables 16 | ================== 17 | 18 | * :ref:`genindex` 19 | * :ref:`modindex` 20 | * :ref:`search` 21 | 22 | .. argparse:: 23 | :module: ffsubsync.ffsubsync 24 | :func: make_parser 25 | :prog: ffsubsync 26 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements-docs.txt: -------------------------------------------------------------------------------- 1 | # docs 2 | autodoc 3 | docutils<0.18 # ref: https://github.com/sphinx-doc/sphinx/issues/9788 4 | sphinx-argparse 5 | sphinx-rtd-theme 6 | -------------------------------------------------------------------------------- /ffsubsync/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | import sys 4 | 5 | try: 6 | from rich.console import Console 7 | from rich.logging import RichHandler 8 | 9 | # configure logging here because some other later imported library does it first otherwise 10 | # TODO: use a fileconfig 11 | logging.basicConfig( 12 | level=logging.INFO, 13 | format="%(message)s", 14 | datefmt="[%X]", 15 | handlers=[RichHandler(console=Console(file=sys.stderr))], 16 | ) 17 | except: # noqa: E722 18 | logging.basicConfig(stream=sys.stderr, level=logging.INFO) 19 | 20 | from .version import __version__ # noqa 21 | from .ffsubsync import main # noqa 22 | -------------------------------------------------------------------------------- /ffsubsync/_version.py: -------------------------------------------------------------------------------- 1 | 2 | # This file helps to compute a version number in source trees obtained from 3 | # git-archive tarball (such as those provided by githubs download-from-tag 4 | # feature). Distribution tarballs (built by setup.py sdist) and build 5 | # directories (produced by setup.py build) will contain a much shorter file 6 | # that just contains the computed version number. 7 | 8 | # This file is released into the public domain. Generated by 9 | # versioneer-0.18 (https://github.com/warner/python-versioneer) 10 | 11 | """Git implementation of _version.py.""" 12 | 13 | import errno 14 | import os 15 | import re 16 | import subprocess 17 | import sys 18 | 19 | 20 | def get_keywords(): 21 | """Get the keywords needed to look up the version information.""" 22 | # these strings will be replaced by git during git-archive. 23 | # setup.py/versioneer.py will grep for the variable names, so they must 24 | # each be defined on a line of their own. _version.py will just call 25 | # get_keywords(). 26 | git_refnames = " (HEAD -> master)" 27 | git_full = "4d275da8b446de4be582d44337e99b2f75b56ebe" 28 | git_date = "2025-02-18 20:04:07 -0800" 29 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} 30 | return keywords 31 | 32 | 33 | class VersioneerConfig: 34 | """Container for Versioneer configuration parameters.""" 35 | 36 | 37 | def get_config(): 38 | """Create, populate and return the VersioneerConfig() object.""" 39 | # these strings are filled in when 'setup.py versioneer' creates 40 | # _version.py 41 | cfg = VersioneerConfig() 42 | cfg.VCS = "git" 43 | cfg.style = "pep440-pre" 44 | cfg.tag_prefix = "" 45 | cfg.parentdir_prefix = "ffsubsync-" 46 | cfg.versionfile_source = "ffsubsync/_version.py" 47 | cfg.verbose = False 48 | return cfg 49 | 50 | 51 | class NotThisMethod(Exception): 52 | """Exception raised if a method is not valid for the current scenario.""" 53 | 54 | 55 | LONG_VERSION_PY = {} 56 | HANDLERS = {} 57 | 58 | 59 | def register_vcs_handler(vcs, method): # decorator 60 | """Decorator to mark a method as the handler for a particular VCS.""" 61 | def decorate(f): 62 | """Store f in HANDLERS[vcs][method].""" 63 | if vcs not in HANDLERS: 64 | HANDLERS[vcs] = {} 65 | HANDLERS[vcs][method] = f 66 | return f 67 | return decorate 68 | 69 | 70 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, 71 | env=None): 72 | """Call the given command(s).""" 73 | assert isinstance(commands, list) 74 | p = None 75 | for c in commands: 76 | try: 77 | dispcmd = str([c] + args) 78 | # remember shell=False, so use git.cmd on windows, not just git 79 | p = subprocess.Popen([c] + args, cwd=cwd, env=env, 80 | stdout=subprocess.PIPE, 81 | stderr=(subprocess.PIPE if hide_stderr 82 | else None)) 83 | break 84 | except EnvironmentError: 85 | e = sys.exc_info()[1] 86 | if e.errno == errno.ENOENT: 87 | continue 88 | if verbose: 89 | print("unable to run %s" % dispcmd) 90 | print(e) 91 | return None, None 92 | else: 93 | if verbose: 94 | print("unable to find command, tried %s" % (commands,)) 95 | return None, None 96 | stdout = p.communicate()[0].strip() 97 | if sys.version_info[0] >= 3: 98 | stdout = stdout.decode() 99 | if p.returncode != 0: 100 | if verbose: 101 | print("unable to run %s (error)" % dispcmd) 102 | print("stdout was %s" % stdout) 103 | return None, p.returncode 104 | return stdout, p.returncode 105 | 106 | 107 | def versions_from_parentdir(parentdir_prefix, root, verbose): 108 | """Try to determine the version from the parent directory name. 109 | 110 | Source tarballs conventionally unpack into a directory that includes both 111 | the project name and a version string. We will also support searching up 112 | two directory levels for an appropriately named parent directory 113 | """ 114 | rootdirs = [] 115 | 116 | for i in range(3): 117 | dirname = os.path.basename(root) 118 | if dirname.startswith(parentdir_prefix): 119 | return {"version": dirname[len(parentdir_prefix):], 120 | "full-revisionid": None, 121 | "dirty": False, "error": None, "date": None} 122 | else: 123 | rootdirs.append(root) 124 | root = os.path.dirname(root) # up a level 125 | 126 | if verbose: 127 | print("Tried directories %s but none started with prefix %s" % 128 | (str(rootdirs), parentdir_prefix)) 129 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 130 | 131 | 132 | @register_vcs_handler("git", "get_keywords") 133 | def git_get_keywords(versionfile_abs): 134 | """Extract version information from the given file.""" 135 | # the code embedded in _version.py can just fetch the value of these 136 | # keywords. When used from setup.py, we don't want to import _version.py, 137 | # so we do it with a regexp instead. This function is not used from 138 | # _version.py. 139 | keywords = {} 140 | try: 141 | f = open(versionfile_abs, "r") 142 | for line in f.readlines(): 143 | if line.strip().startswith("git_refnames ="): 144 | mo = re.search(r'=\s*"(.*)"', line) 145 | if mo: 146 | keywords["refnames"] = mo.group(1) 147 | if line.strip().startswith("git_full ="): 148 | mo = re.search(r'=\s*"(.*)"', line) 149 | if mo: 150 | keywords["full"] = mo.group(1) 151 | if line.strip().startswith("git_date ="): 152 | mo = re.search(r'=\s*"(.*)"', line) 153 | if mo: 154 | keywords["date"] = mo.group(1) 155 | f.close() 156 | except EnvironmentError: 157 | pass 158 | return keywords 159 | 160 | 161 | @register_vcs_handler("git", "keywords") 162 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 163 | """Get version information from git keywords.""" 164 | if not keywords: 165 | raise NotThisMethod("no keywords at all, weird") 166 | date = keywords.get("date") 167 | if date is not None: 168 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant 169 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 170 | # -like" string, which we must then edit to make compliant), because 171 | # it's been around since git-1.5.3, and it's too difficult to 172 | # discover which version we're using, or to work around using an 173 | # older one. 174 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 175 | refnames = keywords["refnames"].strip() 176 | if refnames.startswith("$Format"): 177 | if verbose: 178 | print("keywords are unexpanded, not using") 179 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 180 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 181 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 182 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 183 | TAG = "tag: " 184 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 185 | if not tags: 186 | # Either we're using git < 1.8.3, or there really are no tags. We use 187 | # a heuristic: assume all version tags have a digit. The old git %d 188 | # expansion behaves like git log --decorate=short and strips out the 189 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 190 | # between branches and tags. By ignoring refnames without digits, we 191 | # filter out many common branch names like "release" and 192 | # "stabilization", as well as "HEAD" and "master". 193 | tags = set([r for r in refs if re.search(r'\d', r)]) 194 | if verbose: 195 | print("discarding '%s', no digits" % ",".join(refs - tags)) 196 | if verbose: 197 | print("likely tags: %s" % ",".join(sorted(tags))) 198 | for ref in sorted(tags): 199 | # sorting will prefer e.g. "2.0" over "2.0rc1" 200 | if ref.startswith(tag_prefix): 201 | r = ref[len(tag_prefix):] 202 | if verbose: 203 | print("picking %s" % r) 204 | return {"version": r, 205 | "full-revisionid": keywords["full"].strip(), 206 | "dirty": False, "error": None, 207 | "date": date} 208 | # no suitable tags, so version is "0+unknown", but full hex is still there 209 | if verbose: 210 | print("no suitable tags, using unknown + full revision id") 211 | return {"version": "0+unknown", 212 | "full-revisionid": keywords["full"].strip(), 213 | "dirty": False, "error": "no suitable tags", "date": None} 214 | 215 | 216 | @register_vcs_handler("git", "pieces_from_vcs") 217 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 218 | """Get version from 'git describe' in the root of the source tree. 219 | 220 | This only gets called if the git-archive 'subst' keywords were *not* 221 | expanded, and _version.py hasn't already been rewritten with a short 222 | version string, meaning we're inside a checked out source tree. 223 | """ 224 | GITS = ["git"] 225 | if sys.platform == "win32": 226 | GITS = ["git.cmd", "git.exe"] 227 | 228 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, 229 | hide_stderr=True) 230 | if rc != 0: 231 | if verbose: 232 | print("Directory %s not under git control" % root) 233 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 234 | 235 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 236 | # if there isn't one, this yields HEX[-dirty] (no NUM) 237 | describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", 238 | "--always", "--long", 239 | "--match", "%s*" % tag_prefix], 240 | cwd=root) 241 | # --long was added in git-1.5.5 242 | if describe_out is None: 243 | raise NotThisMethod("'git describe' failed") 244 | describe_out = describe_out.strip() 245 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 246 | if full_out is None: 247 | raise NotThisMethod("'git rev-parse' failed") 248 | full_out = full_out.strip() 249 | 250 | pieces = {} 251 | pieces["long"] = full_out 252 | pieces["short"] = full_out[:7] # maybe improved later 253 | pieces["error"] = None 254 | 255 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 256 | # TAG might have hyphens. 257 | git_describe = describe_out 258 | 259 | # look for -dirty suffix 260 | dirty = git_describe.endswith("-dirty") 261 | pieces["dirty"] = dirty 262 | if dirty: 263 | git_describe = git_describe[:git_describe.rindex("-dirty")] 264 | 265 | # now we have TAG-NUM-gHEX or HEX 266 | 267 | if "-" in git_describe: 268 | # TAG-NUM-gHEX 269 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 270 | if not mo: 271 | # unparseable. Maybe git-describe is misbehaving? 272 | pieces["error"] = ("unable to parse git-describe output: '%s'" 273 | % describe_out) 274 | return pieces 275 | 276 | # tag 277 | full_tag = mo.group(1) 278 | if not full_tag.startswith(tag_prefix): 279 | if verbose: 280 | fmt = "tag '%s' doesn't start with prefix '%s'" 281 | print(fmt % (full_tag, tag_prefix)) 282 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" 283 | % (full_tag, tag_prefix)) 284 | return pieces 285 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 286 | 287 | # distance: number of commits since tag 288 | pieces["distance"] = int(mo.group(2)) 289 | 290 | # commit: short hex revision ID 291 | pieces["short"] = mo.group(3) 292 | 293 | else: 294 | # HEX: no tags 295 | pieces["closest-tag"] = None 296 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], 297 | cwd=root) 298 | pieces["distance"] = int(count_out) # total number of commits 299 | 300 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 301 | date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], 302 | cwd=root)[0].strip() 303 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 304 | 305 | return pieces 306 | 307 | 308 | def plus_or_dot(pieces): 309 | """Return a + if we don't already have one, else return a .""" 310 | if "+" in pieces.get("closest-tag", ""): 311 | return "." 312 | return "+" 313 | 314 | 315 | def render_pep440(pieces): 316 | """Build up version string, with post-release "local version identifier". 317 | 318 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 319 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 320 | 321 | Exceptions: 322 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 323 | """ 324 | if pieces["closest-tag"]: 325 | rendered = pieces["closest-tag"] 326 | if pieces["distance"] or pieces["dirty"]: 327 | rendered += plus_or_dot(pieces) 328 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 329 | if pieces["dirty"]: 330 | rendered += ".dirty" 331 | else: 332 | # exception #1 333 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], 334 | pieces["short"]) 335 | if pieces["dirty"]: 336 | rendered += ".dirty" 337 | return rendered 338 | 339 | 340 | def render_pep440_pre(pieces): 341 | """TAG[.post.devDISTANCE] -- No -dirty. 342 | 343 | Exceptions: 344 | 1: no tags. 0.post.devDISTANCE 345 | """ 346 | if pieces["closest-tag"]: 347 | rendered = pieces["closest-tag"] 348 | if pieces["distance"]: 349 | rendered += ".post.dev%d" % pieces["distance"] 350 | else: 351 | # exception #1 352 | rendered = "0.post.dev%d" % pieces["distance"] 353 | return rendered 354 | 355 | 356 | def render_pep440_post(pieces): 357 | """TAG[.postDISTANCE[.dev0]+gHEX] . 358 | 359 | The ".dev0" means dirty. Note that .dev0 sorts backwards 360 | (a dirty tree will appear "older" than the corresponding clean one), 361 | but you shouldn't be releasing software with -dirty anyways. 362 | 363 | Exceptions: 364 | 1: no tags. 0.postDISTANCE[.dev0] 365 | """ 366 | if pieces["closest-tag"]: 367 | rendered = pieces["closest-tag"] 368 | if pieces["distance"] or pieces["dirty"]: 369 | rendered += ".post%d" % pieces["distance"] 370 | if pieces["dirty"]: 371 | rendered += ".dev0" 372 | rendered += plus_or_dot(pieces) 373 | rendered += "g%s" % pieces["short"] 374 | else: 375 | # exception #1 376 | rendered = "0.post%d" % pieces["distance"] 377 | if pieces["dirty"]: 378 | rendered += ".dev0" 379 | rendered += "+g%s" % pieces["short"] 380 | return rendered 381 | 382 | 383 | def render_pep440_old(pieces): 384 | """TAG[.postDISTANCE[.dev0]] . 385 | 386 | The ".dev0" means dirty. 387 | 388 | Eexceptions: 389 | 1: no tags. 0.postDISTANCE[.dev0] 390 | """ 391 | if pieces["closest-tag"]: 392 | rendered = pieces["closest-tag"] 393 | if pieces["distance"] or pieces["dirty"]: 394 | rendered += ".post%d" % pieces["distance"] 395 | if pieces["dirty"]: 396 | rendered += ".dev0" 397 | else: 398 | # exception #1 399 | rendered = "0.post%d" % pieces["distance"] 400 | if pieces["dirty"]: 401 | rendered += ".dev0" 402 | return rendered 403 | 404 | 405 | def render_git_describe(pieces): 406 | """TAG[-DISTANCE-gHEX][-dirty]. 407 | 408 | Like 'git describe --tags --dirty --always'. 409 | 410 | Exceptions: 411 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 412 | """ 413 | if pieces["closest-tag"]: 414 | rendered = pieces["closest-tag"] 415 | if pieces["distance"]: 416 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 417 | else: 418 | # exception #1 419 | rendered = pieces["short"] 420 | if pieces["dirty"]: 421 | rendered += "-dirty" 422 | return rendered 423 | 424 | 425 | def render_git_describe_long(pieces): 426 | """TAG-DISTANCE-gHEX[-dirty]. 427 | 428 | Like 'git describe --tags --dirty --always -long'. 429 | The distance/hash is unconditional. 430 | 431 | Exceptions: 432 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 433 | """ 434 | if pieces["closest-tag"]: 435 | rendered = pieces["closest-tag"] 436 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 437 | else: 438 | # exception #1 439 | rendered = pieces["short"] 440 | if pieces["dirty"]: 441 | rendered += "-dirty" 442 | return rendered 443 | 444 | 445 | def render(pieces, style): 446 | """Render the given version pieces into the requested style.""" 447 | if pieces["error"]: 448 | return {"version": "unknown", 449 | "full-revisionid": pieces.get("long"), 450 | "dirty": None, 451 | "error": pieces["error"], 452 | "date": None} 453 | 454 | if not style or style == "default": 455 | style = "pep440" # the default 456 | 457 | if style == "pep440": 458 | rendered = render_pep440(pieces) 459 | elif style == "pep440-pre": 460 | rendered = render_pep440_pre(pieces) 461 | elif style == "pep440-post": 462 | rendered = render_pep440_post(pieces) 463 | elif style == "pep440-old": 464 | rendered = render_pep440_old(pieces) 465 | elif style == "git-describe": 466 | rendered = render_git_describe(pieces) 467 | elif style == "git-describe-long": 468 | rendered = render_git_describe_long(pieces) 469 | else: 470 | raise ValueError("unknown style '%s'" % style) 471 | 472 | return {"version": rendered, "full-revisionid": pieces["long"], 473 | "dirty": pieces["dirty"], "error": None, 474 | "date": pieces.get("date")} 475 | 476 | 477 | def get_versions(): 478 | """Get version information or return default if unable to do so.""" 479 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 480 | # __file__, we can work backwards from there to the root. Some 481 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 482 | # case we can only use expanded keywords. 483 | 484 | cfg = get_config() 485 | verbose = cfg.verbose 486 | 487 | try: 488 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 489 | verbose) 490 | except NotThisMethod: 491 | pass 492 | 493 | try: 494 | root = os.path.realpath(__file__) 495 | # versionfile_source is the relative path from the top of the source 496 | # tree (where the .git directory might live) to this file. Invert 497 | # this to find the root from __file__. 498 | for i in cfg.versionfile_source.split('/'): 499 | root = os.path.dirname(root) 500 | except NameError: 501 | return {"version": "0+unknown", "full-revisionid": None, 502 | "dirty": None, 503 | "error": "unable to find root of source tree", 504 | "date": None} 505 | 506 | try: 507 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 508 | return render(pieces, cfg.style) 509 | except NotThisMethod: 510 | pass 511 | 512 | try: 513 | if cfg.parentdir_prefix: 514 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 515 | except NotThisMethod: 516 | pass 517 | 518 | return {"version": "0+unknown", "full-revisionid": None, 519 | "dirty": None, 520 | "error": "unable to compute version", "date": None} 521 | -------------------------------------------------------------------------------- /ffsubsync/aligners.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | import math 4 | from typing import List, Optional, Tuple, Type, Union 5 | 6 | import numpy as np 7 | 8 | from ffsubsync.golden_section_search import gss 9 | from ffsubsync.sklearn_shim import Pipeline, TransformerMixin 10 | 11 | 12 | logging.basicConfig(level=logging.INFO) 13 | logger: logging.Logger = logging.getLogger(__name__) 14 | 15 | 16 | MIN_FRAMERATE_RATIO = 0.9 17 | MAX_FRAMERATE_RATIO = 1.1 18 | 19 | 20 | class FailedToFindAlignmentException(Exception): 21 | pass 22 | 23 | 24 | class FFTAligner(TransformerMixin): 25 | def __init__(self, max_offset_samples: Optional[int] = None) -> None: 26 | self.max_offset_samples: Optional[int] = max_offset_samples 27 | self.best_offset_: Optional[int] = None 28 | self.best_score_: Optional[float] = None 29 | self.get_score_: bool = False 30 | 31 | def _eliminate_extreme_offsets_from_solutions( 32 | self, convolve: np.ndarray, substring: np.ndarray 33 | ) -> np.ndarray: 34 | convolve = np.copy(convolve) 35 | if self.max_offset_samples is None: 36 | return convolve 37 | 38 | def _offset_to_index(offset): 39 | return len(convolve) - 1 + offset - len(substring) 40 | 41 | convolve[: _offset_to_index(-self.max_offset_samples)] = float("-inf") 42 | convolve[_offset_to_index(self.max_offset_samples) :] = float("-inf") 43 | return convolve 44 | 45 | def _compute_argmax(self, convolve: np.ndarray, substring: np.ndarray) -> None: 46 | best_idx = int(np.argmax(convolve)) 47 | self.best_offset_ = len(convolve) - 1 - best_idx - len(substring) 48 | self.best_score_ = convolve[best_idx] 49 | 50 | def fit(self, refstring, substring, get_score: bool = False) -> "FFTAligner": 51 | refstring, substring = [ 52 | list(map(int, s)) if isinstance(s, str) else s 53 | for s in [refstring, substring] 54 | ] 55 | refstring, substring = map( 56 | lambda s: 2 * np.array(s).astype(float) - 1, [refstring, substring] 57 | ) 58 | total_bits = math.log(len(substring) + len(refstring), 2) 59 | total_length = int(2 ** math.ceil(total_bits)) 60 | extra_zeros = total_length - len(substring) - len(refstring) 61 | subft = np.fft.fft(np.append(np.zeros(extra_zeros + len(refstring)), substring)) 62 | refft = np.fft.fft( 63 | np.flip(np.append(refstring, np.zeros(len(substring) + extra_zeros)), 0) 64 | ) 65 | convolve = np.real(np.fft.ifft(subft * refft)) 66 | self._compute_argmax( 67 | self._eliminate_extreme_offsets_from_solutions(convolve, substring), 68 | substring, 69 | ) 70 | self.get_score_ = get_score 71 | return self 72 | 73 | def transform(self, *_) -> Union[int, Tuple[float, int]]: 74 | if self.get_score_: 75 | return self.best_score_, self.best_offset_ 76 | else: 77 | return self.best_offset_ 78 | 79 | 80 | class MaxScoreAligner(TransformerMixin): 81 | def __init__( 82 | self, 83 | base_aligner: Union[FFTAligner, Type[FFTAligner]], 84 | srtin: Optional[str] = None, 85 | sample_rate=None, 86 | max_offset_seconds=None, 87 | ) -> None: 88 | self.srtin: Optional[str] = srtin 89 | if sample_rate is None or max_offset_seconds is None: 90 | self.max_offset_samples: Optional[int] = None 91 | else: 92 | self.max_offset_samples = abs(int(max_offset_seconds * sample_rate)) 93 | if isinstance(base_aligner, type): 94 | self.base_aligner: FFTAligner = base_aligner( 95 | max_offset_samples=self.max_offset_samples 96 | ) 97 | else: 98 | self.base_aligner = base_aligner 99 | self.max_offset_seconds: Optional[int] = max_offset_seconds 100 | self._scores: List[Tuple[Tuple[float, int], Pipeline]] = [] 101 | 102 | def fit_gss(self, refstring, subpipe_maker): 103 | def opt_func(framerate_ratio, is_last_iter): 104 | subpipe = subpipe_maker(framerate_ratio) 105 | substring = subpipe.fit_transform(self.srtin) 106 | score = self.base_aligner.fit_transform( 107 | refstring, substring, get_score=True 108 | ) 109 | logger.info( 110 | "got score %.0f (offset %d) for ratio %.3f", 111 | score[0], 112 | score[1], 113 | framerate_ratio, 114 | ) 115 | if is_last_iter: 116 | self._scores.append((score, subpipe)) 117 | return -score[0] 118 | 119 | gss(opt_func, MIN_FRAMERATE_RATIO, MAX_FRAMERATE_RATIO) 120 | return self 121 | 122 | def fit( 123 | self, refstring, subpipes: Union[Pipeline, List[Pipeline]] 124 | ) -> "MaxScoreAligner": 125 | if not isinstance(subpipes, list): 126 | subpipes = [subpipes] 127 | for subpipe in subpipes: 128 | if callable(subpipe): 129 | self.fit_gss(refstring, subpipe) 130 | continue 131 | elif hasattr(subpipe, "transform"): 132 | substring = subpipe.transform(self.srtin) 133 | else: 134 | substring = subpipe 135 | self._scores.append( 136 | ( 137 | self.base_aligner.fit_transform( 138 | refstring, substring, get_score=True 139 | ), 140 | subpipe, 141 | ) 142 | ) 143 | return self 144 | 145 | def transform(self, *_) -> Tuple[Tuple[float, float], Pipeline]: 146 | scores = self._scores 147 | if self.max_offset_samples is not None: 148 | scores = list( 149 | filter(lambda s: abs(s[0][1]) <= self.max_offset_samples, scores) 150 | ) 151 | if len(scores) == 0: 152 | raise FailedToFindAlignmentException( 153 | "Synchronization failed; consider passing " 154 | "--max-offset-seconds with a number larger than " 155 | "{}".format(self.max_offset_seconds) 156 | ) 157 | (score, offset), subpipe = max(scores, key=lambda x: x[0][0]) 158 | return (score, offset), subpipe 159 | -------------------------------------------------------------------------------- /ffsubsync/constants.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from typing import List, Tuple 3 | 4 | 5 | SUBSYNC_RESOURCES_ENV_MAGIC: str = "ffsubsync_resources_xj48gjdkl340" 6 | 7 | SAMPLE_RATE: int = 100 8 | 9 | FRAMERATE_RATIOS: List[float] = [24.0 / 23.976, 25.0 / 23.976, 25.0 / 24.0] 10 | 11 | DEFAULT_FRAME_RATE: int = 48000 12 | DEFAULT_NON_SPEECH_LABEL: float = 0.0 13 | DEFAULT_ENCODING: str = "infer" 14 | DEFAULT_MAX_SUBTITLE_SECONDS: int = 10 15 | DEFAULT_START_SECONDS: int = 0 16 | DEFAULT_SCALE_FACTOR: float = 1 17 | DEFAULT_VAD: str = "subs_then_webrtc" 18 | DEFAULT_MAX_OFFSET_SECONDS: int = 60 19 | DEFAULT_APPLY_OFFSET_SECONDS: int = 0 20 | 21 | SUBTITLE_EXTENSIONS: Tuple[str, ...] = ("srt", "ass", "ssa", "sub") 22 | 23 | GITHUB_DEV_USER: str = "smacke" 24 | PROJECT_NAME: str = "FFsubsync" 25 | PROJECT_LICENSE: str = "MIT" 26 | COPYRIGHT_YEAR: str = "2019" 27 | GITHUB_REPO: str = "ffsubsync" 28 | DESCRIPTION: str = "Synchronize subtitles with video." 29 | LONG_DESCRIPTION: str = ( 30 | "Automatic and language-agnostic synchronization of subtitles with video." 31 | ) 32 | WEBSITE: str = "https://github.com/{}/{}/".format(GITHUB_DEV_USER, GITHUB_REPO) 33 | DEV_WEBSITE: str = "https://smacke.net/" 34 | 35 | # No trailing slash important for this one... 36 | API_RELEASE_URL: str = "https://api.github.com/repos/{}/{}/releases/latest".format( 37 | GITHUB_DEV_USER, GITHUB_REPO 38 | ) 39 | RELEASE_URL: str = "https://github.com/{}/{}/releases/latest/".format( 40 | GITHUB_DEV_USER, GITHUB_REPO 41 | ) 42 | -------------------------------------------------------------------------------- /ffsubsync/ffmpeg_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | import os 4 | import platform 5 | import subprocess 6 | 7 | from ffsubsync.constants import SUBSYNC_RESOURCES_ENV_MAGIC 8 | 9 | logging.basicConfig(level=logging.INFO) 10 | logger: logging.Logger = logging.getLogger(__name__) 11 | 12 | 13 | # ref: https://github.com/pyinstaller/pyinstaller/wiki/Recipe-subprocess 14 | # Create a set of arguments which make a ``subprocess.Popen`` (and 15 | # variants) call work with or without Pyinstaller, ``--noconsole`` or 16 | # not, on Windows and Linux. Typical use:: 17 | # 18 | # subprocess.call(['program_to_run', 'arg_1'], **subprocess_args()) 19 | # 20 | # When calling ``check_output``:: 21 | # 22 | # subprocess.check_output(['program_to_run', 'arg_1'], 23 | # **subprocess_args(False)) 24 | def subprocess_args(include_stdout=True): 25 | # The following is true only on Windows. 26 | if hasattr(subprocess, "STARTUPINFO"): 27 | # On Windows, subprocess calls will pop up a command window by default 28 | # when run from Pyinstaller with the ``--noconsole`` option. Avoid this 29 | # distraction. 30 | si = subprocess.STARTUPINFO() 31 | si.dwFlags |= subprocess.STARTF_USESHOWWINDOW 32 | # Windows doesn't search the path by default. Pass it an environment so 33 | # it will. 34 | env = os.environ 35 | else: 36 | si = None 37 | env = None 38 | 39 | # ``subprocess.check_output`` doesn't allow specifying ``stdout``:: 40 | # 41 | # Traceback (most recent call last): 42 | # File "test_subprocess.py", line 58, in 43 | # **subprocess_args(stdout=None)) 44 | # File "C:\Python27\lib\subprocess.py", line 567, in check_output 45 | # raise ValueError('stdout argument not allowed, it will be overridden.') 46 | # ValueError: stdout argument not allowed, it will be overridden. 47 | # 48 | # So, add it only if it's needed. 49 | if include_stdout: 50 | ret = {"stdout": subprocess.PIPE} 51 | else: 52 | ret = {} 53 | 54 | # On Windows, running this from the binary produced by Pyinstaller 55 | # with the ``--noconsole`` option requires redirecting everything 56 | # (stdin, stdout, stderr) to avoid an OSError exception 57 | # "[Error 6] the handle is invalid." 58 | ret.update( 59 | { 60 | "stdin": subprocess.PIPE, 61 | "stderr": subprocess.PIPE, 62 | "startupinfo": si, 63 | "env": env, 64 | } 65 | ) 66 | return ret 67 | 68 | 69 | def ffmpeg_bin_path(bin_name, gui_mode, ffmpeg_resources_path=None): 70 | if platform.system() == "Windows": 71 | bin_name = "{}.exe".format(bin_name) 72 | if ffmpeg_resources_path is not None: 73 | if not os.path.isdir(ffmpeg_resources_path): 74 | if bin_name.lower().startswith("ffmpeg"): 75 | return ffmpeg_resources_path 76 | ffmpeg_resources_path = os.path.dirname(ffmpeg_resources_path) 77 | return os.path.join(ffmpeg_resources_path, bin_name) 78 | try: 79 | resource_path = os.environ[SUBSYNC_RESOURCES_ENV_MAGIC] 80 | if len(resource_path) > 0: 81 | return os.path.join(resource_path, "ffmpeg-bin", bin_name) 82 | except KeyError: 83 | if gui_mode: 84 | logger.info( 85 | "Couldn't find resource path; falling back to searching system path" 86 | ) 87 | return bin_name 88 | -------------------------------------------------------------------------------- /ffsubsync/ffsubsync.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import argparse 4 | from datetime import datetime 5 | import logging 6 | import os 7 | import shutil 8 | import subprocess 9 | import sys 10 | from typing import cast, Any, Callable, Dict, List, Optional, Tuple, Union 11 | 12 | import numpy as np 13 | 14 | from ffsubsync.aligners import FFTAligner, MaxScoreAligner 15 | from ffsubsync.constants import ( 16 | DEFAULT_APPLY_OFFSET_SECONDS, 17 | DEFAULT_FRAME_RATE, 18 | DEFAULT_MAX_OFFSET_SECONDS, 19 | DEFAULT_MAX_SUBTITLE_SECONDS, 20 | DEFAULT_NON_SPEECH_LABEL, 21 | DEFAULT_START_SECONDS, 22 | DEFAULT_VAD, 23 | DEFAULT_ENCODING, 24 | FRAMERATE_RATIOS, 25 | SAMPLE_RATE, 26 | SUBTITLE_EXTENSIONS, 27 | ) 28 | from ffsubsync.ffmpeg_utils import ffmpeg_bin_path 29 | from ffsubsync.sklearn_shim import Pipeline, TransformerMixin 30 | from ffsubsync.speech_transformers import ( 31 | VideoSpeechTransformer, 32 | DeserializeSpeechTransformer, 33 | make_subtitle_speech_pipeline, 34 | ) 35 | from ffsubsync.subtitle_parser import make_subtitle_parser 36 | from ffsubsync.subtitle_transformers import SubtitleMerger, SubtitleShifter 37 | from ffsubsync.version import get_version 38 | 39 | 40 | logger: logging.Logger = logging.getLogger(__name__) 41 | 42 | 43 | def override(args: argparse.Namespace, **kwargs: Any) -> Dict[str, Any]: 44 | args_dict = dict(args.__dict__) 45 | args_dict.update(kwargs) 46 | return args_dict 47 | 48 | 49 | def _ref_format(ref_fname: Optional[str]) -> Optional[str]: 50 | if ref_fname is None: 51 | return None 52 | return ref_fname[-3:] 53 | 54 | 55 | def make_test_case( 56 | args: argparse.Namespace, npy_savename: Optional[str], sync_was_successful: bool 57 | ) -> int: 58 | if npy_savename is None: 59 | raise ValueError("need non-null npy_savename") 60 | tar_dir = "{}.{}".format( 61 | args.reference, datetime.now().strftime("%Y-%m-%d-%H-%M-%S") 62 | ) 63 | logger.info("creating test archive {}.tar.gz...".format(tar_dir)) 64 | os.mkdir(tar_dir) 65 | try: 66 | log_path = "ffsubsync.log" 67 | if args.log_dir_path is not None and os.path.isdir(args.log_dir_path): 68 | log_path = os.path.join(args.log_dir_path, log_path) 69 | shutil.copy(log_path, tar_dir) 70 | shutil.copy(args.srtin[0], tar_dir) 71 | if sync_was_successful: 72 | shutil.move(args.srtout, tar_dir) 73 | if _ref_format(args.reference) in SUBTITLE_EXTENSIONS: 74 | shutil.copy(args.reference, tar_dir) 75 | elif args.serialize_speech or args.reference == npy_savename: 76 | shutil.copy(npy_savename, tar_dir) 77 | else: 78 | shutil.move(npy_savename, tar_dir) 79 | supported_formats = set(list(zip(*shutil.get_archive_formats()))[0]) 80 | preferred_formats = ["gztar", "bztar", "xztar", "zip", "tar"] 81 | for archive_format in preferred_formats: 82 | if archive_format in supported_formats: 83 | shutil.make_archive(tar_dir, archive_format, os.curdir, tar_dir) 84 | break 85 | else: 86 | logger.error( 87 | "failed to create test archive; no formats supported " 88 | "(this should not happen)" 89 | ) 90 | return 1 91 | logger.info("...done") 92 | finally: 93 | shutil.rmtree(tar_dir) 94 | return 0 95 | 96 | 97 | def get_srt_pipe_maker( 98 | args: argparse.Namespace, srtin: Optional[str] 99 | ) -> Callable[[Optional[float]], Union[Pipeline, Callable[[float], Pipeline]]]: 100 | if srtin is None: 101 | srtin_format = "srt" 102 | else: 103 | srtin_format = os.path.splitext(srtin)[-1][1:] 104 | parser = make_subtitle_parser(fmt=srtin_format, caching=True, **args.__dict__) 105 | return lambda scale_factor: make_subtitle_speech_pipeline( 106 | **override(args, scale_factor=scale_factor, parser=parser) 107 | ) 108 | 109 | 110 | def get_framerate_ratios_to_try(args: argparse.Namespace) -> List[Optional[float]]: 111 | if args.no_fix_framerate: 112 | return [] 113 | else: 114 | framerate_ratios = list( 115 | np.concatenate( 116 | [np.array(FRAMERATE_RATIOS), 1.0 / np.array(FRAMERATE_RATIOS)] 117 | ) 118 | ) 119 | if args.gss: 120 | framerate_ratios.append(None) 121 | return framerate_ratios 122 | 123 | 124 | def try_sync( 125 | args: argparse.Namespace, reference_pipe: Optional[Pipeline], result: Dict[str, Any] 126 | ) -> bool: 127 | result["sync_was_successful"] = False 128 | sync_was_successful = True 129 | logger.info( 130 | "extracting speech segments from %s...", 131 | "stdin" if not args.srtin else "subtitles file(s) {}".format(args.srtin), 132 | ) 133 | if not args.srtin: 134 | args.srtin = [None] 135 | for srtin in args.srtin: 136 | try: 137 | skip_sync = args.skip_sync or reference_pipe is None 138 | skip_infer_framerate_ratio = ( 139 | args.skip_infer_framerate_ratio or reference_pipe is None 140 | ) 141 | srtout = srtin if args.overwrite_input else args.srtout 142 | srt_pipe_maker = get_srt_pipe_maker(args, srtin) 143 | framerate_ratios = get_framerate_ratios_to_try(args) 144 | srt_pipes = [srt_pipe_maker(1.0)] + [ 145 | srt_pipe_maker(rat) for rat in framerate_ratios 146 | ] 147 | for srt_pipe in srt_pipes: 148 | if callable(srt_pipe): 149 | continue 150 | else: 151 | srt_pipe.fit(srtin) 152 | if not skip_infer_framerate_ratio and hasattr( 153 | reference_pipe[-1], "num_frames" 154 | ): 155 | inferred_framerate_ratio_from_length = ( 156 | float(reference_pipe[-1].num_frames) 157 | / cast(Pipeline, srt_pipes[0])[-1].num_frames 158 | ) 159 | logger.info( 160 | "inferred frameratio ratio: %.3f" 161 | % inferred_framerate_ratio_from_length 162 | ) 163 | srt_pipes.append( 164 | cast( 165 | Pipeline, srt_pipe_maker(inferred_framerate_ratio_from_length) 166 | ).fit(srtin) 167 | ) 168 | logger.info("...done") 169 | logger.info("computing alignments...") 170 | if skip_sync: 171 | best_score = 0.0 172 | best_srt_pipe = cast(Pipeline, srt_pipes[0]) 173 | offset_samples = 0 174 | else: 175 | (best_score, offset_samples), best_srt_pipe = MaxScoreAligner( 176 | FFTAligner, srtin, SAMPLE_RATE, args.max_offset_seconds 177 | ).fit_transform( 178 | reference_pipe.transform(args.reference), 179 | srt_pipes, 180 | ) 181 | if best_score < 0: 182 | sync_was_successful = False 183 | logger.info("...done") 184 | offset_seconds = ( 185 | offset_samples / float(SAMPLE_RATE) + args.apply_offset_seconds 186 | ) 187 | scale_step = best_srt_pipe.named_steps["scale"] 188 | logger.info("score: %.3f", best_score) 189 | logger.info("offset seconds: %.3f", offset_seconds) 190 | logger.info("framerate scale factor: %.3f", scale_step.scale_factor) 191 | output_steps: List[Tuple[str, TransformerMixin]] = [ 192 | ("shift", SubtitleShifter(offset_seconds)) 193 | ] 194 | if args.merge_with_reference: 195 | output_steps.append( 196 | ("merge", SubtitleMerger(reference_pipe.named_steps["parse"].subs_)) 197 | ) 198 | output_pipe = Pipeline(output_steps) 199 | out_subs = output_pipe.fit_transform(scale_step.subs_) 200 | if args.output_encoding != "same": 201 | out_subs = out_subs.set_encoding(args.output_encoding) 202 | suppress_output_thresh = args.suppress_output_if_offset_less_than 203 | if offset_seconds >= (suppress_output_thresh or float("-inf")): 204 | logger.info("writing output to {}".format(srtout or "stdout")) 205 | out_subs.write_file(srtout) 206 | else: 207 | logger.warning( 208 | "suppressing output because offset %s was less than suppression threshold %s", 209 | offset_seconds, 210 | args.suppress_output_if_offset_less_than, 211 | ) 212 | except Exception: 213 | sync_was_successful = False 214 | logger.exception("failed to sync %s", srtin) 215 | else: 216 | result["offset_seconds"] = offset_seconds 217 | result["framerate_scale_factor"] = scale_step.scale_factor 218 | result["sync_was_successful"] = sync_was_successful 219 | return sync_was_successful 220 | 221 | 222 | def make_reference_pipe(args: argparse.Namespace) -> Pipeline: 223 | ref_format = _ref_format(args.reference) 224 | if ref_format in SUBTITLE_EXTENSIONS: 225 | if args.vad is not None: 226 | logger.warning("Vad specified, but reference was not a movie") 227 | return cast( 228 | Pipeline, 229 | make_subtitle_speech_pipeline( 230 | fmt=ref_format, 231 | **override(args, encoding=args.reference_encoding or DEFAULT_ENCODING), 232 | ), 233 | ) 234 | elif ref_format in ("npy", "npz"): 235 | if args.vad is not None: 236 | logger.warning("Vad specified, but reference was not a movie") 237 | return Pipeline( 238 | [("deserialize", DeserializeSpeechTransformer(args.non_speech_label))] 239 | ) 240 | else: 241 | vad = args.vad or DEFAULT_VAD 242 | if args.reference_encoding is not None: 243 | logger.warning( 244 | "Reference srt encoding specified, but reference was a video file" 245 | ) 246 | ref_stream = args.reference_stream 247 | if ref_stream is not None and not ref_stream.startswith("0:"): 248 | ref_stream = "0:" + ref_stream 249 | return Pipeline( 250 | [ 251 | ( 252 | "speech_extract", 253 | VideoSpeechTransformer( 254 | vad=vad, 255 | sample_rate=SAMPLE_RATE, 256 | frame_rate=args.frame_rate, 257 | non_speech_label=args.non_speech_label, 258 | start_seconds=args.start_seconds, 259 | ffmpeg_path=args.ffmpeg_path, 260 | ref_stream=ref_stream, 261 | vlc_mode=args.vlc_mode, 262 | gui_mode=args.gui_mode, 263 | ), 264 | ), 265 | ] 266 | ) 267 | 268 | 269 | def extract_subtitles_from_reference(args: argparse.Namespace) -> int: 270 | stream = args.extract_subs_from_stream 271 | if not stream.startswith("0:s:"): 272 | stream = "0:s:{}".format(stream) 273 | elif not stream.startswith("0:") and stream.startswith("s:"): 274 | stream = "0:{}".format(stream) 275 | if not stream.startswith("0:s:"): 276 | logger.error( 277 | "invalid stream for subtitle extraction: %s", args.extract_subs_from_stream 278 | ) 279 | ffmpeg_args = [ 280 | ffmpeg_bin_path("ffmpeg", args.gui_mode, ffmpeg_resources_path=args.ffmpeg_path) 281 | ] 282 | ffmpeg_args.extend( 283 | [ 284 | "-y", 285 | "-nostdin", 286 | "-loglevel", 287 | "fatal", 288 | "-i", 289 | args.reference, 290 | "-map", 291 | "{}".format(stream), 292 | "-f", 293 | "srt", 294 | ] 295 | ) 296 | if args.srtout is None: 297 | ffmpeg_args.append("-") 298 | else: 299 | ffmpeg_args.append(args.srtout) 300 | logger.info( 301 | "attempting to extract subtitles to {} ...".format( 302 | "stdout" if args.srtout is None else args.srtout 303 | ) 304 | ) 305 | retcode = subprocess.call(ffmpeg_args) 306 | if retcode == 0: 307 | logger.info("...done") 308 | else: 309 | logger.error( 310 | "ffmpeg unable to extract subtitles from reference; return code %d", retcode 311 | ) 312 | return retcode 313 | 314 | 315 | def validate_args(args: argparse.Namespace) -> None: 316 | if args.vlc_mode: 317 | logger.setLevel(logging.CRITICAL) 318 | if args.reference is None: 319 | if args.apply_offset_seconds == 0 or not args.srtin: 320 | raise ValueError( 321 | "`reference` required unless `--apply-offset-seconds` specified" 322 | ) 323 | if args.apply_offset_seconds != 0: 324 | if not args.srtin: 325 | args.srtin = [args.reference] 326 | if not args.srtin: 327 | raise ValueError( 328 | "at least one of `srtin` or `reference` must be specified to apply offset seconds" 329 | ) 330 | if args.srtin: 331 | if len(args.srtin) > 1 and not args.overwrite_input: 332 | raise ValueError( 333 | "cannot specify multiple input srt files without overwriting" 334 | ) 335 | if len(args.srtin) > 1 and args.make_test_case: 336 | raise ValueError("cannot specify multiple input srt files for test cases") 337 | if len(args.srtin) > 1 and args.gui_mode: 338 | raise ValueError("cannot specify multiple input srt files in GUI mode") 339 | if ( 340 | args.make_test_case and not args.gui_mode 341 | ): # this validation not necessary for gui mode 342 | if not args.srtin or args.srtout is None: 343 | raise ValueError( 344 | "need to specify input and output srt files for test cases" 345 | ) 346 | if args.overwrite_input: 347 | if args.extract_subs_from_stream is not None: 348 | raise ValueError( 349 | "input overwriting not allowed for extracting subtitles from reference" 350 | ) 351 | if not args.srtin: 352 | raise ValueError( 353 | "need to specify input srt if --overwrite-input " 354 | "is specified since we cannot overwrite stdin" 355 | ) 356 | if args.srtout is not None: 357 | raise ValueError( 358 | "overwrite input set but output file specified; " 359 | "refusing to run in case this was not intended" 360 | ) 361 | if args.extract_subs_from_stream is not None: 362 | if args.make_test_case: 363 | raise ValueError("test case is for sync and not subtitle extraction") 364 | if args.srtin: 365 | raise ValueError( 366 | "stream specified for reference subtitle extraction; " 367 | "-i flag for sync input not allowed" 368 | ) 369 | 370 | 371 | def validate_file_permissions(args: argparse.Namespace) -> None: 372 | error_string_template = ( 373 | "unable to {action} {file}; " 374 | "try ensuring file exists and has correct permissions" 375 | ) 376 | if args.reference is not None and not os.access(args.reference, os.R_OK): 377 | raise ValueError( 378 | error_string_template.format(action="read reference", file=args.reference) 379 | ) 380 | if args.srtin: 381 | for srtin in args.srtin: 382 | if srtin is not None and not os.access(srtin, os.R_OK): 383 | raise ValueError( 384 | error_string_template.format( 385 | action="read input subtitles", file=srtin 386 | ) 387 | ) 388 | if ( 389 | args.srtout is not None 390 | and os.path.exists(args.srtout) 391 | and not os.access(args.srtout, os.W_OK) 392 | ): 393 | raise ValueError( 394 | error_string_template.format( 395 | action="write output subtitles", file=args.srtout 396 | ) 397 | ) 398 | if args.make_test_case or args.serialize_speech: 399 | npy_savename = os.path.splitext(args.reference)[0] + ".npz" 400 | if os.path.exists(npy_savename) and not os.access(npy_savename, os.W_OK): 401 | raise ValueError( 402 | "unable to write test case file archive %s (try checking permissions)" 403 | % npy_savename 404 | ) 405 | 406 | 407 | def _setup_logging( 408 | args: argparse.Namespace, 409 | ) -> Tuple[Optional[str], Optional[logging.FileHandler]]: 410 | log_handler = None 411 | log_path = None 412 | if args.make_test_case or args.log_dir_path is not None: 413 | log_path = "ffsubsync.log" 414 | if args.log_dir_path is not None and os.path.isdir(args.log_dir_path): 415 | log_path = os.path.join(args.log_dir_path, log_path) 416 | log_handler = logging.FileHandler(log_path) 417 | logger.addHandler(log_handler) 418 | logger.info("this log will be written to %s", os.path.abspath(log_path)) 419 | return log_path, log_handler 420 | 421 | 422 | def _npy_savename(args: argparse.Namespace) -> str: 423 | return os.path.splitext(args.reference)[0] + ".npz" 424 | 425 | 426 | def _run_impl(args: argparse.Namespace, result: Dict[str, Any]) -> bool: 427 | if args.extract_subs_from_stream is not None: 428 | result["retval"] = extract_subtitles_from_reference(args) 429 | return True 430 | if args.srtin is not None and ( 431 | args.reference is None 432 | or (len(args.srtin) == 1 and args.srtin[0] == args.reference) 433 | ): 434 | return try_sync(args, None, result) 435 | reference_pipe = make_reference_pipe(args) 436 | logger.info("extracting speech segments from reference '%s'...", args.reference) 437 | reference_pipe.fit(args.reference) 438 | logger.info("...done") 439 | if args.make_test_case or args.serialize_speech: 440 | logger.info("serializing speech...") 441 | np.savez_compressed( 442 | _npy_savename(args), speech=reference_pipe.transform(args.reference) 443 | ) 444 | logger.info("...done") 445 | if not args.srtin: 446 | logger.info( 447 | "unsynchronized subtitle file not specified; skipping synchronization" 448 | ) 449 | return False 450 | return try_sync(args, reference_pipe, result) 451 | 452 | 453 | def validate_and_transform_args( 454 | parser_or_args: Union[argparse.ArgumentParser, argparse.Namespace] 455 | ) -> Optional[argparse.Namespace]: 456 | if isinstance(parser_or_args, argparse.Namespace): 457 | parser = None 458 | args = parser_or_args 459 | else: 460 | parser = parser_or_args 461 | args = parser.parse_args() 462 | try: 463 | validate_args(args) 464 | except ValueError as e: 465 | logger.error(e) 466 | if parser is not None: 467 | parser.print_usage() 468 | return None 469 | if args.gui_mode and args.srtout is None: 470 | args.srtout = "{}.synced.srt".format(os.path.splitext(args.srtin[0])[0]) 471 | try: 472 | validate_file_permissions(args) 473 | except ValueError as e: 474 | logger.error(e) 475 | return None 476 | ref_format = _ref_format(args.reference) 477 | if args.merge_with_reference and ref_format not in SUBTITLE_EXTENSIONS: 478 | logger.error( 479 | "merging synced output with reference only valid " 480 | "when reference composed of subtitles" 481 | ) 482 | return None 483 | return args 484 | 485 | 486 | def run( 487 | parser_or_args: Union[argparse.ArgumentParser, argparse.Namespace] 488 | ) -> Dict[str, Any]: 489 | sync_was_successful = False 490 | result = { 491 | "retval": 0, 492 | "offset_seconds": None, 493 | "framerate_scale_factor": None, 494 | } 495 | args = validate_and_transform_args(parser_or_args) 496 | if args is None: 497 | result["retval"] = 1 498 | return result 499 | log_path, log_handler = _setup_logging(args) 500 | try: 501 | sync_was_successful = _run_impl(args, result) 502 | result["sync_was_successful"] = sync_was_successful 503 | return result 504 | finally: 505 | if log_handler is not None and log_path is not None: 506 | log_handler.close() 507 | logger.removeHandler(log_handler) 508 | if args.make_test_case: 509 | result["retval"] += make_test_case( 510 | args, _npy_savename(args), sync_was_successful 511 | ) 512 | if args.log_dir_path is None or not os.path.isdir(args.log_dir_path): 513 | os.remove(log_path) 514 | 515 | 516 | def add_main_args_for_cli(parser: argparse.ArgumentParser) -> None: 517 | parser.add_argument( 518 | "reference", 519 | nargs="?", 520 | help=( 521 | "Reference (video, subtitles, or a numpy array with VAD speech) " 522 | "to which to synchronize input subtitles." 523 | ), 524 | ) 525 | parser.add_argument( 526 | "-i", "--srtin", nargs="*", help="Input subtitles file (default=stdin)." 527 | ) 528 | parser.add_argument( 529 | "-o", "--srtout", help="Output subtitles file (default=stdout)." 530 | ) 531 | parser.add_argument( 532 | "--merge-with-reference", 533 | "--merge", 534 | action="store_true", 535 | help="Merge reference subtitles with synced output subtitles.", 536 | ) 537 | parser.add_argument( 538 | "--make-test-case", 539 | "--create-test-case", 540 | action="store_true", 541 | help="If specified, serialize reference speech to a numpy array, " 542 | "and create an archive with input/output subtitles " 543 | "and serialized speech.", 544 | ) 545 | parser.add_argument( 546 | "--reference-stream", 547 | "--refstream", 548 | "--reference-track", 549 | "--reftrack", 550 | default=None, 551 | help=( 552 | "Which stream/track in the video file to use as reference, " 553 | "formatted according to ffmpeg conventions. For example, 0:s:0 " 554 | "uses the first subtitle track; 0:a:3 would use the third audio track. " 555 | "You can also drop the leading `0:`; i.e. use s:0 or a:3, respectively. " 556 | "Example: `ffs ref.mkv -i in.srt -o out.srt --reference-stream s:2`" 557 | ), 558 | ) 559 | 560 | 561 | def add_cli_only_args(parser: argparse.ArgumentParser) -> None: 562 | parser.add_argument( 563 | "-v", 564 | "--version", 565 | action="version", 566 | version="{package} {version}".format( 567 | package=__package__, version=get_version() 568 | ), 569 | ) 570 | parser.add_argument( 571 | "--overwrite-input", 572 | action="store_true", 573 | help=( 574 | "If specified, will overwrite the input srt " 575 | "instead of writing the output to a new file." 576 | ), 577 | ) 578 | parser.add_argument( 579 | "--encoding", 580 | default=DEFAULT_ENCODING, 581 | help="What encoding to use for reading input subtitles " 582 | "(default=%s)." % DEFAULT_ENCODING, 583 | ) 584 | parser.add_argument( 585 | "--max-subtitle-seconds", 586 | type=float, 587 | default=DEFAULT_MAX_SUBTITLE_SECONDS, 588 | help="Maximum duration for a subtitle to appear on-screen " 589 | "(default=%.3f seconds)." % DEFAULT_MAX_SUBTITLE_SECONDS, 590 | ) 591 | parser.add_argument( 592 | "--start-seconds", 593 | type=int, 594 | default=DEFAULT_START_SECONDS, 595 | help="Start time for processing " 596 | "(default=%d seconds)." % DEFAULT_START_SECONDS, 597 | ) 598 | parser.add_argument( 599 | "--max-offset-seconds", 600 | type=float, 601 | default=DEFAULT_MAX_OFFSET_SECONDS, 602 | help="The max allowed offset seconds for any subtitle segment " 603 | "(default=%d seconds)." % DEFAULT_MAX_OFFSET_SECONDS, 604 | ) 605 | parser.add_argument( 606 | "--apply-offset-seconds", 607 | type=float, 608 | default=DEFAULT_APPLY_OFFSET_SECONDS, 609 | help="Apply a predefined offset in seconds to all subtitle segments " 610 | "(default=%d seconds)." % DEFAULT_APPLY_OFFSET_SECONDS, 611 | ) 612 | parser.add_argument( 613 | "--frame-rate", 614 | type=int, 615 | default=DEFAULT_FRAME_RATE, 616 | help="Frame rate for audio extraction (default=%d)." % DEFAULT_FRAME_RATE, 617 | ) 618 | parser.add_argument( 619 | "--skip-infer-framerate-ratio", 620 | action="store_true", 621 | help="If set, do not try to infer framerate ratio based on duration ratio.", 622 | ) 623 | parser.add_argument( 624 | "--non-speech-label", 625 | type=float, 626 | default=DEFAULT_NON_SPEECH_LABEL, 627 | help="Label to use for frames detected as non-speech (default=%f)" 628 | % DEFAULT_NON_SPEECH_LABEL, 629 | ) 630 | parser.add_argument( 631 | "--output-encoding", 632 | default="utf-8", 633 | help="What encoding to use for writing output subtitles " 634 | '(default=utf-8). Can indicate "same" to use same ' 635 | "encoding as that of the input.", 636 | ) 637 | parser.add_argument( 638 | "--reference-encoding", 639 | help="What encoding to use for reading / writing reference subtitles " 640 | "(if applicable, default=infer).", 641 | ) 642 | parser.add_argument( 643 | "--vad", 644 | choices=[ 645 | "subs_then_webrtc", 646 | "webrtc", 647 | "subs_then_auditok", 648 | "auditok", 649 | "subs_then_silero", 650 | "silero", 651 | ], 652 | default=None, 653 | help="Which voice activity detector to use for speech extraction " 654 | "(if using video / audio as a reference, default={}).".format(DEFAULT_VAD), 655 | ) 656 | parser.add_argument( 657 | "--no-fix-framerate", 658 | action="store_true", 659 | help="If specified, subsync will not attempt to correct a framerate " 660 | "mismatch between reference and subtitles.", 661 | ) 662 | parser.add_argument( 663 | "--serialize-speech", 664 | action="store_true", 665 | help="If specified, serialize reference speech to a numpy array.", 666 | ) 667 | parser.add_argument( 668 | "--extract-subs-from-stream", 669 | "--extract-subtitles-from-stream", 670 | default=None, 671 | help="If specified, do not attempt sync; instead, just extract subtitles" 672 | " from the specified stream using the reference.", 673 | ) 674 | parser.add_argument( 675 | "--suppress-output-if-offset-less-than", 676 | type=float, 677 | default=None, 678 | help="If specified, do not produce output if offset below provided threshold.", 679 | ) 680 | parser.add_argument( 681 | "--ffmpeg-path", 682 | "--ffmpegpath", 683 | default=None, 684 | help="Where to look for ffmpeg and ffprobe. Uses the system PATH by default.", 685 | ) 686 | parser.add_argument( 687 | "--log-dir-path", 688 | default=None, 689 | help=( 690 | "If provided, will save log file ffsubsync.log to this path " 691 | "(must be an existing directory)." 692 | ), 693 | ) 694 | parser.add_argument( 695 | "--gss", 696 | action="store_true", 697 | help="If specified, use golden-section search to try to find" 698 | "the optimal framerate ratio between video and subtitles.", 699 | ) 700 | parser.add_argument( 701 | "--strict", 702 | action="store_true", 703 | help="If specified, refuse to parse srt files with formatting issues.", 704 | ) 705 | parser.add_argument("--vlc-mode", action="store_true", help=argparse.SUPPRESS) 706 | parser.add_argument("--gui-mode", action="store_true", help=argparse.SUPPRESS) 707 | parser.add_argument("--skip-sync", action="store_true", help=argparse.SUPPRESS) 708 | 709 | 710 | def make_parser() -> argparse.ArgumentParser: 711 | parser = argparse.ArgumentParser(description="Synchronize subtitles with video.") 712 | add_main_args_for_cli(parser) 713 | add_cli_only_args(parser) 714 | return parser 715 | 716 | 717 | def main() -> int: 718 | parser = make_parser() 719 | return run(parser)["retval"] 720 | 721 | 722 | if __name__ == "__main__": 723 | sys.exit(main()) 724 | -------------------------------------------------------------------------------- /ffsubsync/ffsubsync_gui.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import logging 4 | import os 5 | import sys 6 | 7 | from gooey import Gooey, GooeyParser 8 | 9 | from ffsubsync.constants import ( 10 | RELEASE_URL, 11 | WEBSITE, 12 | DEV_WEBSITE, 13 | DESCRIPTION, 14 | LONG_DESCRIPTION, 15 | PROJECT_NAME, 16 | PROJECT_LICENSE, 17 | COPYRIGHT_YEAR, 18 | SUBSYNC_RESOURCES_ENV_MAGIC, 19 | ) 20 | 21 | # set the env magic so that we look for resources in the right place 22 | if SUBSYNC_RESOURCES_ENV_MAGIC not in os.environ: 23 | os.environ[SUBSYNC_RESOURCES_ENV_MAGIC] = getattr(sys, "_MEIPASS", "") 24 | from ffsubsync.ffsubsync import run, add_cli_only_args 25 | from ffsubsync.version import get_version, update_available 26 | 27 | logging.basicConfig(level=logging.INFO) 28 | logger = logging.getLogger(__name__) 29 | 30 | 31 | _menu = [ 32 | { 33 | "name": "File", 34 | "items": [ 35 | { 36 | "type": "AboutDialog", 37 | "menuTitle": "About", 38 | "name": PROJECT_NAME, 39 | "description": LONG_DESCRIPTION, 40 | "version": get_version(), 41 | "copyright": COPYRIGHT_YEAR, 42 | "website": WEBSITE, 43 | "developer": DEV_WEBSITE, 44 | "license": PROJECT_LICENSE, 45 | }, 46 | { 47 | "type": "Link", 48 | "menuTitle": "Download latest release", 49 | "url": RELEASE_URL, 50 | }, 51 | ], 52 | } 53 | ] 54 | 55 | 56 | @Gooey( 57 | program_name=PROJECT_NAME, 58 | image_dir=os.path.join(os.environ[SUBSYNC_RESOURCES_ENV_MAGIC], "img"), 59 | menu=_menu, 60 | tabbed_groups=True, 61 | progress_regex=r"(\d+)%", 62 | hide_progress_msg=True, 63 | ) 64 | def make_parser(): 65 | description = DESCRIPTION 66 | if update_available(): 67 | description += ( 68 | "\nUpdate available! Please go to " 69 | '"File" -> "Download latest release"' 70 | " to update FFsubsync." 71 | ) 72 | parser = GooeyParser(description=description) 73 | main_group = parser.add_argument_group("Basic") 74 | main_group.add_argument( 75 | "reference", 76 | help="Reference (video or subtitles file) to which to synchronize input subtitles.", 77 | widget="FileChooser", 78 | ) 79 | main_group.add_argument("srtin", help="Input subtitles file", widget="FileChooser") 80 | main_group.add_argument( 81 | "-o", 82 | "--srtout", 83 | help="Output subtitles file (default=${srtin}.synced.srt).", 84 | widget="FileSaver", 85 | ) 86 | advanced_group = parser.add_argument_group("Advanced") 87 | 88 | # TODO: these are shared between gui and cli; don't duplicate this code 89 | advanced_group.add_argument( 90 | "--merge-with-reference", 91 | "--merge", 92 | action="store_true", 93 | help="Merge reference subtitles with synced output subtitles.", 94 | ) 95 | advanced_group.add_argument( 96 | "--make-test-case", 97 | "--create-test-case", 98 | action="store_true", 99 | help="If specified, create a test archive a few KiB in size " 100 | "to send to the developer as a debugging aid.", 101 | ) 102 | advanced_group.add_argument( 103 | "--reference-stream", 104 | "--refstream", 105 | "--reference-track", 106 | "--reftrack", 107 | default=None, 108 | help="Which stream/track in the video file to use as reference, " 109 | "formatted according to ffmpeg conventions. For example, s:0 " 110 | "uses the first subtitle track; a:3 would use the fourth audio track.", 111 | ) 112 | return parser 113 | 114 | 115 | def main(): 116 | parser = make_parser() 117 | _ = parser.parse_args() # Fool Gooey into presenting the simpler menu 118 | add_cli_only_args(parser) 119 | args = parser.parse_args() 120 | args.gui_mode = True 121 | return run(args) 122 | 123 | 124 | if __name__ == "__main__": 125 | sys.exit(main()) 126 | -------------------------------------------------------------------------------- /ffsubsync/file_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | 4 | 5 | class open_file: 6 | """ 7 | Context manager that opens a filename and closes it on exit, but does 8 | nothing for file-like objects. 9 | """ 10 | 11 | def __init__(self, filename, *args, **kwargs) -> None: 12 | self.closing = kwargs.pop("closing", False) 13 | if filename is None: 14 | stream = sys.stdout if "w" in args else sys.stdin 15 | self.fh = open(stream.fileno(), *args, **kwargs) 16 | elif isinstance(filename, str): 17 | self.fh = open(filename, *args, **kwargs) 18 | self.closing = True 19 | else: 20 | self.fh = filename 21 | 22 | def __enter__(self): 23 | return self.fh 24 | 25 | def __exit__(self, exc_type, exc_val, exc_tb): 26 | if self.closing: 27 | self.fh.close() 28 | 29 | return False 30 | -------------------------------------------------------------------------------- /ffsubsync/generic_subtitles.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import copy 3 | from datetime import timedelta 4 | import logging 5 | import os 6 | from typing import cast, Any, Dict, Iterator, List, Optional 7 | 8 | import pysubs2 9 | import srt 10 | import sys 11 | 12 | 13 | logging.basicConfig(level=logging.INFO) 14 | logger: logging.Logger = logging.getLogger(__name__) 15 | 16 | 17 | class GenericSubtitle: 18 | def __init__(self, start, end, inner): 19 | self.start = start 20 | self.end = end 21 | self.inner = inner 22 | 23 | def __eq__(self, other: object) -> bool: 24 | if not isinstance(other, GenericSubtitle): 25 | return False 26 | eq = True 27 | eq = eq and self.start == other.start 28 | eq = eq and self.end == other.end 29 | eq = eq and self.inner == other.inner 30 | return eq 31 | 32 | @property 33 | def content(self) -> str: 34 | if isinstance(self.inner, srt.Subtitle): 35 | ret = self.inner.content 36 | elif isinstance(self.inner, pysubs2.SSAEvent): 37 | ret = self.inner.text 38 | else: 39 | raise NotImplementedError( 40 | "unsupported subtitle type: %s" % type(self.inner) 41 | ) 42 | return ret 43 | 44 | def resolve_inner_timestamps(self): 45 | ret = copy.deepcopy(self.inner) 46 | if isinstance(self.inner, srt.Subtitle): 47 | ret.start = self.start 48 | ret.end = self.end 49 | elif isinstance(self.inner, pysubs2.SSAEvent): 50 | ret.start = pysubs2.make_time(s=self.start.total_seconds()) 51 | ret.end = pysubs2.make_time(s=self.end.total_seconds()) 52 | else: 53 | raise NotImplementedError( 54 | "unsupported subtitle type: %s" % type(self.inner) 55 | ) 56 | return ret 57 | 58 | def merge_with(self, other): 59 | assert isinstance(self.inner, type(other.inner)) 60 | inner_merged = copy.deepcopy(self.inner) 61 | if isinstance(self.inner, srt.Subtitle): 62 | inner_merged.content = "{}\n{}".format( 63 | inner_merged.content, other.inner.content 64 | ) 65 | return self.__class__(self.start, self.end, inner_merged) 66 | else: 67 | raise NotImplementedError( 68 | "unsupported subtitle type: %s" % type(self.inner) 69 | ) 70 | 71 | @classmethod 72 | def wrap_inner_subtitle(cls, sub) -> "GenericSubtitle": 73 | if isinstance(sub, srt.Subtitle): 74 | return cls(sub.start, sub.end, sub) 75 | elif isinstance(sub, pysubs2.SSAEvent): 76 | return cls( 77 | timedelta(milliseconds=sub.start), timedelta(milliseconds=sub.end), sub 78 | ) 79 | else: 80 | raise NotImplementedError("unsupported subtitle type: %s" % type(sub)) 81 | 82 | 83 | class GenericSubtitlesFile: 84 | def __init__(self, subs: List[GenericSubtitle], *_, **kwargs: Any): 85 | sub_format: str = cast(str, kwargs.pop("sub_format", None)) 86 | if sub_format is None: 87 | raise ValueError("format must be specified") 88 | encoding: str = cast(str, kwargs.pop("encoding", None)) 89 | if encoding is None: 90 | raise ValueError("encoding must be specified") 91 | self.subs_: List[GenericSubtitle] = subs 92 | self._sub_format: str = sub_format 93 | self._encoding: str = encoding 94 | self._styles: Optional[Dict[str, pysubs2.SSAStyle]] = kwargs.pop("styles", None) 95 | self._fonts_opaque: Optional[Dict[str, Any]] = kwargs.pop("fonts_opaque", None) 96 | self._info: Optional[Dict[str, str]] = kwargs.pop("info", None) 97 | 98 | def set_encoding(self, encoding: str) -> "GenericSubtitlesFile": 99 | if encoding != "same": 100 | self._encoding = encoding 101 | return self 102 | 103 | def __len__(self) -> int: 104 | return len(self.subs_) 105 | 106 | def __getitem__(self, item: int) -> GenericSubtitle: 107 | return self.subs_[item] 108 | 109 | def __iter__(self) -> Iterator[GenericSubtitle]: 110 | return iter(self.subs_) 111 | 112 | def clone_props_for_subs( 113 | self, new_subs: List[GenericSubtitle] 114 | ) -> "GenericSubtitlesFile": 115 | return GenericSubtitlesFile( 116 | new_subs, 117 | sub_format=self._sub_format, 118 | encoding=self._encoding, 119 | styles=self._styles, 120 | fonts_opaque=self._fonts_opaque, 121 | info=self._info, 122 | ) 123 | 124 | def gen_raw_resolved_subs(self): 125 | for sub in self.subs_: 126 | yield sub.resolve_inner_timestamps() 127 | 128 | def offset(self, td: timedelta) -> "GenericSubtitlesFile": 129 | offset_subs = [] 130 | for sub in self.subs_: 131 | offset_subs.append(GenericSubtitle(sub.start + td, sub.end + td, sub.inner)) 132 | return self.clone_props_for_subs(offset_subs) 133 | 134 | def write_file(self, fname: str) -> None: 135 | # TODO: converter to go between self.subs_format and out_format 136 | if fname is None: 137 | out_format = self._sub_format 138 | else: 139 | out_format = os.path.splitext(fname)[-1][1:] 140 | subs = list(self.gen_raw_resolved_subs()) 141 | if self._sub_format in ("ssa", "ass", "vtt"): 142 | ssaf = pysubs2.SSAFile() 143 | ssaf.events = subs 144 | if self._styles is not None: 145 | ssaf.styles = self._styles 146 | if self._info is not None: 147 | ssaf.info = self._info 148 | if self._fonts_opaque is not None: 149 | ssaf.fonts_opaque = self._fonts_opaque 150 | to_write = ssaf.to_string(out_format) 151 | elif self._sub_format == "srt" and out_format in ("ssa", "ass", "vtt"): 152 | to_write = pysubs2.SSAFile.from_string(srt.compose(subs)).to_string( 153 | out_format 154 | ) 155 | elif out_format == "srt": 156 | to_write = srt.compose(subs) 157 | else: 158 | raise NotImplementedError("unsupported output format: %s" % out_format) 159 | 160 | with open(fname or sys.stdout.fileno(), "wb") as f: 161 | f.write(to_write.encode(self._encoding)) 162 | 163 | 164 | class SubsMixin: 165 | def __init__(self, subs: Optional[GenericSubtitlesFile] = None) -> None: 166 | self.subs_: Optional[GenericSubtitlesFile] = subs 167 | 168 | def set_encoding(self, encoding: str) -> "SubsMixin": 169 | self.subs_.set_encoding(encoding) 170 | return self 171 | -------------------------------------------------------------------------------- /ffsubsync/golden_section_search.py: -------------------------------------------------------------------------------- 1 | """Python program for golden section search (straight-up copied from Wikipedia). 2 | This implementation reuses function evaluations, saving 1/2 of the evaluations per 3 | iteration, and returns a bounding interval.""" 4 | import logging 5 | import math 6 | 7 | logging.basicConfig(level=logging.INFO) 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | invphi = (math.sqrt(5) - 1) / 2 # 1 / phi 12 | invphi2 = (3 - math.sqrt(5)) / 2 # 1 / phi^2 13 | 14 | 15 | def gss(f, a, b, tol=1e-4): 16 | """Golden-section search. 17 | 18 | Given a function f with a single local minimum in 19 | the interval [a,b], gss returns a subset interval 20 | [c,d] that contains the minimum with d-c <= tol. 21 | 22 | Example: 23 | >>> f = lambda x: (x-2)**2 24 | >>> a = 1 25 | >>> b = 5 26 | >>> tol = 1e-5 27 | >>> (c,d) = gss(f, a, b, tol) 28 | >>> print(c, d) 29 | 1.9999959837979107 2.0000050911830893 30 | """ 31 | 32 | (a, b) = (min(a, b), max(a, b)) 33 | h = b - a 34 | if h <= tol: 35 | return a, b 36 | 37 | # Required steps to achieve tolerance 38 | n = int(math.ceil(math.log(tol / h) / math.log(invphi))) 39 | logger.info( 40 | "About to perform %d iterations of golden section search to find the best framerate", 41 | n, 42 | ) 43 | 44 | def f_wrapped(x, is_last_iter): 45 | try: 46 | return f(x, is_last_iter) 47 | except TypeError: 48 | return f(x) 49 | 50 | c = a + invphi2 * h 51 | d = a + invphi * h 52 | yc = f_wrapped(c, n == 1) 53 | yd = f_wrapped(d, n == 1) 54 | 55 | for k in range(n - 1): 56 | if yc < yd: 57 | b = d 58 | d = c 59 | yd = yc 60 | h = invphi * h 61 | c = a + invphi2 * h 62 | yc = f_wrapped(c, k == n - 2) 63 | else: 64 | a = c 65 | c = d 66 | yc = yd 67 | h = invphi * h 68 | d = a + invphi * h 69 | yd = f(d, k == n - 2) 70 | 71 | if yc < yd: 72 | return a, d 73 | else: 74 | return c, b 75 | -------------------------------------------------------------------------------- /ffsubsync/sklearn_shim.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This module borrows and adapts `Pipeline` from `sklearn.pipeline` and 4 | `TransformerMixin` from `sklearn.base` in the scikit-learn framework 5 | (commit hash d205638475ca542dc46862652e3bb0be663a8eac) to be precise). 6 | Both are BSD licensed and allow for this sort of thing; attribution 7 | is given as a comment above each class. License reproduced below: 8 | 9 | BSD 3-Clause License 10 | 11 | Copyright (c) 2007-2022 The scikit-learn developers. 12 | All rights reserved. 13 | 14 | Redistribution and use in source and binary forms, with or without 15 | modification, are permitted provided that the following conditions are met: 16 | 17 | * Redistributions of source code must retain the above copyright notice, this 18 | list of conditions and the following disclaimer. 19 | 20 | * Redistributions in binary form must reproduce the above copyright notice, 21 | this list of conditions and the following disclaimer in the documentation 22 | and/or other materials provided with the distribution. 23 | 24 | * Neither the name of the copyright holder nor the names of its 25 | contributors may be used to endorse or promote products derived from 26 | this software without specific prior written permission. 27 | 28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 29 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 31 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 32 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 34 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 35 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 36 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 37 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 | """ 39 | from collections import defaultdict 40 | from itertools import islice 41 | from typing import Any, Callable, Optional 42 | from typing_extensions import Protocol 43 | 44 | 45 | class TransformerProtocol(Protocol): 46 | fit: Callable[..., "TransformerProtocol"] 47 | transform: Callable[[Any], Any] 48 | 49 | 50 | # Author: Gael Varoquaux 51 | # License: BSD 3 clause 52 | class TransformerMixin(TransformerProtocol): 53 | """Mixin class for all transformers.""" 54 | 55 | def fit_transform(self, X: Any, y: Optional[Any] = None, **fit_params: Any) -> Any: 56 | """ 57 | Fit to data, then transform it. 58 | Fits transformer to X and y with optional parameters fit_params 59 | and returns a transformed version of X. 60 | Parameters 61 | ---------- 62 | X : ndarray of shape (n_samples, n_features) 63 | Training set. 64 | y : ndarray of shape (n_samples,), default=None 65 | Target values. 66 | **fit_params : dict 67 | Additional fit parameters. 68 | Returns 69 | ------- 70 | X_new : ndarray array of shape (n_samples, n_features_new) 71 | Transformed array. 72 | """ 73 | # non-optimized default implementation; override when a better 74 | # method is possible for a given clustering algorithm 75 | if y is None: 76 | # fit method of arity 1 (unsupervised transformation) 77 | return self.fit(X, **fit_params).transform(X) 78 | else: 79 | # fit method of arity 2 (supervised transformation) 80 | return self.fit(X, y, **fit_params).transform(X) 81 | 82 | 83 | # Author: Edouard Duchesnay 84 | # Gael Varoquaux 85 | # Virgile Fritsch 86 | # Alexandre Gramfort 87 | # Lars Buitinck 88 | # License: BSD 89 | class Pipeline: 90 | def __init__(self, steps, verbose=False): 91 | self.steps = steps 92 | self.verbose = verbose 93 | self._validate_steps() 94 | 95 | def _validate_steps(self): 96 | names, estimators = zip(*self.steps) 97 | 98 | # validate estimators 99 | transformers = estimators[:-1] 100 | estimator = estimators[-1] 101 | 102 | for t in transformers: 103 | if t is None or t == "passthrough": 104 | continue 105 | if not (hasattr(t, "fit") or hasattr(t, "fit_transform")) or not hasattr( 106 | t, "transform" 107 | ): 108 | raise TypeError( 109 | "All intermediate steps should be " 110 | "transformers and implement fit and transform " 111 | "or be the string 'passthrough' " 112 | "'%s' (type %s) doesn't" % (t, type(t)) 113 | ) 114 | 115 | # We allow last estimator to be None as an identity transformation 116 | if ( 117 | estimator is not None 118 | and estimator != "passthrough" 119 | and not hasattr(estimator, "fit") 120 | ): 121 | raise TypeError( 122 | "Last step of Pipeline should implement fit " 123 | "or be the string 'passthrough'. " 124 | "'%s' (type %s) doesn't" % (estimator, type(estimator)) 125 | ) 126 | 127 | def _iter(self, with_final=True, filter_passthrough=True): 128 | """ 129 | Generate (idx, (name, trans)) tuples from self.steps 130 | 131 | When filter_passthrough is True, 'passthrough' and None transformers 132 | are filtered out. 133 | """ 134 | stop = len(self.steps) 135 | if not with_final: 136 | stop -= 1 137 | 138 | for idx, (name, trans) in enumerate(islice(self.steps, 0, stop)): 139 | if not filter_passthrough: 140 | yield idx, name, trans 141 | elif trans is not None and trans != "passthrough": 142 | yield idx, name, trans 143 | 144 | def __len__(self) -> int: 145 | """ 146 | Returns the length of the Pipeline 147 | """ 148 | return len(self.steps) 149 | 150 | def __getitem__(self, ind): 151 | """Returns a sub-pipeline or a single esimtator in the pipeline 152 | 153 | Indexing with an integer will return an estimator; using a slice 154 | returns another Pipeline instance which copies a slice of this 155 | Pipeline. This copy is shallow: modifying (or fitting) estimators in 156 | the sub-pipeline will affect the larger pipeline and vice-versa. 157 | However, replacing a value in `step` will not affect a copy. 158 | """ 159 | if isinstance(ind, slice): 160 | if ind.step not in (1, None): 161 | raise ValueError("Pipeline slicing only supports a step of 1") 162 | return self.__class__(self.steps[ind]) 163 | try: 164 | name, est = self.steps[ind] 165 | except TypeError: 166 | # Not an int, try get step by name 167 | return self.named_steps[ind] 168 | return est 169 | 170 | @property 171 | def _estimator_type(self): 172 | return self.steps[-1][1]._estimator_type 173 | 174 | @property 175 | def named_steps(self): 176 | return dict(self.steps) 177 | 178 | @property 179 | def _final_estimator(self): 180 | estimator = self.steps[-1][1] 181 | return "passthrough" if estimator is None else estimator 182 | 183 | def _log_message(self, step_idx): 184 | if not self.verbose: 185 | return None 186 | name, step = self.steps[step_idx] 187 | 188 | return "(step %d of %d) Processing %s" % (step_idx + 1, len(self.steps), name) 189 | 190 | # Estimator interface 191 | 192 | def _fit(self, X, y=None, **fit_params): 193 | # shallow copy of steps - this should really be steps_ 194 | self.steps = list(self.steps) 195 | self._validate_steps() 196 | 197 | fit_params_steps = {name: {} for name, step in self.steps if step is not None} 198 | for pname, pval in fit_params.items(): 199 | if "__" not in pname: 200 | raise ValueError( 201 | "Pipeline.fit does not accept the {} parameter. " 202 | "You can pass parameters to specific steps of your " 203 | "pipeline using the stepname__parameter format, e.g. " 204 | "`Pipeline.fit(X, y, logisticregression__sample_weight" 205 | "=sample_weight)`.".format(pname) 206 | ) 207 | step, param = pname.split("__", 1) 208 | fit_params_steps[step][param] = pval 209 | for step_idx, name, transformer in self._iter( 210 | with_final=False, filter_passthrough=False 211 | ): 212 | if transformer is None or transformer == "passthrough": 213 | continue 214 | 215 | # Fit or load from cache the current transformer 216 | X, fitted_transformer = _fit_transform_one( 217 | transformer, X, y, None, **fit_params_steps[name] 218 | ) 219 | # Replace the transformer of the step with the fitted 220 | # transformer. This is necessary when loading the transformer 221 | # from the cache. 222 | self.steps[step_idx] = (name, fitted_transformer) 223 | if self._final_estimator == "passthrough": 224 | return X, {} 225 | return X, fit_params_steps[self.steps[-1][0]] 226 | 227 | def fit(self, X, y=None, **fit_params): 228 | """Fit the model 229 | 230 | Fit all the transforms one after the other and transform the 231 | data, then fit the transformed data using the final estimator. 232 | 233 | Parameters 234 | ---------- 235 | X : iterable 236 | Training data. Must fulfill input requirements of first step of the 237 | pipeline. 238 | 239 | y : iterable, default=None 240 | Training targets. Must fulfill label requirements for all steps of 241 | the pipeline. 242 | 243 | **fit_params : dict of string -> object 244 | Parameters passed to the ``fit`` method of each step, where 245 | each parameter name is prefixed such that parameter ``p`` for step 246 | ``s`` has key ``s__p``. 247 | 248 | Returns 249 | ------- 250 | self : Pipeline 251 | This estimator 252 | """ 253 | Xt, fit_params = self._fit(X, y, **fit_params) 254 | if self._final_estimator != "passthrough": 255 | self._final_estimator.fit(Xt, y, **fit_params) 256 | return self 257 | 258 | def fit_transform(self, X, y=None, **fit_params): 259 | """Fit the model and transform with the final estimator 260 | 261 | Fits all the transforms one after the other and transforms the 262 | data, then uses fit_transform on transformed data with the final 263 | estimator. 264 | 265 | Parameters 266 | ---------- 267 | X : iterable 268 | Training data. Must fulfill input requirements of first step of the 269 | pipeline. 270 | 271 | y : iterable, default=None 272 | Training targets. Must fulfill label requirements for all steps of 273 | the pipeline. 274 | 275 | **fit_params : dict of string -> object 276 | Parameters passed to the ``fit`` method of each step, where 277 | each parameter name is prefixed such that parameter ``p`` for step 278 | ``s`` has key ``s__p``. 279 | 280 | Returns 281 | ------- 282 | Xt : array-like of shape (n_samples, n_transformed_features) 283 | Transformed samples 284 | """ 285 | last_step = self._final_estimator 286 | Xt, fit_params = self._fit(X, y, **fit_params) 287 | if last_step == "passthrough": 288 | return Xt 289 | if hasattr(last_step, "fit_transform"): 290 | return last_step.fit_transform(Xt, y, **fit_params) 291 | else: 292 | return last_step.fit(Xt, y, **fit_params).transform(Xt) 293 | 294 | @property 295 | def transform(self): 296 | """Apply transforms, and transform with the final estimator 297 | 298 | This also works where final estimator is ``None``: all prior 299 | transformations are applied. 300 | 301 | Parameters 302 | ---------- 303 | X : iterable 304 | Data to transform. Must fulfill input requirements of first step 305 | of the pipeline. 306 | 307 | Returns 308 | ------- 309 | Xt : array-like of shape (n_samples, n_transformed_features) 310 | """ 311 | # _final_estimator is None or has transform, otherwise attribute error 312 | # XXX: Handling the None case means we can't use if_delegate_has_method 313 | if self._final_estimator != "passthrough": 314 | self._final_estimator.transform 315 | return self._transform 316 | 317 | def _transform(self, X): 318 | Xt = X 319 | for _, _, transform in self._iter(): 320 | Xt = transform.transform(Xt) 321 | return Xt 322 | 323 | @property 324 | def classes_(self): 325 | return self.steps[-1][-1].classes_ 326 | 327 | @property 328 | def _pairwise(self): 329 | # check if first estimator expects pairwise input 330 | return getattr(self.steps[0][1], "_pairwise", False) 331 | 332 | @property 333 | def n_features_in_(self): 334 | # delegate to first step (which will call _check_is_fitted) 335 | return self.steps[0][1].n_features_in_ 336 | 337 | 338 | def _name_estimators(estimators): 339 | """Generate names for estimators.""" 340 | 341 | names = [ 342 | estimator if isinstance(estimator, str) else type(estimator).__name__.lower() 343 | for estimator in estimators 344 | ] 345 | namecount = defaultdict(int) 346 | for est, name in zip(estimators, names): 347 | namecount[name] += 1 348 | 349 | for k, v in list(namecount.items()): 350 | if v == 1: 351 | del namecount[k] 352 | 353 | for i in reversed(range(len(estimators))): 354 | name = names[i] 355 | if name in namecount: 356 | names[i] += "-%d" % namecount[name] 357 | namecount[name] -= 1 358 | 359 | return list(zip(names, estimators)) 360 | 361 | 362 | def make_pipeline(*steps, **kwargs) -> Pipeline: 363 | """Construct a Pipeline from the given estimators. 364 | 365 | This is a shorthand for the Pipeline constructor; it does not require, and 366 | does not permit, naming the estimators. Instead, their names will be set 367 | to the lowercase of their types automatically. 368 | 369 | Parameters 370 | ---------- 371 | *steps : list of estimators. 372 | 373 | verbose : bool, default=False 374 | If True, the time elapsed while fitting each step will be printed as it 375 | is completed. 376 | 377 | Returns 378 | ------- 379 | p : Pipeline 380 | """ 381 | verbose = kwargs.pop("verbose", False) 382 | if kwargs: 383 | raise TypeError( 384 | 'Unknown keyword arguments: "{}"'.format(list(kwargs.keys())[0]) 385 | ) 386 | return Pipeline(_name_estimators(steps), verbose=verbose) 387 | 388 | 389 | def _transform_one(transformer, X, y, weight, **fit_params): 390 | res = transformer.transform(X) 391 | # if we have a weight for this transformer, multiply output 392 | if weight is None: 393 | return res 394 | return res * weight 395 | 396 | 397 | def _fit_transform_one(transformer, X, y, weight, **fit_params): 398 | """ 399 | Fits ``transformer`` to ``X`` and ``y``. The transformed result is returned 400 | with the fitted transformer. If ``weight`` is not ``None``, the result will 401 | be multiplied by ``weight``. 402 | """ 403 | if hasattr(transformer, "fit_transform"): 404 | res = transformer.fit_transform(X, y, **fit_params) 405 | else: 406 | res = transformer.fit(X, y, **fit_params).transform(X) 407 | 408 | if weight is None: 409 | return res, transformer 410 | return res * weight, transformer 411 | -------------------------------------------------------------------------------- /ffsubsync/speech_transformers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | from contextlib import contextmanager 4 | import logging 5 | import io 6 | import subprocess 7 | import sys 8 | from datetime import timedelta 9 | from typing import cast, Callable, Dict, List, Optional, Union 10 | 11 | import ffmpeg 12 | import numpy as np 13 | import tqdm 14 | 15 | from ffsubsync.constants import ( 16 | DEFAULT_ENCODING, 17 | DEFAULT_MAX_SUBTITLE_SECONDS, 18 | DEFAULT_SCALE_FACTOR, 19 | DEFAULT_START_SECONDS, 20 | SAMPLE_RATE, 21 | ) 22 | from ffsubsync.ffmpeg_utils import ffmpeg_bin_path, subprocess_args 23 | from ffsubsync.generic_subtitles import GenericSubtitle 24 | from ffsubsync.sklearn_shim import TransformerMixin 25 | from ffsubsync.sklearn_shim import Pipeline 26 | from ffsubsync.subtitle_parser import make_subtitle_parser 27 | from ffsubsync.subtitle_transformers import SubtitleScaler 28 | 29 | 30 | logging.basicConfig(level=logging.INFO) 31 | logger: logging.Logger = logging.getLogger(__name__) 32 | 33 | 34 | def make_subtitle_speech_pipeline( 35 | fmt: str = "srt", 36 | encoding: str = DEFAULT_ENCODING, 37 | caching: bool = False, 38 | max_subtitle_seconds: int = DEFAULT_MAX_SUBTITLE_SECONDS, 39 | start_seconds: int = DEFAULT_START_SECONDS, 40 | scale_factor: float = DEFAULT_SCALE_FACTOR, 41 | parser=None, 42 | **kwargs, 43 | ) -> Union[Pipeline, Callable[[float], Pipeline]]: 44 | if parser is None: 45 | parser = make_subtitle_parser( 46 | fmt, 47 | encoding=encoding, 48 | caching=caching, 49 | max_subtitle_seconds=max_subtitle_seconds, 50 | start_seconds=start_seconds, 51 | **kwargs, 52 | ) 53 | assert parser.encoding == encoding 54 | assert parser.max_subtitle_seconds == max_subtitle_seconds 55 | assert parser.start_seconds == start_seconds 56 | 57 | def subpipe_maker(framerate_ratio): 58 | return Pipeline( 59 | [ 60 | ("parse", parser), 61 | ("scale", SubtitleScaler(framerate_ratio)), 62 | ( 63 | "speech_extract", 64 | SubtitleSpeechTransformer( 65 | sample_rate=SAMPLE_RATE, 66 | start_seconds=start_seconds, 67 | framerate_ratio=framerate_ratio, 68 | ), 69 | ), 70 | ] 71 | ) 72 | 73 | if scale_factor is None: 74 | return subpipe_maker 75 | else: 76 | return subpipe_maker(scale_factor) 77 | 78 | 79 | def _make_auditok_detector( 80 | sample_rate: int, frame_rate: int, non_speech_label: float 81 | ) -> Callable[[bytes], np.ndarray]: 82 | try: 83 | from auditok import ( 84 | BufferAudioSource, 85 | ADSFactory, 86 | AudioEnergyValidator, 87 | StreamTokenizer, 88 | ) 89 | except ImportError as e: 90 | logger.error( 91 | """Error: auditok not installed! 92 | Consider installing it with `pip install auditok`. Note that auditok 93 | is GPLv3 licensed, which means that successfully importing it at 94 | runtime creates a derivative work that is GPLv3 licensed. For personal 95 | use this is fine, but note that any commercial use that relies on 96 | auditok must be open source as per the GPLv3!* 97 | *Not legal advice. Consult with a lawyer. 98 | """ 99 | ) 100 | raise e 101 | bytes_per_frame = 2 102 | frames_per_window = frame_rate // sample_rate 103 | validator = AudioEnergyValidator(sample_width=bytes_per_frame, energy_threshold=50) 104 | tokenizer = StreamTokenizer( 105 | validator=validator, 106 | min_length=0.2 * sample_rate, 107 | max_length=int(5 * sample_rate), 108 | max_continuous_silence=0.25 * sample_rate, 109 | ) 110 | 111 | def _detect(asegment: bytes) -> np.ndarray: 112 | asource = BufferAudioSource( 113 | data_buffer=asegment, 114 | sampling_rate=frame_rate, 115 | sample_width=bytes_per_frame, 116 | channels=1, 117 | ) 118 | ads = ADSFactory.ads(audio_source=asource, block_dur=1.0 / sample_rate) 119 | ads.open() 120 | tokens = tokenizer.tokenize(ads) 121 | length = ( 122 | len(asegment) // bytes_per_frame + frames_per_window - 1 123 | ) // frames_per_window 124 | media_bstring = np.zeros(length + 1) 125 | for token in tokens: 126 | media_bstring[token[1]] = 1.0 127 | media_bstring[token[2] + 1] = non_speech_label - 1.0 128 | return np.clip(np.cumsum(media_bstring)[:-1], 0.0, 1.0) 129 | 130 | return _detect 131 | 132 | 133 | def _make_webrtcvad_detector( 134 | sample_rate: int, frame_rate: int, non_speech_label: float 135 | ) -> Callable[[bytes], np.ndarray]: 136 | import webrtcvad 137 | 138 | vad = webrtcvad.Vad() 139 | vad.set_mode(3) # set non-speech pruning aggressiveness from 0 to 3 140 | window_duration = 1.0 / sample_rate # duration in seconds 141 | frames_per_window = int(window_duration * frame_rate + 0.5) 142 | bytes_per_frame = 2 143 | 144 | def _detect(asegment: bytes) -> np.ndarray: 145 | media_bstring = [] 146 | failures = 0 147 | for start in range(0, len(asegment) // bytes_per_frame, frames_per_window): 148 | stop = min(start + frames_per_window, len(asegment) // bytes_per_frame) 149 | try: 150 | is_speech = vad.is_speech( 151 | asegment[start * bytes_per_frame : stop * bytes_per_frame], 152 | sample_rate=frame_rate, 153 | ) 154 | except Exception: 155 | is_speech = False 156 | failures += 1 157 | # webrtcvad has low recall on mode 3, so treat non-speech as "not sure" 158 | media_bstring.append(1.0 if is_speech else non_speech_label) 159 | return np.array(media_bstring) 160 | 161 | return _detect 162 | 163 | 164 | def _make_silero_detector( 165 | sample_rate: int, frame_rate: int, non_speech_label: float 166 | ) -> Callable[[bytes], np.ndarray]: 167 | import torch 168 | 169 | window_duration = 1.0 / sample_rate # duration in seconds 170 | frames_per_window = int(window_duration * frame_rate + 0.5) 171 | bytes_per_frame = 1 172 | 173 | model, _ = torch.hub.load( 174 | repo_or_dir="snakers4/silero-vad", 175 | model="silero_vad", 176 | force_reload=False, 177 | onnx=False, 178 | ) 179 | 180 | exception_logged = False 181 | 182 | def _detect(asegment) -> np.ndarray: 183 | asegment = np.frombuffer(asegment, np.int16).astype(np.float32) / (1 << 15) 184 | asegment = torch.FloatTensor(asegment) 185 | media_bstring = [] 186 | failures = 0 187 | for start in range(0, len(asegment) // bytes_per_frame, frames_per_window): 188 | stop = min(start + frames_per_window, len(asegment)) 189 | try: 190 | speech_prob = model( 191 | asegment[start * bytes_per_frame : stop * bytes_per_frame], 192 | frame_rate, 193 | ).item() 194 | except Exception: 195 | nonlocal exception_logged 196 | if not exception_logged: 197 | exception_logged = True 198 | logger.exception("exception occurred during speech detection") 199 | speech_prob = 0.0 200 | failures += 1 201 | media_bstring.append(1.0 - (1.0 - speech_prob) * (1.0 - non_speech_label)) 202 | return np.array(media_bstring) 203 | 204 | return _detect 205 | 206 | 207 | class ComputeSpeechFrameBoundariesMixin: 208 | def __init__(self) -> None: 209 | self.start_frame_: Optional[int] = None 210 | self.end_frame_: Optional[int] = None 211 | 212 | @property 213 | def num_frames(self) -> Optional[int]: 214 | if self.start_frame_ is None or self.end_frame_ is None: 215 | return None 216 | return self.end_frame_ - self.start_frame_ 217 | 218 | def fit_boundaries( 219 | self, speech_frames: np.ndarray 220 | ) -> "ComputeSpeechFrameBoundariesMixin": 221 | nz = np.nonzero(speech_frames > 0.5)[0] 222 | if len(nz) > 0: 223 | self.start_frame_ = int(np.min(nz)) 224 | self.end_frame_ = int(np.max(nz)) 225 | return self 226 | 227 | 228 | class VideoSpeechTransformer(TransformerMixin): 229 | def __init__( 230 | self, 231 | vad: str, 232 | sample_rate: int, 233 | frame_rate: int, 234 | non_speech_label: float, 235 | start_seconds: int = 0, 236 | ffmpeg_path: Optional[str] = None, 237 | ref_stream: Optional[str] = None, 238 | vlc_mode: bool = False, 239 | gui_mode: bool = False, 240 | ) -> None: 241 | super(VideoSpeechTransformer, self).__init__() 242 | self.vad: str = vad 243 | self.sample_rate: int = sample_rate 244 | self.frame_rate: int = frame_rate 245 | self._non_speech_label: float = non_speech_label 246 | self.start_seconds: int = start_seconds 247 | self.ffmpeg_path: Optional[str] = ffmpeg_path 248 | self.ref_stream: Optional[str] = ref_stream 249 | self.vlc_mode: bool = vlc_mode 250 | self.gui_mode: bool = gui_mode 251 | self.video_speech_results_: Optional[np.ndarray] = None 252 | 253 | def try_fit_using_embedded_subs(self, fname: str) -> None: 254 | embedded_subs = [] 255 | embedded_subs_times = [] 256 | if self.ref_stream is None: 257 | # check first 5; should cover 99% of movies 258 | streams_to_try: List[str] = list(map("0:s:{}".format, range(5))) 259 | else: 260 | streams_to_try = [self.ref_stream] 261 | for stream in streams_to_try: 262 | ffmpeg_args = [ 263 | ffmpeg_bin_path( 264 | "ffmpeg", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path 265 | ) 266 | ] 267 | ffmpeg_args.extend( 268 | [ 269 | "-loglevel", 270 | "fatal", 271 | "-nostdin", 272 | "-i", 273 | fname, 274 | "-map", 275 | "{}".format(stream), 276 | "-f", 277 | "srt", 278 | "-", 279 | ] 280 | ) 281 | process = subprocess.Popen( 282 | ffmpeg_args, **subprocess_args(include_stdout=True) 283 | ) 284 | output = io.BytesIO(process.communicate()[0]) 285 | if process.returncode != 0: 286 | break 287 | pipe = cast( 288 | Pipeline, 289 | make_subtitle_speech_pipeline(start_seconds=self.start_seconds), 290 | ).fit(output) 291 | speech_step = pipe.steps[-1][1] 292 | embedded_subs.append(speech_step) 293 | embedded_subs_times.append(speech_step.max_time_) 294 | if len(embedded_subs) == 0: 295 | if self.ref_stream is None: 296 | error_msg = "Video file appears to lack subtitle stream" 297 | else: 298 | error_msg = "Stream {} not found".format(self.ref_stream) 299 | raise ValueError(error_msg) 300 | # use longest set of embedded subs 301 | subs_to_use = embedded_subs[int(np.argmax(embedded_subs_times))] 302 | self.video_speech_results_ = subs_to_use.subtitle_speech_results_ 303 | 304 | def fit(self, fname: str, *_) -> "VideoSpeechTransformer": 305 | if "subs" in self.vad and ( 306 | self.ref_stream is None or self.ref_stream.startswith("0:s:") 307 | ): 308 | try: 309 | logger.info("Checking video for subtitles stream...") 310 | self.try_fit_using_embedded_subs(fname) 311 | logger.info("...success!") 312 | return self 313 | except Exception as e: 314 | logger.info(e) 315 | try: 316 | total_duration = ( 317 | float( 318 | ffmpeg.probe( 319 | fname, 320 | cmd=ffmpeg_bin_path( 321 | "ffprobe", 322 | self.gui_mode, 323 | ffmpeg_resources_path=self.ffmpeg_path, 324 | ), 325 | )["format"]["duration"] 326 | ) 327 | - self.start_seconds 328 | ) 329 | except Exception as e: 330 | logger.warning(e) 331 | total_duration = None 332 | if "webrtc" in self.vad: 333 | detector = _make_webrtcvad_detector( 334 | self.sample_rate, self.frame_rate, self._non_speech_label 335 | ) 336 | elif "auditok" in self.vad: 337 | detector = _make_auditok_detector( 338 | self.sample_rate, self.frame_rate, self._non_speech_label 339 | ) 340 | elif "silero" in self.vad: 341 | detector = _make_silero_detector( 342 | self.sample_rate, self.frame_rate, self._non_speech_label 343 | ) 344 | else: 345 | raise ValueError("unknown vad: %s" % self.vad) 346 | media_bstring: List[np.ndarray] = [] 347 | ffmpeg_args = [ 348 | ffmpeg_bin_path( 349 | "ffmpeg", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path 350 | ) 351 | ] 352 | if self.start_seconds > 0: 353 | ffmpeg_args.extend( 354 | [ 355 | "-ss", 356 | str(timedelta(seconds=self.start_seconds)), 357 | ] 358 | ) 359 | ffmpeg_args.extend(["-loglevel", "fatal", "-nostdin", "-i", fname]) 360 | if self.ref_stream is not None and self.ref_stream.startswith("0:a:"): 361 | ffmpeg_args.extend(["-map", self.ref_stream]) 362 | ffmpeg_args.extend( 363 | [ 364 | "-f", 365 | "s16le", 366 | "-ac", 367 | "1", 368 | "-acodec", 369 | "pcm_s16le", 370 | "-af", 371 | "aresample=async=1", 372 | "-ar", 373 | str(self.frame_rate), 374 | "-", 375 | ] 376 | ) 377 | process = subprocess.Popen(ffmpeg_args, **subprocess_args(include_stdout=True)) 378 | bytes_per_frame = 2 379 | frames_per_window = bytes_per_frame * self.frame_rate // self.sample_rate 380 | windows_per_buffer = 10000 381 | simple_progress = 0.0 382 | 383 | redirect_stderr = None 384 | tqdm_extra_args = {} 385 | should_print_redirected_stderr = self.gui_mode 386 | if self.gui_mode: 387 | try: 388 | from contextlib import redirect_stderr # type: ignore 389 | 390 | tqdm_extra_args["file"] = sys.stdout 391 | except ImportError: 392 | should_print_redirected_stderr = False 393 | if redirect_stderr is None: 394 | 395 | @contextmanager 396 | def redirect_stderr(enter_result=None): 397 | yield enter_result 398 | 399 | assert redirect_stderr is not None 400 | pbar_output = io.StringIO() 401 | with redirect_stderr(pbar_output): 402 | with tqdm.tqdm( 403 | total=total_duration, disable=self.vlc_mode, **tqdm_extra_args 404 | ) as pbar: 405 | while True: 406 | in_bytes = process.stdout.read( 407 | frames_per_window * windows_per_buffer 408 | ) 409 | if not in_bytes: 410 | break 411 | newstuff = len(in_bytes) / float(bytes_per_frame) / self.frame_rate 412 | if ( 413 | total_duration is not None 414 | and simple_progress + newstuff > total_duration 415 | ): 416 | newstuff = total_duration - simple_progress 417 | simple_progress += newstuff 418 | pbar.update(newstuff) 419 | if self.vlc_mode and total_duration is not None: 420 | print("%d" % int(simple_progress * 100.0 / total_duration)) 421 | sys.stdout.flush() 422 | if should_print_redirected_stderr: 423 | assert self.gui_mode 424 | # no need to flush since we pass -u to do unbuffered output for gui mode 425 | print(pbar_output.read()) 426 | if "silero" not in self.vad: 427 | in_bytes = np.frombuffer(in_bytes, np.uint8) 428 | media_bstring.append(detector(in_bytes)) 429 | process.wait() 430 | if len(media_bstring) == 0: 431 | raise ValueError( 432 | "Unable to detect speech. " 433 | "Perhaps try specifying a different stream / track, or a different vad." 434 | ) 435 | self.video_speech_results_ = np.concatenate(media_bstring) 436 | logger.info("total of speech segments: %s", np.sum(self.video_speech_results_)) 437 | return self 438 | 439 | def transform(self, *_) -> np.ndarray: 440 | return self.video_speech_results_ 441 | 442 | 443 | _PAIRED_NESTER: Dict[str, str] = { 444 | "(": ")", 445 | "{": "}", 446 | "[": "]", 447 | # FIXME: False positive sometimes when there are html tags, e.g. Hello? 448 | # '<': '>', 449 | } 450 | 451 | 452 | # TODO: need way better metadata detector 453 | def _is_metadata(content: str, is_beginning_or_end: bool) -> bool: 454 | content = content.strip() 455 | if len(content) == 0: 456 | return True 457 | if ( 458 | content[0] in _PAIRED_NESTER.keys() 459 | and content[-1] == _PAIRED_NESTER[content[0]] 460 | ): 461 | return True 462 | if is_beginning_or_end: 463 | if "english" in content.lower(): 464 | return True 465 | if " - " in content: 466 | return True 467 | return False 468 | 469 | 470 | class SubtitleSpeechTransformer(TransformerMixin, ComputeSpeechFrameBoundariesMixin): 471 | def __init__( 472 | self, sample_rate: int, start_seconds: int = 0, framerate_ratio: float = 1.0 473 | ) -> None: 474 | super(SubtitleSpeechTransformer, self).__init__() 475 | self.sample_rate: int = sample_rate 476 | self.start_seconds: int = start_seconds 477 | self.framerate_ratio: float = framerate_ratio 478 | self.subtitle_speech_results_: Optional[np.ndarray] = None 479 | self.max_time_: Optional[int] = None 480 | 481 | def fit(self, subs: List[GenericSubtitle], *_) -> "SubtitleSpeechTransformer": 482 | max_time = 0 483 | for sub in subs: 484 | max_time = max(max_time, sub.end.total_seconds()) 485 | self.max_time_ = max_time - self.start_seconds 486 | samples = np.zeros(int(max_time * self.sample_rate) + 2, dtype=float) 487 | start_frame = float("inf") 488 | end_frame = 0 489 | for i, sub in enumerate(subs): 490 | if _is_metadata(sub.content, i == 0 or i + 1 == len(subs)): 491 | continue 492 | start = int( 493 | round( 494 | (sub.start.total_seconds() - self.start_seconds) * self.sample_rate 495 | ) 496 | ) 497 | start_frame = min(start_frame, start) 498 | duration = sub.end.total_seconds() - sub.start.total_seconds() 499 | end = start + int(round(duration * self.sample_rate)) 500 | end_frame = max(end_frame, end) 501 | samples[start:end] = min(1.0 / self.framerate_ratio, 1.0) 502 | self.subtitle_speech_results_ = samples 503 | self.fit_boundaries(self.subtitle_speech_results_) 504 | return self 505 | 506 | def transform(self, *_) -> np.ndarray: 507 | assert self.subtitle_speech_results_ is not None 508 | return self.subtitle_speech_results_ 509 | 510 | 511 | class DeserializeSpeechTransformer(TransformerMixin): 512 | def __init__(self, non_speech_label: float) -> None: 513 | super(DeserializeSpeechTransformer, self).__init__() 514 | self._non_speech_label: float = non_speech_label 515 | self.deserialized_speech_results_: Optional[np.ndarray] = None 516 | 517 | def fit(self, fname, *_) -> "DeserializeSpeechTransformer": 518 | speech = np.load(fname) 519 | if hasattr(speech, "files"): 520 | if "speech" in speech.files: 521 | speech = speech["speech"] 522 | else: 523 | raise ValueError( 524 | 'could not find "speech" array in ' 525 | "serialized file; only contains: %s" % speech.files 526 | ) 527 | speech[speech < 1.0] = self._non_speech_label 528 | self.deserialized_speech_results_ = speech 529 | return self 530 | 531 | def transform(self, *_) -> np.ndarray: 532 | assert self.deserialized_speech_results_ is not None 533 | return self.deserialized_speech_results_ 534 | -------------------------------------------------------------------------------- /ffsubsync/subtitle_parser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from datetime import timedelta 3 | import logging 4 | from typing import Any, cast, List, Optional 5 | 6 | try: 7 | import cchardet 8 | except: # noqa: E722 9 | cchardet = None 10 | try: 11 | import chardet 12 | except: # noqa: E722 13 | chardet = None 14 | try: 15 | import charset_normalizer 16 | except: # noqa: E722 17 | charset_normalizer = None 18 | import pysubs2 19 | from ffsubsync.sklearn_shim import TransformerMixin 20 | import srt 21 | 22 | from ffsubsync.constants import ( 23 | DEFAULT_ENCODING, 24 | DEFAULT_MAX_SUBTITLE_SECONDS, 25 | DEFAULT_START_SECONDS, 26 | ) 27 | from ffsubsync.file_utils import open_file 28 | from ffsubsync.generic_subtitles import GenericSubtitle, GenericSubtitlesFile, SubsMixin 29 | 30 | logging.basicConfig(level=logging.INFO) 31 | logger: logging.Logger = logging.getLogger(__name__) 32 | 33 | 34 | def _preprocess_subs( 35 | subs, 36 | max_subtitle_seconds: Optional[int] = None, 37 | start_seconds: int = 0, 38 | tolerant: bool = True, 39 | ) -> List[GenericSubtitle]: 40 | subs_list = [] 41 | start_time = timedelta(seconds=start_seconds) 42 | max_duration = timedelta(days=1) 43 | if max_subtitle_seconds is not None: 44 | max_duration = timedelta(seconds=max_subtitle_seconds) 45 | subs = iter(subs) 46 | while True: 47 | try: 48 | next_sub = GenericSubtitle.wrap_inner_subtitle(next(subs)) 49 | if next_sub.start < start_time: 50 | continue 51 | next_sub.end = min(next_sub.end, next_sub.start + max_duration) 52 | subs_list.append(next_sub) 53 | # We don't catch SRTParseError here b/c that is typically raised when we 54 | # are trying to parse with the wrong encoding, in which case we might 55 | # be able to try another one on the *entire* set of subtitles elsewhere. 56 | except ValueError as e: 57 | if tolerant: 58 | logger.warning(e) 59 | continue 60 | else: 61 | raise 62 | except StopIteration: 63 | break 64 | return subs_list 65 | 66 | 67 | class GenericSubtitleParser(SubsMixin, TransformerMixin): 68 | def __init__( 69 | self, 70 | fmt: str = "srt", 71 | encoding: str = "infer", 72 | caching: bool = False, 73 | max_subtitle_seconds: Optional[int] = None, 74 | start_seconds: int = 0, 75 | skip_ssa_info: bool = False, 76 | strict: bool = False, 77 | ) -> None: 78 | super(self.__class__, self).__init__() 79 | self.sub_format: str = fmt 80 | self.encoding: str = encoding 81 | self.caching: bool = caching 82 | self.fit_fname: Optional[str] = None 83 | self.detected_encoding_: Optional[str] = None 84 | self.max_subtitle_seconds: Optional[int] = max_subtitle_seconds 85 | self.start_seconds: int = start_seconds 86 | # FIXME: hack to get tests to pass; remove 87 | self._skip_ssa_info: bool = skip_ssa_info 88 | self._strict: bool = strict 89 | 90 | def fit(self, fname: str, *_) -> "GenericSubtitleParser": 91 | if self.caching and self.fit_fname == ("" if fname is None else fname): 92 | return self 93 | encodings_to_try = (self.encoding,) 94 | with open_file(fname, "rb") as f: 95 | subs = f.read() 96 | if self.encoding == "infer": 97 | for chardet_lib in (cchardet, charset_normalizer, chardet): 98 | if chardet_lib is not None: 99 | try: 100 | detected_encoding = cast( 101 | Optional[str], chardet_lib.detect(subs)["encoding"] 102 | ) 103 | except: # noqa: E722 104 | continue 105 | if detected_encoding is not None: 106 | self.detected_encoding_ = detected_encoding 107 | encodings_to_try = (detected_encoding,) 108 | break 109 | assert self.detected_encoding_ is not None 110 | logger.info("detected encoding: %s" % self.detected_encoding_) 111 | exc = None 112 | for encoding in encodings_to_try: 113 | try: 114 | decoded_subs = subs.decode(encoding, errors="replace").strip() 115 | if self.sub_format == "srt": 116 | parsed_subs = srt.parse( 117 | decoded_subs, ignore_errors=not self._strict 118 | ) 119 | elif self.sub_format in ("ass", "ssa", "sub", "vtt"): 120 | parsed_subs = pysubs2.SSAFile.from_string(decoded_subs) 121 | else: 122 | raise NotImplementedError( 123 | "unsupported format: %s" % self.sub_format 124 | ) 125 | extra_generic_subtitle_file_kwargs = {} 126 | if isinstance(parsed_subs, pysubs2.SSAFile): 127 | extra_generic_subtitle_file_kwargs.update( 128 | dict( 129 | styles=parsed_subs.styles, 130 | # pysubs2 on Python >= 3.6 doesn't support this 131 | fonts_opaque=getattr(parsed_subs, "fonts_opaque", None), 132 | info=parsed_subs.info if not self._skip_ssa_info else None, 133 | ) 134 | ) 135 | self.subs_ = GenericSubtitlesFile( 136 | _preprocess_subs( 137 | parsed_subs, 138 | max_subtitle_seconds=self.max_subtitle_seconds, 139 | start_seconds=self.start_seconds, 140 | ), 141 | sub_format=self.sub_format, 142 | encoding=encoding, 143 | **extra_generic_subtitle_file_kwargs, 144 | ) 145 | self.fit_fname = "" if fname is None else fname 146 | if len(encodings_to_try) > 1: 147 | self.detected_encoding_ = encoding 148 | logger.info("detected encoding: %s" % self.detected_encoding_) 149 | return self 150 | except Exception as e: 151 | exc = e 152 | continue 153 | raise exc 154 | 155 | def transform(self, *_) -> GenericSubtitlesFile: 156 | return self.subs_ 157 | 158 | 159 | def make_subtitle_parser( 160 | fmt: str, 161 | encoding: str = DEFAULT_ENCODING, 162 | caching: bool = False, 163 | max_subtitle_seconds: int = DEFAULT_MAX_SUBTITLE_SECONDS, 164 | start_seconds: int = DEFAULT_START_SECONDS, 165 | **kwargs: Any, 166 | ) -> GenericSubtitleParser: 167 | return GenericSubtitleParser( 168 | fmt=fmt, 169 | encoding=encoding, 170 | caching=caching, 171 | max_subtitle_seconds=max_subtitle_seconds, 172 | start_seconds=start_seconds, 173 | skip_ssa_info=kwargs.get("skip_ssa_info", False), 174 | strict=kwargs.get("strict", False), 175 | ) 176 | -------------------------------------------------------------------------------- /ffsubsync/subtitle_transformers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from datetime import timedelta 3 | import logging 4 | import numbers 5 | 6 | from ffsubsync.generic_subtitles import GenericSubtitle, GenericSubtitlesFile, SubsMixin 7 | from ffsubsync.sklearn_shim import TransformerMixin 8 | 9 | logging.basicConfig(level=logging.INFO) 10 | logger: logging.Logger = logging.getLogger(__name__) 11 | 12 | 13 | class SubtitleShifter(SubsMixin, TransformerMixin): 14 | def __init__(self, td_seconds): 15 | super(SubsMixin, self).__init__() 16 | if not isinstance(td_seconds, timedelta): 17 | self.td_seconds = timedelta(seconds=td_seconds) 18 | else: 19 | self.td_seconds = td_seconds 20 | 21 | def fit(self, subs: GenericSubtitlesFile, *_): 22 | self.subs_ = subs.offset(self.td_seconds) 23 | return self 24 | 25 | def transform(self, *_): 26 | return self.subs_ 27 | 28 | 29 | class SubtitleScaler(SubsMixin, TransformerMixin): 30 | def __init__(self, scale_factor): 31 | assert isinstance(scale_factor, numbers.Number) 32 | super(SubsMixin, self).__init__() 33 | self.scale_factor = scale_factor 34 | 35 | def fit(self, subs: GenericSubtitlesFile, *_): 36 | scaled_subs = [] 37 | for sub in subs: 38 | scaled_subs.append( 39 | GenericSubtitle( 40 | # py2 doesn't support direct multiplication of timedelta w/ float 41 | timedelta(seconds=sub.start.total_seconds() * self.scale_factor), 42 | timedelta(seconds=sub.end.total_seconds() * self.scale_factor), 43 | sub.inner, 44 | ) 45 | ) 46 | self.subs_ = subs.clone_props_for_subs(scaled_subs) 47 | return self 48 | 49 | def transform(self, *_): 50 | return self.subs_ 51 | 52 | 53 | class SubtitleMerger(SubsMixin, TransformerMixin): 54 | def __init__(self, reference_subs, first="reference"): 55 | assert first in ("reference", "output") 56 | super(SubsMixin, self).__init__() 57 | self.reference_subs = reference_subs 58 | self.first = first 59 | 60 | def fit(self, output_subs: GenericSubtitlesFile, *_): 61 | def _merger_gen(a, b): 62 | ita, itb = iter(a), iter(b) 63 | cur_a = next(ita, None) 64 | cur_b = next(itb, None) 65 | while True: 66 | if cur_a is None and cur_b is None: 67 | return 68 | elif cur_a is None: 69 | while cur_b is not None: 70 | yield cur_b 71 | cur_b = next(itb, None) 72 | return 73 | elif cur_b is None: 74 | while cur_a is not None: 75 | yield cur_a 76 | cur_a = next(ita, None) 77 | return 78 | # else: neither are None 79 | if cur_a.start < cur_b.start: 80 | swapped = False 81 | else: 82 | swapped = True 83 | cur_a, cur_b = cur_b, cur_a 84 | ita, itb = itb, ita 85 | prev_a = cur_a 86 | while prev_a is not None and cur_a.start < cur_b.start: 87 | cur_a = next(ita, None) 88 | if cur_a is None or cur_a.start < cur_b.start: 89 | yield prev_a 90 | prev_a = cur_a 91 | if prev_a is None: 92 | while cur_b is not None: 93 | yield cur_b 94 | cur_b = next(itb, None) 95 | return 96 | if cur_b.start - prev_a.start < cur_a.start - cur_b.start: 97 | if swapped: 98 | yield cur_b.merge_with(prev_a) 99 | ita, itb = itb, ita 100 | cur_a, cur_b = cur_b, cur_a 101 | cur_a = next(ita, None) 102 | else: 103 | yield prev_a.merge_with(cur_b) 104 | cur_b = next(itb, None) 105 | else: 106 | if swapped: 107 | yield cur_b.merge_with(cur_a) 108 | ita, itb = itb, ita 109 | else: 110 | yield cur_a.merge_with(cur_b) 111 | cur_a = next(ita, None) 112 | cur_b = next(itb, None) 113 | 114 | merged_subs = [] 115 | if self.first == "reference": 116 | first, second = self.reference_subs, output_subs 117 | else: 118 | first, second = output_subs, self.reference_subs 119 | for merged in _merger_gen(first, second): 120 | merged_subs.append(merged) 121 | self.subs_ = output_subs.clone_props_for_subs(merged_subs) 122 | return self 123 | 124 | def transform(self, *_): 125 | return self.subs_ 126 | -------------------------------------------------------------------------------- /ffsubsync/version.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | from ffsubsync.constants import SUBSYNC_RESOURCES_ENV_MAGIC 4 | from ffsubsync._version import get_versions 5 | 6 | __version__ = get_versions()["version"] 7 | del get_versions 8 | 9 | 10 | def get_version(): 11 | if "unknown" in __version__.lower(): 12 | with open( 13 | os.path.join(os.environ[SUBSYNC_RESOURCES_ENV_MAGIC], "__version__") 14 | ) as f: 15 | return f.read().strip() 16 | else: 17 | return __version__ 18 | 19 | 20 | def make_version_tuple(vstr=None): 21 | if vstr is None: 22 | vstr = __version__ 23 | if vstr[0] == "v": 24 | vstr = vstr[1:] 25 | components = [] 26 | for component in vstr.split("+")[0].split("."): 27 | try: 28 | components.append(int(component)) 29 | except ValueError: 30 | break 31 | return tuple(components) 32 | 33 | 34 | def update_available(): 35 | import requests 36 | from requests.exceptions import Timeout 37 | from .constants import API_RELEASE_URL 38 | 39 | try: 40 | resp = requests.get(API_RELEASE_URL, timeout=1) 41 | latest_vstr = resp.json()["tag_name"] 42 | except Timeout: 43 | return False 44 | except KeyError: 45 | return False 46 | if not resp.ok: 47 | return False 48 | return make_version_tuple(get_version()) < make_version_tuple(latest_vstr) 49 | -------------------------------------------------------------------------------- /gui/.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | dist 3 | -------------------------------------------------------------------------------- /gui/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: macos 2 | 3 | macos: clean app pkg 4 | 5 | app: 6 | ./build-macos.sh 7 | 8 | pkg: 9 | ./package-macos.sh 10 | 11 | clean: 12 | rm -r dist/ build/ 13 | -------------------------------------------------------------------------------- /gui/README.md: -------------------------------------------------------------------------------- 1 | == Note on platform-specific PyInstaller version in requirements.txt == 2 | 3 | PyInstaller>=3.6 introduces a webrtcvad hook that seems to not play nicely 4 | with the webrtcvad-wheels package. This package contains prebuilt wheels 5 | and is needed for Windows (unless I can get a working C compiler in my 6 | Windows build environment, which is doubtful). For MacOS this isn't a 7 | problem since I can use the vanilla webrtcvad package and leverage the 8 | preexisting hook in PyInstaller>=3.6, but for Windows I need to use the 9 | old version of PyInstaller without the hook and introduce my own (in the 10 | 'hooks' directory). 11 | 12 | == Note on Scikit-Learn == 13 | There is some DLL that wasn't getting bundled in the Windows PyInstaller 14 | build and causing the built exe to complain. My solution was to remove 15 | the dependency and include a shim for the Pipeline / Transformer fuctionality. 16 | -------------------------------------------------------------------------------- /gui/build-macos.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python3 -m PyInstaller --clean -y --dist ./dist/macos build.spec 3 | # ref: https://github.com/chriskiehl/Gooey/issues/259#issuecomment-522432026 4 | mkdir -p ./dist/macos/Contents 5 | -------------------------------------------------------------------------------- /gui/build-windows.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | nbits=${1:-64} 3 | tag="python3" 4 | if [[ "$nbits" == 32 ]]; then 5 | tag="${tag}-32bit" 6 | fi 7 | docker run -v "$(pwd):/src/" -v "$(pwd)/..:/ffsubsync/" --entrypoint /bin/sh "cdrx/pyinstaller-windows:${tag}" -c "pip install -e /ffsubsync && /ffsubsync/gui/entrypoint-windows.sh" 8 | rm -r "./dist/win${nbits}" 9 | mv ./dist/windows "./dist/win${nbits}" 10 | -------------------------------------------------------------------------------- /gui/build.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python -*- 2 | 3 | import os 4 | import platform 5 | import gooey 6 | 7 | 8 | root = '..' 9 | hookspath = None 10 | if platform.system() == 'Windows': 11 | root = '/ffsubsync' 12 | hookspath = [os.path.join(os.curdir, 'hooks')] 13 | 14 | ffmpeg_bin = os.path.join(root, 'resources/ffmpeg-bin') 15 | datas = [(os.path.join(root, 'resources/img/program_icon.png'), './img')] 16 | datas.append((os.path.join(root, 'resources/img/config_icon.png'), './img')) 17 | datas.append((os.path.join(root, '__version__'), '.')) 18 | if platform.system() == 'Darwin': 19 | ffmpeg_bin = os.path.join(ffmpeg_bin, 'macos') 20 | elif platform.system() == 'Windows': 21 | arch_bits = int(platform.architecture()[0][:2]) 22 | ffmpeg_bin = os.path.join(ffmpeg_bin, 'win{}'.format(arch_bits)) 23 | if arch_bits == 64: 24 | datas.append((os.path.join(root, 'resources/lib/win64/VCRUNTIME140_1.dll'), '.')) 25 | else: 26 | raise Exception('ffmpeg not available for {}'.format(platform.system())) 27 | 28 | gooey_root = os.path.dirname(gooey.__file__) 29 | gooey_languages = Tree(os.path.join(gooey_root, 'languages'), prefix = 'gooey/languages') 30 | gooey_images = Tree(os.path.join(gooey_root, 'images'), prefix = 'gooey/images') 31 | a = Analysis([os.path.join(os.curdir, 'ffsubsync-gui.py')], 32 | datas=datas, 33 | hiddenimports=['pkg_resources.py2_warn'], # ref: https://github.com/pypa/setuptools/issues/1963 34 | hookspath=hookspath, 35 | runtime_hooks=None, 36 | binaries=[(ffmpeg_bin, 'ffmpeg-bin')], 37 | ) 38 | pyz = PYZ(a.pure) 39 | 40 | # runtime options to pass to interpreter -- '-u' is for unbuffered io 41 | options = [('u', None, 'OPTION')] 42 | 43 | exe = EXE(pyz, 44 | a.scripts, 45 | a.binaries, 46 | a.zipfiles, 47 | a.datas, 48 | options, 49 | gooey_languages, # Add them in to collected files 50 | gooey_images, # Same here. 51 | name='FFsubsync', 52 | debug=False, 53 | strip=None, 54 | upx=True, 55 | console=False, 56 | windowed=True, 57 | icon=os.path.join(root, 'resources', 'img', 'program_icon.ico') 58 | ) 59 | 60 | 61 | if platform.system() == 'Darwin': 62 | # info_plist = {'addition_prop': 'additional_value'} 63 | info_plist = {} 64 | app = BUNDLE(exe, 65 | icon=os.path.join(root, 'resources', 'img', 'program_icon.icns'), 66 | name='FFsubsync.app', 67 | bundle_identifier=None, 68 | info_plist=info_plist 69 | ) 70 | -------------------------------------------------------------------------------- /gui/entrypoint-windows.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Fail on errors. 4 | set -e 5 | 6 | # Make sure .bashrc is sourced 7 | . /root/.bashrc 8 | 9 | # Allow the workdir to be set using an env var. 10 | # Useful for CI pipiles which use docker for their build steps 11 | # and don't allow that much flexibility to mount volumes 12 | WORKDIR=${SRCDIR:-/src} 13 | 14 | # 15 | # In case the user specified a custom URL for PYPI, then use 16 | # that one, instead of the default one. 17 | # 18 | if [[ "$PYPI_URL" != "https://pypi.python.org/" ]] || \ 19 | [[ "$PYPI_INDEX_URL" != "https://pypi.python.org/simple" ]]; then 20 | # the funky looking regexp just extracts the hostname, excluding port 21 | # to be used as a trusted-host. 22 | mkdir -p /wine/drive_c/users/root/pip 23 | echo "[global]" > /wine/drive_c/users/root/pip/pip.ini 24 | echo "index = $PYPI_URL" >> /wine/drive_c/users/root/pip/pip.ini 25 | echo "index-url = $PYPI_INDEX_URL" >> /wine/drive_c/users/root/pip/pip.ini 26 | echo "trusted-host = $(echo $PYPI_URL | perl -pe 's|^.*?://(.*?)(:.*?)?/.*$|$1|')" >> /wine/drive_c/users/root/pip/pip.ini 27 | 28 | echo "Using custom pip.ini: " 29 | cat /wine/drive_c/users/root/pip/pip.ini 30 | fi 31 | 32 | cd $WORKDIR 33 | 34 | if [ -f requirements.txt ]; then 35 | pip install -r requirements.txt 36 | fi # [ -f requirements.txt ] 37 | 38 | rm /wine/drive_c/Python37/Lib/site-packages/PyInstaller/hooks/hook-webrtcvad.py 39 | 40 | echo "$@" 41 | 42 | if [[ "$@" == "" ]]; then 43 | pyinstaller --clean -y --dist ./dist/windows --workpath /tmp *.spec 44 | chown -R --reference=. ./dist/windows 45 | else 46 | sh -c "$@" 47 | fi # [[ "$@" == "" ]] 48 | -------------------------------------------------------------------------------- /gui/ffsubsync-gui.py: -------------------------------------------------------------------------------- 1 | from ffsubsync.ffsubsync_gui import main 2 | 3 | 4 | if __name__ == '__main__': 5 | main() 6 | -------------------------------------------------------------------------------- /gui/hooks/hook-webrtcvad.py: -------------------------------------------------------------------------------- 1 | from PyInstaller.utils.hooks import copy_metadata 2 | 3 | datas = copy_metadata('webrtcvad-wheels') 4 | -------------------------------------------------------------------------------- /gui/package-macos.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -Eeuxo pipefail 4 | 5 | BASE=. 6 | DIST="$BASE/dist" 7 | BUILD="$BASE/build/dmg" 8 | VERSION=$(python3 -c "from subsync.version import __version__; print(__version__)") 9 | APP="Subsync.app" 10 | TARGET="$DIST/subsync-${VERSION}-mac-x86_64.dmg" 11 | 12 | test -e "$BUILD" && rm -rf "$BUILD" 13 | test -e "$TARGET" && rm -f "$TARGET" 14 | mkdir -p "$BUILD" 15 | cp -r "$DIST/$APP" "$BUILD" 16 | 17 | create-dmg \ 18 | --volname "subsync installer" \ 19 | `#--volicon "icon.icns"` \ 20 | --window-pos 300 200 \ 21 | --window-size 700 500 \ 22 | --icon-size 150 \ 23 | --icon "$APP" 200 200 \ 24 | --hide-extension "$APP" \ 25 | --app-drop-link 450 200 \ 26 | --no-internet-enable \ 27 | "$TARGET" "$BUILD" 28 | -------------------------------------------------------------------------------- /gui/requirements.txt: -------------------------------------------------------------------------------- 1 | gooey 2 | pyinstaller>=3.6 3 | requests 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 88 3 | target-version = ['py39'] 4 | extend-exclude = '(^/versioneer|_version)\.py' 5 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | markers = 3 | integration: mark a test as an integration test. 4 | #filterwarnings = 5 | # ignore::DeprecationWarning 6 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | black 2 | flake8 3 | mypy 4 | pytest 5 | pytest-cov 6 | pyyaml 7 | twine 8 | types-requests 9 | versioneer 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | auditok==0.1.5 2 | chardet;python_version>='3.7' 3 | charset_normalizer 4 | faust-cchardet 5 | ffmpeg-python 6 | numpy>=1.12.0 7 | pysubs2;python_version<'3.7' 8 | pysubs2>=1.2.0;python_version>='3.7' 9 | rich 10 | setuptools 11 | srt>=3.0.0 12 | tqdm 13 | typing_extensions 14 | webrtcvad;platform_system!='Windows' 15 | webrtcvad-wheels;platform_system=='Windows' 16 | -------------------------------------------------------------------------------- /resources/img/config_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smacke/ffsubsync/4d275da8b446de4be582d44337e99b2f75b56ebe/resources/img/config_icon.png -------------------------------------------------------------------------------- /resources/img/program_icon.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smacke/ffsubsync/4d275da8b446de4be582d44337e99b2f75b56ebe/resources/img/program_icon.icns -------------------------------------------------------------------------------- /resources/img/program_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smacke/ffsubsync/4d275da8b446de4be582d44337e99b2f75b56ebe/resources/img/program_icon.png -------------------------------------------------------------------------------- /resources/img/subsync.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smacke/ffsubsync/4d275da8b446de4be582d44337e99b2f75b56ebe/resources/img/subsync.png -------------------------------------------------------------------------------- /resources/img/tearing-me-apart-correct.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smacke/ffsubsync/4d275da8b446de4be582d44337e99b2f75b56ebe/resources/img/tearing-me-apart-correct.gif -------------------------------------------------------------------------------- /resources/img/tearing-me-apart-wrong.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smacke/ffsubsync/4d275da8b446de4be582d44337e99b2f75b56ebe/resources/img/tearing-me-apart-wrong.gif -------------------------------------------------------------------------------- /resources/lib/win64/VCRUNTIME140_1.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smacke/ffsubsync/4d275da8b446de4be582d44337e99b2f75b56ebe/resources/lib/win64/VCRUNTIME140_1.dll -------------------------------------------------------------------------------- /scripts/blacken.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # ref: https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/ 4 | set -euxo pipefail 5 | 6 | DIRS="./ffsubsync ./tests" 7 | black $DIRS $@ 8 | -------------------------------------------------------------------------------- /scripts/bump-version.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import argparse 4 | import subprocess 5 | import sys 6 | 7 | from ffsubsync.version import make_version_tuple 8 | 9 | 10 | def main(*_): 11 | components = list(make_version_tuple()) 12 | components[-1] += 1 13 | version = '.'.join(str(c) for c in components) 14 | subprocess.check_output(['git', 'tag', version]) 15 | return 0 16 | 17 | 18 | if __name__ == '__main__': 19 | parser = argparse.ArgumentParser(description='Bump version and create git tag.') 20 | args = parser.parse_args() 21 | sys.exit(main(args)) 22 | -------------------------------------------------------------------------------- /scripts/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # ref: https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/ 4 | set -euxo pipefail 5 | 6 | if ! git diff-index --quiet HEAD --; then 7 | echo "dirty working tree; please clean or commit changes" 8 | exit 1 9 | fi 10 | 11 | if ! git describe --exact-match --tags HEAD > /dev/null; then 12 | echo "current revision not tagged; please deploy from a tagged revision" 13 | exit 1 14 | fi 15 | 16 | current="$(python -c 'import versioneer; print(versioneer.get_version())')" 17 | [[ $? -eq 1 ]] && exit 1 18 | 19 | latest="$(git describe --tags $(git rev-list --tags --max-count=1))" 20 | [[ $? -eq 1 ]] && exit 1 21 | 22 | if [[ "$current" != "$latest" ]]; then 23 | echo "current revision is not the latest version; please deploy from latest version" 24 | exit 1 25 | fi 26 | 27 | expect <= 0.99 64 | 65 | 66 | def detected_encoding(fname): 67 | parser = GenericSubtitleParser(skip_ssa_info=True) 68 | parser.fit(fname) 69 | return parser.detected_encoding_ 70 | 71 | 72 | @pytest.mark.integration 73 | @pytest.mark.parametrize("args,truth,should_detect_encoding", gen_synctest_configs()) 74 | def test_sync_matches_ground_truth(args, truth, should_detect_encoding): 75 | # context manager TemporaryDirectory not available on py2 76 | dirpath = tempfile.mkdtemp() 77 | try: 78 | args.srtout = os.path.join( 79 | dirpath, "test" + os.path.splitext(args.srtin[0])[-1] 80 | ) 81 | args.skip_ssa_info = True 82 | assert ffsubsync.run(args)["retval"] == 0 83 | assert timestamps_roughly_match(args.srtout, truth) 84 | if should_detect_encoding is not None: 85 | assert detected_encoding(args.srtin[0]) == should_detect_encoding 86 | finally: 87 | shutil.rmtree(dirpath) 88 | -------------------------------------------------------------------------------- /tests/test_misc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pytest 3 | from ffsubsync.version import make_version_tuple 4 | 5 | 6 | @pytest.mark.parametrize( 7 | "vstr, expected", 8 | [("v0.1.1", (0, 1, 1)), ("v1.2.3", (1, 2, 3)), ("4.5.6.1", (4, 5, 6, 1))], 9 | ) 10 | def test_version_tuple_from_string(vstr, expected): 11 | assert make_version_tuple(vstr) == expected 12 | -------------------------------------------------------------------------------- /tests/test_subtitles.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import itertools 3 | from io import BytesIO 4 | from datetime import timedelta 5 | 6 | import pytest 7 | import numpy as np 8 | 9 | from ffsubsync.sklearn_shim import make_pipeline 10 | from ffsubsync.speech_transformers import SubtitleSpeechTransformer 11 | from ffsubsync.subtitle_parser import GenericSubtitleParser 12 | from ffsubsync.subtitle_transformers import SubtitleShifter 13 | 14 | fake_srt = b"""1 15 | 00:00:00,178 --> 00:00:01,1416 16 | Previously on "Your favorite TV show..." 17 | 18 | 2 19 | 00:00:01,1828 --> 00:00:04,549 20 | Oh hi, Mark. 21 | 22 | 3 23 | 00:00:04,653 --> 00:00:03,3062 24 | You are tearing me apart, Lisa! 25 | """ 26 | 27 | # Occasionally some srt files have timestamps whose 'milliseconds' 28 | # field has more than 3 digits... Ideally we should test that these 29 | # are handled properly with dedicated tests, but in the interest of 30 | # development speed I've opted to sprinkle in a few >3 digit 31 | # millisecond fields into the dummy string above in order to exercise 32 | # this case integration-test style in the below unit tests. 33 | 34 | 35 | @pytest.mark.parametrize("start_seconds", [0, 2, 4, 6]) 36 | def test_start_seconds(start_seconds): 37 | parser_zero = GenericSubtitleParser(start_seconds=0) 38 | parser_zero.fit(BytesIO(fake_srt)) 39 | parser = GenericSubtitleParser(start_seconds=start_seconds) 40 | parser.fit(BytesIO(fake_srt)) 41 | expected = [ 42 | sub 43 | for sub in parser_zero.subs_ 44 | if sub.start >= timedelta(seconds=start_seconds) 45 | ] 46 | assert all(esub == psub for esub, psub in zip(expected, parser.subs_)) 47 | 48 | 49 | @pytest.mark.parametrize("max_seconds", [1, 1.5, 2.0, 2.5]) 50 | def test_max_seconds(max_seconds): 51 | parser = GenericSubtitleParser(max_subtitle_seconds=max_seconds) 52 | parser.fit(BytesIO(fake_srt)) 53 | assert max(sub.end - sub.start for sub in parser.subs_) <= timedelta( 54 | seconds=max_seconds 55 | ) 56 | 57 | 58 | @pytest.mark.parametrize("encoding", ["utf-8", "ascii", "latin-1"]) 59 | def test_same_encoding(encoding): 60 | parser = GenericSubtitleParser(encoding=encoding) 61 | offseter = SubtitleShifter(1) 62 | pipe = make_pipeline(parser, offseter) 63 | pipe.fit(BytesIO(fake_srt)) 64 | assert parser.subs_._encoding == encoding 65 | assert offseter.subs_._encoding == parser.subs_._encoding 66 | assert offseter.subs_.set_encoding("same")._encoding == encoding 67 | assert offseter.subs_.set_encoding("utf-8")._encoding == "utf-8" 68 | 69 | 70 | @pytest.mark.parametrize("offset", [1, 1.5, -2.3]) 71 | def test_offset(offset): 72 | parser = GenericSubtitleParser() 73 | offseter = SubtitleShifter(offset) 74 | pipe = make_pipeline(parser, offseter) 75 | pipe.fit(BytesIO(fake_srt)) 76 | for sub_orig, sub_offset in zip(parser.subs_, offseter.subs_): 77 | assert ( 78 | abs( 79 | sub_offset.start.total_seconds() 80 | - sub_orig.start.total_seconds() 81 | - offset 82 | ) 83 | < 1e-6 84 | ) 85 | assert ( 86 | abs(sub_offset.end.total_seconds() - sub_orig.end.total_seconds() - offset) 87 | < 1e-6 88 | ) 89 | 90 | 91 | @pytest.mark.parametrize( 92 | "sample_rate,start_seconds", itertools.product([10, 20, 100, 300], [0, 2, 4, 6]) 93 | ) 94 | def test_speech_extraction(sample_rate, start_seconds): 95 | parser = GenericSubtitleParser(start_seconds=start_seconds) 96 | extractor = SubtitleSpeechTransformer( 97 | sample_rate=sample_rate, start_seconds=start_seconds 98 | ) 99 | pipe = make_pipeline(parser, extractor) 100 | bitstring = pipe.fit_transform(BytesIO(fake_srt)).astype(bool) 101 | bitstring_shifted_left = np.append(bitstring[1:], [False]) 102 | bitstring_shifted_right = np.append([False], bitstring[:-1]) 103 | bitstring_cumsum = np.cumsum(bitstring) 104 | consec_ones_end_pos = np.nonzero( 105 | bitstring_cumsum 106 | * (bitstring ^ bitstring_shifted_left) 107 | * (bitstring_cumsum != np.cumsum(bitstring_shifted_right)) 108 | )[0] 109 | prev = 0 110 | for pos, sub in zip(consec_ones_end_pos, parser.subs_): 111 | start = int(round(sub.start.total_seconds() * sample_rate)) 112 | duration = sub.end.total_seconds() - sub.start.total_seconds() 113 | stop = start + int(round(duration * sample_rate)) 114 | assert bitstring_cumsum[pos] - prev == stop - start 115 | prev = bitstring_cumsum[pos] 116 | 117 | 118 | def test_max_time_found(): 119 | parser = GenericSubtitleParser() 120 | extractor = SubtitleSpeechTransformer(sample_rate=100) 121 | pipe = make_pipeline(parser, extractor) 122 | pipe.fit(BytesIO(fake_srt)) 123 | assert extractor.max_time_ == 6.062 124 | --------------------------------------------------------------------------------