├── .coveragerc
├── .gitattributes
├── .github
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── synchronization-problem.md
    └── workflows
    │   └── ci.yml
├── .gitignore
├── .gitmodules
├── .readthedocs.yml
├── .travis.yml
├── CODE_OF_CONDUCT.md
├── HISTORY.rst
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── docs
    ├── .gitignore
    ├── Makefile
    ├── _static
    │   └── .keep
    ├── _templates
    │   └── .keep
    ├── conf.py
    ├── index.rst
    ├── make.bat
    └── requirements-docs.txt
├── ffsubsync
    ├── __init__.py
    ├── _version.py
    ├── aligners.py
    ├── constants.py
    ├── ffmpeg_utils.py
    ├── ffsubsync.py
    ├── ffsubsync_gui.py
    ├── file_utils.py
    ├── generic_subtitles.py
    ├── golden_section_search.py
    ├── sklearn_shim.py
    ├── speech_transformers.py
    ├── subtitle_parser.py
    ├── subtitle_transformers.py
    └── version.py
├── gui
    ├── .gitignore
    ├── Makefile
    ├── README.md
    ├── build-macos.sh
    ├── build-windows.sh
    ├── build.spec
    ├── entrypoint-windows.sh
    ├── ffsubsync-gui.py
    ├── hooks
    │   └── hook-webrtcvad.py
    ├── package-macos.sh
    └── requirements.txt
├── pyproject.toml
├── pytest.ini
├── requirements-dev.txt
├── requirements.txt
├── resources
    ├── img
    │   ├── config_icon.png
    │   ├── program_icon.icns
    │   ├── program_icon.png
    │   ├── subsync.png
    │   ├── tearing-me-apart-correct.gif
    │   └── tearing-me-apart-wrong.gif
    └── lib
    │   └── win64
    │       └── VCRUNTIME140_1.dll
├── scripts
    ├── blacken.sh
    ├── bump-version.py
    ├── deploy.sh
    └── write-version.py
├── setup.cfg
├── setup.py
├── tests
    ├── test_alignment.py
    ├── test_integration.py
    ├── test_misc.py
    └── test_subtitles.py
└── versioneer.py


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit = ffsubsync/ffsubsync_gui.py, ffsubsync/_version.py, ffsubsync/version.py
3 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | ffsubsync/_version.py export-subst
2 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: smacke
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Environment (please complete the following information):**
11 |  - OS: [e.g. Windows 10, MacOS Mojave, etc.]
12 |  - python version (`python --version`)
13 |  - subsync version (`subsync --version`)
14 | 
15 | **Describe the bug**
16 | A clear and concise description of what the bug is.
17 | 
18 | **To Reproduce**
19 | How to reproduce the behavior.
20 | 
21 | **Expected behavior**
22 | A clear and concise description of what you expected to happen.
23 | 
24 | **Output**
25 | Copy+paste stdout from running the command here.
26 | 
27 | **Test case**
28 | [Optional] You can bundle additional debugging information into a tar archive as follows:
29 | ```
30 | subsync vid.mkv -i in.srt -o out.srt --make-test-case
31 | ```
32 | This will create a file `vid.mkv.$timestamp.tar.gz` or similar a few KiB in size; you can attach it by clicking the "attach files" button below.
33 | 
34 | **Additional context**
35 | Add any other context about the problem here.
36 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/synchronization-problem.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Synchronization problem
 3 | about: Help us to improve syncing by reporting failed syncs
 4 | title: output subtitles still out of sync
 5 | labels: out-of-sync
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Upload a tarball with debugging information**
11 | 1. Run the command that produces the out-of-sync subtitle output, but with the additional `--make-test-case` flag, i.e.: `subsync ref.mkv -i in.srt -o failed.srt --make-test-case`
12 | 2. This results in a file of the form `ref.mkv.$timestamp.tar.gz` or similar.
13 | 3. Please upload this file using the "attach files" button at the bottom of the text prompt.
14 | 
15 | That's all! Thank you for contributing a test case; this helps me to continue improving the sync and to add additional integration tests once improvements have been made.
16 | 
17 | **Additional context**
18 | Add any other context about the problem here that might be helpful.
19 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: ffsubsync
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   build:
 7 | 
 8 |     runs-on: ${{ matrix.os }}
 9 | 
10 |     strategy:
11 |       matrix:
12 |         os: [ 'ubuntu-22.04', 'windows-latest' ]
13 |         python-version: [ '3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13' ]
14 |         include:
15 |           - python-version: '3.7'
16 |             os: 'macos-13'
17 |           - python-version: '3.8'
18 |             os: 'macos-13'
19 |           - python-version: '3.9'
20 |             os: 'macos-13'
21 |           - python-version: '3.10'
22 |             os: 'macos-latest'
23 |           - python-version: '3.11'
24 |             os: 'macos-latest'
25 |           - python-version: '3.12'
26 |             os: 'macos-latest'
27 |           - python-version: '3.13'
28 |             os: 'macos-latest'
29 |     steps:
30 |     - uses: actions/checkout@v4
31 |       with:
32 |         fetch-depth: 1
33 |     - uses: smacke/submodule-checkout@v3
34 |       if: ${{ matrix.os == 'ubuntu-latest' && matrix.python-version != '3.10'}}
35 |       with:
36 |         ssh-key: '${{ secrets.TEST_DATA_SECRET }}'
37 |     - name: Set up Python
38 |       uses: actions/setup-python@v5
39 |       with:
40 |         python-version: ${{ matrix.python-version }}
41 |     - name: Install dependencies
42 |       run: |
43 |         python -m pip install --upgrade pip
44 |         pip install -r requirements.txt
45 |         pip install -r requirements-dev.txt
46 |         pip install -e .
47 |     - name: Lint with flake8
48 |       run: |
49 |         pip install flake8
50 |         # stop the build if there are Python syntax errors or undefined names
51 |         #flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
52 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
53 |         #flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
54 |         flake8 . --exit-zero  
55 |     - name: Run unit tests with pytest (no coverage)
56 |       if: matrix.os != 'ubuntu-latest'
57 |       run: |
58 |         pytest --cov-config=.coveragerc --cov-report= --cov=ffsubsync -v -m 'not integration' tests/
59 |     - name: Run unit tests with pytest (with coverage)
60 |       if: matrix.os == 'ubuntu-latest'
61 |       run: |
62 |         pytest --cov-config=.coveragerc --cov-report=xml:cov.xml --cov=ffsubsync -v -m 'not integration' tests/
63 |     - name: Run integration tests with pytest
64 |       if: ${{ matrix.os == 'ubuntu-latest' && matrix.python-version != '3.10'}}
65 |       run: |
66 |         INTEGRATION=1 pytest --cov-config=.coveragerc --cov-report=xml:cov.xml --cov=ffsubsync -v -m 'integration' tests/
67 |     - name: Upload coverage report
68 |       if: matrix.os == 'ubuntu-latest'
69 |       uses: codecov/codecov-action@v1
70 |       with:
71 |         token: '${{ secrets.CODECOV_TOKEN }}'
72 |         files: ./cov.xml
73 |         env_vars: PYTHON
74 |         name: codecov-umbrella
75 |         fail_ci_if_error: true
76 |         verbose: true
77 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | scratch-notebooks/
 2 | **/__pycache__
 3 | build
 4 | dist
 5 | *.egg-info
 6 | .vim
 7 | __version__
 8 | .venv/
 9 | .coverage
10 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "test-data"]
2 | 	path = test-data
3 | 	url = git@github.com:smacke/subsync-data
4 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Build documentation in the docs/ directory with Sphinx
 9 | sphinx:
10 |   configuration: docs/conf.py
11 | 
12 | # Optionally build your docs in additional formats such as PDF and ePub
13 | formats: [pdf]
14 | 
15 | # Optionally set the version of Python and requirements required to build your docs
16 | python:
17 |   version: 3.8
18 |   install:
19 |     - method: setuptools
20 |       path: .
21 |     - requirements: docs/requirements-docs.txt
22 | 
23 | submodules:
24 |   exclude: all
25 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | python:
 4 |   - "3.6"
 5 |   - "3.7"
 6 |   - "3.8"
 7 |   - "3.9"
 8 | 
 9 | os:
10 |   - linux
11 | #  - osx
12 | 
13 | dist: xenial
14 | 
15 | git:
16 |   submodules: false
17 |   lfs_skip_smudge: true
18 | 
19 | install:
20 |   - pip install -r requirements.txt
21 |   - pip install -r requirements-dev.txt
22 |   - pip install -e .
23 | 
24 | #addons:
25 | #  apt:
26 | #    update: true
27 | #    packages: ffmpeg
28 | #  homebrew:
29 | #    packages: ffmpeg
30 | 
31 | script:
32 |   - pytest -v -m 'not integration' tests/
33 |   - flake8 . --exit-zero
34 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |  advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |  address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |  professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at stephen.macke@gmail.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 | 


--------------------------------------------------------------------------------
/HISTORY.rst:
--------------------------------------------------------------------------------
  1 | History
  2 | =======
  3 | 
  4 | 0.4.28 (2025-02-16)
  5 | -------------------
  6 | * Add support for Python 3.13;
  7 | 
  8 | 0.4.27 (2024-12-23)
  9 | -------------------
 10 | * Add support for WebVTT by @GrahamDigital;
 11 | * Make setuptools an explicit requirement to improve support for Python 3.12+;
 12 | 
 13 | 0.4.26 (2024-10-15)
 14 | -------------------
 15 | * Allow progress to work for multiple syncs even if alignment fails for a particular input;
 16 | * Allow specifying ffmpeg exe path using --ffmpeg-path;
 17 | * Updates for Python 3.12;
 18 | * Don't report sync as successful if best score is in negatives (from @ajitid);
 19 | * Turn on Audio Sync for audio extraction process (from @dvh312);
 20 | 
 21 | 0.4.25 (2023-03-26)
 22 | -------------------
 23 | * Replace unmaintained cchardet with faust-cchardet;
 24 | 
 25 | 0.4.23 (2023-01-17)
 26 | -------------------
 27 | * Bugfix for waitpid on Windows;
 28 | 
 29 | 0.4.22 (2022-12-31)
 30 | -------------------
 31 | * Misc maintenance / compatibility fixes;
 32 | 
 33 | 0.4.19 (2022-01-07)
 34 | -------------------
 35 | * Blacken code and get rid of future_annotations dependency;
 36 | 
 37 | 0.4.18 (2021-11-07)
 38 | -------------------
 39 | * Allow `--apply-offset-seconds` when only subtitles specified;
 40 | * Make golden section search over scale factors option (`--gss`) available from help;
 41 | * Use -inf as objective for invalid offsets;
 42 | 
 43 | 0.4.17 (2021-10-03)
 44 | -------------------
 45 | * Don't remove log file if --log-dir-path explicitly requested;
 46 | * Add --suppress-output-if-offset-less-than arg to suppress output for small syncs;
 47 | 
 48 | 0.4.16 (2021-07-22)
 49 | -------------------
 50 | * Fix a couple of validation bugs that prevented certain uncommon command line options from use;
 51 | 
 52 | 0.4.15 (2021-05-25)
 53 | -------------------
 54 | * Make typing_extensions a requirement
 55 | 
 56 | 0.4.14 (2021-05-10)
 57 | -------------------
 58 | * Hotfix for pysubs2 on Python 3.6;
 59 | 
 60 | 0.4.13 (2021-05-10)
 61 | -------------------
 62 | * Support SSA embedded fonts using new pysubs2 'opaque_fonts' metadata;
 63 | * Set min required pysubs2 version to 1.2.0 to ensure the aforementioned functionality is available;
 64 | 
 65 | 0.4.12 (2021-04-13)
 66 | -------------------
 67 | * Pin auditok to 0.1.5 to avoid API-breaking change
 68 | 
 69 | 0.4.11 (2021-01-29)
 70 | -------------------
 71 | * Misc sync improvements:
 72 |     * Have webrtcvad use '0' as the non speech label instead of 0.5;
 73 |     * Allow the vad non speech label to be specified via the --non-speech-label command line parameter;
 74 |     * Don't try to infer framerate ratio based on length between first and last speech frames for non-subtitle speech detection;
 75 | 
 76 | 0.4.10 (2021-01-18)
 77 | -------------------
 78 | * Lots of improvements from PRs submitted by @alucryd (big thanks!):
 79 |     * Retain ASS styles;
 80 |     * Support syncing several subs against the same ref via --overwrite-input flag;
 81 |     * Add --apply-offset-seconds postprocess option to shift alignment by prespecified amount;
 82 | * Filter out metadata in subtitles when extracting speech;
 83 | * Add experimental --golden-section-search over framerate ratio (off by default);
 84 | * Try to improve sync by inferring framerate ratio based on relative duration of synced vs unsynced;
 85 | 
 86 | 0.4.9 (2020-10-11)
 87 | ------------------
 88 | * Make default max offset seconds 60 and enforce during alignment as opposed to throwing away alignments with > max_offset_seconds;
 89 | * Add experimental section for using golden section search to find framerate ratio;
 90 | * Restore ability to read stdin and write stdout after buggy permissions check;
 91 | * Exceptions that occur during syncing were mistakenly suppressed; this is now fixed;
 92 | 
 93 | 0.4.8 (2020-09-22)
 94 | ------------------
 95 | * Use webrtcvad-wheels on Windows to eliminate dependency on compiler;
 96 | 
 97 | 0.4.7 (2020-09-05)
 98 | ------------------
 99 | * Misc bugfixes and stability improvements;
100 | 
101 | 0.4.6 (2020-06-10)
102 | ------------------
103 | * Bugfix for writing subs to stdout;
104 | 
105 | 0.4.5 (2020-06-09)
106 | ------------------
107 | * Allow MicroDVD input format;
108 | * Use output extension to determine output format;
109 | 
110 | 0.4.4 (2020-06-08)
111 | ------------------
112 | * Use rich formatting for Python >= 3.6;
113 | * Use versioneer to manage versions;
114 | 
115 | 0.4.3 (2020-06-07)
116 | ------------------
117 | * Fix regression where stdout not used for default output;
118 | * Add ability to specify path to ffmpeg / ffprobe binaries;
119 | * Add ability to overwrite the input / unsynced srt with the --overwrite-input flag;
120 | 
121 | 0.4.2 (2020-06-06)
122 | ------------------
123 | * Fix Python 2 compatibility bug;
124 | 
125 | 0.4.1 (2020-06-06)
126 | ------------------
127 | * Add --reference-stream option for selecting the stream / track from the video reference to use for speech detection;
128 | 
129 | 0.4.0 (2020-06-02)
130 | ------------------
131 | * Remove dependency on scikit-learn;
132 | * Implement PyInstaller / Gooey build process for graphical application on MacOS and Windows;
133 | 
134 | 0.3.7 (2020-05-11)
135 | ------------------
136 | * Fix PyPI issues;
137 | 
138 | 0.3.5 (2020-05-08)
139 | ------------------
140 | * Fix corner case bug that occurred when multiple sync attempts were scored the same;
141 | 
142 | 0.3.4 (2020-03-20)
143 | ------------------
144 | * Attempt speech extraction from subtitle tracks embedded in video first before using VAD;
145 | 
146 | 0.3.3 (2020-03-15)
147 | ------------------
148 | * Hotfix for test archive creation bug;
149 | 
150 | 0.3.2 (2020-03-13)
151 | ------------------
152 | * Add ability to merge synced and reference subs into bilingual subs when reference is srt;
153 | 
154 | 0.3.1 (2020-03-12)
155 | ------------------
156 | * Fix bug when handling ass/ssa input, this format should work now;
157 | 
158 | 0.3.0 (2020-03-11)
159 | ------------------
160 | * Better detection of text file encodings;
161 | * ASS / SSA functionality (but currently untested);
162 | * Allow serialize speech with --serialize-speech flag;
163 | * Convenient --make-test-case flag to create test cases when filing sync-related bugs;
164 | * Use utf-8 as default output encoding (instead of using same encoding as input);
165 | * More robust test framework (integration tests!);
166 | 
167 | 0.2.17 (2019-12-21)
168 | ------------------
169 | * Try to correct for framerate differences by picking best framerate ratio;
170 | 
171 | 0.2.16 (2019-12-04)
172 | ------------------
173 | * Revert changes from 0.2.9 now that srt parses weird timestamps robustly;
174 | 
175 | 0.2.15 (2019-10-11)
176 | ------------------
177 | * Revert changes from 0.2.12 (caused regression on Windows);
178 | 
179 | 0.2.14 (2019-10-07)
180 | ------------------
181 | * Bump min required scikit-learn to 0.20.4;
182 | 
183 | 0.2.12 (2019-10-06)
184 | ------------------
185 | * Clear O_NONBLOCK flag on stdout stream in case it is set;
186 | 
187 | 0.2.11 (2019-10-06)
188 | ------------------
189 | * Quick and dirty fix to recover without progress info if `ffmpeg.probe` raises;
190 | 
191 | 0.2.10 (2019-09-22)
192 | ------------------
193 | * Specify utf-8 encoding at top of file for backcompat with Python2;
194 | 
195 | 0.2.9 (2019-09-22)
196 | ------------------
197 | * Quck and dirty fix to properly handle timestamp ms fields with >3 digits;
198 | 
199 | 0.2.8 (2019-06-15)
200 | ------------------
201 | * Allow user to specify start time (in seconds) for processing;
202 | 
203 | 0.2.7 (2019-05-28)
204 | ------------------
205 | * Add utf-16 to list of encodings to try for inference purposes;
206 | 
207 | 0.2.6 (2019-05-15)
208 | ------------------
209 | * Fix argument parsing regression;
210 | 
211 | 0.2.5 (2019-05-14)
212 | ------------------
213 | * Clamp subtitles to maximum duration (default 10);
214 | 
215 | 0.2.4 (2019-03-19)
216 | ------------------
217 | * Add six to requirements.txt;
218 | * Set default encoding to utf8 to ensure non ascii filenames handled properly;
219 | 
220 | 0.2.3 (2019-03-08)
221 | ------------------
222 | * Minor change to subtitle speech extraction;
223 | 
224 | 0.2.2 (2019-03-08)
225 | ------------------
226 | * Allow reading input srt from stdin;
227 | * Allow specifying encodings for reference, input, and output srt;
228 | * Use the same encoding for both input srt and output srt by default;
229 | * Developer note: using sklearn-style data pipelines now;
230 | 
231 | 0.2.1 (2019-03-07)
232 | ------------------
233 | * Developer note: change progress-only to vlc-mode and remove from help docs;
234 | 
235 | 0.2.0 (2019-03-06)
236 | ------------------
237 | * Get rid of auditok (GPLv3, was hurting alignment algorithm);
238 | * Change to alignment algo: don't penalize matching video non-speech with subtitle speech;
239 | 
240 | 0.1.7 (2019-03-05)
241 | ------------------
242 | * Add Chinese to the list of encodings that can be inferred;
243 | * Make srt parsing more robust;
244 | 
245 | 0.1.6 (2019-03-04)
246 | ------------------
247 | * Misc bugfixes;
248 | * Proper logging;
249 | * Proper version handling;
250 | 
251 | 0.1.0 (2019-02-24)
252 | ------------------
253 | * Support srt format;
254 | * Support using srt as reference;
255 | * Support using video as reference (via ffmpeg);
256 | * Support writing to stdout or file (read from stdin not yet supported; can only read from file);
257 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2019 Stephen Macke
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.rst
2 | include versioneer.py
3 | include ffsubsync/_version.py
4 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | .PHONY: clean build bump deploy black blackcheck check test tests deps devdeps
 3 | 
 4 | clean:
 5 | 	rm -rf dist/ build/ *.egg-info/
 6 | 
 7 | build: clean
 8 | 	python setup.py sdist bdist_wheel --universal
 9 | 
10 | bump:
11 | 	./scripts/bump-version.py
12 | 
13 | deploy: build
14 | 	./scripts/deploy.sh
15 | 
16 | black:
17 | 	./scripts/blacken.sh
18 | 
19 | blackcheck:
20 | 	./scripts/blacken.sh --check
21 | 
22 | lint:
23 | 	flake8
24 | 
25 | typecheck:
26 | 	mypy ffsubsync
27 | 
28 | check_no_typing:
29 | 	INTEGRATION=1 pytest --cov-config=.coveragerc --cov=ffsubsync
30 | 
31 | check: blackcheck typecheck check_no_typing
32 | 
33 | test: check
34 | tests: check
35 | 
36 | deps:
37 | 	pip install -r requirements.txt
38 | 
39 | devdeps:
40 | 	pip install -e .
41 | 	pip install -r requirements-dev.txt
42 | 
43 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | FFsubsync
  2 | =======
  3 | 
  4 | [![CI Status](https://github.com/smacke/ffsubsync/workflows/ffsubsync/badge.svg)](https://github.com/smacke/ffsubsync/actions)
  5 | [![Support Ukraine](https://badgen.net/badge/support/UKRAINE/?color=0057B8&labelColor=FFD700)](https://github.com/vshymanskyy/StandWithUkraine/blob/main/docs/README.md)
  6 | [![Checked with mypy](http://www.mypy-lang.org/static/mypy_badge.svg)](http://mypy-lang.org/)
  7 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
  8 | [![License: MIT](https://img.shields.io/badge/License-MIT-maroon.svg)](https://opensource.org/licenses/MIT)
  9 | [![Python Versions](https://img.shields.io/pypi/pyversions/ffsubsync.svg)](https://pypi.org/project/ffsubsync)
 10 | [![Documentation Status](https://readthedocs.org/projects/ffsubsync/badge/?version=latest)](https://ffsubsync.readthedocs.io/en/latest/?badge=latest)
 11 | [![PyPI Version](https://img.shields.io/pypi/v/ffsubsync.svg)](https://pypi.org/project/ffsubsync)
 12 | 
 13 | 
 14 | Language-agnostic automatic synchronization of subtitles with video, so that
 15 | subtitles are aligned to the correct starting point within the video.
 16 | 
 17 | Turn this:                       |  Into this:
 18 | :-------------------------------:|:-------------------------:
 19 | ![](https://raw.githubusercontent.com/smacke/ffsubsync/master/resources/img/tearing-me-apart-wrong.gif)  |  ![](https://raw.githubusercontent.com/smacke/ffsubsync/master/resources/img/tearing-me-apart-correct.gif)
 20 | 
 21 | Helping Development
 22 | -------------------
 23 | Please consider [supporting Ukraine](https://github.com/vshymanskyy/StandWithUkraine/blob/main/docs/README.md)
 24 | rather than donating directly to this project. That said, at the request of
 25 | some, you can now help cover my coffee expenses using the Github Sponsors
 26 | button at the top, or using the below Paypal Donate button:
 27 | 
 28 | [![Donate](https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=XJC5ANLMYECJE)
 29 | 
 30 | Install
 31 | -------
 32 | First, make sure ffmpeg is installed. On MacOS, this looks like:
 33 | ~~~
 34 | brew install ffmpeg
 35 | ~~~
 36 | (Windows users: make sure `ffmpeg` is on your path and can be referenced
 37 | from the command line!)
 38 | 
 39 | Next, grab the package (compatible with Python >= 3.6):
 40 | ~~~
 41 | pip install ffsubsync
 42 | ~~~
 43 | If you want to live dangerously, you can grab the latest version as follows:
 44 | ~~~
 45 | pip install git+https://github.com/smacke/ffsubsync@latest
 46 | ~~~
 47 | 
 48 | Usage
 49 | -----
 50 | `ffs`, `subsync` and `ffsubsync` all work as entrypoints:
 51 | ~~~
 52 | ffs video.mp4 -i unsynchronized.srt -o synchronized.srt
 53 | ~~~
 54 | 
 55 | There may be occasions where you have a correctly synchronized srt file in a
 56 | language you are unfamiliar with, as well as an unsynchronized srt file in your
 57 | native language. In this case, you can use the correctly synchronized srt file
 58 | directly as a reference for synchronization, instead of using the video as the
 59 | reference:
 60 | 
 61 | ~~~
 62 | ffsubsync reference.srt -i unsynchronized.srt -o synchronized.srt
 63 | ~~~
 64 | 
 65 | `ffsubsync` uses the file extension to decide whether to perform voice activity
 66 | detection on the audio or to directly extract speech from an srt file.
 67 | 
 68 | Sync Issues
 69 | -----------
 70 | If the sync fails, the following recourses are available:
 71 | - Try to sync assuming identical video / subtitle framerates by passing
 72 |   `--no-fix-framerate`;
 73 | - Try passing `--gss` to use [golden-section search](https://en.wikipedia.org/wiki/Golden-section_search)
 74 |   to find the optimal ratio between video and subtitle framerates (by default,
 75 |   only a few common ratios are evaluated);
 76 | - Try a value of `--max-offset-seconds` greater than the default of 60, in the
 77 |   event that the subtitles are out of sync by more than 60 seconds (empirically
 78 |   unlikely in practice, but possible).
 79 | - Try `--vad=auditok` since [auditok](https://github.com/amsehili/auditok) can
 80 |   sometimes work better in the case of low-quality audio than WebRTC's VAD.
 81 |   Auditok does not specifically detect voice, but instead detects all audio;
 82 |   this property can yield suboptimal syncing behavior when a proper VAD can
 83 |   work well, but can be effective in some cases.
 84 | 
 85 | If the sync still fails, consider trying one of the following similar tools:
 86 | - [sc0ty/subsync](https://github.com/sc0ty/subsync): does speech-to-text and looks for matching word morphemes
 87 | - [kaegi/alass](https://github.com/kaegi/alass): rust-based subtitle synchronizer with a fancy dynamic programming algorithm
 88 | - [tympanix/subsync](https://github.com/tympanix/subsync): neural net based approach that optimizes directly for alignment when performing speech detection
 89 | - [oseiskar/autosubsync](https://github.com/oseiskar/autosubsync): performs speech detection with bespoke spectrogram + logistic regression
 90 | - [pums974/srtsync](https://github.com/pums974/srtsync): similar approach to ffsubsync (WebRTC's VAD + FFT to maximize signal cross correlation)
 91 | 
 92 | Speed
 93 | -----
 94 | `ffsubsync` usually finishes in 20 to 30 seconds, depending on the length of
 95 | the video. The most expensive step is actually extraction of raw audio. If you
 96 | already have a correctly synchronized "reference" srt file (in which case audio
 97 | extraction can be skipped), `ffsubsync` typically runs in less than a second.
 98 | 
 99 | How It Works
100 | ------------
101 | The synchronization algorithm operates in 3 steps:
102 | 1. Discretize both the video file's audio stream and the subtitles into 10ms
103 |    windows.
104 | 2. For each 10ms window, determine whether that window contains speech.  This
105 |    is trivial to do for subtitles (we just determine whether any subtitle is
106 |    "on" during each time window); for the audio stream, use an off-the-shelf
107 |    voice activity detector (VAD) like
108 |    the one built into [webrtc](https://webrtc.org/).
109 | 3. Now we have two binary strings: one for the subtitles, and one for the
110 |    video.  Try to align these strings by matching 0's with 0's and 1's with
111 |    1's. We score these alignments as (# video 1's matched w/ subtitle 1's) - (#
112 |    video 1's matched with subtitle 0's).
113 | 
114 | The best-scoring alignment from step 3 determines how to offset the subtitles
115 | in time so that they are properly synced with the video. Because the binary
116 | strings are fairly long (millions of digits for video longer than an hour), the
117 | naive O(n^2) strategy for scoring all alignments is unacceptable. Instead, we
118 | use the fact that "scoring all alignments" is a convolution operation and can
119 | be implemented with the Fast Fourier Transform (FFT), bringing the complexity
120 | down to O(n log n).
121 | 
122 | Limitations
123 | -----------
124 | In most cases, inconsistencies between video and subtitles occur when starting
125 | or ending segments present in video are not present in subtitles, or vice versa.
126 | This can occur, for example, when a TV episode recap in the subtitles was pruned
127 | from video. FFsubsync typically works well in these cases, and in my experience
128 | this covers >95% of use cases. Handling breaks and splits outside of the beginning
129 | and ending segments is left to future work (see below).
130 | 
131 | Future Work
132 | -----------
133 | Besides general stability and usability improvements, one line
134 | of work aims to extend the synchronization algorithm to handle splits
135 | / breaks in the middle of video not present in subtitles (or vice versa).
136 | Developing a robust solution will take some time (assuming one is possible).
137 | See [#10](https://github.com/smacke/ffsubsync/issues/10) for more details.
138 | 
139 | History
140 | -------
141 | The implementation for this project was started during HackIllinois 2019, for
142 | which it received an **_Honorable Mention_** (ranked in the top 5 projects,
143 | excluding projects that won company-specific prizes).
144 | 
145 | Credits
146 | -------
147 | This project would not be possible without the following libraries:
148 | - [ffmpeg](https://www.ffmpeg.org/) and the [ffmpeg-python](https://github.com/kkroening/ffmpeg-python) wrapper, for extracting raw audio from video
149 | - VAD from [webrtc](https://webrtc.org/) and the [py-webrtcvad](https://github.com/wiseman/py-webrtcvad) wrapper, for speech detection
150 | - [srt](https://pypi.org/project/srt/) for operating on [SRT files](https://en.wikipedia.org/wiki/SubRip#SubRip_text_file_format)
151 | - [numpy](http://www.numpy.org/) and, indirectly, [FFTPACK](https://www.netlib.org/fftpack/), which powers the FFT-based algorithm for fast scoring of alignments between subtitles (or subtitles and video)
152 | - Other excellent Python libraries like [argparse](https://docs.python.org/3/library/argparse.html), [rich](https://github.com/willmcgugan/rich), and [tqdm](https://tqdm.github.io/), not related to the core functionality, but which enable much better experiences for developers and users.
153 | 
154 | # License
155 | Code in this project is [MIT licensed](https://opensource.org/licenses/MIT).
156 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _build/
2 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/_static/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smacke/ffsubsync/4d275da8b446de4be582d44337e99b2f75b56ebe/docs/_static/.keep


--------------------------------------------------------------------------------
/docs/_templates/.keep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smacke/ffsubsync/4d275da8b446de4be582d44337e99b2f75b56ebe/docs/_templates/.keep


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'ffsubsync'
21 | copyright = '2020, Stephen Macke'
22 | author = 'Stephen Macke'
23 | 
24 | 
25 | # -- General configuration ---------------------------------------------------
26 | 
27 | # ref: https://stackoverflow.com/questions/56336234/build-fail-sphinx-error-contents-rst-not-found
28 | master_doc = 'index'
29 | 
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = [
34 |     'sphinx.ext.autodoc',
35 |     'sphinxarg.ext',
36 |     'sphinx_rtd_theme',
37 | ]
38 | 
39 | # Add any paths that contain templates here, relative to this directory.
40 | templates_path = ['_templates']
41 | 
42 | # List of patterns, relative to source directory, that match files and
43 | # directories to ignore when looking for source files.
44 | # This pattern also affects html_static_path and html_extra_path.
45 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
46 | 
47 | 
48 | # -- Options for HTML output -------------------------------------------------
49 | 
50 | # The theme to use for HTML and HTML Help pages.  See the documentation for
51 | # a list of builtin themes.
52 | #
53 | html_theme = 'sphinx_rtd_theme'
54 | 
55 | # Add any paths that contain custom static files (such as style sheets) here,
56 | # relative to this directory. They are copied after the builtin static files,
57 | # so a file named "default.css" will overwrite the builtin "default.css".
58 | html_static_path = ['_static']
59 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. ffsubsync documentation master file, created by
 2 |    sphinx-quickstart on Mon Dec  2 17:06:18 2019.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to FFsubsync's documentation!
 7 | =====================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 | 
14 | 
15 | Indices and tables
16 | ==================
17 | 
18 | * :ref:`genindex`
19 | * :ref:`modindex`
20 | * :ref:`search`
21 | 
22 | .. argparse::
23 |    :module: ffsubsync.ffsubsync
24 |    :func: make_parser
25 |    :prog: ffsubsync
26 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/requirements-docs.txt:
--------------------------------------------------------------------------------
1 | # docs
2 | autodoc
3 | docutils<0.18  # ref: https://github.com/sphinx-doc/sphinx/issues/9788
4 | sphinx-argparse
5 | sphinx-rtd-theme
6 | 


--------------------------------------------------------------------------------
/ffsubsync/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | import sys
 4 | 
 5 | try:
 6 |     from rich.console import Console
 7 |     from rich.logging import RichHandler
 8 | 
 9 |     # configure logging here because some other later imported library does it first otherwise
10 |     # TODO: use a fileconfig
11 |     logging.basicConfig(
12 |         level=logging.INFO,
13 |         format="%(message)s",
14 |         datefmt="[%X]",
15 |         handlers=[RichHandler(console=Console(file=sys.stderr))],
16 |     )
17 | except:  # noqa: E722
18 |     logging.basicConfig(stream=sys.stderr, level=logging.INFO)
19 | 
20 | from .version import __version__  # noqa
21 | from .ffsubsync import main  # noqa
22 | 


--------------------------------------------------------------------------------
/ffsubsync/_version.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # This file helps to compute a version number in source trees obtained from
  3 | # git-archive tarball (such as those provided by githubs download-from-tag
  4 | # feature). Distribution tarballs (built by setup.py sdist) and build
  5 | # directories (produced by setup.py build) will contain a much shorter file
  6 | # that just contains the computed version number.
  7 | 
  8 | # This file is released into the public domain. Generated by
  9 | # versioneer-0.18 (https://github.com/warner/python-versioneer)
 10 | 
 11 | """Git implementation of _version.py."""
 12 | 
 13 | import errno
 14 | import os
 15 | import re
 16 | import subprocess
 17 | import sys
 18 | 
 19 | 
 20 | def get_keywords():
 21 |     """Get the keywords needed to look up the version information."""
 22 |     # these strings will be replaced by git during git-archive.
 23 |     # setup.py/versioneer.py will grep for the variable names, so they must
 24 |     # each be defined on a line of their own. _version.py will just call
 25 |     # get_keywords().
 26 |     git_refnames = " (HEAD -> master)"
 27 |     git_full = "4d275da8b446de4be582d44337e99b2f75b56ebe"
 28 |     git_date = "2025-02-18 20:04:07 -0800"
 29 |     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
 30 |     return keywords
 31 | 
 32 | 
 33 | class VersioneerConfig:
 34 |     """Container for Versioneer configuration parameters."""
 35 | 
 36 | 
 37 | def get_config():
 38 |     """Create, populate and return the VersioneerConfig() object."""
 39 |     # these strings are filled in when 'setup.py versioneer' creates
 40 |     # _version.py
 41 |     cfg = VersioneerConfig()
 42 |     cfg.VCS = "git"
 43 |     cfg.style = "pep440-pre"
 44 |     cfg.tag_prefix = ""
 45 |     cfg.parentdir_prefix = "ffsubsync-"
 46 |     cfg.versionfile_source = "ffsubsync/_version.py"
 47 |     cfg.verbose = False
 48 |     return cfg
 49 | 
 50 | 
 51 | class NotThisMethod(Exception):
 52 |     """Exception raised if a method is not valid for the current scenario."""
 53 | 
 54 | 
 55 | LONG_VERSION_PY = {}
 56 | HANDLERS = {}
 57 | 
 58 | 
 59 | def register_vcs_handler(vcs, method):  # decorator
 60 |     """Decorator to mark a method as the handler for a particular VCS."""
 61 |     def decorate(f):
 62 |         """Store f in HANDLERS[vcs][method]."""
 63 |         if vcs not in HANDLERS:
 64 |             HANDLERS[vcs] = {}
 65 |         HANDLERS[vcs][method] = f
 66 |         return f
 67 |     return decorate
 68 | 
 69 | 
 70 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
 71 |                 env=None):
 72 |     """Call the given command(s)."""
 73 |     assert isinstance(commands, list)
 74 |     p = None
 75 |     for c in commands:
 76 |         try:
 77 |             dispcmd = str([c] + args)
 78 |             # remember shell=False, so use git.cmd on windows, not just git
 79 |             p = subprocess.Popen([c] + args, cwd=cwd, env=env,
 80 |                                  stdout=subprocess.PIPE,
 81 |                                  stderr=(subprocess.PIPE if hide_stderr
 82 |                                          else None))
 83 |             break
 84 |         except EnvironmentError:
 85 |             e = sys.exc_info()[1]
 86 |             if e.errno == errno.ENOENT:
 87 |                 continue
 88 |             if verbose:
 89 |                 print("unable to run %s" % dispcmd)
 90 |                 print(e)
 91 |             return None, None
 92 |     else:
 93 |         if verbose:
 94 |             print("unable to find command, tried %s" % (commands,))
 95 |         return None, None
 96 |     stdout = p.communicate()[0].strip()
 97 |     if sys.version_info[0] >= 3:
 98 |         stdout = stdout.decode()
 99 |     if p.returncode != 0:
100 |         if verbose:
101 |             print("unable to run %s (error)" % dispcmd)
102 |             print("stdout was %s" % stdout)
103 |         return None, p.returncode
104 |     return stdout, p.returncode
105 | 
106 | 
107 | def versions_from_parentdir(parentdir_prefix, root, verbose):
108 |     """Try to determine the version from the parent directory name.
109 | 
110 |     Source tarballs conventionally unpack into a directory that includes both
111 |     the project name and a version string. We will also support searching up
112 |     two directory levels for an appropriately named parent directory
113 |     """
114 |     rootdirs = []
115 | 
116 |     for i in range(3):
117 |         dirname = os.path.basename(root)
118 |         if dirname.startswith(parentdir_prefix):
119 |             return {"version": dirname[len(parentdir_prefix):],
120 |                     "full-revisionid": None,
121 |                     "dirty": False, "error": None, "date": None}
122 |         else:
123 |             rootdirs.append(root)
124 |             root = os.path.dirname(root)  # up a level
125 | 
126 |     if verbose:
127 |         print("Tried directories %s but none started with prefix %s" %
128 |               (str(rootdirs), parentdir_prefix))
129 |     raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
130 | 
131 | 
132 | @register_vcs_handler("git", "get_keywords")
133 | def git_get_keywords(versionfile_abs):
134 |     """Extract version information from the given file."""
135 |     # the code embedded in _version.py can just fetch the value of these
136 |     # keywords. When used from setup.py, we don't want to import _version.py,
137 |     # so we do it with a regexp instead. This function is not used from
138 |     # _version.py.
139 |     keywords = {}
140 |     try:
141 |         f = open(versionfile_abs, "r")
142 |         for line in f.readlines():
143 |             if line.strip().startswith("git_refnames ="):
144 |                 mo = re.search(r'=\s*"(.*)"', line)
145 |                 if mo:
146 |                     keywords["refnames"] = mo.group(1)
147 |             if line.strip().startswith("git_full ="):
148 |                 mo = re.search(r'=\s*"(.*)"', line)
149 |                 if mo:
150 |                     keywords["full"] = mo.group(1)
151 |             if line.strip().startswith("git_date ="):
152 |                 mo = re.search(r'=\s*"(.*)"', line)
153 |                 if mo:
154 |                     keywords["date"] = mo.group(1)
155 |         f.close()
156 |     except EnvironmentError:
157 |         pass
158 |     return keywords
159 | 
160 | 
161 | @register_vcs_handler("git", "keywords")
162 | def git_versions_from_keywords(keywords, tag_prefix, verbose):
163 |     """Get version information from git keywords."""
164 |     if not keywords:
165 |         raise NotThisMethod("no keywords at all, weird")
166 |     date = keywords.get("date")
167 |     if date is not None:
168 |         # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
169 |         # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
170 |         # -like" string, which we must then edit to make compliant), because
171 |         # it's been around since git-1.5.3, and it's too difficult to
172 |         # discover which version we're using, or to work around using an
173 |         # older one.
174 |         date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
175 |     refnames = keywords["refnames"].strip()
176 |     if refnames.startswith("$Format"):
177 |         if verbose:
178 |             print("keywords are unexpanded, not using")
179 |         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
180 |     refs = set([r.strip() for r in refnames.strip("()").split(",")])
181 |     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
182 |     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
183 |     TAG = "tag: "
184 |     tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
185 |     if not tags:
186 |         # Either we're using git < 1.8.3, or there really are no tags. We use
187 |         # a heuristic: assume all version tags have a digit. The old git %d
188 |         # expansion behaves like git log --decorate=short and strips out the
189 |         # refs/heads/ and refs/tags/ prefixes that would let us distinguish
190 |         # between branches and tags. By ignoring refnames without digits, we
191 |         # filter out many common branch names like "release" and
192 |         # "stabilization", as well as "HEAD" and "master".
193 |         tags = set([r for r in refs if re.search(r'\d', r)])
194 |         if verbose:
195 |             print("discarding '%s', no digits" % ",".join(refs - tags))
196 |     if verbose:
197 |         print("likely tags: %s" % ",".join(sorted(tags)))
198 |     for ref in sorted(tags):
199 |         # sorting will prefer e.g. "2.0" over "2.0rc1"
200 |         if ref.startswith(tag_prefix):
201 |             r = ref[len(tag_prefix):]
202 |             if verbose:
203 |                 print("picking %s" % r)
204 |             return {"version": r,
205 |                     "full-revisionid": keywords["full"].strip(),
206 |                     "dirty": False, "error": None,
207 |                     "date": date}
208 |     # no suitable tags, so version is "0+unknown", but full hex is still there
209 |     if verbose:
210 |         print("no suitable tags, using unknown + full revision id")
211 |     return {"version": "0+unknown",
212 |             "full-revisionid": keywords["full"].strip(),
213 |             "dirty": False, "error": "no suitable tags", "date": None}
214 | 
215 | 
216 | @register_vcs_handler("git", "pieces_from_vcs")
217 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
218 |     """Get version from 'git describe' in the root of the source tree.
219 | 
220 |     This only gets called if the git-archive 'subst' keywords were *not*
221 |     expanded, and _version.py hasn't already been rewritten with a short
222 |     version string, meaning we're inside a checked out source tree.
223 |     """
224 |     GITS = ["git"]
225 |     if sys.platform == "win32":
226 |         GITS = ["git.cmd", "git.exe"]
227 | 
228 |     out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
229 |                           hide_stderr=True)
230 |     if rc != 0:
231 |         if verbose:
232 |             print("Directory %s not under git control" % root)
233 |         raise NotThisMethod("'git rev-parse --git-dir' returned error")
234 | 
235 |     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
236 |     # if there isn't one, this yields HEX[-dirty] (no NUM)
237 |     describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
238 |                                           "--always", "--long",
239 |                                           "--match", "%s*" % tag_prefix],
240 |                                    cwd=root)
241 |     # --long was added in git-1.5.5
242 |     if describe_out is None:
243 |         raise NotThisMethod("'git describe' failed")
244 |     describe_out = describe_out.strip()
245 |     full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
246 |     if full_out is None:
247 |         raise NotThisMethod("'git rev-parse' failed")
248 |     full_out = full_out.strip()
249 | 
250 |     pieces = {}
251 |     pieces["long"] = full_out
252 |     pieces["short"] = full_out[:7]  # maybe improved later
253 |     pieces["error"] = None
254 | 
255 |     # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
256 |     # TAG might have hyphens.
257 |     git_describe = describe_out
258 | 
259 |     # look for -dirty suffix
260 |     dirty = git_describe.endswith("-dirty")
261 |     pieces["dirty"] = dirty
262 |     if dirty:
263 |         git_describe = git_describe[:git_describe.rindex("-dirty")]
264 | 
265 |     # now we have TAG-NUM-gHEX or HEX
266 | 
267 |     if "-" in git_describe:
268 |         # TAG-NUM-gHEX
269 |         mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
270 |         if not mo:
271 |             # unparseable. Maybe git-describe is misbehaving?
272 |             pieces["error"] = ("unable to parse git-describe output: '%s'"
273 |                                % describe_out)
274 |             return pieces
275 | 
276 |         # tag
277 |         full_tag = mo.group(1)
278 |         if not full_tag.startswith(tag_prefix):
279 |             if verbose:
280 |                 fmt = "tag '%s' doesn't start with prefix '%s'"
281 |                 print(fmt % (full_tag, tag_prefix))
282 |             pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
283 |                                % (full_tag, tag_prefix))
284 |             return pieces
285 |         pieces["closest-tag"] = full_tag[len(tag_prefix):]
286 | 
287 |         # distance: number of commits since tag
288 |         pieces["distance"] = int(mo.group(2))
289 | 
290 |         # commit: short hex revision ID
291 |         pieces["short"] = mo.group(3)
292 | 
293 |     else:
294 |         # HEX: no tags
295 |         pieces["closest-tag"] = None
296 |         count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
297 |                                     cwd=root)
298 |         pieces["distance"] = int(count_out)  # total number of commits
299 | 
300 |     # commit date: see ISO-8601 comment in git_versions_from_keywords()
301 |     date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
302 |                        cwd=root)[0].strip()
303 |     pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
304 | 
305 |     return pieces
306 | 
307 | 
308 | def plus_or_dot(pieces):
309 |     """Return a + if we don't already have one, else return a ."""
310 |     if "+" in pieces.get("closest-tag", ""):
311 |         return "."
312 |     return "+"
313 | 
314 | 
315 | def render_pep440(pieces):
316 |     """Build up version string, with post-release "local version identifier".
317 | 
318 |     Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
319 |     get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
320 | 
321 |     Exceptions:
322 |     1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
323 |     """
324 |     if pieces["closest-tag"]:
325 |         rendered = pieces["closest-tag"]
326 |         if pieces["distance"] or pieces["dirty"]:
327 |             rendered += plus_or_dot(pieces)
328 |             rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
329 |             if pieces["dirty"]:
330 |                 rendered += ".dirty"
331 |     else:
332 |         # exception #1
333 |         rendered = "0+untagged.%d.g%s" % (pieces["distance"],
334 |                                           pieces["short"])
335 |         if pieces["dirty"]:
336 |             rendered += ".dirty"
337 |     return rendered
338 | 
339 | 
340 | def render_pep440_pre(pieces):
341 |     """TAG[.post.devDISTANCE] -- No -dirty.
342 | 
343 |     Exceptions:
344 |     1: no tags. 0.post.devDISTANCE
345 |     """
346 |     if pieces["closest-tag"]:
347 |         rendered = pieces["closest-tag"]
348 |         if pieces["distance"]:
349 |             rendered += ".post.dev%d" % pieces["distance"]
350 |     else:
351 |         # exception #1
352 |         rendered = "0.post.dev%d" % pieces["distance"]
353 |     return rendered
354 | 
355 | 
356 | def render_pep440_post(pieces):
357 |     """TAG[.postDISTANCE[.dev0]+gHEX] .
358 | 
359 |     The ".dev0" means dirty. Note that .dev0 sorts backwards
360 |     (a dirty tree will appear "older" than the corresponding clean one),
361 |     but you shouldn't be releasing software with -dirty anyways.
362 | 
363 |     Exceptions:
364 |     1: no tags. 0.postDISTANCE[.dev0]
365 |     """
366 |     if pieces["closest-tag"]:
367 |         rendered = pieces["closest-tag"]
368 |         if pieces["distance"] or pieces["dirty"]:
369 |             rendered += ".post%d" % pieces["distance"]
370 |             if pieces["dirty"]:
371 |                 rendered += ".dev0"
372 |             rendered += plus_or_dot(pieces)
373 |             rendered += "g%s" % pieces["short"]
374 |     else:
375 |         # exception #1
376 |         rendered = "0.post%d" % pieces["distance"]
377 |         if pieces["dirty"]:
378 |             rendered += ".dev0"
379 |         rendered += "+g%s" % pieces["short"]
380 |     return rendered
381 | 
382 | 
383 | def render_pep440_old(pieces):
384 |     """TAG[.postDISTANCE[.dev0]] .
385 | 
386 |     The ".dev0" means dirty.
387 | 
388 |     Eexceptions:
389 |     1: no tags. 0.postDISTANCE[.dev0]
390 |     """
391 |     if pieces["closest-tag"]:
392 |         rendered = pieces["closest-tag"]
393 |         if pieces["distance"] or pieces["dirty"]:
394 |             rendered += ".post%d" % pieces["distance"]
395 |             if pieces["dirty"]:
396 |                 rendered += ".dev0"
397 |     else:
398 |         # exception #1
399 |         rendered = "0.post%d" % pieces["distance"]
400 |         if pieces["dirty"]:
401 |             rendered += ".dev0"
402 |     return rendered
403 | 
404 | 
405 | def render_git_describe(pieces):
406 |     """TAG[-DISTANCE-gHEX][-dirty].
407 | 
408 |     Like 'git describe --tags --dirty --always'.
409 | 
410 |     Exceptions:
411 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
412 |     """
413 |     if pieces["closest-tag"]:
414 |         rendered = pieces["closest-tag"]
415 |         if pieces["distance"]:
416 |             rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
417 |     else:
418 |         # exception #1
419 |         rendered = pieces["short"]
420 |     if pieces["dirty"]:
421 |         rendered += "-dirty"
422 |     return rendered
423 | 
424 | 
425 | def render_git_describe_long(pieces):
426 |     """TAG-DISTANCE-gHEX[-dirty].
427 | 
428 |     Like 'git describe --tags --dirty --always -long'.
429 |     The distance/hash is unconditional.
430 | 
431 |     Exceptions:
432 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
433 |     """
434 |     if pieces["closest-tag"]:
435 |         rendered = pieces["closest-tag"]
436 |         rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
437 |     else:
438 |         # exception #1
439 |         rendered = pieces["short"]
440 |     if pieces["dirty"]:
441 |         rendered += "-dirty"
442 |     return rendered
443 | 
444 | 
445 | def render(pieces, style):
446 |     """Render the given version pieces into the requested style."""
447 |     if pieces["error"]:
448 |         return {"version": "unknown",
449 |                 "full-revisionid": pieces.get("long"),
450 |                 "dirty": None,
451 |                 "error": pieces["error"],
452 |                 "date": None}
453 | 
454 |     if not style or style == "default":
455 |         style = "pep440"  # the default
456 | 
457 |     if style == "pep440":
458 |         rendered = render_pep440(pieces)
459 |     elif style == "pep440-pre":
460 |         rendered = render_pep440_pre(pieces)
461 |     elif style == "pep440-post":
462 |         rendered = render_pep440_post(pieces)
463 |     elif style == "pep440-old":
464 |         rendered = render_pep440_old(pieces)
465 |     elif style == "git-describe":
466 |         rendered = render_git_describe(pieces)
467 |     elif style == "git-describe-long":
468 |         rendered = render_git_describe_long(pieces)
469 |     else:
470 |         raise ValueError("unknown style '%s'" % style)
471 | 
472 |     return {"version": rendered, "full-revisionid": pieces["long"],
473 |             "dirty": pieces["dirty"], "error": None,
474 |             "date": pieces.get("date")}
475 | 
476 | 
477 | def get_versions():
478 |     """Get version information or return default if unable to do so."""
479 |     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
480 |     # __file__, we can work backwards from there to the root. Some
481 |     # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
482 |     # case we can only use expanded keywords.
483 | 
484 |     cfg = get_config()
485 |     verbose = cfg.verbose
486 | 
487 |     try:
488 |         return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
489 |                                           verbose)
490 |     except NotThisMethod:
491 |         pass
492 | 
493 |     try:
494 |         root = os.path.realpath(__file__)
495 |         # versionfile_source is the relative path from the top of the source
496 |         # tree (where the .git directory might live) to this file. Invert
497 |         # this to find the root from __file__.
498 |         for i in cfg.versionfile_source.split('/'):
499 |             root = os.path.dirname(root)
500 |     except NameError:
501 |         return {"version": "0+unknown", "full-revisionid": None,
502 |                 "dirty": None,
503 |                 "error": "unable to find root of source tree",
504 |                 "date": None}
505 | 
506 |     try:
507 |         pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
508 |         return render(pieces, cfg.style)
509 |     except NotThisMethod:
510 |         pass
511 | 
512 |     try:
513 |         if cfg.parentdir_prefix:
514 |             return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
515 |     except NotThisMethod:
516 |         pass
517 | 
518 |     return {"version": "0+unknown", "full-revisionid": None,
519 |             "dirty": None,
520 |             "error": "unable to compute version", "date": None}
521 | 


--------------------------------------------------------------------------------
/ffsubsync/aligners.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import logging
  3 | import math
  4 | from typing import List, Optional, Tuple, Type, Union
  5 | 
  6 | import numpy as np
  7 | 
  8 | from ffsubsync.golden_section_search import gss
  9 | from ffsubsync.sklearn_shim import Pipeline, TransformerMixin
 10 | 
 11 | 
 12 | logging.basicConfig(level=logging.INFO)
 13 | logger: logging.Logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | MIN_FRAMERATE_RATIO = 0.9
 17 | MAX_FRAMERATE_RATIO = 1.1
 18 | 
 19 | 
 20 | class FailedToFindAlignmentException(Exception):
 21 |     pass
 22 | 
 23 | 
 24 | class FFTAligner(TransformerMixin):
 25 |     def __init__(self, max_offset_samples: Optional[int] = None) -> None:
 26 |         self.max_offset_samples: Optional[int] = max_offset_samples
 27 |         self.best_offset_: Optional[int] = None
 28 |         self.best_score_: Optional[float] = None
 29 |         self.get_score_: bool = False
 30 | 
 31 |     def _eliminate_extreme_offsets_from_solutions(
 32 |         self, convolve: np.ndarray, substring: np.ndarray
 33 |     ) -> np.ndarray:
 34 |         convolve = np.copy(convolve)
 35 |         if self.max_offset_samples is None:
 36 |             return convolve
 37 | 
 38 |         def _offset_to_index(offset):
 39 |             return len(convolve) - 1 + offset - len(substring)
 40 | 
 41 |         convolve[: _offset_to_index(-self.max_offset_samples)] = float("-inf")
 42 |         convolve[_offset_to_index(self.max_offset_samples) :] = float("-inf")
 43 |         return convolve
 44 | 
 45 |     def _compute_argmax(self, convolve: np.ndarray, substring: np.ndarray) -> None:
 46 |         best_idx = int(np.argmax(convolve))
 47 |         self.best_offset_ = len(convolve) - 1 - best_idx - len(substring)
 48 |         self.best_score_ = convolve[best_idx]
 49 | 
 50 |     def fit(self, refstring, substring, get_score: bool = False) -> "FFTAligner":
 51 |         refstring, substring = [
 52 |             list(map(int, s)) if isinstance(s, str) else s
 53 |             for s in [refstring, substring]
 54 |         ]
 55 |         refstring, substring = map(
 56 |             lambda s: 2 * np.array(s).astype(float) - 1, [refstring, substring]
 57 |         )
 58 |         total_bits = math.log(len(substring) + len(refstring), 2)
 59 |         total_length = int(2 ** math.ceil(total_bits))
 60 |         extra_zeros = total_length - len(substring) - len(refstring)
 61 |         subft = np.fft.fft(np.append(np.zeros(extra_zeros + len(refstring)), substring))
 62 |         refft = np.fft.fft(
 63 |             np.flip(np.append(refstring, np.zeros(len(substring) + extra_zeros)), 0)
 64 |         )
 65 |         convolve = np.real(np.fft.ifft(subft * refft))
 66 |         self._compute_argmax(
 67 |             self._eliminate_extreme_offsets_from_solutions(convolve, substring),
 68 |             substring,
 69 |         )
 70 |         self.get_score_ = get_score
 71 |         return self
 72 | 
 73 |     def transform(self, *_) -> Union[int, Tuple[float, int]]:
 74 |         if self.get_score_:
 75 |             return self.best_score_, self.best_offset_
 76 |         else:
 77 |             return self.best_offset_
 78 | 
 79 | 
 80 | class MaxScoreAligner(TransformerMixin):
 81 |     def __init__(
 82 |         self,
 83 |         base_aligner: Union[FFTAligner, Type[FFTAligner]],
 84 |         srtin: Optional[str] = None,
 85 |         sample_rate=None,
 86 |         max_offset_seconds=None,
 87 |     ) -> None:
 88 |         self.srtin: Optional[str] = srtin
 89 |         if sample_rate is None or max_offset_seconds is None:
 90 |             self.max_offset_samples: Optional[int] = None
 91 |         else:
 92 |             self.max_offset_samples = abs(int(max_offset_seconds * sample_rate))
 93 |         if isinstance(base_aligner, type):
 94 |             self.base_aligner: FFTAligner = base_aligner(
 95 |                 max_offset_samples=self.max_offset_samples
 96 |             )
 97 |         else:
 98 |             self.base_aligner = base_aligner
 99 |         self.max_offset_seconds: Optional[int] = max_offset_seconds
100 |         self._scores: List[Tuple[Tuple[float, int], Pipeline]] = []
101 | 
102 |     def fit_gss(self, refstring, subpipe_maker):
103 |         def opt_func(framerate_ratio, is_last_iter):
104 |             subpipe = subpipe_maker(framerate_ratio)
105 |             substring = subpipe.fit_transform(self.srtin)
106 |             score = self.base_aligner.fit_transform(
107 |                 refstring, substring, get_score=True
108 |             )
109 |             logger.info(
110 |                 "got score %.0f (offset %d) for ratio %.3f",
111 |                 score[0],
112 |                 score[1],
113 |                 framerate_ratio,
114 |             )
115 |             if is_last_iter:
116 |                 self._scores.append((score, subpipe))
117 |             return -score[0]
118 | 
119 |         gss(opt_func, MIN_FRAMERATE_RATIO, MAX_FRAMERATE_RATIO)
120 |         return self
121 | 
122 |     def fit(
123 |         self, refstring, subpipes: Union[Pipeline, List[Pipeline]]
124 |     ) -> "MaxScoreAligner":
125 |         if not isinstance(subpipes, list):
126 |             subpipes = [subpipes]
127 |         for subpipe in subpipes:
128 |             if callable(subpipe):
129 |                 self.fit_gss(refstring, subpipe)
130 |                 continue
131 |             elif hasattr(subpipe, "transform"):
132 |                 substring = subpipe.transform(self.srtin)
133 |             else:
134 |                 substring = subpipe
135 |             self._scores.append(
136 |                 (
137 |                     self.base_aligner.fit_transform(
138 |                         refstring, substring, get_score=True
139 |                     ),
140 |                     subpipe,
141 |                 )
142 |             )
143 |         return self
144 | 
145 |     def transform(self, *_) -> Tuple[Tuple[float, float], Pipeline]:
146 |         scores = self._scores
147 |         if self.max_offset_samples is not None:
148 |             scores = list(
149 |                 filter(lambda s: abs(s[0][1]) <= self.max_offset_samples, scores)
150 |             )
151 |         if len(scores) == 0:
152 |             raise FailedToFindAlignmentException(
153 |                 "Synchronization failed; consider passing "
154 |                 "--max-offset-seconds with a number larger than "
155 |                 "{}".format(self.max_offset_seconds)
156 |             )
157 |         (score, offset), subpipe = max(scores, key=lambda x: x[0][0])
158 |         return (score, offset), subpipe
159 | 


--------------------------------------------------------------------------------
/ffsubsync/constants.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from typing import List, Tuple
 3 | 
 4 | 
 5 | SUBSYNC_RESOURCES_ENV_MAGIC: str = "ffsubsync_resources_xj48gjdkl340"
 6 | 
 7 | SAMPLE_RATE: int = 100
 8 | 
 9 | FRAMERATE_RATIOS: List[float] = [24.0 / 23.976, 25.0 / 23.976, 25.0 / 24.0]
10 | 
11 | DEFAULT_FRAME_RATE: int = 48000
12 | DEFAULT_NON_SPEECH_LABEL: float = 0.0
13 | DEFAULT_ENCODING: str = "infer"
14 | DEFAULT_MAX_SUBTITLE_SECONDS: int = 10
15 | DEFAULT_START_SECONDS: int = 0
16 | DEFAULT_SCALE_FACTOR: float = 1
17 | DEFAULT_VAD: str = "subs_then_webrtc"
18 | DEFAULT_MAX_OFFSET_SECONDS: int = 60
19 | DEFAULT_APPLY_OFFSET_SECONDS: int = 0
20 | 
21 | SUBTITLE_EXTENSIONS: Tuple[str, ...] = ("srt", "ass", "ssa", "sub")
22 | 
23 | GITHUB_DEV_USER: str = "smacke"
24 | PROJECT_NAME: str = "FFsubsync"
25 | PROJECT_LICENSE: str = "MIT"
26 | COPYRIGHT_YEAR: str = "2019"
27 | GITHUB_REPO: str = "ffsubsync"
28 | DESCRIPTION: str = "Synchronize subtitles with video."
29 | LONG_DESCRIPTION: str = (
30 |     "Automatic and language-agnostic synchronization of subtitles with video."
31 | )
32 | WEBSITE: str = "https://github.com/{}/{}/".format(GITHUB_DEV_USER, GITHUB_REPO)
33 | DEV_WEBSITE: str = "https://smacke.net/"
34 | 
35 | # No trailing slash important for this one...
36 | API_RELEASE_URL: str = "https://api.github.com/repos/{}/{}/releases/latest".format(
37 |     GITHUB_DEV_USER, GITHUB_REPO
38 | )
39 | RELEASE_URL: str = "https://github.com/{}/{}/releases/latest/".format(
40 |     GITHUB_DEV_USER, GITHUB_REPO
41 | )
42 | 


--------------------------------------------------------------------------------
/ffsubsync/ffmpeg_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | import os
 4 | import platform
 5 | import subprocess
 6 | 
 7 | from ffsubsync.constants import SUBSYNC_RESOURCES_ENV_MAGIC
 8 | 
 9 | logging.basicConfig(level=logging.INFO)
10 | logger: logging.Logger = logging.getLogger(__name__)
11 | 
12 | 
13 | # ref: https://github.com/pyinstaller/pyinstaller/wiki/Recipe-subprocess
14 | # Create a set of arguments which make a ``subprocess.Popen`` (and
15 | # variants) call work with or without Pyinstaller, ``--noconsole`` or
16 | # not, on Windows and Linux. Typical use::
17 | #
18 | #   subprocess.call(['program_to_run', 'arg_1'], **subprocess_args())
19 | #
20 | # When calling ``check_output``::
21 | #
22 | #   subprocess.check_output(['program_to_run', 'arg_1'],
23 | #                           **subprocess_args(False))
24 | def subprocess_args(include_stdout=True):
25 |     # The following is true only on Windows.
26 |     if hasattr(subprocess, "STARTUPINFO"):
27 |         # On Windows, subprocess calls will pop up a command window by default
28 |         # when run from Pyinstaller with the ``--noconsole`` option. Avoid this
29 |         # distraction.
30 |         si = subprocess.STARTUPINFO()
31 |         si.dwFlags |= subprocess.STARTF_USESHOWWINDOW
32 |         # Windows doesn't search the path by default. Pass it an environment so
33 |         # it will.
34 |         env = os.environ
35 |     else:
36 |         si = None
37 |         env = None
38 | 
39 |     # ``subprocess.check_output`` doesn't allow specifying ``stdout``::
40 |     #
41 |     #   Traceback (most recent call last):
42 |     #     File "test_subprocess.py", line 58, in <module>
43 |     #       **subprocess_args(stdout=None))
44 |     #     File "C:\Python27\lib\subprocess.py", line 567, in check_output
45 |     #       raise ValueError('stdout argument not allowed, it will be overridden.')
46 |     #   ValueError: stdout argument not allowed, it will be overridden.
47 |     #
48 |     # So, add it only if it's needed.
49 |     if include_stdout:
50 |         ret = {"stdout": subprocess.PIPE}
51 |     else:
52 |         ret = {}
53 | 
54 |     # On Windows, running this from the binary produced by Pyinstaller
55 |     # with the ``--noconsole`` option requires redirecting everything
56 |     # (stdin, stdout, stderr) to avoid an OSError exception
57 |     # "[Error 6] the handle is invalid."
58 |     ret.update(
59 |         {
60 |             "stdin": subprocess.PIPE,
61 |             "stderr": subprocess.PIPE,
62 |             "startupinfo": si,
63 |             "env": env,
64 |         }
65 |     )
66 |     return ret
67 | 
68 | 
69 | def ffmpeg_bin_path(bin_name, gui_mode, ffmpeg_resources_path=None):
70 |     if platform.system() == "Windows":
71 |         bin_name = "{}.exe".format(bin_name)
72 |     if ffmpeg_resources_path is not None:
73 |         if not os.path.isdir(ffmpeg_resources_path):
74 |             if bin_name.lower().startswith("ffmpeg"):
75 |                 return ffmpeg_resources_path
76 |             ffmpeg_resources_path = os.path.dirname(ffmpeg_resources_path)
77 |         return os.path.join(ffmpeg_resources_path, bin_name)
78 |     try:
79 |         resource_path = os.environ[SUBSYNC_RESOURCES_ENV_MAGIC]
80 |         if len(resource_path) > 0:
81 |             return os.path.join(resource_path, "ffmpeg-bin", bin_name)
82 |     except KeyError:
83 |         if gui_mode:
84 |             logger.info(
85 |                 "Couldn't find resource path; falling back to searching system path"
86 |             )
87 |     return bin_name
88 | 


--------------------------------------------------------------------------------
/ffsubsync/ffsubsync.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | import argparse
  4 | from datetime import datetime
  5 | import logging
  6 | import os
  7 | import shutil
  8 | import subprocess
  9 | import sys
 10 | from typing import cast, Any, Callable, Dict, List, Optional, Tuple, Union
 11 | 
 12 | import numpy as np
 13 | 
 14 | from ffsubsync.aligners import FFTAligner, MaxScoreAligner
 15 | from ffsubsync.constants import (
 16 |     DEFAULT_APPLY_OFFSET_SECONDS,
 17 |     DEFAULT_FRAME_RATE,
 18 |     DEFAULT_MAX_OFFSET_SECONDS,
 19 |     DEFAULT_MAX_SUBTITLE_SECONDS,
 20 |     DEFAULT_NON_SPEECH_LABEL,
 21 |     DEFAULT_START_SECONDS,
 22 |     DEFAULT_VAD,
 23 |     DEFAULT_ENCODING,
 24 |     FRAMERATE_RATIOS,
 25 |     SAMPLE_RATE,
 26 |     SUBTITLE_EXTENSIONS,
 27 | )
 28 | from ffsubsync.ffmpeg_utils import ffmpeg_bin_path
 29 | from ffsubsync.sklearn_shim import Pipeline, TransformerMixin
 30 | from ffsubsync.speech_transformers import (
 31 |     VideoSpeechTransformer,
 32 |     DeserializeSpeechTransformer,
 33 |     make_subtitle_speech_pipeline,
 34 | )
 35 | from ffsubsync.subtitle_parser import make_subtitle_parser
 36 | from ffsubsync.subtitle_transformers import SubtitleMerger, SubtitleShifter
 37 | from ffsubsync.version import get_version
 38 | 
 39 | 
 40 | logger: logging.Logger = logging.getLogger(__name__)
 41 | 
 42 | 
 43 | def override(args: argparse.Namespace, **kwargs: Any) -> Dict[str, Any]:
 44 |     args_dict = dict(args.__dict__)
 45 |     args_dict.update(kwargs)
 46 |     return args_dict
 47 | 
 48 | 
 49 | def _ref_format(ref_fname: Optional[str]) -> Optional[str]:
 50 |     if ref_fname is None:
 51 |         return None
 52 |     return ref_fname[-3:]
 53 | 
 54 | 
 55 | def make_test_case(
 56 |     args: argparse.Namespace, npy_savename: Optional[str], sync_was_successful: bool
 57 | ) -> int:
 58 |     if npy_savename is None:
 59 |         raise ValueError("need non-null npy_savename")
 60 |     tar_dir = "{}.{}".format(
 61 |         args.reference, datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
 62 |     )
 63 |     logger.info("creating test archive {}.tar.gz...".format(tar_dir))
 64 |     os.mkdir(tar_dir)
 65 |     try:
 66 |         log_path = "ffsubsync.log"
 67 |         if args.log_dir_path is not None and os.path.isdir(args.log_dir_path):
 68 |             log_path = os.path.join(args.log_dir_path, log_path)
 69 |         shutil.copy(log_path, tar_dir)
 70 |         shutil.copy(args.srtin[0], tar_dir)
 71 |         if sync_was_successful:
 72 |             shutil.move(args.srtout, tar_dir)
 73 |         if _ref_format(args.reference) in SUBTITLE_EXTENSIONS:
 74 |             shutil.copy(args.reference, tar_dir)
 75 |         elif args.serialize_speech or args.reference == npy_savename:
 76 |             shutil.copy(npy_savename, tar_dir)
 77 |         else:
 78 |             shutil.move(npy_savename, tar_dir)
 79 |         supported_formats = set(list(zip(*shutil.get_archive_formats()))[0])
 80 |         preferred_formats = ["gztar", "bztar", "xztar", "zip", "tar"]
 81 |         for archive_format in preferred_formats:
 82 |             if archive_format in supported_formats:
 83 |                 shutil.make_archive(tar_dir, archive_format, os.curdir, tar_dir)
 84 |                 break
 85 |         else:
 86 |             logger.error(
 87 |                 "failed to create test archive; no formats supported "
 88 |                 "(this should not happen)"
 89 |             )
 90 |             return 1
 91 |         logger.info("...done")
 92 |     finally:
 93 |         shutil.rmtree(tar_dir)
 94 |     return 0
 95 | 
 96 | 
 97 | def get_srt_pipe_maker(
 98 |     args: argparse.Namespace, srtin: Optional[str]
 99 | ) -> Callable[[Optional[float]], Union[Pipeline, Callable[[float], Pipeline]]]:
100 |     if srtin is None:
101 |         srtin_format = "srt"
102 |     else:
103 |         srtin_format = os.path.splitext(srtin)[-1][1:]
104 |     parser = make_subtitle_parser(fmt=srtin_format, caching=True, **args.__dict__)
105 |     return lambda scale_factor: make_subtitle_speech_pipeline(
106 |         **override(args, scale_factor=scale_factor, parser=parser)
107 |     )
108 | 
109 | 
110 | def get_framerate_ratios_to_try(args: argparse.Namespace) -> List[Optional[float]]:
111 |     if args.no_fix_framerate:
112 |         return []
113 |     else:
114 |         framerate_ratios = list(
115 |             np.concatenate(
116 |                 [np.array(FRAMERATE_RATIOS), 1.0 / np.array(FRAMERATE_RATIOS)]
117 |             )
118 |         )
119 |         if args.gss:
120 |             framerate_ratios.append(None)
121 |         return framerate_ratios
122 | 
123 | 
124 | def try_sync(
125 |     args: argparse.Namespace, reference_pipe: Optional[Pipeline], result: Dict[str, Any]
126 | ) -> bool:
127 |     result["sync_was_successful"] = False
128 |     sync_was_successful = True
129 |     logger.info(
130 |         "extracting speech segments from %s...",
131 |         "stdin" if not args.srtin else "subtitles file(s) {}".format(args.srtin),
132 |     )
133 |     if not args.srtin:
134 |         args.srtin = [None]
135 |     for srtin in args.srtin:
136 |         try:
137 |             skip_sync = args.skip_sync or reference_pipe is None
138 |             skip_infer_framerate_ratio = (
139 |                 args.skip_infer_framerate_ratio or reference_pipe is None
140 |             )
141 |             srtout = srtin if args.overwrite_input else args.srtout
142 |             srt_pipe_maker = get_srt_pipe_maker(args, srtin)
143 |             framerate_ratios = get_framerate_ratios_to_try(args)
144 |             srt_pipes = [srt_pipe_maker(1.0)] + [
145 |                 srt_pipe_maker(rat) for rat in framerate_ratios
146 |             ]
147 |             for srt_pipe in srt_pipes:
148 |                 if callable(srt_pipe):
149 |                     continue
150 |                 else:
151 |                     srt_pipe.fit(srtin)
152 |             if not skip_infer_framerate_ratio and hasattr(
153 |                 reference_pipe[-1], "num_frames"
154 |             ):
155 |                 inferred_framerate_ratio_from_length = (
156 |                     float(reference_pipe[-1].num_frames)
157 |                     / cast(Pipeline, srt_pipes[0])[-1].num_frames
158 |                 )
159 |                 logger.info(
160 |                     "inferred frameratio ratio: %.3f"
161 |                     % inferred_framerate_ratio_from_length
162 |                 )
163 |                 srt_pipes.append(
164 |                     cast(
165 |                         Pipeline, srt_pipe_maker(inferred_framerate_ratio_from_length)
166 |                     ).fit(srtin)
167 |                 )
168 |                 logger.info("...done")
169 |             logger.info("computing alignments...")
170 |             if skip_sync:
171 |                 best_score = 0.0
172 |                 best_srt_pipe = cast(Pipeline, srt_pipes[0])
173 |                 offset_samples = 0
174 |             else:
175 |                 (best_score, offset_samples), best_srt_pipe = MaxScoreAligner(
176 |                     FFTAligner, srtin, SAMPLE_RATE, args.max_offset_seconds
177 |                 ).fit_transform(
178 |                     reference_pipe.transform(args.reference),
179 |                     srt_pipes,
180 |                 )
181 |             if best_score < 0:
182 |                 sync_was_successful = False
183 |             logger.info("...done")
184 |             offset_seconds = (
185 |                 offset_samples / float(SAMPLE_RATE) + args.apply_offset_seconds
186 |             )
187 |             scale_step = best_srt_pipe.named_steps["scale"]
188 |             logger.info("score: %.3f", best_score)
189 |             logger.info("offset seconds: %.3f", offset_seconds)
190 |             logger.info("framerate scale factor: %.3f", scale_step.scale_factor)
191 |             output_steps: List[Tuple[str, TransformerMixin]] = [
192 |                 ("shift", SubtitleShifter(offset_seconds))
193 |             ]
194 |             if args.merge_with_reference:
195 |                 output_steps.append(
196 |                     ("merge", SubtitleMerger(reference_pipe.named_steps["parse"].subs_))
197 |                 )
198 |             output_pipe = Pipeline(output_steps)
199 |             out_subs = output_pipe.fit_transform(scale_step.subs_)
200 |             if args.output_encoding != "same":
201 |                 out_subs = out_subs.set_encoding(args.output_encoding)
202 |             suppress_output_thresh = args.suppress_output_if_offset_less_than
203 |             if offset_seconds >= (suppress_output_thresh or float("-inf")):
204 |                 logger.info("writing output to {}".format(srtout or "stdout"))
205 |                 out_subs.write_file(srtout)
206 |             else:
207 |                 logger.warning(
208 |                     "suppressing output because offset %s was less than suppression threshold %s",
209 |                     offset_seconds,
210 |                     args.suppress_output_if_offset_less_than,
211 |                 )
212 |         except Exception:
213 |             sync_was_successful = False
214 |             logger.exception("failed to sync %s", srtin)
215 |         else:
216 |             result["offset_seconds"] = offset_seconds
217 |             result["framerate_scale_factor"] = scale_step.scale_factor
218 |     result["sync_was_successful"] = sync_was_successful
219 |     return sync_was_successful
220 | 
221 | 
222 | def make_reference_pipe(args: argparse.Namespace) -> Pipeline:
223 |     ref_format = _ref_format(args.reference)
224 |     if ref_format in SUBTITLE_EXTENSIONS:
225 |         if args.vad is not None:
226 |             logger.warning("Vad specified, but reference was not a movie")
227 |         return cast(
228 |             Pipeline,
229 |             make_subtitle_speech_pipeline(
230 |                 fmt=ref_format,
231 |                 **override(args, encoding=args.reference_encoding or DEFAULT_ENCODING),
232 |             ),
233 |         )
234 |     elif ref_format in ("npy", "npz"):
235 |         if args.vad is not None:
236 |             logger.warning("Vad specified, but reference was not a movie")
237 |         return Pipeline(
238 |             [("deserialize", DeserializeSpeechTransformer(args.non_speech_label))]
239 |         )
240 |     else:
241 |         vad = args.vad or DEFAULT_VAD
242 |         if args.reference_encoding is not None:
243 |             logger.warning(
244 |                 "Reference srt encoding specified, but reference was a video file"
245 |             )
246 |         ref_stream = args.reference_stream
247 |         if ref_stream is not None and not ref_stream.startswith("0:"):
248 |             ref_stream = "0:" + ref_stream
249 |         return Pipeline(
250 |             [
251 |                 (
252 |                     "speech_extract",
253 |                     VideoSpeechTransformer(
254 |                         vad=vad,
255 |                         sample_rate=SAMPLE_RATE,
256 |                         frame_rate=args.frame_rate,
257 |                         non_speech_label=args.non_speech_label,
258 |                         start_seconds=args.start_seconds,
259 |                         ffmpeg_path=args.ffmpeg_path,
260 |                         ref_stream=ref_stream,
261 |                         vlc_mode=args.vlc_mode,
262 |                         gui_mode=args.gui_mode,
263 |                     ),
264 |                 ),
265 |             ]
266 |         )
267 | 
268 | 
269 | def extract_subtitles_from_reference(args: argparse.Namespace) -> int:
270 |     stream = args.extract_subs_from_stream
271 |     if not stream.startswith("0:s:"):
272 |         stream = "0:s:{}".format(stream)
273 |     elif not stream.startswith("0:") and stream.startswith("s:"):
274 |         stream = "0:{}".format(stream)
275 |     if not stream.startswith("0:s:"):
276 |         logger.error(
277 |             "invalid stream for subtitle extraction: %s", args.extract_subs_from_stream
278 |         )
279 |     ffmpeg_args = [
280 |         ffmpeg_bin_path("ffmpeg", args.gui_mode, ffmpeg_resources_path=args.ffmpeg_path)
281 |     ]
282 |     ffmpeg_args.extend(
283 |         [
284 |             "-y",
285 |             "-nostdin",
286 |             "-loglevel",
287 |             "fatal",
288 |             "-i",
289 |             args.reference,
290 |             "-map",
291 |             "{}".format(stream),
292 |             "-f",
293 |             "srt",
294 |         ]
295 |     )
296 |     if args.srtout is None:
297 |         ffmpeg_args.append("-")
298 |     else:
299 |         ffmpeg_args.append(args.srtout)
300 |     logger.info(
301 |         "attempting to extract subtitles to {} ...".format(
302 |             "stdout" if args.srtout is None else args.srtout
303 |         )
304 |     )
305 |     retcode = subprocess.call(ffmpeg_args)
306 |     if retcode == 0:
307 |         logger.info("...done")
308 |     else:
309 |         logger.error(
310 |             "ffmpeg unable to extract subtitles from reference; return code %d", retcode
311 |         )
312 |     return retcode
313 | 
314 | 
315 | def validate_args(args: argparse.Namespace) -> None:
316 |     if args.vlc_mode:
317 |         logger.setLevel(logging.CRITICAL)
318 |     if args.reference is None:
319 |         if args.apply_offset_seconds == 0 or not args.srtin:
320 |             raise ValueError(
321 |                 "`reference` required unless `--apply-offset-seconds` specified"
322 |             )
323 |     if args.apply_offset_seconds != 0:
324 |         if not args.srtin:
325 |             args.srtin = [args.reference]
326 |         if not args.srtin:
327 |             raise ValueError(
328 |                 "at least one of `srtin` or `reference` must be specified to apply offset seconds"
329 |             )
330 |     if args.srtin:
331 |         if len(args.srtin) > 1 and not args.overwrite_input:
332 |             raise ValueError(
333 |                 "cannot specify multiple input srt files without overwriting"
334 |             )
335 |         if len(args.srtin) > 1 and args.make_test_case:
336 |             raise ValueError("cannot specify multiple input srt files for test cases")
337 |         if len(args.srtin) > 1 and args.gui_mode:
338 |             raise ValueError("cannot specify multiple input srt files in GUI mode")
339 |     if (
340 |         args.make_test_case and not args.gui_mode
341 |     ):  # this validation not necessary for gui mode
342 |         if not args.srtin or args.srtout is None:
343 |             raise ValueError(
344 |                 "need to specify input and output srt files for test cases"
345 |             )
346 |     if args.overwrite_input:
347 |         if args.extract_subs_from_stream is not None:
348 |             raise ValueError(
349 |                 "input overwriting not allowed for extracting subtitles from reference"
350 |             )
351 |         if not args.srtin:
352 |             raise ValueError(
353 |                 "need to specify input srt if --overwrite-input "
354 |                 "is specified since we cannot overwrite stdin"
355 |             )
356 |         if args.srtout is not None:
357 |             raise ValueError(
358 |                 "overwrite input set but output file specified; "
359 |                 "refusing to run in case this was not intended"
360 |             )
361 |     if args.extract_subs_from_stream is not None:
362 |         if args.make_test_case:
363 |             raise ValueError("test case is for sync and not subtitle extraction")
364 |         if args.srtin:
365 |             raise ValueError(
366 |                 "stream specified for reference subtitle extraction; "
367 |                 "-i flag for sync input not allowed"
368 |             )
369 | 
370 | 
371 | def validate_file_permissions(args: argparse.Namespace) -> None:
372 |     error_string_template = (
373 |         "unable to {action} {file}; "
374 |         "try ensuring file exists and has correct permissions"
375 |     )
376 |     if args.reference is not None and not os.access(args.reference, os.R_OK):
377 |         raise ValueError(
378 |             error_string_template.format(action="read reference", file=args.reference)
379 |         )
380 |     if args.srtin:
381 |         for srtin in args.srtin:
382 |             if srtin is not None and not os.access(srtin, os.R_OK):
383 |                 raise ValueError(
384 |                     error_string_template.format(
385 |                         action="read input subtitles", file=srtin
386 |                     )
387 |                 )
388 |     if (
389 |         args.srtout is not None
390 |         and os.path.exists(args.srtout)
391 |         and not os.access(args.srtout, os.W_OK)
392 |     ):
393 |         raise ValueError(
394 |             error_string_template.format(
395 |                 action="write output subtitles", file=args.srtout
396 |             )
397 |         )
398 |     if args.make_test_case or args.serialize_speech:
399 |         npy_savename = os.path.splitext(args.reference)[0] + ".npz"
400 |         if os.path.exists(npy_savename) and not os.access(npy_savename, os.W_OK):
401 |             raise ValueError(
402 |                 "unable to write test case file archive %s (try checking permissions)"
403 |                 % npy_savename
404 |             )
405 | 
406 | 
407 | def _setup_logging(
408 |     args: argparse.Namespace,
409 | ) -> Tuple[Optional[str], Optional[logging.FileHandler]]:
410 |     log_handler = None
411 |     log_path = None
412 |     if args.make_test_case or args.log_dir_path is not None:
413 |         log_path = "ffsubsync.log"
414 |         if args.log_dir_path is not None and os.path.isdir(args.log_dir_path):
415 |             log_path = os.path.join(args.log_dir_path, log_path)
416 |         log_handler = logging.FileHandler(log_path)
417 |         logger.addHandler(log_handler)
418 |         logger.info("this log will be written to %s", os.path.abspath(log_path))
419 |     return log_path, log_handler
420 | 
421 | 
422 | def _npy_savename(args: argparse.Namespace) -> str:
423 |     return os.path.splitext(args.reference)[0] + ".npz"
424 | 
425 | 
426 | def _run_impl(args: argparse.Namespace, result: Dict[str, Any]) -> bool:
427 |     if args.extract_subs_from_stream is not None:
428 |         result["retval"] = extract_subtitles_from_reference(args)
429 |         return True
430 |     if args.srtin is not None and (
431 |         args.reference is None
432 |         or (len(args.srtin) == 1 and args.srtin[0] == args.reference)
433 |     ):
434 |         return try_sync(args, None, result)
435 |     reference_pipe = make_reference_pipe(args)
436 |     logger.info("extracting speech segments from reference '%s'...", args.reference)
437 |     reference_pipe.fit(args.reference)
438 |     logger.info("...done")
439 |     if args.make_test_case or args.serialize_speech:
440 |         logger.info("serializing speech...")
441 |         np.savez_compressed(
442 |             _npy_savename(args), speech=reference_pipe.transform(args.reference)
443 |         )
444 |         logger.info("...done")
445 |         if not args.srtin:
446 |             logger.info(
447 |                 "unsynchronized subtitle file not specified; skipping synchronization"
448 |             )
449 |             return False
450 |     return try_sync(args, reference_pipe, result)
451 | 
452 | 
453 | def validate_and_transform_args(
454 |     parser_or_args: Union[argparse.ArgumentParser, argparse.Namespace]
455 | ) -> Optional[argparse.Namespace]:
456 |     if isinstance(parser_or_args, argparse.Namespace):
457 |         parser = None
458 |         args = parser_or_args
459 |     else:
460 |         parser = parser_or_args
461 |         args = parser.parse_args()
462 |     try:
463 |         validate_args(args)
464 |     except ValueError as e:
465 |         logger.error(e)
466 |         if parser is not None:
467 |             parser.print_usage()
468 |         return None
469 |     if args.gui_mode and args.srtout is None:
470 |         args.srtout = "{}.synced.srt".format(os.path.splitext(args.srtin[0])[0])
471 |     try:
472 |         validate_file_permissions(args)
473 |     except ValueError as e:
474 |         logger.error(e)
475 |         return None
476 |     ref_format = _ref_format(args.reference)
477 |     if args.merge_with_reference and ref_format not in SUBTITLE_EXTENSIONS:
478 |         logger.error(
479 |             "merging synced output with reference only valid "
480 |             "when reference composed of subtitles"
481 |         )
482 |         return None
483 |     return args
484 | 
485 | 
486 | def run(
487 |     parser_or_args: Union[argparse.ArgumentParser, argparse.Namespace]
488 | ) -> Dict[str, Any]:
489 |     sync_was_successful = False
490 |     result = {
491 |         "retval": 0,
492 |         "offset_seconds": None,
493 |         "framerate_scale_factor": None,
494 |     }
495 |     args = validate_and_transform_args(parser_or_args)
496 |     if args is None:
497 |         result["retval"] = 1
498 |         return result
499 |     log_path, log_handler = _setup_logging(args)
500 |     try:
501 |         sync_was_successful = _run_impl(args, result)
502 |         result["sync_was_successful"] = sync_was_successful
503 |         return result
504 |     finally:
505 |         if log_handler is not None and log_path is not None:
506 |             log_handler.close()
507 |             logger.removeHandler(log_handler)
508 |             if args.make_test_case:
509 |                 result["retval"] += make_test_case(
510 |                     args, _npy_savename(args), sync_was_successful
511 |                 )
512 |             if args.log_dir_path is None or not os.path.isdir(args.log_dir_path):
513 |                 os.remove(log_path)
514 | 
515 | 
516 | def add_main_args_for_cli(parser: argparse.ArgumentParser) -> None:
517 |     parser.add_argument(
518 |         "reference",
519 |         nargs="?",
520 |         help=(
521 |             "Reference (video, subtitles, or a numpy array with VAD speech) "
522 |             "to which to synchronize input subtitles."
523 |         ),
524 |     )
525 |     parser.add_argument(
526 |         "-i", "--srtin", nargs="*", help="Input subtitles file (default=stdin)."
527 |     )
528 |     parser.add_argument(
529 |         "-o", "--srtout", help="Output subtitles file (default=stdout)."
530 |     )
531 |     parser.add_argument(
532 |         "--merge-with-reference",
533 |         "--merge",
534 |         action="store_true",
535 |         help="Merge reference subtitles with synced output subtitles.",
536 |     )
537 |     parser.add_argument(
538 |         "--make-test-case",
539 |         "--create-test-case",
540 |         action="store_true",
541 |         help="If specified, serialize reference speech to a numpy array, "
542 |         "and create an archive with input/output subtitles "
543 |         "and serialized speech.",
544 |     )
545 |     parser.add_argument(
546 |         "--reference-stream",
547 |         "--refstream",
548 |         "--reference-track",
549 |         "--reftrack",
550 |         default=None,
551 |         help=(
552 |             "Which stream/track in the video file to use as reference, "
553 |             "formatted according to ffmpeg conventions. For example, 0:s:0 "
554 |             "uses the first subtitle track; 0:a:3 would use the third audio track. "
555 |             "You can also drop the leading `0:`; i.e. use s:0 or a:3, respectively. "
556 |             "Example: `ffs ref.mkv -i in.srt -o out.srt --reference-stream s:2`"
557 |         ),
558 |     )
559 | 
560 | 
561 | def add_cli_only_args(parser: argparse.ArgumentParser) -> None:
562 |     parser.add_argument(
563 |         "-v",
564 |         "--version",
565 |         action="version",
566 |         version="{package} {version}".format(
567 |             package=__package__, version=get_version()
568 |         ),
569 |     )
570 |     parser.add_argument(
571 |         "--overwrite-input",
572 |         action="store_true",
573 |         help=(
574 |             "If specified, will overwrite the input srt "
575 |             "instead of writing the output to a new file."
576 |         ),
577 |     )
578 |     parser.add_argument(
579 |         "--encoding",
580 |         default=DEFAULT_ENCODING,
581 |         help="What encoding to use for reading input subtitles "
582 |         "(default=%s)." % DEFAULT_ENCODING,
583 |     )
584 |     parser.add_argument(
585 |         "--max-subtitle-seconds",
586 |         type=float,
587 |         default=DEFAULT_MAX_SUBTITLE_SECONDS,
588 |         help="Maximum duration for a subtitle to appear on-screen "
589 |         "(default=%.3f seconds)." % DEFAULT_MAX_SUBTITLE_SECONDS,
590 |     )
591 |     parser.add_argument(
592 |         "--start-seconds",
593 |         type=int,
594 |         default=DEFAULT_START_SECONDS,
595 |         help="Start time for processing "
596 |         "(default=%d seconds)." % DEFAULT_START_SECONDS,
597 |     )
598 |     parser.add_argument(
599 |         "--max-offset-seconds",
600 |         type=float,
601 |         default=DEFAULT_MAX_OFFSET_SECONDS,
602 |         help="The max allowed offset seconds for any subtitle segment "
603 |         "(default=%d seconds)." % DEFAULT_MAX_OFFSET_SECONDS,
604 |     )
605 |     parser.add_argument(
606 |         "--apply-offset-seconds",
607 |         type=float,
608 |         default=DEFAULT_APPLY_OFFSET_SECONDS,
609 |         help="Apply a predefined offset in seconds to all subtitle segments "
610 |         "(default=%d seconds)." % DEFAULT_APPLY_OFFSET_SECONDS,
611 |     )
612 |     parser.add_argument(
613 |         "--frame-rate",
614 |         type=int,
615 |         default=DEFAULT_FRAME_RATE,
616 |         help="Frame rate for audio extraction (default=%d)." % DEFAULT_FRAME_RATE,
617 |     )
618 |     parser.add_argument(
619 |         "--skip-infer-framerate-ratio",
620 |         action="store_true",
621 |         help="If set, do not try to infer framerate ratio based on duration ratio.",
622 |     )
623 |     parser.add_argument(
624 |         "--non-speech-label",
625 |         type=float,
626 |         default=DEFAULT_NON_SPEECH_LABEL,
627 |         help="Label to use for frames detected as non-speech (default=%f)"
628 |         % DEFAULT_NON_SPEECH_LABEL,
629 |     )
630 |     parser.add_argument(
631 |         "--output-encoding",
632 |         default="utf-8",
633 |         help="What encoding to use for writing output subtitles "
634 |         '(default=utf-8). Can indicate "same" to use same '
635 |         "encoding as that of the input.",
636 |     )
637 |     parser.add_argument(
638 |         "--reference-encoding",
639 |         help="What encoding to use for reading / writing reference subtitles "
640 |         "(if applicable, default=infer).",
641 |     )
642 |     parser.add_argument(
643 |         "--vad",
644 |         choices=[
645 |             "subs_then_webrtc",
646 |             "webrtc",
647 |             "subs_then_auditok",
648 |             "auditok",
649 |             "subs_then_silero",
650 |             "silero",
651 |         ],
652 |         default=None,
653 |         help="Which voice activity detector to use for speech extraction "
654 |         "(if using video / audio as a reference, default={}).".format(DEFAULT_VAD),
655 |     )
656 |     parser.add_argument(
657 |         "--no-fix-framerate",
658 |         action="store_true",
659 |         help="If specified, subsync will not attempt to correct a framerate "
660 |         "mismatch between reference and subtitles.",
661 |     )
662 |     parser.add_argument(
663 |         "--serialize-speech",
664 |         action="store_true",
665 |         help="If specified, serialize reference speech to a numpy array.",
666 |     )
667 |     parser.add_argument(
668 |         "--extract-subs-from-stream",
669 |         "--extract-subtitles-from-stream",
670 |         default=None,
671 |         help="If specified, do not attempt sync; instead, just extract subtitles"
672 |         " from the specified stream using the reference.",
673 |     )
674 |     parser.add_argument(
675 |         "--suppress-output-if-offset-less-than",
676 |         type=float,
677 |         default=None,
678 |         help="If specified, do not produce output if offset below provided threshold.",
679 |     )
680 |     parser.add_argument(
681 |         "--ffmpeg-path",
682 |         "--ffmpegpath",
683 |         default=None,
684 |         help="Where to look for ffmpeg and ffprobe. Uses the system PATH by default.",
685 |     )
686 |     parser.add_argument(
687 |         "--log-dir-path",
688 |         default=None,
689 |         help=(
690 |             "If provided, will save log file ffsubsync.log to this path "
691 |             "(must be an existing directory)."
692 |         ),
693 |     )
694 |     parser.add_argument(
695 |         "--gss",
696 |         action="store_true",
697 |         help="If specified, use golden-section search to try to find"
698 |         "the optimal framerate ratio between video and subtitles.",
699 |     )
700 |     parser.add_argument(
701 |         "--strict",
702 |         action="store_true",
703 |         help="If specified, refuse to parse srt files with formatting issues.",
704 |     )
705 |     parser.add_argument("--vlc-mode", action="store_true", help=argparse.SUPPRESS)
706 |     parser.add_argument("--gui-mode", action="store_true", help=argparse.SUPPRESS)
707 |     parser.add_argument("--skip-sync", action="store_true", help=argparse.SUPPRESS)
708 | 
709 | 
710 | def make_parser() -> argparse.ArgumentParser:
711 |     parser = argparse.ArgumentParser(description="Synchronize subtitles with video.")
712 |     add_main_args_for_cli(parser)
713 |     add_cli_only_args(parser)
714 |     return parser
715 | 
716 | 
717 | def main() -> int:
718 |     parser = make_parser()
719 |     return run(parser)["retval"]
720 | 
721 | 
722 | if __name__ == "__main__":
723 |     sys.exit(main())
724 | 


--------------------------------------------------------------------------------
/ffsubsync/ffsubsync_gui.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | import logging
  4 | import os
  5 | import sys
  6 | 
  7 | from gooey import Gooey, GooeyParser
  8 | 
  9 | from ffsubsync.constants import (
 10 |     RELEASE_URL,
 11 |     WEBSITE,
 12 |     DEV_WEBSITE,
 13 |     DESCRIPTION,
 14 |     LONG_DESCRIPTION,
 15 |     PROJECT_NAME,
 16 |     PROJECT_LICENSE,
 17 |     COPYRIGHT_YEAR,
 18 |     SUBSYNC_RESOURCES_ENV_MAGIC,
 19 | )
 20 | 
 21 | # set the env magic so that we look for resources in the right place
 22 | if SUBSYNC_RESOURCES_ENV_MAGIC not in os.environ:
 23 |     os.environ[SUBSYNC_RESOURCES_ENV_MAGIC] = getattr(sys, "_MEIPASS", "")
 24 | from ffsubsync.ffsubsync import run, add_cli_only_args
 25 | from ffsubsync.version import get_version, update_available
 26 | 
 27 | logging.basicConfig(level=logging.INFO)
 28 | logger = logging.getLogger(__name__)
 29 | 
 30 | 
 31 | _menu = [
 32 |     {
 33 |         "name": "File",
 34 |         "items": [
 35 |             {
 36 |                 "type": "AboutDialog",
 37 |                 "menuTitle": "About",
 38 |                 "name": PROJECT_NAME,
 39 |                 "description": LONG_DESCRIPTION,
 40 |                 "version": get_version(),
 41 |                 "copyright": COPYRIGHT_YEAR,
 42 |                 "website": WEBSITE,
 43 |                 "developer": DEV_WEBSITE,
 44 |                 "license": PROJECT_LICENSE,
 45 |             },
 46 |             {
 47 |                 "type": "Link",
 48 |                 "menuTitle": "Download latest release",
 49 |                 "url": RELEASE_URL,
 50 |             },
 51 |         ],
 52 |     }
 53 | ]
 54 | 
 55 | 
 56 | @Gooey(
 57 |     program_name=PROJECT_NAME,
 58 |     image_dir=os.path.join(os.environ[SUBSYNC_RESOURCES_ENV_MAGIC], "img"),
 59 |     menu=_menu,
 60 |     tabbed_groups=True,
 61 |     progress_regex=r"(\d+)%",
 62 |     hide_progress_msg=True,
 63 | )
 64 | def make_parser():
 65 |     description = DESCRIPTION
 66 |     if update_available():
 67 |         description += (
 68 |             "\nUpdate available! Please go to "
 69 |             '"File" -> "Download latest release"'
 70 |             " to update FFsubsync."
 71 |         )
 72 |     parser = GooeyParser(description=description)
 73 |     main_group = parser.add_argument_group("Basic")
 74 |     main_group.add_argument(
 75 |         "reference",
 76 |         help="Reference (video or subtitles file) to which to synchronize input subtitles.",
 77 |         widget="FileChooser",
 78 |     )
 79 |     main_group.add_argument("srtin", help="Input subtitles file", widget="FileChooser")
 80 |     main_group.add_argument(
 81 |         "-o",
 82 |         "--srtout",
 83 |         help="Output subtitles file (default=${srtin}.synced.srt).",
 84 |         widget="FileSaver",
 85 |     )
 86 |     advanced_group = parser.add_argument_group("Advanced")
 87 | 
 88 |     # TODO: these are shared between gui and cli; don't duplicate this code
 89 |     advanced_group.add_argument(
 90 |         "--merge-with-reference",
 91 |         "--merge",
 92 |         action="store_true",
 93 |         help="Merge reference subtitles with synced output subtitles.",
 94 |     )
 95 |     advanced_group.add_argument(
 96 |         "--make-test-case",
 97 |         "--create-test-case",
 98 |         action="store_true",
 99 |         help="If specified, create a test archive a few KiB in size "
100 |         "to send to the developer as a debugging aid.",
101 |     )
102 |     advanced_group.add_argument(
103 |         "--reference-stream",
104 |         "--refstream",
105 |         "--reference-track",
106 |         "--reftrack",
107 |         default=None,
108 |         help="Which stream/track in the video file to use as reference, "
109 |         "formatted according to ffmpeg conventions. For example, s:0 "
110 |         "uses the first subtitle track; a:3 would use the fourth audio track.",
111 |     )
112 |     return parser
113 | 
114 | 
115 | def main():
116 |     parser = make_parser()
117 |     _ = parser.parse_args()  # Fool Gooey into presenting the simpler menu
118 |     add_cli_only_args(parser)
119 |     args = parser.parse_args()
120 |     args.gui_mode = True
121 |     return run(args)
122 | 
123 | 
124 | if __name__ == "__main__":
125 |     sys.exit(main())
126 | 


--------------------------------------------------------------------------------
/ffsubsync/file_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import sys
 3 | 
 4 | 
 5 | class open_file:
 6 |     """
 7 |     Context manager that opens a filename and closes it on exit, but does
 8 |     nothing for file-like objects.
 9 |     """
10 | 
11 |     def __init__(self, filename, *args, **kwargs) -> None:
12 |         self.closing = kwargs.pop("closing", False)
13 |         if filename is None:
14 |             stream = sys.stdout if "w" in args else sys.stdin
15 |             self.fh = open(stream.fileno(), *args, **kwargs)
16 |         elif isinstance(filename, str):
17 |             self.fh = open(filename, *args, **kwargs)
18 |             self.closing = True
19 |         else:
20 |             self.fh = filename
21 | 
22 |     def __enter__(self):
23 |         return self.fh
24 | 
25 |     def __exit__(self, exc_type, exc_val, exc_tb):
26 |         if self.closing:
27 |             self.fh.close()
28 | 
29 |         return False
30 | 


--------------------------------------------------------------------------------
/ffsubsync/generic_subtitles.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import copy
  3 | from datetime import timedelta
  4 | import logging
  5 | import os
  6 | from typing import cast, Any, Dict, Iterator, List, Optional
  7 | 
  8 | import pysubs2
  9 | import srt
 10 | import sys
 11 | 
 12 | 
 13 | logging.basicConfig(level=logging.INFO)
 14 | logger: logging.Logger = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | class GenericSubtitle:
 18 |     def __init__(self, start, end, inner):
 19 |         self.start = start
 20 |         self.end = end
 21 |         self.inner = inner
 22 | 
 23 |     def __eq__(self, other: object) -> bool:
 24 |         if not isinstance(other, GenericSubtitle):
 25 |             return False
 26 |         eq = True
 27 |         eq = eq and self.start == other.start
 28 |         eq = eq and self.end == other.end
 29 |         eq = eq and self.inner == other.inner
 30 |         return eq
 31 | 
 32 |     @property
 33 |     def content(self) -> str:
 34 |         if isinstance(self.inner, srt.Subtitle):
 35 |             ret = self.inner.content
 36 |         elif isinstance(self.inner, pysubs2.SSAEvent):
 37 |             ret = self.inner.text
 38 |         else:
 39 |             raise NotImplementedError(
 40 |                 "unsupported subtitle type: %s" % type(self.inner)
 41 |             )
 42 |         return ret
 43 | 
 44 |     def resolve_inner_timestamps(self):
 45 |         ret = copy.deepcopy(self.inner)
 46 |         if isinstance(self.inner, srt.Subtitle):
 47 |             ret.start = self.start
 48 |             ret.end = self.end
 49 |         elif isinstance(self.inner, pysubs2.SSAEvent):
 50 |             ret.start = pysubs2.make_time(s=self.start.total_seconds())
 51 |             ret.end = pysubs2.make_time(s=self.end.total_seconds())
 52 |         else:
 53 |             raise NotImplementedError(
 54 |                 "unsupported subtitle type: %s" % type(self.inner)
 55 |             )
 56 |         return ret
 57 | 
 58 |     def merge_with(self, other):
 59 |         assert isinstance(self.inner, type(other.inner))
 60 |         inner_merged = copy.deepcopy(self.inner)
 61 |         if isinstance(self.inner, srt.Subtitle):
 62 |             inner_merged.content = "{}\n{}".format(
 63 |                 inner_merged.content, other.inner.content
 64 |             )
 65 |             return self.__class__(self.start, self.end, inner_merged)
 66 |         else:
 67 |             raise NotImplementedError(
 68 |                 "unsupported subtitle type: %s" % type(self.inner)
 69 |             )
 70 | 
 71 |     @classmethod
 72 |     def wrap_inner_subtitle(cls, sub) -> "GenericSubtitle":
 73 |         if isinstance(sub, srt.Subtitle):
 74 |             return cls(sub.start, sub.end, sub)
 75 |         elif isinstance(sub, pysubs2.SSAEvent):
 76 |             return cls(
 77 |                 timedelta(milliseconds=sub.start), timedelta(milliseconds=sub.end), sub
 78 |             )
 79 |         else:
 80 |             raise NotImplementedError("unsupported subtitle type: %s" % type(sub))
 81 | 
 82 | 
 83 | class GenericSubtitlesFile:
 84 |     def __init__(self, subs: List[GenericSubtitle], *_, **kwargs: Any):
 85 |         sub_format: str = cast(str, kwargs.pop("sub_format", None))
 86 |         if sub_format is None:
 87 |             raise ValueError("format must be specified")
 88 |         encoding: str = cast(str, kwargs.pop("encoding", None))
 89 |         if encoding is None:
 90 |             raise ValueError("encoding must be specified")
 91 |         self.subs_: List[GenericSubtitle] = subs
 92 |         self._sub_format: str = sub_format
 93 |         self._encoding: str = encoding
 94 |         self._styles: Optional[Dict[str, pysubs2.SSAStyle]] = kwargs.pop("styles", None)
 95 |         self._fonts_opaque: Optional[Dict[str, Any]] = kwargs.pop("fonts_opaque", None)
 96 |         self._info: Optional[Dict[str, str]] = kwargs.pop("info", None)
 97 | 
 98 |     def set_encoding(self, encoding: str) -> "GenericSubtitlesFile":
 99 |         if encoding != "same":
100 |             self._encoding = encoding
101 |         return self
102 | 
103 |     def __len__(self) -> int:
104 |         return len(self.subs_)
105 | 
106 |     def __getitem__(self, item: int) -> GenericSubtitle:
107 |         return self.subs_[item]
108 | 
109 |     def __iter__(self) -> Iterator[GenericSubtitle]:
110 |         return iter(self.subs_)
111 | 
112 |     def clone_props_for_subs(
113 |         self, new_subs: List[GenericSubtitle]
114 |     ) -> "GenericSubtitlesFile":
115 |         return GenericSubtitlesFile(
116 |             new_subs,
117 |             sub_format=self._sub_format,
118 |             encoding=self._encoding,
119 |             styles=self._styles,
120 |             fonts_opaque=self._fonts_opaque,
121 |             info=self._info,
122 |         )
123 | 
124 |     def gen_raw_resolved_subs(self):
125 |         for sub in self.subs_:
126 |             yield sub.resolve_inner_timestamps()
127 | 
128 |     def offset(self, td: timedelta) -> "GenericSubtitlesFile":
129 |         offset_subs = []
130 |         for sub in self.subs_:
131 |             offset_subs.append(GenericSubtitle(sub.start + td, sub.end + td, sub.inner))
132 |         return self.clone_props_for_subs(offset_subs)
133 | 
134 |     def write_file(self, fname: str) -> None:
135 |         # TODO: converter to go between self.subs_format and out_format
136 |         if fname is None:
137 |             out_format = self._sub_format
138 |         else:
139 |             out_format = os.path.splitext(fname)[-1][1:]
140 |         subs = list(self.gen_raw_resolved_subs())
141 |         if self._sub_format in ("ssa", "ass", "vtt"):
142 |             ssaf = pysubs2.SSAFile()
143 |             ssaf.events = subs
144 |             if self._styles is not None:
145 |                 ssaf.styles = self._styles
146 |             if self._info is not None:
147 |                 ssaf.info = self._info
148 |             if self._fonts_opaque is not None:
149 |                 ssaf.fonts_opaque = self._fonts_opaque
150 |             to_write = ssaf.to_string(out_format)
151 |         elif self._sub_format == "srt" and out_format in ("ssa", "ass", "vtt"):
152 |             to_write = pysubs2.SSAFile.from_string(srt.compose(subs)).to_string(
153 |                 out_format
154 |             )
155 |         elif out_format == "srt":
156 |             to_write = srt.compose(subs)
157 |         else:
158 |             raise NotImplementedError("unsupported output format: %s" % out_format)
159 | 
160 |         with open(fname or sys.stdout.fileno(), "wb") as f:
161 |             f.write(to_write.encode(self._encoding))
162 | 
163 | 
164 | class SubsMixin:
165 |     def __init__(self, subs: Optional[GenericSubtitlesFile] = None) -> None:
166 |         self.subs_: Optional[GenericSubtitlesFile] = subs
167 | 
168 |     def set_encoding(self, encoding: str) -> "SubsMixin":
169 |         self.subs_.set_encoding(encoding)
170 |         return self
171 | 


--------------------------------------------------------------------------------
/ffsubsync/golden_section_search.py:
--------------------------------------------------------------------------------
 1 | """Python program for golden section search (straight-up copied from Wikipedia).
 2 |    This implementation reuses function evaluations, saving 1/2 of the evaluations per
 3 |    iteration, and returns a bounding interval."""
 4 | import logging
 5 | import math
 6 | 
 7 | logging.basicConfig(level=logging.INFO)
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | invphi = (math.sqrt(5) - 1) / 2  # 1 / phi
12 | invphi2 = (3 - math.sqrt(5)) / 2  # 1 / phi^2
13 | 
14 | 
15 | def gss(f, a, b, tol=1e-4):
16 |     """Golden-section search.
17 | 
18 |     Given a function f with a single local minimum in
19 |     the interval [a,b], gss returns a subset interval
20 |     [c,d] that contains the minimum with d-c <= tol.
21 | 
22 |     Example:
23 |     >>> f = lambda x: (x-2)**2
24 |     >>> a = 1
25 |     >>> b = 5
26 |     >>> tol = 1e-5
27 |     >>> (c,d) = gss(f, a, b, tol)
28 |     >>> print(c, d)
29 |     1.9999959837979107 2.0000050911830893
30 |     """
31 | 
32 |     (a, b) = (min(a, b), max(a, b))
33 |     h = b - a
34 |     if h <= tol:
35 |         return a, b
36 | 
37 |     # Required steps to achieve tolerance
38 |     n = int(math.ceil(math.log(tol / h) / math.log(invphi)))
39 |     logger.info(
40 |         "About to perform %d iterations of golden section search to find the best framerate",
41 |         n,
42 |     )
43 | 
44 |     def f_wrapped(x, is_last_iter):
45 |         try:
46 |             return f(x, is_last_iter)
47 |         except TypeError:
48 |             return f(x)
49 | 
50 |     c = a + invphi2 * h
51 |     d = a + invphi * h
52 |     yc = f_wrapped(c, n == 1)
53 |     yd = f_wrapped(d, n == 1)
54 | 
55 |     for k in range(n - 1):
56 |         if yc < yd:
57 |             b = d
58 |             d = c
59 |             yd = yc
60 |             h = invphi * h
61 |             c = a + invphi2 * h
62 |             yc = f_wrapped(c, k == n - 2)
63 |         else:
64 |             a = c
65 |             c = d
66 |             yc = yd
67 |             h = invphi * h
68 |             d = a + invphi * h
69 |             yd = f(d, k == n - 2)
70 | 
71 |     if yc < yd:
72 |         return a, d
73 |     else:
74 |         return c, b
75 | 


--------------------------------------------------------------------------------
/ffsubsync/sklearn_shim.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | This module borrows and adapts `Pipeline` from `sklearn.pipeline` and
  4 | `TransformerMixin` from `sklearn.base` in the scikit-learn framework
  5 | (commit hash d205638475ca542dc46862652e3bb0be663a8eac) to be precise).
  6 | Both are BSD licensed and allow for this sort of thing; attribution
  7 | is given as a comment above each class. License reproduced below:
  8 | 
  9 | BSD 3-Clause License
 10 | 
 11 | Copyright (c) 2007-2022 The scikit-learn developers.
 12 | All rights reserved.
 13 | 
 14 | Redistribution and use in source and binary forms, with or without
 15 | modification, are permitted provided that the following conditions are met:
 16 | 
 17 | * Redistributions of source code must retain the above copyright notice, this
 18 |   list of conditions and the following disclaimer.
 19 | 
 20 | * Redistributions in binary form must reproduce the above copyright notice,
 21 |   this list of conditions and the following disclaimer in the documentation
 22 |   and/or other materials provided with the distribution.
 23 | 
 24 | * Neither the name of the copyright holder nor the names of its
 25 |   contributors may be used to endorse or promote products derived from
 26 |   this software without specific prior written permission.
 27 | 
 28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 29 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 30 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 31 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 32 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 33 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 34 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 35 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 36 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 37 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 38 | """
 39 | from collections import defaultdict
 40 | from itertools import islice
 41 | from typing import Any, Callable, Optional
 42 | from typing_extensions import Protocol
 43 | 
 44 | 
 45 | class TransformerProtocol(Protocol):
 46 |     fit: Callable[..., "TransformerProtocol"]
 47 |     transform: Callable[[Any], Any]
 48 | 
 49 | 
 50 | # Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
 51 | # License: BSD 3 clause
 52 | class TransformerMixin(TransformerProtocol):
 53 |     """Mixin class for all transformers."""
 54 | 
 55 |     def fit_transform(self, X: Any, y: Optional[Any] = None, **fit_params: Any) -> Any:
 56 |         """
 57 |         Fit to data, then transform it.
 58 |         Fits transformer to X and y with optional parameters fit_params
 59 |         and returns a transformed version of X.
 60 |         Parameters
 61 |         ----------
 62 |         X : ndarray of shape (n_samples, n_features)
 63 |             Training set.
 64 |         y : ndarray of shape (n_samples,), default=None
 65 |             Target values.
 66 |         **fit_params : dict
 67 |             Additional fit parameters.
 68 |         Returns
 69 |         -------
 70 |         X_new : ndarray array of shape (n_samples, n_features_new)
 71 |             Transformed array.
 72 |         """
 73 |         # non-optimized default implementation; override when a better
 74 |         # method is possible for a given clustering algorithm
 75 |         if y is None:
 76 |             # fit method of arity 1 (unsupervised transformation)
 77 |             return self.fit(X, **fit_params).transform(X)
 78 |         else:
 79 |             # fit method of arity 2 (supervised transformation)
 80 |             return self.fit(X, y, **fit_params).transform(X)
 81 | 
 82 | 
 83 | # Author: Edouard Duchesnay
 84 | #         Gael Varoquaux
 85 | #         Virgile Fritsch
 86 | #         Alexandre Gramfort
 87 | #         Lars Buitinck
 88 | # License: BSD
 89 | class Pipeline:
 90 |     def __init__(self, steps, verbose=False):
 91 |         self.steps = steps
 92 |         self.verbose = verbose
 93 |         self._validate_steps()
 94 | 
 95 |     def _validate_steps(self):
 96 |         names, estimators = zip(*self.steps)
 97 | 
 98 |         # validate estimators
 99 |         transformers = estimators[:-1]
100 |         estimator = estimators[-1]
101 | 
102 |         for t in transformers:
103 |             if t is None or t == "passthrough":
104 |                 continue
105 |             if not (hasattr(t, "fit") or hasattr(t, "fit_transform")) or not hasattr(
106 |                 t, "transform"
107 |             ):
108 |                 raise TypeError(
109 |                     "All intermediate steps should be "
110 |                     "transformers and implement fit and transform "
111 |                     "or be the string 'passthrough' "
112 |                     "'%s' (type %s) doesn't" % (t, type(t))
113 |                 )
114 | 
115 |         # We allow last estimator to be None as an identity transformation
116 |         if (
117 |             estimator is not None
118 |             and estimator != "passthrough"
119 |             and not hasattr(estimator, "fit")
120 |         ):
121 |             raise TypeError(
122 |                 "Last step of Pipeline should implement fit "
123 |                 "or be the string 'passthrough'. "
124 |                 "'%s' (type %s) doesn't" % (estimator, type(estimator))
125 |             )
126 | 
127 |     def _iter(self, with_final=True, filter_passthrough=True):
128 |         """
129 |         Generate (idx, (name, trans)) tuples from self.steps
130 | 
131 |         When filter_passthrough is True, 'passthrough' and None transformers
132 |         are filtered out.
133 |         """
134 |         stop = len(self.steps)
135 |         if not with_final:
136 |             stop -= 1
137 | 
138 |         for idx, (name, trans) in enumerate(islice(self.steps, 0, stop)):
139 |             if not filter_passthrough:
140 |                 yield idx, name, trans
141 |             elif trans is not None and trans != "passthrough":
142 |                 yield idx, name, trans
143 | 
144 |     def __len__(self) -> int:
145 |         """
146 |         Returns the length of the Pipeline
147 |         """
148 |         return len(self.steps)
149 | 
150 |     def __getitem__(self, ind):
151 |         """Returns a sub-pipeline or a single esimtator in the pipeline
152 | 
153 |         Indexing with an integer will return an estimator; using a slice
154 |         returns another Pipeline instance which copies a slice of this
155 |         Pipeline. This copy is shallow: modifying (or fitting) estimators in
156 |         the sub-pipeline will affect the larger pipeline and vice-versa.
157 |         However, replacing a value in `step` will not affect a copy.
158 |         """
159 |         if isinstance(ind, slice):
160 |             if ind.step not in (1, None):
161 |                 raise ValueError("Pipeline slicing only supports a step of 1")
162 |             return self.__class__(self.steps[ind])
163 |         try:
164 |             name, est = self.steps[ind]
165 |         except TypeError:
166 |             # Not an int, try get step by name
167 |             return self.named_steps[ind]
168 |         return est
169 | 
170 |     @property
171 |     def _estimator_type(self):
172 |         return self.steps[-1][1]._estimator_type
173 | 
174 |     @property
175 |     def named_steps(self):
176 |         return dict(self.steps)
177 | 
178 |     @property
179 |     def _final_estimator(self):
180 |         estimator = self.steps[-1][1]
181 |         return "passthrough" if estimator is None else estimator
182 | 
183 |     def _log_message(self, step_idx):
184 |         if not self.verbose:
185 |             return None
186 |         name, step = self.steps[step_idx]
187 | 
188 |         return "(step %d of %d) Processing %s" % (step_idx + 1, len(self.steps), name)
189 | 
190 |     # Estimator interface
191 | 
192 |     def _fit(self, X, y=None, **fit_params):
193 |         # shallow copy of steps - this should really be steps_
194 |         self.steps = list(self.steps)
195 |         self._validate_steps()
196 | 
197 |         fit_params_steps = {name: {} for name, step in self.steps if step is not None}
198 |         for pname, pval in fit_params.items():
199 |             if "__" not in pname:
200 |                 raise ValueError(
201 |                     "Pipeline.fit does not accept the {} parameter. "
202 |                     "You can pass parameters to specific steps of your "
203 |                     "pipeline using the stepname__parameter format, e.g. "
204 |                     "`Pipeline.fit(X, y, logisticregression__sample_weight"
205 |                     "=sample_weight)`.".format(pname)
206 |                 )
207 |             step, param = pname.split("__", 1)
208 |             fit_params_steps[step][param] = pval
209 |         for step_idx, name, transformer in self._iter(
210 |             with_final=False, filter_passthrough=False
211 |         ):
212 |             if transformer is None or transformer == "passthrough":
213 |                 continue
214 | 
215 |             # Fit or load from cache the current transformer
216 |             X, fitted_transformer = _fit_transform_one(
217 |                 transformer, X, y, None, **fit_params_steps[name]
218 |             )
219 |             # Replace the transformer of the step with the fitted
220 |             # transformer. This is necessary when loading the transformer
221 |             # from the cache.
222 |             self.steps[step_idx] = (name, fitted_transformer)
223 |         if self._final_estimator == "passthrough":
224 |             return X, {}
225 |         return X, fit_params_steps[self.steps[-1][0]]
226 | 
227 |     def fit(self, X, y=None, **fit_params):
228 |         """Fit the model
229 | 
230 |         Fit all the transforms one after the other and transform the
231 |         data, then fit the transformed data using the final estimator.
232 | 
233 |         Parameters
234 |         ----------
235 |         X : iterable
236 |             Training data. Must fulfill input requirements of first step of the
237 |             pipeline.
238 | 
239 |         y : iterable, default=None
240 |             Training targets. Must fulfill label requirements for all steps of
241 |             the pipeline.
242 | 
243 |         **fit_params : dict of string -> object
244 |             Parameters passed to the ``fit`` method of each step, where
245 |             each parameter name is prefixed such that parameter ``p`` for step
246 |             ``s`` has key ``s__p``.
247 | 
248 |         Returns
249 |         -------
250 |         self : Pipeline
251 |             This estimator
252 |         """
253 |         Xt, fit_params = self._fit(X, y, **fit_params)
254 |         if self._final_estimator != "passthrough":
255 |             self._final_estimator.fit(Xt, y, **fit_params)
256 |         return self
257 | 
258 |     def fit_transform(self, X, y=None, **fit_params):
259 |         """Fit the model and transform with the final estimator
260 | 
261 |         Fits all the transforms one after the other and transforms the
262 |         data, then uses fit_transform on transformed data with the final
263 |         estimator.
264 | 
265 |         Parameters
266 |         ----------
267 |         X : iterable
268 |             Training data. Must fulfill input requirements of first step of the
269 |             pipeline.
270 | 
271 |         y : iterable, default=None
272 |             Training targets. Must fulfill label requirements for all steps of
273 |             the pipeline.
274 | 
275 |         **fit_params : dict of string -> object
276 |             Parameters passed to the ``fit`` method of each step, where
277 |             each parameter name is prefixed such that parameter ``p`` for step
278 |             ``s`` has key ``s__p``.
279 | 
280 |         Returns
281 |         -------
282 |         Xt : array-like of shape  (n_samples, n_transformed_features)
283 |             Transformed samples
284 |         """
285 |         last_step = self._final_estimator
286 |         Xt, fit_params = self._fit(X, y, **fit_params)
287 |         if last_step == "passthrough":
288 |             return Xt
289 |         if hasattr(last_step, "fit_transform"):
290 |             return last_step.fit_transform(Xt, y, **fit_params)
291 |         else:
292 |             return last_step.fit(Xt, y, **fit_params).transform(Xt)
293 | 
294 |     @property
295 |     def transform(self):
296 |         """Apply transforms, and transform with the final estimator
297 | 
298 |         This also works where final estimator is ``None``: all prior
299 |         transformations are applied.
300 | 
301 |         Parameters
302 |         ----------
303 |         X : iterable
304 |             Data to transform. Must fulfill input requirements of first step
305 |             of the pipeline.
306 | 
307 |         Returns
308 |         -------
309 |         Xt : array-like of shape  (n_samples, n_transformed_features)
310 |         """
311 |         # _final_estimator is None or has transform, otherwise attribute error
312 |         # XXX: Handling the None case means we can't use if_delegate_has_method
313 |         if self._final_estimator != "passthrough":
314 |             self._final_estimator.transform
315 |         return self._transform
316 | 
317 |     def _transform(self, X):
318 |         Xt = X
319 |         for _, _, transform in self._iter():
320 |             Xt = transform.transform(Xt)
321 |         return Xt
322 | 
323 |     @property
324 |     def classes_(self):
325 |         return self.steps[-1][-1].classes_
326 | 
327 |     @property
328 |     def _pairwise(self):
329 |         # check if first estimator expects pairwise input
330 |         return getattr(self.steps[0][1], "_pairwise", False)
331 | 
332 |     @property
333 |     def n_features_in_(self):
334 |         # delegate to first step (which will call _check_is_fitted)
335 |         return self.steps[0][1].n_features_in_
336 | 
337 | 
338 | def _name_estimators(estimators):
339 |     """Generate names for estimators."""
340 | 
341 |     names = [
342 |         estimator if isinstance(estimator, str) else type(estimator).__name__.lower()
343 |         for estimator in estimators
344 |     ]
345 |     namecount = defaultdict(int)
346 |     for est, name in zip(estimators, names):
347 |         namecount[name] += 1
348 | 
349 |     for k, v in list(namecount.items()):
350 |         if v == 1:
351 |             del namecount[k]
352 | 
353 |     for i in reversed(range(len(estimators))):
354 |         name = names[i]
355 |         if name in namecount:
356 |             names[i] += "-%d" % namecount[name]
357 |             namecount[name] -= 1
358 | 
359 |     return list(zip(names, estimators))
360 | 
361 | 
362 | def make_pipeline(*steps, **kwargs) -> Pipeline:
363 |     """Construct a Pipeline from the given estimators.
364 | 
365 |     This is a shorthand for the Pipeline constructor; it does not require, and
366 |     does not permit, naming the estimators. Instead, their names will be set
367 |     to the lowercase of their types automatically.
368 | 
369 |     Parameters
370 |     ----------
371 |     *steps : list of estimators.
372 | 
373 |     verbose : bool, default=False
374 |         If True, the time elapsed while fitting each step will be printed as it
375 |         is completed.
376 | 
377 |     Returns
378 |     -------
379 |     p : Pipeline
380 |     """
381 |     verbose = kwargs.pop("verbose", False)
382 |     if kwargs:
383 |         raise TypeError(
384 |             'Unknown keyword arguments: "{}"'.format(list(kwargs.keys())[0])
385 |         )
386 |     return Pipeline(_name_estimators(steps), verbose=verbose)
387 | 
388 | 
389 | def _transform_one(transformer, X, y, weight, **fit_params):
390 |     res = transformer.transform(X)
391 |     # if we have a weight for this transformer, multiply output
392 |     if weight is None:
393 |         return res
394 |     return res * weight
395 | 
396 | 
397 | def _fit_transform_one(transformer, X, y, weight, **fit_params):
398 |     """
399 |     Fits ``transformer`` to ``X`` and ``y``. The transformed result is returned
400 |     with the fitted transformer. If ``weight`` is not ``None``, the result will
401 |     be multiplied by ``weight``.
402 |     """
403 |     if hasattr(transformer, "fit_transform"):
404 |         res = transformer.fit_transform(X, y, **fit_params)
405 |     else:
406 |         res = transformer.fit(X, y, **fit_params).transform(X)
407 | 
408 |     if weight is None:
409 |         return res, transformer
410 |     return res * weight, transformer
411 | 


--------------------------------------------------------------------------------
/ffsubsync/speech_transformers.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | from contextlib import contextmanager
  4 | import logging
  5 | import io
  6 | import subprocess
  7 | import sys
  8 | from datetime import timedelta
  9 | from typing import cast, Callable, Dict, List, Optional, Union
 10 | 
 11 | import ffmpeg
 12 | import numpy as np
 13 | import tqdm
 14 | 
 15 | from ffsubsync.constants import (
 16 |     DEFAULT_ENCODING,
 17 |     DEFAULT_MAX_SUBTITLE_SECONDS,
 18 |     DEFAULT_SCALE_FACTOR,
 19 |     DEFAULT_START_SECONDS,
 20 |     SAMPLE_RATE,
 21 | )
 22 | from ffsubsync.ffmpeg_utils import ffmpeg_bin_path, subprocess_args
 23 | from ffsubsync.generic_subtitles import GenericSubtitle
 24 | from ffsubsync.sklearn_shim import TransformerMixin
 25 | from ffsubsync.sklearn_shim import Pipeline
 26 | from ffsubsync.subtitle_parser import make_subtitle_parser
 27 | from ffsubsync.subtitle_transformers import SubtitleScaler
 28 | 
 29 | 
 30 | logging.basicConfig(level=logging.INFO)
 31 | logger: logging.Logger = logging.getLogger(__name__)
 32 | 
 33 | 
 34 | def make_subtitle_speech_pipeline(
 35 |     fmt: str = "srt",
 36 |     encoding: str = DEFAULT_ENCODING,
 37 |     caching: bool = False,
 38 |     max_subtitle_seconds: int = DEFAULT_MAX_SUBTITLE_SECONDS,
 39 |     start_seconds: int = DEFAULT_START_SECONDS,
 40 |     scale_factor: float = DEFAULT_SCALE_FACTOR,
 41 |     parser=None,
 42 |     **kwargs,
 43 | ) -> Union[Pipeline, Callable[[float], Pipeline]]:
 44 |     if parser is None:
 45 |         parser = make_subtitle_parser(
 46 |             fmt,
 47 |             encoding=encoding,
 48 |             caching=caching,
 49 |             max_subtitle_seconds=max_subtitle_seconds,
 50 |             start_seconds=start_seconds,
 51 |             **kwargs,
 52 |         )
 53 |     assert parser.encoding == encoding
 54 |     assert parser.max_subtitle_seconds == max_subtitle_seconds
 55 |     assert parser.start_seconds == start_seconds
 56 | 
 57 |     def subpipe_maker(framerate_ratio):
 58 |         return Pipeline(
 59 |             [
 60 |                 ("parse", parser),
 61 |                 ("scale", SubtitleScaler(framerate_ratio)),
 62 |                 (
 63 |                     "speech_extract",
 64 |                     SubtitleSpeechTransformer(
 65 |                         sample_rate=SAMPLE_RATE,
 66 |                         start_seconds=start_seconds,
 67 |                         framerate_ratio=framerate_ratio,
 68 |                     ),
 69 |                 ),
 70 |             ]
 71 |         )
 72 | 
 73 |     if scale_factor is None:
 74 |         return subpipe_maker
 75 |     else:
 76 |         return subpipe_maker(scale_factor)
 77 | 
 78 | 
 79 | def _make_auditok_detector(
 80 |     sample_rate: int, frame_rate: int, non_speech_label: float
 81 | ) -> Callable[[bytes], np.ndarray]:
 82 |     try:
 83 |         from auditok import (
 84 |             BufferAudioSource,
 85 |             ADSFactory,
 86 |             AudioEnergyValidator,
 87 |             StreamTokenizer,
 88 |         )
 89 |     except ImportError as e:
 90 |         logger.error(
 91 |             """Error: auditok not installed!
 92 |         Consider installing it with `pip install auditok`. Note that auditok
 93 |         is GPLv3 licensed, which means that successfully importing it at
 94 |         runtime creates a derivative work that is GPLv3 licensed. For personal
 95 |         use this is fine, but note that any commercial use that relies on
 96 |         auditok must be open source as per the GPLv3!*
 97 |         *Not legal advice. Consult with a lawyer.
 98 |         """
 99 |         )
100 |         raise e
101 |     bytes_per_frame = 2
102 |     frames_per_window = frame_rate // sample_rate
103 |     validator = AudioEnergyValidator(sample_width=bytes_per_frame, energy_threshold=50)
104 |     tokenizer = StreamTokenizer(
105 |         validator=validator,
106 |         min_length=0.2 * sample_rate,
107 |         max_length=int(5 * sample_rate),
108 |         max_continuous_silence=0.25 * sample_rate,
109 |     )
110 | 
111 |     def _detect(asegment: bytes) -> np.ndarray:
112 |         asource = BufferAudioSource(
113 |             data_buffer=asegment,
114 |             sampling_rate=frame_rate,
115 |             sample_width=bytes_per_frame,
116 |             channels=1,
117 |         )
118 |         ads = ADSFactory.ads(audio_source=asource, block_dur=1.0 / sample_rate)
119 |         ads.open()
120 |         tokens = tokenizer.tokenize(ads)
121 |         length = (
122 |             len(asegment) // bytes_per_frame + frames_per_window - 1
123 |         ) // frames_per_window
124 |         media_bstring = np.zeros(length + 1)
125 |         for token in tokens:
126 |             media_bstring[token[1]] = 1.0
127 |             media_bstring[token[2] + 1] = non_speech_label - 1.0
128 |         return np.clip(np.cumsum(media_bstring)[:-1], 0.0, 1.0)
129 | 
130 |     return _detect
131 | 
132 | 
133 | def _make_webrtcvad_detector(
134 |     sample_rate: int, frame_rate: int, non_speech_label: float
135 | ) -> Callable[[bytes], np.ndarray]:
136 |     import webrtcvad
137 | 
138 |     vad = webrtcvad.Vad()
139 |     vad.set_mode(3)  # set non-speech pruning aggressiveness from 0 to 3
140 |     window_duration = 1.0 / sample_rate  # duration in seconds
141 |     frames_per_window = int(window_duration * frame_rate + 0.5)
142 |     bytes_per_frame = 2
143 | 
144 |     def _detect(asegment: bytes) -> np.ndarray:
145 |         media_bstring = []
146 |         failures = 0
147 |         for start in range(0, len(asegment) // bytes_per_frame, frames_per_window):
148 |             stop = min(start + frames_per_window, len(asegment) // bytes_per_frame)
149 |             try:
150 |                 is_speech = vad.is_speech(
151 |                     asegment[start * bytes_per_frame : stop * bytes_per_frame],
152 |                     sample_rate=frame_rate,
153 |                 )
154 |             except Exception:
155 |                 is_speech = False
156 |                 failures += 1
157 |             # webrtcvad has low recall on mode 3, so treat non-speech as "not sure"
158 |             media_bstring.append(1.0 if is_speech else non_speech_label)
159 |         return np.array(media_bstring)
160 | 
161 |     return _detect
162 | 
163 | 
164 | def _make_silero_detector(
165 |     sample_rate: int, frame_rate: int, non_speech_label: float
166 | ) -> Callable[[bytes], np.ndarray]:
167 |     import torch
168 | 
169 |     window_duration = 1.0 / sample_rate  # duration in seconds
170 |     frames_per_window = int(window_duration * frame_rate + 0.5)
171 |     bytes_per_frame = 1
172 | 
173 |     model, _ = torch.hub.load(
174 |         repo_or_dir="snakers4/silero-vad",
175 |         model="silero_vad",
176 |         force_reload=False,
177 |         onnx=False,
178 |     )
179 | 
180 |     exception_logged = False
181 | 
182 |     def _detect(asegment) -> np.ndarray:
183 |         asegment = np.frombuffer(asegment, np.int16).astype(np.float32) / (1 << 15)
184 |         asegment = torch.FloatTensor(asegment)
185 |         media_bstring = []
186 |         failures = 0
187 |         for start in range(0, len(asegment) // bytes_per_frame, frames_per_window):
188 |             stop = min(start + frames_per_window, len(asegment))
189 |             try:
190 |                 speech_prob = model(
191 |                     asegment[start * bytes_per_frame : stop * bytes_per_frame],
192 |                     frame_rate,
193 |                 ).item()
194 |             except Exception:
195 |                 nonlocal exception_logged
196 |                 if not exception_logged:
197 |                     exception_logged = True
198 |                     logger.exception("exception occurred during speech detection")
199 |                 speech_prob = 0.0
200 |                 failures += 1
201 |             media_bstring.append(1.0 - (1.0 - speech_prob) * (1.0 - non_speech_label))
202 |         return np.array(media_bstring)
203 | 
204 |     return _detect
205 | 
206 | 
207 | class ComputeSpeechFrameBoundariesMixin:
208 |     def __init__(self) -> None:
209 |         self.start_frame_: Optional[int] = None
210 |         self.end_frame_: Optional[int] = None
211 | 
212 |     @property
213 |     def num_frames(self) -> Optional[int]:
214 |         if self.start_frame_ is None or self.end_frame_ is None:
215 |             return None
216 |         return self.end_frame_ - self.start_frame_
217 | 
218 |     def fit_boundaries(
219 |         self, speech_frames: np.ndarray
220 |     ) -> "ComputeSpeechFrameBoundariesMixin":
221 |         nz = np.nonzero(speech_frames > 0.5)[0]
222 |         if len(nz) > 0:
223 |             self.start_frame_ = int(np.min(nz))
224 |             self.end_frame_ = int(np.max(nz))
225 |         return self
226 | 
227 | 
228 | class VideoSpeechTransformer(TransformerMixin):
229 |     def __init__(
230 |         self,
231 |         vad: str,
232 |         sample_rate: int,
233 |         frame_rate: int,
234 |         non_speech_label: float,
235 |         start_seconds: int = 0,
236 |         ffmpeg_path: Optional[str] = None,
237 |         ref_stream: Optional[str] = None,
238 |         vlc_mode: bool = False,
239 |         gui_mode: bool = False,
240 |     ) -> None:
241 |         super(VideoSpeechTransformer, self).__init__()
242 |         self.vad: str = vad
243 |         self.sample_rate: int = sample_rate
244 |         self.frame_rate: int = frame_rate
245 |         self._non_speech_label: float = non_speech_label
246 |         self.start_seconds: int = start_seconds
247 |         self.ffmpeg_path: Optional[str] = ffmpeg_path
248 |         self.ref_stream: Optional[str] = ref_stream
249 |         self.vlc_mode: bool = vlc_mode
250 |         self.gui_mode: bool = gui_mode
251 |         self.video_speech_results_: Optional[np.ndarray] = None
252 | 
253 |     def try_fit_using_embedded_subs(self, fname: str) -> None:
254 |         embedded_subs = []
255 |         embedded_subs_times = []
256 |         if self.ref_stream is None:
257 |             # check first 5; should cover 99% of movies
258 |             streams_to_try: List[str] = list(map("0:s:{}".format, range(5)))
259 |         else:
260 |             streams_to_try = [self.ref_stream]
261 |         for stream in streams_to_try:
262 |             ffmpeg_args = [
263 |                 ffmpeg_bin_path(
264 |                     "ffmpeg", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path
265 |                 )
266 |             ]
267 |             ffmpeg_args.extend(
268 |                 [
269 |                     "-loglevel",
270 |                     "fatal",
271 |                     "-nostdin",
272 |                     "-i",
273 |                     fname,
274 |                     "-map",
275 |                     "{}".format(stream),
276 |                     "-f",
277 |                     "srt",
278 |                     "-",
279 |                 ]
280 |             )
281 |             process = subprocess.Popen(
282 |                 ffmpeg_args, **subprocess_args(include_stdout=True)
283 |             )
284 |             output = io.BytesIO(process.communicate()[0])
285 |             if process.returncode != 0:
286 |                 break
287 |             pipe = cast(
288 |                 Pipeline,
289 |                 make_subtitle_speech_pipeline(start_seconds=self.start_seconds),
290 |             ).fit(output)
291 |             speech_step = pipe.steps[-1][1]
292 |             embedded_subs.append(speech_step)
293 |             embedded_subs_times.append(speech_step.max_time_)
294 |         if len(embedded_subs) == 0:
295 |             if self.ref_stream is None:
296 |                 error_msg = "Video file appears to lack subtitle stream"
297 |             else:
298 |                 error_msg = "Stream {} not found".format(self.ref_stream)
299 |             raise ValueError(error_msg)
300 |         # use longest set of embedded subs
301 |         subs_to_use = embedded_subs[int(np.argmax(embedded_subs_times))]
302 |         self.video_speech_results_ = subs_to_use.subtitle_speech_results_
303 | 
304 |     def fit(self, fname: str, *_) -> "VideoSpeechTransformer":
305 |         if "subs" in self.vad and (
306 |             self.ref_stream is None or self.ref_stream.startswith("0:s:")
307 |         ):
308 |             try:
309 |                 logger.info("Checking video for subtitles stream...")
310 |                 self.try_fit_using_embedded_subs(fname)
311 |                 logger.info("...success!")
312 |                 return self
313 |             except Exception as e:
314 |                 logger.info(e)
315 |         try:
316 |             total_duration = (
317 |                 float(
318 |                     ffmpeg.probe(
319 |                         fname,
320 |                         cmd=ffmpeg_bin_path(
321 |                             "ffprobe",
322 |                             self.gui_mode,
323 |                             ffmpeg_resources_path=self.ffmpeg_path,
324 |                         ),
325 |                     )["format"]["duration"]
326 |                 )
327 |                 - self.start_seconds
328 |             )
329 |         except Exception as e:
330 |             logger.warning(e)
331 |             total_duration = None
332 |         if "webrtc" in self.vad:
333 |             detector = _make_webrtcvad_detector(
334 |                 self.sample_rate, self.frame_rate, self._non_speech_label
335 |             )
336 |         elif "auditok" in self.vad:
337 |             detector = _make_auditok_detector(
338 |                 self.sample_rate, self.frame_rate, self._non_speech_label
339 |             )
340 |         elif "silero" in self.vad:
341 |             detector = _make_silero_detector(
342 |                 self.sample_rate, self.frame_rate, self._non_speech_label
343 |             )
344 |         else:
345 |             raise ValueError("unknown vad: %s" % self.vad)
346 |         media_bstring: List[np.ndarray] = []
347 |         ffmpeg_args = [
348 |             ffmpeg_bin_path(
349 |                 "ffmpeg", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path
350 |             )
351 |         ]
352 |         if self.start_seconds > 0:
353 |             ffmpeg_args.extend(
354 |                 [
355 |                     "-ss",
356 |                     str(timedelta(seconds=self.start_seconds)),
357 |                 ]
358 |             )
359 |         ffmpeg_args.extend(["-loglevel", "fatal", "-nostdin", "-i", fname])
360 |         if self.ref_stream is not None and self.ref_stream.startswith("0:a:"):
361 |             ffmpeg_args.extend(["-map", self.ref_stream])
362 |         ffmpeg_args.extend(
363 |             [
364 |                 "-f",
365 |                 "s16le",
366 |                 "-ac",
367 |                 "1",
368 |                 "-acodec",
369 |                 "pcm_s16le",
370 |                 "-af",
371 |                 "aresample=async=1",
372 |                 "-ar",
373 |                 str(self.frame_rate),
374 |                 "-",
375 |             ]
376 |         )
377 |         process = subprocess.Popen(ffmpeg_args, **subprocess_args(include_stdout=True))
378 |         bytes_per_frame = 2
379 |         frames_per_window = bytes_per_frame * self.frame_rate // self.sample_rate
380 |         windows_per_buffer = 10000
381 |         simple_progress = 0.0
382 | 
383 |         redirect_stderr = None
384 |         tqdm_extra_args = {}
385 |         should_print_redirected_stderr = self.gui_mode
386 |         if self.gui_mode:
387 |             try:
388 |                 from contextlib import redirect_stderr  # type: ignore
389 | 
390 |                 tqdm_extra_args["file"] = sys.stdout
391 |             except ImportError:
392 |                 should_print_redirected_stderr = False
393 |         if redirect_stderr is None:
394 | 
395 |             @contextmanager
396 |             def redirect_stderr(enter_result=None):
397 |                 yield enter_result
398 | 
399 |         assert redirect_stderr is not None
400 |         pbar_output = io.StringIO()
401 |         with redirect_stderr(pbar_output):
402 |             with tqdm.tqdm(
403 |                 total=total_duration, disable=self.vlc_mode, **tqdm_extra_args
404 |             ) as pbar:
405 |                 while True:
406 |                     in_bytes = process.stdout.read(
407 |                         frames_per_window * windows_per_buffer
408 |                     )
409 |                     if not in_bytes:
410 |                         break
411 |                     newstuff = len(in_bytes) / float(bytes_per_frame) / self.frame_rate
412 |                     if (
413 |                         total_duration is not None
414 |                         and simple_progress + newstuff > total_duration
415 |                     ):
416 |                         newstuff = total_duration - simple_progress
417 |                     simple_progress += newstuff
418 |                     pbar.update(newstuff)
419 |                     if self.vlc_mode and total_duration is not None:
420 |                         print("%d" % int(simple_progress * 100.0 / total_duration))
421 |                         sys.stdout.flush()
422 |                     if should_print_redirected_stderr:
423 |                         assert self.gui_mode
424 |                         # no need to flush since we pass -u to do unbuffered output for gui mode
425 |                         print(pbar_output.read())
426 |                     if "silero" not in self.vad:
427 |                         in_bytes = np.frombuffer(in_bytes, np.uint8)
428 |                     media_bstring.append(detector(in_bytes))
429 |         process.wait()
430 |         if len(media_bstring) == 0:
431 |             raise ValueError(
432 |                 "Unable to detect speech. "
433 |                 "Perhaps try specifying a different stream / track, or a different vad."
434 |             )
435 |         self.video_speech_results_ = np.concatenate(media_bstring)
436 |         logger.info("total of speech segments: %s", np.sum(self.video_speech_results_))
437 |         return self
438 | 
439 |     def transform(self, *_) -> np.ndarray:
440 |         return self.video_speech_results_
441 | 
442 | 
443 | _PAIRED_NESTER: Dict[str, str] = {
444 |     "(": ")",
445 |     "{": "}",
446 |     "[": "]",
447 |     # FIXME: False positive sometimes when there are html tags, e.g. <i> Hello? </i>
448 |     # '<': '>',
449 | }
450 | 
451 | 
452 | # TODO: need way better metadata detector
453 | def _is_metadata(content: str, is_beginning_or_end: bool) -> bool:
454 |     content = content.strip()
455 |     if len(content) == 0:
456 |         return True
457 |     if (
458 |         content[0] in _PAIRED_NESTER.keys()
459 |         and content[-1] == _PAIRED_NESTER[content[0]]
460 |     ):
461 |         return True
462 |     if is_beginning_or_end:
463 |         if "english" in content.lower():
464 |             return True
465 |         if " - " in content:
466 |             return True
467 |     return False
468 | 
469 | 
470 | class SubtitleSpeechTransformer(TransformerMixin, ComputeSpeechFrameBoundariesMixin):
471 |     def __init__(
472 |         self, sample_rate: int, start_seconds: int = 0, framerate_ratio: float = 1.0
473 |     ) -> None:
474 |         super(SubtitleSpeechTransformer, self).__init__()
475 |         self.sample_rate: int = sample_rate
476 |         self.start_seconds: int = start_seconds
477 |         self.framerate_ratio: float = framerate_ratio
478 |         self.subtitle_speech_results_: Optional[np.ndarray] = None
479 |         self.max_time_: Optional[int] = None
480 | 
481 |     def fit(self, subs: List[GenericSubtitle], *_) -> "SubtitleSpeechTransformer":
482 |         max_time = 0
483 |         for sub in subs:
484 |             max_time = max(max_time, sub.end.total_seconds())
485 |         self.max_time_ = max_time - self.start_seconds
486 |         samples = np.zeros(int(max_time * self.sample_rate) + 2, dtype=float)
487 |         start_frame = float("inf")
488 |         end_frame = 0
489 |         for i, sub in enumerate(subs):
490 |             if _is_metadata(sub.content, i == 0 or i + 1 == len(subs)):
491 |                 continue
492 |             start = int(
493 |                 round(
494 |                     (sub.start.total_seconds() - self.start_seconds) * self.sample_rate
495 |                 )
496 |             )
497 |             start_frame = min(start_frame, start)
498 |             duration = sub.end.total_seconds() - sub.start.total_seconds()
499 |             end = start + int(round(duration * self.sample_rate))
500 |             end_frame = max(end_frame, end)
501 |             samples[start:end] = min(1.0 / self.framerate_ratio, 1.0)
502 |         self.subtitle_speech_results_ = samples
503 |         self.fit_boundaries(self.subtitle_speech_results_)
504 |         return self
505 | 
506 |     def transform(self, *_) -> np.ndarray:
507 |         assert self.subtitle_speech_results_ is not None
508 |         return self.subtitle_speech_results_
509 | 
510 | 
511 | class DeserializeSpeechTransformer(TransformerMixin):
512 |     def __init__(self, non_speech_label: float) -> None:
513 |         super(DeserializeSpeechTransformer, self).__init__()
514 |         self._non_speech_label: float = non_speech_label
515 |         self.deserialized_speech_results_: Optional[np.ndarray] = None
516 | 
517 |     def fit(self, fname, *_) -> "DeserializeSpeechTransformer":
518 |         speech = np.load(fname)
519 |         if hasattr(speech, "files"):
520 |             if "speech" in speech.files:
521 |                 speech = speech["speech"]
522 |             else:
523 |                 raise ValueError(
524 |                     'could not find "speech" array in '
525 |                     "serialized file; only contains: %s" % speech.files
526 |                 )
527 |         speech[speech < 1.0] = self._non_speech_label
528 |         self.deserialized_speech_results_ = speech
529 |         return self
530 | 
531 |     def transform(self, *_) -> np.ndarray:
532 |         assert self.deserialized_speech_results_ is not None
533 |         return self.deserialized_speech_results_
534 | 


--------------------------------------------------------------------------------
/ffsubsync/subtitle_parser.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from datetime import timedelta
  3 | import logging
  4 | from typing import Any, cast, List, Optional
  5 | 
  6 | try:
  7 |     import cchardet
  8 | except:  # noqa: E722
  9 |     cchardet = None
 10 | try:
 11 |     import chardet
 12 | except:  # noqa: E722
 13 |     chardet = None
 14 | try:
 15 |     import charset_normalizer
 16 | except:  # noqa: E722
 17 |     charset_normalizer = None
 18 | import pysubs2
 19 | from ffsubsync.sklearn_shim import TransformerMixin
 20 | import srt
 21 | 
 22 | from ffsubsync.constants import (
 23 |     DEFAULT_ENCODING,
 24 |     DEFAULT_MAX_SUBTITLE_SECONDS,
 25 |     DEFAULT_START_SECONDS,
 26 | )
 27 | from ffsubsync.file_utils import open_file
 28 | from ffsubsync.generic_subtitles import GenericSubtitle, GenericSubtitlesFile, SubsMixin
 29 | 
 30 | logging.basicConfig(level=logging.INFO)
 31 | logger: logging.Logger = logging.getLogger(__name__)
 32 | 
 33 | 
 34 | def _preprocess_subs(
 35 |     subs,
 36 |     max_subtitle_seconds: Optional[int] = None,
 37 |     start_seconds: int = 0,
 38 |     tolerant: bool = True,
 39 | ) -> List[GenericSubtitle]:
 40 |     subs_list = []
 41 |     start_time = timedelta(seconds=start_seconds)
 42 |     max_duration = timedelta(days=1)
 43 |     if max_subtitle_seconds is not None:
 44 |         max_duration = timedelta(seconds=max_subtitle_seconds)
 45 |     subs = iter(subs)
 46 |     while True:
 47 |         try:
 48 |             next_sub = GenericSubtitle.wrap_inner_subtitle(next(subs))
 49 |             if next_sub.start < start_time:
 50 |                 continue
 51 |             next_sub.end = min(next_sub.end, next_sub.start + max_duration)
 52 |             subs_list.append(next_sub)
 53 |         # We don't catch SRTParseError here b/c that is typically raised when we
 54 |         # are trying to parse with the wrong encoding, in which case we might
 55 |         # be able to try another one on the *entire* set of subtitles elsewhere.
 56 |         except ValueError as e:
 57 |             if tolerant:
 58 |                 logger.warning(e)
 59 |                 continue
 60 |             else:
 61 |                 raise
 62 |         except StopIteration:
 63 |             break
 64 |     return subs_list
 65 | 
 66 | 
 67 | class GenericSubtitleParser(SubsMixin, TransformerMixin):
 68 |     def __init__(
 69 |         self,
 70 |         fmt: str = "srt",
 71 |         encoding: str = "infer",
 72 |         caching: bool = False,
 73 |         max_subtitle_seconds: Optional[int] = None,
 74 |         start_seconds: int = 0,
 75 |         skip_ssa_info: bool = False,
 76 |         strict: bool = False,
 77 |     ) -> None:
 78 |         super(self.__class__, self).__init__()
 79 |         self.sub_format: str = fmt
 80 |         self.encoding: str = encoding
 81 |         self.caching: bool = caching
 82 |         self.fit_fname: Optional[str] = None
 83 |         self.detected_encoding_: Optional[str] = None
 84 |         self.max_subtitle_seconds: Optional[int] = max_subtitle_seconds
 85 |         self.start_seconds: int = start_seconds
 86 |         # FIXME: hack to get tests to pass; remove
 87 |         self._skip_ssa_info: bool = skip_ssa_info
 88 |         self._strict: bool = strict
 89 | 
 90 |     def fit(self, fname: str, *_) -> "GenericSubtitleParser":
 91 |         if self.caching and self.fit_fname == ("<stdin>" if fname is None else fname):
 92 |             return self
 93 |         encodings_to_try = (self.encoding,)
 94 |         with open_file(fname, "rb") as f:
 95 |             subs = f.read()
 96 |         if self.encoding == "infer":
 97 |             for chardet_lib in (cchardet, charset_normalizer, chardet):
 98 |                 if chardet_lib is not None:
 99 |                     try:
100 |                         detected_encoding = cast(
101 |                             Optional[str], chardet_lib.detect(subs)["encoding"]
102 |                         )
103 |                     except:  # noqa: E722
104 |                         continue
105 |                     if detected_encoding is not None:
106 |                         self.detected_encoding_ = detected_encoding
107 |                         encodings_to_try = (detected_encoding,)
108 |                         break
109 |             assert self.detected_encoding_ is not None
110 |             logger.info("detected encoding: %s" % self.detected_encoding_)
111 |         exc = None
112 |         for encoding in encodings_to_try:
113 |             try:
114 |                 decoded_subs = subs.decode(encoding, errors="replace").strip()
115 |                 if self.sub_format == "srt":
116 |                     parsed_subs = srt.parse(
117 |                         decoded_subs, ignore_errors=not self._strict
118 |                     )
119 |                 elif self.sub_format in ("ass", "ssa", "sub", "vtt"):
120 |                     parsed_subs = pysubs2.SSAFile.from_string(decoded_subs)
121 |                 else:
122 |                     raise NotImplementedError(
123 |                         "unsupported format: %s" % self.sub_format
124 |                     )
125 |                 extra_generic_subtitle_file_kwargs = {}
126 |                 if isinstance(parsed_subs, pysubs2.SSAFile):
127 |                     extra_generic_subtitle_file_kwargs.update(
128 |                         dict(
129 |                             styles=parsed_subs.styles,
130 |                             # pysubs2 on Python >= 3.6 doesn't support this
131 |                             fonts_opaque=getattr(parsed_subs, "fonts_opaque", None),
132 |                             info=parsed_subs.info if not self._skip_ssa_info else None,
133 |                         )
134 |                     )
135 |                 self.subs_ = GenericSubtitlesFile(
136 |                     _preprocess_subs(
137 |                         parsed_subs,
138 |                         max_subtitle_seconds=self.max_subtitle_seconds,
139 |                         start_seconds=self.start_seconds,
140 |                     ),
141 |                     sub_format=self.sub_format,
142 |                     encoding=encoding,
143 |                     **extra_generic_subtitle_file_kwargs,
144 |                 )
145 |                 self.fit_fname = "<stdin>" if fname is None else fname
146 |                 if len(encodings_to_try) > 1:
147 |                     self.detected_encoding_ = encoding
148 |                     logger.info("detected encoding: %s" % self.detected_encoding_)
149 |                 return self
150 |             except Exception as e:
151 |                 exc = e
152 |                 continue
153 |         raise exc
154 | 
155 |     def transform(self, *_) -> GenericSubtitlesFile:
156 |         return self.subs_
157 | 
158 | 
159 | def make_subtitle_parser(
160 |     fmt: str,
161 |     encoding: str = DEFAULT_ENCODING,
162 |     caching: bool = False,
163 |     max_subtitle_seconds: int = DEFAULT_MAX_SUBTITLE_SECONDS,
164 |     start_seconds: int = DEFAULT_START_SECONDS,
165 |     **kwargs: Any,
166 | ) -> GenericSubtitleParser:
167 |     return GenericSubtitleParser(
168 |         fmt=fmt,
169 |         encoding=encoding,
170 |         caching=caching,
171 |         max_subtitle_seconds=max_subtitle_seconds,
172 |         start_seconds=start_seconds,
173 |         skip_ssa_info=kwargs.get("skip_ssa_info", False),
174 |         strict=kwargs.get("strict", False),
175 |     )
176 | 


--------------------------------------------------------------------------------
/ffsubsync/subtitle_transformers.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from datetime import timedelta
  3 | import logging
  4 | import numbers
  5 | 
  6 | from ffsubsync.generic_subtitles import GenericSubtitle, GenericSubtitlesFile, SubsMixin
  7 | from ffsubsync.sklearn_shim import TransformerMixin
  8 | 
  9 | logging.basicConfig(level=logging.INFO)
 10 | logger: logging.Logger = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | class SubtitleShifter(SubsMixin, TransformerMixin):
 14 |     def __init__(self, td_seconds):
 15 |         super(SubsMixin, self).__init__()
 16 |         if not isinstance(td_seconds, timedelta):
 17 |             self.td_seconds = timedelta(seconds=td_seconds)
 18 |         else:
 19 |             self.td_seconds = td_seconds
 20 | 
 21 |     def fit(self, subs: GenericSubtitlesFile, *_):
 22 |         self.subs_ = subs.offset(self.td_seconds)
 23 |         return self
 24 | 
 25 |     def transform(self, *_):
 26 |         return self.subs_
 27 | 
 28 | 
 29 | class SubtitleScaler(SubsMixin, TransformerMixin):
 30 |     def __init__(self, scale_factor):
 31 |         assert isinstance(scale_factor, numbers.Number)
 32 |         super(SubsMixin, self).__init__()
 33 |         self.scale_factor = scale_factor
 34 | 
 35 |     def fit(self, subs: GenericSubtitlesFile, *_):
 36 |         scaled_subs = []
 37 |         for sub in subs:
 38 |             scaled_subs.append(
 39 |                 GenericSubtitle(
 40 |                     # py2 doesn't support direct multiplication of timedelta w/ float
 41 |                     timedelta(seconds=sub.start.total_seconds() * self.scale_factor),
 42 |                     timedelta(seconds=sub.end.total_seconds() * self.scale_factor),
 43 |                     sub.inner,
 44 |                 )
 45 |             )
 46 |         self.subs_ = subs.clone_props_for_subs(scaled_subs)
 47 |         return self
 48 | 
 49 |     def transform(self, *_):
 50 |         return self.subs_
 51 | 
 52 | 
 53 | class SubtitleMerger(SubsMixin, TransformerMixin):
 54 |     def __init__(self, reference_subs, first="reference"):
 55 |         assert first in ("reference", "output")
 56 |         super(SubsMixin, self).__init__()
 57 |         self.reference_subs = reference_subs
 58 |         self.first = first
 59 | 
 60 |     def fit(self, output_subs: GenericSubtitlesFile, *_):
 61 |         def _merger_gen(a, b):
 62 |             ita, itb = iter(a), iter(b)
 63 |             cur_a = next(ita, None)
 64 |             cur_b = next(itb, None)
 65 |             while True:
 66 |                 if cur_a is None and cur_b is None:
 67 |                     return
 68 |                 elif cur_a is None:
 69 |                     while cur_b is not None:
 70 |                         yield cur_b
 71 |                         cur_b = next(itb, None)
 72 |                     return
 73 |                 elif cur_b is None:
 74 |                     while cur_a is not None:
 75 |                         yield cur_a
 76 |                         cur_a = next(ita, None)
 77 |                     return
 78 |                 # else: neither are None
 79 |                 if cur_a.start < cur_b.start:
 80 |                     swapped = False
 81 |                 else:
 82 |                     swapped = True
 83 |                     cur_a, cur_b = cur_b, cur_a
 84 |                     ita, itb = itb, ita
 85 |                 prev_a = cur_a
 86 |                 while prev_a is not None and cur_a.start < cur_b.start:
 87 |                     cur_a = next(ita, None)
 88 |                     if cur_a is None or cur_a.start < cur_b.start:
 89 |                         yield prev_a
 90 |                         prev_a = cur_a
 91 |                 if prev_a is None:
 92 |                     while cur_b is not None:
 93 |                         yield cur_b
 94 |                         cur_b = next(itb, None)
 95 |                     return
 96 |                 if cur_b.start - prev_a.start < cur_a.start - cur_b.start:
 97 |                     if swapped:
 98 |                         yield cur_b.merge_with(prev_a)
 99 |                         ita, itb = itb, ita
100 |                         cur_a, cur_b = cur_b, cur_a
101 |                         cur_a = next(ita, None)
102 |                     else:
103 |                         yield prev_a.merge_with(cur_b)
104 |                         cur_b = next(itb, None)
105 |                 else:
106 |                     if swapped:
107 |                         yield cur_b.merge_with(cur_a)
108 |                         ita, itb = itb, ita
109 |                     else:
110 |                         yield cur_a.merge_with(cur_b)
111 |                     cur_a = next(ita, None)
112 |                     cur_b = next(itb, None)
113 | 
114 |         merged_subs = []
115 |         if self.first == "reference":
116 |             first, second = self.reference_subs, output_subs
117 |         else:
118 |             first, second = output_subs, self.reference_subs
119 |         for merged in _merger_gen(first, second):
120 |             merged_subs.append(merged)
121 |         self.subs_ = output_subs.clone_props_for_subs(merged_subs)
122 |         return self
123 | 
124 |     def transform(self, *_):
125 |         return self.subs_
126 | 


--------------------------------------------------------------------------------
/ffsubsync/version.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | from ffsubsync.constants import SUBSYNC_RESOURCES_ENV_MAGIC
 4 | from ffsubsync._version import get_versions
 5 | 
 6 | __version__ = get_versions()["version"]
 7 | del get_versions
 8 | 
 9 | 
10 | def get_version():
11 |     if "unknown" in __version__.lower():
12 |         with open(
13 |             os.path.join(os.environ[SUBSYNC_RESOURCES_ENV_MAGIC], "__version__")
14 |         ) as f:
15 |             return f.read().strip()
16 |     else:
17 |         return __version__
18 | 
19 | 
20 | def make_version_tuple(vstr=None):
21 |     if vstr is None:
22 |         vstr = __version__
23 |     if vstr[0] == "v":
24 |         vstr = vstr[1:]
25 |     components = []
26 |     for component in vstr.split("+")[0].split("."):
27 |         try:
28 |             components.append(int(component))
29 |         except ValueError:
30 |             break
31 |     return tuple(components)
32 | 
33 | 
34 | def update_available():
35 |     import requests
36 |     from requests.exceptions import Timeout
37 |     from .constants import API_RELEASE_URL
38 | 
39 |     try:
40 |         resp = requests.get(API_RELEASE_URL, timeout=1)
41 |         latest_vstr = resp.json()["tag_name"]
42 |     except Timeout:
43 |         return False
44 |     except KeyError:
45 |         return False
46 |     if not resp.ok:
47 |         return False
48 |     return make_version_tuple(get_version()) < make_version_tuple(latest_vstr)
49 | 


--------------------------------------------------------------------------------
/gui/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | dist
3 | 


--------------------------------------------------------------------------------
/gui/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: macos
 2 | 
 3 | macos: clean app pkg
 4 | 
 5 | app:
 6 | 	./build-macos.sh
 7 | 
 8 | pkg:
 9 | 	./package-macos.sh
10 | 
11 | clean:
12 | 	rm -r dist/ build/
13 | 


--------------------------------------------------------------------------------
/gui/README.md:
--------------------------------------------------------------------------------
 1 | == Note on platform-specific PyInstaller version in requirements.txt ==
 2 | 
 3 | PyInstaller>=3.6 introduces a webrtcvad hook that seems to not play nicely
 4 | with the webrtcvad-wheels package. This package contains prebuilt wheels
 5 | and is needed for Windows (unless I can get a working C compiler in my
 6 | Windows build environment, which is doubtful). For MacOS this isn't a
 7 | problem since I can use the vanilla webrtcvad package and leverage the
 8 | preexisting hook in PyInstaller>=3.6, but for Windows I need to use the
 9 | old version of PyInstaller without the hook and introduce my own (in the
10 | 'hooks' directory).
11 | 
12 | == Note on Scikit-Learn ==
13 | There is some DLL that wasn't getting bundled in the Windows PyInstaller
14 | build and causing the built exe to complain. My solution was to remove
15 | the dependency and include a shim for the Pipeline / Transformer fuctionality.
16 | 


--------------------------------------------------------------------------------
/gui/build-macos.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python3 -m PyInstaller --clean -y --dist ./dist/macos build.spec
3 | # ref: https://github.com/chriskiehl/Gooey/issues/259#issuecomment-522432026
4 | mkdir -p ./dist/macos/Contents
5 | 


--------------------------------------------------------------------------------
/gui/build-windows.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | nbits=${1:-64}
 3 | tag="python3"
 4 | if [[ "$nbits" == 32 ]]; then
 5 |     tag="${tag}-32bit"
 6 | fi
 7 | docker run -v "$(pwd):/src/" -v "$(pwd)/..:/ffsubsync/" --entrypoint /bin/sh "cdrx/pyinstaller-windows:${tag}" -c "pip install -e /ffsubsync && /ffsubsync/gui/entrypoint-windows.sh"
 8 | rm -r "./dist/win${nbits}"
 9 | mv ./dist/windows "./dist/win${nbits}"
10 | 


--------------------------------------------------------------------------------
/gui/build.spec:
--------------------------------------------------------------------------------
 1 | # -*- mode: python -*-
 2 | 
 3 | import os
 4 | import platform
 5 | import gooey
 6 | 
 7 | 
 8 | root = '..'
 9 | hookspath = None
10 | if platform.system() == 'Windows':
11 |     root = '/ffsubsync'
12 |     hookspath = [os.path.join(os.curdir, 'hooks')]
13 | 
14 | ffmpeg_bin = os.path.join(root, 'resources/ffmpeg-bin')
15 | datas = [(os.path.join(root, 'resources/img/program_icon.png'), './img')]
16 | datas.append((os.path.join(root, 'resources/img/config_icon.png'), './img'))
17 | datas.append((os.path.join(root, '__version__'), '.'))
18 | if platform.system() == 'Darwin':
19 |     ffmpeg_bin = os.path.join(ffmpeg_bin, 'macos')
20 | elif platform.system() == 'Windows':
21 |     arch_bits = int(platform.architecture()[0][:2])
22 |     ffmpeg_bin = os.path.join(ffmpeg_bin, 'win{}'.format(arch_bits))
23 |     if arch_bits == 64:
24 |         datas.append((os.path.join(root, 'resources/lib/win64/VCRUNTIME140_1.dll'), '.'))
25 | else:
26 |     raise Exception('ffmpeg not available for {}'.format(platform.system()))
27 | 
28 | gooey_root = os.path.dirname(gooey.__file__)
29 | gooey_languages = Tree(os.path.join(gooey_root, 'languages'), prefix = 'gooey/languages')
30 | gooey_images = Tree(os.path.join(gooey_root, 'images'), prefix = 'gooey/images')
31 | a = Analysis([os.path.join(os.curdir, 'ffsubsync-gui.py')],
32 |              datas=datas,
33 |              hiddenimports=['pkg_resources.py2_warn'],  # ref: https://github.com/pypa/setuptools/issues/1963
34 |              hookspath=hookspath,
35 |              runtime_hooks=None,
36 |              binaries=[(ffmpeg_bin, 'ffmpeg-bin')],
37 |              )
38 | pyz = PYZ(a.pure)
39 | 
40 | # runtime options to pass to interpreter -- '-u' is for unbuffered io
41 | options = [('u', None, 'OPTION')]
42 | 
43 | exe = EXE(pyz,
44 |           a.scripts,
45 |           a.binaries,
46 |           a.zipfiles,
47 |           a.datas,
48 |           options,
49 |           gooey_languages, # Add them in to collected files
50 |           gooey_images, # Same here.
51 |           name='FFsubsync',
52 |           debug=False,
53 |           strip=None,
54 |           upx=True,
55 |           console=False,
56 |           windowed=True,
57 |           icon=os.path.join(root, 'resources', 'img', 'program_icon.ico')
58 |           )
59 | 
60 | 
61 | if platform.system() == 'Darwin':
62 |     # info_plist = {'addition_prop': 'additional_value'}
63 |     info_plist = {}
64 |     app = BUNDLE(exe,
65 |                  icon=os.path.join(root, 'resources', 'img', 'program_icon.icns'),
66 |                  name='FFsubsync.app',
67 |                  bundle_identifier=None,
68 |                  info_plist=info_plist
69 |                 )
70 | 


--------------------------------------------------------------------------------
/gui/entrypoint-windows.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Fail on errors.
 4 | set -e
 5 | 
 6 | # Make sure .bashrc is sourced
 7 | . /root/.bashrc
 8 | 
 9 | # Allow the workdir to be set using an env var.
10 | # Useful for CI pipiles which use docker for their build steps
11 | # and don't allow that much flexibility to mount volumes
12 | WORKDIR=${SRCDIR:-/src}
13 | 
14 | #
15 | # In case the user specified a custom URL for PYPI, then use
16 | # that one, instead of the default one.
17 | #
18 | if [[ "$PYPI_URL" != "https://pypi.python.org/" ]] || \
19 |    [[ "$PYPI_INDEX_URL" != "https://pypi.python.org/simple" ]]; then
20 |     # the funky looking regexp just extracts the hostname, excluding port
21 |     # to be used as a trusted-host.
22 |     mkdir -p /wine/drive_c/users/root/pip
23 |     echo "[global]" > /wine/drive_c/users/root/pip/pip.ini
24 |     echo "index = $PYPI_URL" >> /wine/drive_c/users/root/pip/pip.ini
25 |     echo "index-url = $PYPI_INDEX_URL" >> /wine/drive_c/users/root/pip/pip.ini
26 |     echo "trusted-host = $(echo $PYPI_URL | perl -pe 's|^.*?://(.*?)(:.*?)?/.*$|$1|')" >> /wine/drive_c/users/root/pip/pip.ini
27 | 
28 |     echo "Using custom pip.ini: "
29 |     cat /wine/drive_c/users/root/pip/pip.ini
30 | fi
31 | 
32 | cd $WORKDIR
33 | 
34 | if [ -f requirements.txt ]; then
35 |     pip install -r requirements.txt
36 | fi # [ -f requirements.txt ]
37 | 
38 | rm /wine/drive_c/Python37/Lib/site-packages/PyInstaller/hooks/hook-webrtcvad.py
39 | 
40 | echo "$@"
41 | 
42 | if [[ "$@" == "" ]]; then
43 |     pyinstaller --clean -y --dist ./dist/windows --workpath /tmp *.spec
44 |     chown -R --reference=. ./dist/windows
45 | else
46 |     sh -c "$@"
47 | fi # [[ "$@" == "" ]]
48 | 


--------------------------------------------------------------------------------
/gui/ffsubsync-gui.py:
--------------------------------------------------------------------------------
1 | from ffsubsync.ffsubsync_gui import main
2 | 
3 | 
4 | if __name__ == '__main__':
5 |     main()
6 | 


--------------------------------------------------------------------------------
/gui/hooks/hook-webrtcvad.py:
--------------------------------------------------------------------------------
1 | from PyInstaller.utils.hooks import copy_metadata
2 | 
3 | datas = copy_metadata('webrtcvad-wheels')
4 | 


--------------------------------------------------------------------------------
/gui/package-macos.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -Eeuxo pipefail
 4 | 
 5 | BASE=.
 6 | DIST="$BASE/dist"
 7 | BUILD="$BASE/build/dmg"
 8 | VERSION=$(python3 -c "from subsync.version import __version__; print(__version__)")
 9 | APP="Subsync.app"
10 | TARGET="$DIST/subsync-${VERSION}-mac-x86_64.dmg"
11 | 
12 | test -e "$BUILD" && rm -rf "$BUILD"
13 | test -e "$TARGET" && rm -f "$TARGET"
14 | mkdir -p "$BUILD"
15 | cp -r "$DIST/$APP" "$BUILD"
16 | 
17 | create-dmg \
18 |     --volname "subsync installer" \
19 |     `#--volicon "icon.icns"` \
20 |     --window-pos 300 200 \
21 |     --window-size 700 500 \
22 |     --icon-size 150 \
23 |     --icon "$APP" 200 200 \
24 |     --hide-extension "$APP" \
25 |     --app-drop-link 450 200 \
26 |     --no-internet-enable \
27 |     "$TARGET" "$BUILD"
28 | 


--------------------------------------------------------------------------------
/gui/requirements.txt:
--------------------------------------------------------------------------------
1 | gooey
2 | pyinstaller>=3.6
3 | requests
4 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 88
3 | target-version = ['py39']
4 | extend-exclude = '(^/versioneer|_version)\.py'
5 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | markers =
3 |     integration: mark a test as an integration test.
4 | #filterwarnings =
5 | #    ignore::DeprecationWarning
6 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | black
 2 | flake8
 3 | mypy
 4 | pytest
 5 | pytest-cov
 6 | pyyaml
 7 | twine
 8 | types-requests
 9 | versioneer
10 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | auditok==0.1.5
 2 | chardet;python_version>='3.7'
 3 | charset_normalizer
 4 | faust-cchardet
 5 | ffmpeg-python
 6 | numpy>=1.12.0
 7 | pysubs2;python_version<'3.7'
 8 | pysubs2>=1.2.0;python_version>='3.7'
 9 | rich
10 | setuptools
11 | srt>=3.0.0
12 | tqdm
13 | typing_extensions
14 | webrtcvad;platform_system!='Windows'
15 | webrtcvad-wheels;platform_system=='Windows'
16 | 


--------------------------------------------------------------------------------
/resources/img/config_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smacke/ffsubsync/4d275da8b446de4be582d44337e99b2f75b56ebe/resources/img/config_icon.png


--------------------------------------------------------------------------------
/resources/img/program_icon.icns:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smacke/ffsubsync/4d275da8b446de4be582d44337e99b2f75b56ebe/resources/img/program_icon.icns


--------------------------------------------------------------------------------
/resources/img/program_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smacke/ffsubsync/4d275da8b446de4be582d44337e99b2f75b56ebe/resources/img/program_icon.png


--------------------------------------------------------------------------------
/resources/img/subsync.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smacke/ffsubsync/4d275da8b446de4be582d44337e99b2f75b56ebe/resources/img/subsync.png


--------------------------------------------------------------------------------
/resources/img/tearing-me-apart-correct.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smacke/ffsubsync/4d275da8b446de4be582d44337e99b2f75b56ebe/resources/img/tearing-me-apart-correct.gif


--------------------------------------------------------------------------------
/resources/img/tearing-me-apart-wrong.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smacke/ffsubsync/4d275da8b446de4be582d44337e99b2f75b56ebe/resources/img/tearing-me-apart-wrong.gif


--------------------------------------------------------------------------------
/resources/lib/win64/VCRUNTIME140_1.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smacke/ffsubsync/4d275da8b446de4be582d44337e99b2f75b56ebe/resources/lib/win64/VCRUNTIME140_1.dll


--------------------------------------------------------------------------------
/scripts/blacken.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | # ref: https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/
4 | set -euxo pipefail
5 | 
6 | DIRS="./ffsubsync ./tests"
7 | black $DIRS $@
8 | 


--------------------------------------------------------------------------------
/scripts/bump-version.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import argparse
 4 | import subprocess
 5 | import sys
 6 | 
 7 | from ffsubsync.version import make_version_tuple
 8 | 
 9 | 
10 | def main(*_):
11 |     components = list(make_version_tuple())
12 |     components[-1] += 1
13 |     version = '.'.join(str(c) for c in components)
14 |     subprocess.check_output(['git', 'tag', version])
15 |     return 0
16 | 
17 | 
18 | if __name__ == '__main__':
19 |     parser = argparse.ArgumentParser(description='Bump version and create git tag.')
20 |     args = parser.parse_args()
21 |     sys.exit(main(args))
22 | 


--------------------------------------------------------------------------------
/scripts/deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # ref: https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/
 4 | set -euxo pipefail
 5 | 
 6 | if ! git diff-index --quiet HEAD --; then
 7 |     echo "dirty working tree; please clean or commit changes"
 8 |     exit 1
 9 | fi
10 | 
11 | if ! git describe --exact-match --tags HEAD > /dev/null; then
12 |     echo "current revision not tagged; please deploy from a tagged revision"
13 |     exit 1
14 | fi
15 | 
16 | current="$(python -c 'import versioneer; print(versioneer.get_version())')"
17 | [[ $? -eq 1 ]] && exit 1
18 | 
19 | latest="$(git describe --tags $(git rev-list --tags --max-count=1))"
20 | [[ $? -eq 1 ]] && exit 1
21 | 
22 | if [[ "$current" != "$latest" ]]; then
23 |     echo "current revision is not the latest version; please deploy from latest version"
24 |     exit 1
25 | fi
26 | 
27 | expect <<EOF
28 | set timeout -1
29 | 
30 | spawn twine upload dist/*
31 | 
32 | expect "Enter your username:"
33 | send -- "$(lpass show 937494930560669633 --username)\r"
34 | 
35 | expect "Enter your password:"
36 | send -- "$(lpass show 937494930560669633 --password)\r"
37 | expect
38 | EOF
39 | 
40 | branch="$(git branch --show-current)"
41 | git checkout latest
42 | git rebase "$branch"
43 | git push -f
44 | git checkout "$branch"
45 | 
46 | git push --tags
47 | 


--------------------------------------------------------------------------------
/scripts/write-version.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | from ffsubsync.version import __version__
4 | 
5 | 
6 | if __name__ == '__main__':
7 |     with open('__version__', 'w') as f:
8 |         f.write(__version__.strip() + '\n')
9 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | # See the docstring in versioneer.py for instructions. Note that you must
 2 | # re-run 'versioneer.py install' after changing this section, and commit the
 3 | # resulting files.
 4 | 
 5 | [versioneer]
 6 | VCS = git
 7 | style = pep440
 8 | versionfile_source = ffsubsync/_version.py
 9 | versionfile_build = ffsubsync/_version.py
10 | tag_prefix =
11 | parentdir_prefix = ffsubsync-
12 | 
13 | [metadata]
14 | description_file = README.md
15 | 
16 | [flake8]
17 | max-line-length = 100
18 | exclude = .git,__pycache__,old,build,dist,docs,versioneer.py,ffsubsync/_version.py
19 | 
20 | [bdist_wheel]
21 | universal = 1
22 | 
23 | [tool:pytest]
24 | filterwarnings = ignore::DeprecationWarning
25 | 
26 | [mypy]
27 | no_strict_optional = True
28 | ignore_missing_imports = True
29 | 
30 | [mypy-ffsubsync._version]
31 | ignore_errors = True
32 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import os
 4 | 
 5 | from setuptools import setup, find_packages
 6 | import versioneer
 7 | 
 8 | pkg_name = 'ffsubsync'
 9 | 
10 | __version__ = versioneer.get_version()
11 | if 'unknown' in __version__.lower():
12 |     with open(os.path.join(pkg_name, '__version__')) as f:
13 |         __version__ = f.read().strip()
14 | 
15 | 
16 | def read_file(fname):
17 |     with open(fname, 'r') as f:
18 |         return f.read()
19 | 
20 | 
21 | history = read_file('HISTORY.rst')
22 | requirements = read_file('requirements.txt').strip().split()
23 | setup(
24 |     name=pkg_name,
25 |     version=__version__,
26 |     cmdclass=versioneer.get_cmdclass(),
27 |     author='Stephen Macke',
28 |     author_email='stephen.macke@gmail.com',
29 |     description='Language-agnostic synchronization of subtitles with video.',
30 |     long_description=read_file('README.md'),
31 |     long_description_content_type='text/markdown',
32 |     url='https://github.com/smacke/ffsubsync',
33 |     packages=find_packages(exclude=['docs']),
34 |     include_package_data=True,
35 |     install_requires=requirements,
36 |     entry_points={
37 |         'console_scripts': [
38 |             'ffs = ffsubsync:main',
39 |             'subsync = ffsubsync:main',
40 |             'ffsubsync = ffsubsync:main'
41 |         ],
42 |     },
43 |     license='MIT',
44 |     zip_safe=False,
45 |     classifiers=[
46 |         'Development Status :: 3 - Alpha',
47 |         'Intended Audience :: End Users/Desktop',
48 |         'License :: OSI Approved :: MIT License',
49 |         'Natural Language :: English',
50 |         'Programming Language :: Python :: 3.6',
51 |         'Programming Language :: Python :: 3.7',
52 |         'Programming Language :: Python :: 3.8',
53 |         'Programming Language :: Python :: 3.9',
54 |         'Programming Language :: Python :: 3.10',
55 |         'Programming Language :: Python :: 3.11',
56 |         'Programming Language :: Python :: 3.12',
57 |         'Programming Language :: Python :: 3.13',
58 |         'Topic :: Multimedia :: Sound/Audio :: Analysis',
59 |         'Topic :: Multimedia :: Sound/Audio :: Speech',
60 |     ],
61 | )
62 | 
63 | # python setup.py sdist
64 | # twine upload dist/*
65 | 


--------------------------------------------------------------------------------
/tests/test_alignment.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import pytest
 3 | from ffsubsync.aligners import FFTAligner, MaxScoreAligner
 4 | 
 5 | 
 6 | @pytest.mark.parametrize(
 7 |     "s1, s2, true_offset",
 8 |     [("111001", "11001", -1), ("1001", "1001", 0), ("10010", "01001", 1)],
 9 | )
10 | def test_fft_alignment(s1, s2, true_offset):
11 |     assert FFTAligner().fit_transform(s2, s1) == true_offset
12 |     assert MaxScoreAligner(FFTAligner).fit_transform(s2, s1)[0][1] == true_offset
13 |     assert MaxScoreAligner(FFTAligner()).fit_transform(s2, s1)[0][1] == true_offset
14 | 


--------------------------------------------------------------------------------
/tests/test_integration.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | import shutil
 4 | import tempfile
 5 | 
 6 | import numpy as np
 7 | import pytest
 8 | 
 9 | try:
10 |     import yaml
11 | except ImportError:  # pyyaml does not work with py3.4
12 |     pass
13 | 
14 | from ffsubsync import ffsubsync
15 | from ffsubsync.sklearn_shim import make_pipeline
16 | from ffsubsync.speech_transformers import SubtitleSpeechTransformer
17 | from ffsubsync.subtitle_parser import GenericSubtitleParser
18 | 
19 | INTEGRATION = "INTEGRATION"
20 | SYNC_TESTS = "sync_tests"
21 | REF = "reference"
22 | SYNCED = "synchronized"
23 | UNSYNCED = "unsynchronized"
24 | SKIP = "skip"
25 | SHOULD_DETECT_ENCODING = "should_detect_encoding"
26 | EXTRA_ARGS = "extra_args"
27 | EXTRA_NO_VALUE_ARGS = "extra_no_value_args"
28 | 
29 | 
30 | def gen_synctest_configs():
31 |     def test_path(fname):
32 |         return os.path.join("test-data", fname)
33 | 
34 |     if INTEGRATION not in os.environ or os.environ[INTEGRATION] == 0:
35 |         return
36 |     with open("test-data/integration-testing-config.yaml", "r") as f:
37 |         config = yaml.load(f, yaml.SafeLoader)
38 |     parser = ffsubsync.make_parser()
39 |     for test in config[SYNC_TESTS]:
40 |         if SKIP in test and test[SKIP]:
41 |             continue
42 |         unparsed_args = [test_path(test[REF]), "-i", test_path(test[UNSYNCED])]
43 |         if EXTRA_ARGS in test:
44 |             for extra_key, extra_value in test[EXTRA_ARGS].items():
45 |                 unparsed_args.extend(["--{}".format(extra_key), str(extra_value)])
46 |         if EXTRA_NO_VALUE_ARGS in test:
47 |             for extra_key in test[EXTRA_NO_VALUE_ARGS]:
48 |                 unparsed_args.append("--{}".format(extra_key))
49 |         args = parser.parse_args(unparsed_args)
50 |         truth = test_path(test[SYNCED])
51 |         should_detect_encoding = None
52 |         if SHOULD_DETECT_ENCODING in test:
53 |             should_detect_encoding = test[SHOULD_DETECT_ENCODING]
54 |         yield args, truth, should_detect_encoding
55 | 
56 | 
57 | def timestamps_roughly_match(f1, f2):
58 |     parser = GenericSubtitleParser(skip_ssa_info=True)
59 |     extractor = SubtitleSpeechTransformer(sample_rate=ffsubsync.DEFAULT_FRAME_RATE)
60 |     pipe = make_pipeline(parser, extractor)
61 |     f1_bitstring = pipe.fit_transform(f1).astype(bool)
62 |     f2_bitstring = pipe.fit_transform(f2).astype(bool)
63 |     return np.sum(f1_bitstring == f2_bitstring) / len(f1_bitstring) >= 0.99
64 | 
65 | 
66 | def detected_encoding(fname):
67 |     parser = GenericSubtitleParser(skip_ssa_info=True)
68 |     parser.fit(fname)
69 |     return parser.detected_encoding_
70 | 
71 | 
72 | @pytest.mark.integration
73 | @pytest.mark.parametrize("args,truth,should_detect_encoding", gen_synctest_configs())
74 | def test_sync_matches_ground_truth(args, truth, should_detect_encoding):
75 |     # context manager TemporaryDirectory not available on py2
76 |     dirpath = tempfile.mkdtemp()
77 |     try:
78 |         args.srtout = os.path.join(
79 |             dirpath, "test" + os.path.splitext(args.srtin[0])[-1]
80 |         )
81 |         args.skip_ssa_info = True
82 |         assert ffsubsync.run(args)["retval"] == 0
83 |         assert timestamps_roughly_match(args.srtout, truth)
84 |         if should_detect_encoding is not None:
85 |             assert detected_encoding(args.srtin[0]) == should_detect_encoding
86 |     finally:
87 |         shutil.rmtree(dirpath)
88 | 


--------------------------------------------------------------------------------
/tests/test_misc.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import pytest
 3 | from ffsubsync.version import make_version_tuple
 4 | 
 5 | 
 6 | @pytest.mark.parametrize(
 7 |     "vstr, expected",
 8 |     [("v0.1.1", (0, 1, 1)), ("v1.2.3", (1, 2, 3)), ("4.5.6.1", (4, 5, 6, 1))],
 9 | )
10 | def test_version_tuple_from_string(vstr, expected):
11 |     assert make_version_tuple(vstr) == expected
12 | 


--------------------------------------------------------------------------------
/tests/test_subtitles.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import itertools
  3 | from io import BytesIO
  4 | from datetime import timedelta
  5 | 
  6 | import pytest
  7 | import numpy as np
  8 | 
  9 | from ffsubsync.sklearn_shim import make_pipeline
 10 | from ffsubsync.speech_transformers import SubtitleSpeechTransformer
 11 | from ffsubsync.subtitle_parser import GenericSubtitleParser
 12 | from ffsubsync.subtitle_transformers import SubtitleShifter
 13 | 
 14 | fake_srt = b"""1
 15 | 00:00:00,178 --> 00:00:01,1416
 16 | <i>Previously on "Your favorite TV show..."</i>
 17 | 
 18 | 2
 19 | 00:00:01,1828 --> 00:00:04,549
 20 | Oh hi, Mark.
 21 | 
 22 | 3
 23 | 00:00:04,653 --> 00:00:03,3062
 24 | You are tearing me apart, Lisa!
 25 | """
 26 | 
 27 | # Occasionally some srt files have timestamps whose 'milliseconds'
 28 | # field has more than 3 digits... Ideally we should test that these
 29 | # are handled properly with dedicated tests, but in the interest of
 30 | # development speed I've opted to sprinkle in a few >3 digit
 31 | # millisecond fields into the dummy string above in order to exercise
 32 | # this case integration-test style in the below unit tests.
 33 | 
 34 | 
 35 | @pytest.mark.parametrize("start_seconds", [0, 2, 4, 6])
 36 | def test_start_seconds(start_seconds):
 37 |     parser_zero = GenericSubtitleParser(start_seconds=0)
 38 |     parser_zero.fit(BytesIO(fake_srt))
 39 |     parser = GenericSubtitleParser(start_seconds=start_seconds)
 40 |     parser.fit(BytesIO(fake_srt))
 41 |     expected = [
 42 |         sub
 43 |         for sub in parser_zero.subs_
 44 |         if sub.start >= timedelta(seconds=start_seconds)
 45 |     ]
 46 |     assert all(esub == psub for esub, psub in zip(expected, parser.subs_))
 47 | 
 48 | 
 49 | @pytest.mark.parametrize("max_seconds", [1, 1.5, 2.0, 2.5])
 50 | def test_max_seconds(max_seconds):
 51 |     parser = GenericSubtitleParser(max_subtitle_seconds=max_seconds)
 52 |     parser.fit(BytesIO(fake_srt))
 53 |     assert max(sub.end - sub.start for sub in parser.subs_) <= timedelta(
 54 |         seconds=max_seconds
 55 |     )
 56 | 
 57 | 
 58 | @pytest.mark.parametrize("encoding", ["utf-8", "ascii", "latin-1"])
 59 | def test_same_encoding(encoding):
 60 |     parser = GenericSubtitleParser(encoding=encoding)
 61 |     offseter = SubtitleShifter(1)
 62 |     pipe = make_pipeline(parser, offseter)
 63 |     pipe.fit(BytesIO(fake_srt))
 64 |     assert parser.subs_._encoding == encoding
 65 |     assert offseter.subs_._encoding == parser.subs_._encoding
 66 |     assert offseter.subs_.set_encoding("same")._encoding == encoding
 67 |     assert offseter.subs_.set_encoding("utf-8")._encoding == "utf-8"
 68 | 
 69 | 
 70 | @pytest.mark.parametrize("offset", [1, 1.5, -2.3])
 71 | def test_offset(offset):
 72 |     parser = GenericSubtitleParser()
 73 |     offseter = SubtitleShifter(offset)
 74 |     pipe = make_pipeline(parser, offseter)
 75 |     pipe.fit(BytesIO(fake_srt))
 76 |     for sub_orig, sub_offset in zip(parser.subs_, offseter.subs_):
 77 |         assert (
 78 |             abs(
 79 |                 sub_offset.start.total_seconds()
 80 |                 - sub_orig.start.total_seconds()
 81 |                 - offset
 82 |             )
 83 |             < 1e-6
 84 |         )
 85 |         assert (
 86 |             abs(sub_offset.end.total_seconds() - sub_orig.end.total_seconds() - offset)
 87 |             < 1e-6
 88 |         )
 89 | 
 90 | 
 91 | @pytest.mark.parametrize(
 92 |     "sample_rate,start_seconds", itertools.product([10, 20, 100, 300], [0, 2, 4, 6])
 93 | )
 94 | def test_speech_extraction(sample_rate, start_seconds):
 95 |     parser = GenericSubtitleParser(start_seconds=start_seconds)
 96 |     extractor = SubtitleSpeechTransformer(
 97 |         sample_rate=sample_rate, start_seconds=start_seconds
 98 |     )
 99 |     pipe = make_pipeline(parser, extractor)
100 |     bitstring = pipe.fit_transform(BytesIO(fake_srt)).astype(bool)
101 |     bitstring_shifted_left = np.append(bitstring[1:], [False])
102 |     bitstring_shifted_right = np.append([False], bitstring[:-1])
103 |     bitstring_cumsum = np.cumsum(bitstring)
104 |     consec_ones_end_pos = np.nonzero(
105 |         bitstring_cumsum
106 |         * (bitstring ^ bitstring_shifted_left)
107 |         * (bitstring_cumsum != np.cumsum(bitstring_shifted_right))
108 |     )[0]
109 |     prev = 0
110 |     for pos, sub in zip(consec_ones_end_pos, parser.subs_):
111 |         start = int(round(sub.start.total_seconds() * sample_rate))
112 |         duration = sub.end.total_seconds() - sub.start.total_seconds()
113 |         stop = start + int(round(duration * sample_rate))
114 |         assert bitstring_cumsum[pos] - prev == stop - start
115 |         prev = bitstring_cumsum[pos]
116 | 
117 | 
118 | def test_max_time_found():
119 |     parser = GenericSubtitleParser()
120 |     extractor = SubtitleSpeechTransformer(sample_rate=100)
121 |     pipe = make_pipeline(parser, extractor)
122 |     pipe.fit(BytesIO(fake_srt))
123 |     assert extractor.max_time_ == 6.062
124 | 


--------------------------------------------------------------------------------