├── .github
    ├── CONTRIBUTING.md
    └── workflows
    │   └── python-package.yml
├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.rst
├── docs
    ├── _autosummary
    │   ├── srt.tools.add.rst
    │   ├── srt.tools.deduplicate.rst
    │   ├── srt.tools.find.rst
    │   ├── srt.tools.fixed_timeshift.rst
    │   ├── srt.tools.linear_timeshift.rst
    │   ├── srt.tools.match.rst
    │   ├── srt.tools.mux.rst
    │   ├── srt.tools.normalize.rst
    │   └── srt.tools.paste.rst
    ├── _templates
    │   └── tool.rst
    ├── api.rst
    ├── conf.py
    ├── index.rst
    ├── quickstart.rst
    ├── requirements.txt
    └── tools.rst
├── setup.py
├── srt
    ├── __init__.py
    ├── srt.py
    └── tools
    │   ├── README.rst
    │   ├── __init__.py
    │   ├── _cli.py
    │   ├── _srt.py
    │   ├── _utils.py
    │   ├── add.py
    │   ├── deduplicate.py
    │   ├── find.py
    │   ├── fixed_timeshift.py
    │   ├── linear_timeshift.py
    │   ├── match.py
    │   ├── mux.py
    │   ├── normalize.py
    │   ├── paste.py
    │   └── split.py
├── tests
    ├── files
    │   ├── ascii.srt
    │   └── gb2312.srt
    ├── requirements.txt
    ├── test_srt.py
    ├── test_tools.py
    └── tools
    │   ├── __init__.py
    │   ├── test_add.py
    │   ├── test_find.py
    │   ├── test_import.py
    │   ├── test_paste.py
    │   └── test_split.py
└── tox.ini


/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | ## License
 4 | 
 5 | By contributing code to [srt3](https://github.com/switchupcb/srt), you agree to license your contribution under the [MIT License](https://github.com/switchupcb/srt/blob/develop/LICENSE).
 6 | 
 7 | ## Documentation
 8 | 
 9 | [Detailed API documentation](https://srt3.readthedocs.io/en/latest/api.html) is available here. Documentation is auto-generated from comments using [sphinx](https://www.sphinx-doc.org/en/master/).
10 | 
11 | ## Pull Requests
12 | 
13 | The **stable** branch will always be stable. Submit against the current [**develop**](https://github.com/switchupcb/srt/tree/develop) branch.
14 | 
15 | ### Process
16 | 
17 | When you create a pull request, be sure to include its goal along with a detailed description of the code involved. A pull request can be merged by a contributor once two other developers (contributor or otherwise) have reviewed the pull request's code.
18 | 
19 | ### Tools
20 | 
21 | If you are adding an srt tool, you can use the following commit as a guide: [srt tools: add srt add](https://github.com/switchupcb/srt3/commit/1ee1f649c9a09acc649bd48c076ec6f92e8ce78e)
22 | 
23 | This library uses [argparse](https://docs.python.org/3/library/argparse) to program its Command Line Interface (CLI) and follows [UNIX Program Argument Syntax Conventions](https://www.gnu.org/software/libc/manual/html_node/Argument-Syntax.html). View the [tools table](https://github.com/switchupcb/srt/tree/develop/srt/tools) to see existing default arguments that should **NOT** be used in new tools.
24 | 
25 | ### Style
26 | 
27 | This library uses the [black](https://black.readthedocs.io/en/stable/) code style.
28 | 
29 | ```
30 | pip install black
31 | cd srt3
32 | cd ..
33 | black srt3
34 | ```
35 | 
36 | ### Testing
37 | 
38 | You are required to test your code using [tox](https://tox.readthedocs.org). You can view the `tox.ini` file for each test that is used (via _[testenv: test]_).
39 | 
40 | ```
41 | pip install tox
42 | cd srt3
43 | tox
44 | ```
45 | 
46 | _Tests use the [pytest](https://docs.pytest.org/en/6.2.x/contents.html) framework._
47 | 
48 | ### Checklist
49 | 
50 | Before a **pull request**:
51 | 
52 | 1. Ensure any unnecessary files, code and/or dependencies are removed.
53 | 2. Adhere to the style guide.
54 | 3. Adhere to the testing protocol.
55 | 4. Add comments for the auto-generated documentation.
56 | 5. Update the README where necessary.
57 | 
58 | ## Roadmap
59 | 
60 | While srt3 does not have an official roadmap, here are cool features you could add or improve:
61 | 
62 | **srt3/# -** Simplify code and implement commented features remnant from legacy srt.
63 | 
64 | **srt3/coverage -** Legacy tools from srt1 don't actually have 100% coverage and don't fully support pytype.
65 | 
66 | **srt3/srt/tools -** Tools that can be used in the Command Line Interface and srt3 library.
67 | 
68 | **srt3/srt/tools/tool/transcribe -** A transcription tool that automates audio-video transcription.
69 | 
70 | **srt3/gui -** A graphical user interface that allows users to use the tools present in the library but **not** in the installed package.
71 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
  1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
  2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
  3 | name: build
  4 | 
  5 | on:
  6 |   push:
  7 |     branches: [develop]
  8 |   pull_request:
  9 |     branches: [develop]
 10 | 
 11 | jobs:
 12 |   build-linux:
 13 |     runs-on: ubuntu-latest
 14 |     strategy:
 15 |       matrix:
 16 |         # https://devguide.python.org/#status-of-python-branches
 17 |         # 3.8 ran on alternate builds
 18 |         python-version: [3.6, 3.7, 3.9]
 19 | 
 20 |     # All build steps are the same
 21 |     steps:
 22 |       - uses: actions/checkout@v2
 23 |       - name: Set up Python ${{ matrix.python-version }}
 24 |         uses: actions/setup-python@v2
 25 |         with:
 26 |           python-version: ${{ matrix.python-version }}
 27 |       - name: Install dependencies
 28 |         run: |
 29 |           python -m pip install --upgrade pip
 30 |           python -m pip install --upgrade tox
 31 |       - name: Test with tox
 32 |         run: |
 33 |           tox
 34 | 
 35 |   build-mac:
 36 |     runs-on: macos-latest
 37 |     strategy:
 38 |       matrix:
 39 |         python-version: [3.8]
 40 | 
 41 |     # All build steps are the same
 42 |     steps:
 43 |       - uses: actions/checkout@v2
 44 |       - name: Set up Python ${{ matrix.python-version }}
 45 |         uses: actions/setup-python@v2
 46 |         with:
 47 |           python-version: ${{ matrix.python-version }}
 48 |       - name: Install dependencies
 49 |         run: |
 50 |           python -m pip install --upgrade pip
 51 |           python -m pip install --upgrade tox
 52 |       - name: Test with tox
 53 |         run: |
 54 |           tox
 55 | 
 56 |   build-win:
 57 |     runs-on: windows-latest
 58 |     strategy:
 59 |       matrix:
 60 |         python-version: [3.8]
 61 | 
 62 |     # All build steps are the same
 63 |     steps:
 64 |       - uses: actions/checkout@v2
 65 |       - name: Set up Python ${{ matrix.python-version }}
 66 |         uses: actions/setup-python@v2
 67 |         with:
 68 |           python-version: ${{ matrix.python-version }}
 69 |       - name: Install dependencies
 70 |         run: |
 71 |           python -m pip install --upgrade pip
 72 |           python -m pip install --upgrade tox
 73 |       - name: Test with tox
 74 |         run: |
 75 |           tox
 76 | 
 77 |   full-build:
 78 |     runs-on: ubuntu-latest
 79 |     strategy:
 80 |       matrix:
 81 |         python-version: [3.8]
 82 | 
 83 |     # All build steps are the same
 84 |     steps:
 85 |       - uses: actions/checkout@v2
 86 |       - name: Set up Python ${{ matrix.python-version }}
 87 |         uses: actions/setup-python@v2
 88 |         with:
 89 |           python-version: ${{ matrix.python-version }}
 90 |       - name: Install dependencies
 91 |         run: |
 92 |           python -m pip install --upgrade pip
 93 |           python -m pip install --upgrade tox
 94 |       - name: Test with tox
 95 |         run: |
 96 |           tox
 97 |         env:
 98 |           TOXENV: doctest,black,pylint,bandit,coverage,py-release
 99 |           CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
100 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .hypothesis
3 | .tox
4 | build
5 | dist
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2014-2021 Christopher Down
 4 | Copyright (c) 2021-present SwitchUpCB
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in
14 | all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 | THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include docs *
2 | 
3 | recursive-exclude tests *
4 | recursive-exclude * *.py[co]
5 | recursive-exclude * __pycache__
6 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | |ghactions| |lgtm| |codecov| |libraries|
  2 | 
  3 | .. |ghactions| image:: https://github.com/switchupcb/srt3/actions/workflows/python-package.yml/badge.svg
  4 |   :target: https://github.com/switchupcb/srt3/actions
  5 |   :alt: Github Actions
  6 | 
  7 | .. |lgtm| image:: https://img.shields.io/lgtm/grade/python/github/switchupcb/srt3.svg?label=code%20quality
  8 |   :target: https://lgtm.com/projects/g/switchupcb/srt3/overview/
  9 |   :alt: LGTM
 10 | 
 11 | .. |codecov| image:: https://codecov.io/gh/switchupcb/srt3/branch/develop/graph/badge.svg?token=YINLIN42N1
 12 |   :target: https://codecov.io/gh/switchupcb/srt3
 13 |   :alt: Coverage
 14 | 
 15 | .. |libraries| image:: https://img.shields.io/librariesio/github/switchupcb/srt3.svg?label=dependencies
 16 |   :target: https://libraries.io/github/switchupcb/srt3
 17 |   :alt: Dependencies
 18 | 
 19 | srt3 is a simple yet featureful Python library for parsing, modifying, and
 20 | composing `SRT files`_. Take a look at the quickstart_ for a basic overview of
 21 | the library. `Detailed API documentation`_ is also available.
 22 | 
 23 | Want to see some examples of its use? Take a look at the `tools shipped with
 24 | the library`_.
 25 | 
 26 | Why choose this library?
 27 | ------------------------
 28 | 
 29 | - Parses broken SRT files other libraries can't and fixes them
 30 | - Support for Asian-style SRT formats (ie. "fullwidth" SRT format)
 31 | - Extremely lightweight with a `Well Documented API`_
 32 | - Includes tools that allow you to perform tasks using the library
 33 | - No Dependencies outside of the Standard Library
 34 | - High quality test suite using Hypothesis_
 35 | - `~30% faster than pysrt on typical workloads`_
 36 | - 100% Unicode Compliant
 37 | - Portable — runs on Windows, OSX, and Linux
 38 | - Released under a highly permissive license (MIT)
 39 | 
 40 | .. _quickstart: http://srt3.readthedocs.org/en/latest/quickstart.html
 41 | .. _`SRT files`: https://en.wikipedia.org/wiki/SubRip#SubRip_text_file_format
 42 | .. _Hypothesis: https://github.com/DRMacIver/hypothesis
 43 | .. _`Well Documented API`: http://srt3.readthedocs.org/en/latest/index.html
 44 | .. _`~30% faster than pysrt on typical workloads`: https://paste.pound-python.org/raw/8nQKbDW0ROWvS7bOeAb3/
 45 | 
 46 | Usage
 47 | -----
 48 | 
 49 | Tools
 50 | =====
 51 | 
 52 | There are a number of `tools shipped with the library`_ to manipulate, process,
 53 | and fix SRT files. Here's an example using `hanzidentifier`_ to strip out
 54 | non-Chinese lines:
 55 | 
 56 | .. code::
 57 | 
 58 |     $ cat pe.srt
 59 |     1
 60 |     00:00:33,843 --> 00:00:38,097
 61 |     Only 3% of the water on our planet is fresh.
 62 |     地球上只有3%的水是淡水
 63 | 
 64 |     2
 65 |     00:00:40,641 --> 00:00:44,687
 66 |     Yet, these precious waters are rich with surprise.
 67 |     可是这些珍贵的淡水中却充满了惊奇
 68 | 
 69 |     $ srt match -m hanzidentifier -fm hanzidentifier.has_chinese -i pe.srt
 70 |     1
 71 |     00:00:33,843 --> 00:00:38,097
 72 |     地球上只有3%的水是淡水
 73 | 
 74 |     2
 75 |     00:00:40,641 --> 00:00:44,687
 76 |     可是这些珍贵的淡水中却充满了惊奇
 77 | 
 78 | 
 79 | These tools are easy to chain together. For example, you have a subtitle
 80 | containing Chinese and English, and another containing French. You only want Chinese
 81 | French. The Chinese and English subtitle is also 5 seconds late. That's easy enough
 82 | to sort out:
 83 | 
 84 | .. code::
 85 | 
 86 |    $ srt match -m hanzidentifier -fm hanzidentifier.has_chinese -i chs+eng.srt |
 87 |    >     srt fixed_timeshift --seconds -5 |
 88 |    >     srt mux --input - --input fra.srt
 89 | 
 90 | See the srt/tools/ directory for more information.
 91 | 
 92 | .. _hanzidentifier: https://github.com/tsroten/hanzidentifier
 93 | 
 94 | Library
 95 | =======
 96 | 
 97 | `Detailed API documentation`_ is available, but here are the basics:
 98 | 
 99 | .. code:: python
100 | 
101 |     >>> # list() is needed as srt.parse creates a generator
102 |     >>> subs = list(srt.parse('''\
103 |     ... 1
104 |     ... 00:00:33,843 --> 00:00:38,097
105 |     ... 地球上只有3%的水是淡水
106 |     ...
107 |     ... 2
108 |     ... 00:00:40,641 --> 00:00:44,687
109 |     ... 可是这些珍贵的淡水中却充满了惊奇
110 |     ...
111 |     ... 3
112 |     ... 00:00:57,908 --> 00:01:03,414
113 |     ... 所有陆地生命归根结底都依赖於淡水
114 |     ...
115 |     ... '''))
116 |     >>> subs
117 |     [Subtitle(index=1, start=datetime.timedelta(0, 33, 843000), end=datetime.timedelta(0, 38, 97000), content='地球上只有3%的水是淡水', proprietary=''),
118 |      Subtitle(index=2, start=datetime.timedelta(0, 40, 641000), end=datetime.timedelta(0, 44, 687000), content='可是这些珍贵的淡水中却充满了惊奇', proprietary=''),
119 |      Subtitle(index=3, start=datetime.timedelta(0, 57, 908000), end=datetime.timedelta(0, 63, 414000), content='所有陆地生命归根结底都依赖於淡水', proprietary='')]
120 |     >>> print(srt.compose(subs))
121 |     1
122 |     00:00:33,843 --> 00:00:38,097
123 |     地球上只有3%的水是淡水
124 | 
125 |     2
126 |     00:00:40,641 --> 00:00:44,687
127 |     可是这些珍贵的淡水中却充满了惊奇
128 | 
129 |     3
130 |     00:00:57,908 --> 00:01:03,414
131 |     所有陆地生命归根结底都依赖於淡水
132 | 
133 | Installation
134 | ------------
135 | 
136 | To install the latest stable version from PyPi:
137 | 
138 | .. code::
139 | 
140 |     pip install -U srt3
141 | 
142 | To install the latest development version directly from GitHub:
143 | 
144 | .. code::
145 | 
146 |     pip install -U git+https://github.com/switchupcb/srt3.git@develop
147 | 
148 | Contribute
149 | ----------
150 | You can contribute to this repository using its `Contribution Guidelines`_.
151 | 
152 | .. _`Detailed API documentation`: http://srt3.readthedocs.org/en/latest
153 | .. _`tools shipped with the library`: https://github.com/switchupcb/srt3/tree/develop/srt/tools
154 | .. _`Contribution Guidelines`: https://github.com/switchupcb/srt3/blob/5011e36336134eedf281bbab60279c988b54e07f/.github/CONTRIBUTING.md
155 | 


--------------------------------------------------------------------------------
/docs/_autosummary/srt.tools.add.rst:
--------------------------------------------------------------------------------
1 | ﻿srt.tools.add
2 | ===============================================================================
3 | 
4 | .. automodule:: srt.tools.add
5 |    :members:
6 |    :exclude-members:


--------------------------------------------------------------------------------
/docs/_autosummary/srt.tools.deduplicate.rst:
--------------------------------------------------------------------------------
1 | ﻿srt.tools.deduplicate
2 | ===============================================================================
3 | 
4 | .. automodule:: srt.tools.deduplicate
5 |    :members:
6 |    :exclude-members:


--------------------------------------------------------------------------------
/docs/_autosummary/srt.tools.find.rst:
--------------------------------------------------------------------------------
1 | ﻿srt.tools.find
2 | ===============================================================================
3 | 
4 | .. automodule:: srt.tools.find
5 |    :members:
6 |    :exclude-members:


--------------------------------------------------------------------------------
/docs/_autosummary/srt.tools.fixed_timeshift.rst:
--------------------------------------------------------------------------------
1 | ﻿srt.tools.fixed_timeshift
2 | ===============================================================================
3 | 
4 | .. automodule:: srt.tools.fixed_timeshift
5 |    :members:
6 |    :exclude-members:


--------------------------------------------------------------------------------
/docs/_autosummary/srt.tools.linear_timeshift.rst:
--------------------------------------------------------------------------------
1 | ﻿srt.tools.linear_timeshift
2 | ===============================================================================
3 | 
4 | .. automodule:: srt.tools.linear_timeshift
5 |    :members:
6 |    :exclude-members:


--------------------------------------------------------------------------------
/docs/_autosummary/srt.tools.match.rst:
--------------------------------------------------------------------------------
1 | ﻿srt.tools.match
2 | ===============================================================================
3 | 
4 | .. automodule:: srt.tools.match
5 |    :members:
6 |    :exclude-members:


--------------------------------------------------------------------------------
/docs/_autosummary/srt.tools.mux.rst:
--------------------------------------------------------------------------------
1 | ﻿srt.tools.mux
2 | ===============================================================================
3 | 
4 | .. automodule:: srt.tools.mux
5 |    :members:
6 |    :exclude-members:


--------------------------------------------------------------------------------
/docs/_autosummary/srt.tools.normalize.rst:
--------------------------------------------------------------------------------
1 | ﻿srt.tools.normalize
2 | ===============================================================================
3 | 
4 | .. automodule:: srt.tools.normalize
5 |    :members:
6 |    :exclude-members:


--------------------------------------------------------------------------------
/docs/_autosummary/srt.tools.paste.rst:
--------------------------------------------------------------------------------
1 | ﻿srt.tools.paste
2 | ===============================================================================
3 | 
4 | .. automodule:: srt.tools.paste
5 |    :members:
6 |    :exclude-members:


--------------------------------------------------------------------------------
/docs/_templates/tool.rst:
--------------------------------------------------------------------------------
1 | {{ fullname }}
2 | ===============================================================================
3 | 
4 | .. automodule:: {{ fullname }}
5 |    :members:
6 |    :exclude-members:
7 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
1 | API Documentation
2 | =================
3 | 
4 | .. automodule:: srt.srt
5 |    :members:
6 |    :exclude-members:
7 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | # srt.py is in the /srt directory
 5 | sys.path.insert(0, os.path.abspath("../"))
 6 | 
 7 | # Project Information
 8 | project = "srt3"
 9 | version = "1.0.0"
10 | release = version
11 | copyright = "SwitchUpCB"
12 | 
13 | # General Configuration
14 | extensions = [
15 |     "sphinx.ext.autodoc",
16 |     "sphinx.ext.autosummary",
17 |     "sphinx.ext.doctest",
18 |     "sphinx.ext.intersphinx",
19 | ]
20 | 
21 | intersphinx_mapping = {"python": ("https://docs.python.org/3.8", None)}
22 | autosummary_generate = True  # Turn on sphinx.ext.autosummary
23 | html_show_sourcelink = False  # Remove 'Page source' (html)
24 | add_module_names = False  # Remove namespaces.
25 | 
26 | # Exclusions
27 | exclude_patterns = ["_build"]
28 | 
29 | 
30 | def exclude_cli_methods(app, what, name, obj, skip, options):
31 |     return "main" == name or "set_args" == name or name.startswith("_")
32 | 
33 | 
34 | def setup(app):
35 |     app.connect("autodoc-skip-member", exclude_cli_methods)
36 | 
37 | 
38 | # Theme
39 | html_theme = "sphinx_rtd_theme"
40 | pygments_style = "sphinx"
41 | 
42 | # Options
43 | root_doc = "index"
44 | source_suffix = ".rst"
45 | htmlhelp_basename = "srtdoc"
46 | templates_path = ["_templates"]
47 | html_static_path = ["_static"]
48 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | `srt3`: Parse SubRip Files
 2 | ==========================
 3 | 
 4 | srt3_ is a simple Python library for parsing, modifying, and composing SRT files.
 5 | 
 6 | .. _srt3: https://github.com/switchupcb/srt3
 7 | 
 8 | Documentation
 9 | =============
10 | 
11 | .. toctree::
12 |    :maxdepth: 1
13 | 
14 |    quickstart
15 |    api
16 |    tools
17 | 
18 | Indices and Tables
19 | ==================
20 | 
21 | * :ref:`genindex`
22 | * :ref:`search`
23 | 


--------------------------------------------------------------------------------
/docs/quickstart.rst:
--------------------------------------------------------------------------------
 1 | Quickstart
 2 | ==========
 3 | 
 4 | Parse an SRT to Python objects
 5 | ------------------------------
 6 | 
 7 | .. code:: python
 8 | 
 9 |     >>> import srt
10 |     >>> subtitle_generator = srt.parse('''\
11 |     ... 1
12 |     ... 00:31:37,894 --> 00:31:39,928
13 |     ... OK, look, I think I have a plan here.
14 |     ...
15 |     ... 2
16 |     ... 00:31:39,931 --> 00:31:41,931
17 |     ... Using mainly spoons,
18 |     ...
19 |     ... 3
20 |     ... 00:31:41,933 --> 00:31:43,435
21 |     ... we dig a tunnel under the city and release it into the wild.
22 |     ...
23 |     ... ''')
24 |     >>> subtitles = list(subtitle_generator)
25 |     >>>
26 |     >>> subtitles[0].start
27 |     datetime.timedelta(0, 1897, 894000)
28 |     >>> subtitles[1].content
29 |     'Using mainly spoons,'
30 | 
31 | Compose an SRT from Python objects
32 | ----------------------------------
33 | 
34 | .. code:: python
35 | 
36 |     >>> print(srt.compose(subtitles))
37 |     1
38 |     00:31:37,894 --> 00:31:39,928
39 |     OK, look, I think I have a plan here.
40 |     <BLANKLINE>
41 |     2
42 |     00:31:39,931 --> 00:31:41,931
43 |     Using mainly spoons,
44 |     <BLANKLINE>
45 |     3
46 |     00:31:41,933 --> 00:31:43,435
47 |     we dig a tunnel under the city and release it into the wild.
48 |     <BLANKLINE>
49 | 
50 | Import Guide
51 | ------------
52 | 
53 | .. code:: python
54 | 
55 |     ### Use srt via srt.func()
56 |     # import the whole srt package (including tools)
57 |     import srt
58 | 
59 |     # only imports the srt.py module
60 |     from srt import srt
61 | 
62 |     ### Use srt tools
63 |     import srt
64 |     # srt.tools.tool.func()
65 |     srt.tools.find.find_by_timestamp()
66 | 
67 |     from srt import tools
68 |     # tools.tool.func()
69 |     tools.find.find_by_timestamp()
70 | 
71 |     # import all members from a tool module.
72 |     from srt.tools.find import *
73 |     find_by_timestamp()
74 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx==4.*
2 | sphinx-rtd-theme==0.*
3 | 


--------------------------------------------------------------------------------
/docs/tools.rst:
--------------------------------------------------------------------------------
 1 | Tools Documentation
 2 | ===================
 3 | 
 4 | .. automodule:: srt.tools
 5 | 
 6 | .. rubric:: Modules
 7 | 
 8 | .. autosummary::
 9 |    :toctree: _autosummary
10 |    :template: tool.rst
11 |    :recursive:
12 | 
13 |    srt.tools.add
14 |    srt.tools.deduplicate
15 |    srt.tools.find
16 |    srt.tools.fixed_timeshift
17 |    srt.tools.linear_timeshift
18 |    srt.tools.match
19 |    srt.tools.mux
20 |    srt.tools.normalize
21 |    srt.tools.paste
22 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | import codecs
 4 | from setuptools import setup, find_packages
 5 | 
 6 | 
 7 | with codecs.open("README.rst", encoding="utf8") as readme_f:
 8 |     README = readme_f.read()
 9 | 
10 | setup(
11 |     name="srt3",
12 |     version="1.0.1",
13 |     python_requires=">=3.3",
14 |     description="A simple library for parsing, modifying, and composing SRT files.",
15 |     long_description=README,
16 |     long_description_content_type="text/x-rst",
17 |     author="SwitchUpCB",
18 |     url="https://github.com/switchupcb/srt3",
19 |     packages=find_packages(include=["srt", "srt.*"]),
20 |     license="MIT",
21 |     license_files=("LICENSE"),
22 |     keywords=["srt", "srt3"],
23 |     classifiers=[
24 |         "Development Status :: 5 - Production/Stable",
25 |         "Intended Audience :: Developers",
26 |         "License :: OSI Approved :: MIT License",
27 |         "Operating System :: OS Independent",
28 |         "Programming Language :: Python :: 3",
29 |         "Programming Language :: Python :: 3.6",
30 |         "Programming Language :: Python :: 3.7",
31 |         "Programming Language :: Python :: 3.8",
32 |         "Programming Language :: Python :: 3.9",
33 |         "Topic :: Multimedia :: Video",
34 |         "Topic :: Software Development :: Libraries",
35 |         "Topic :: Text Processing",
36 |     ],
37 |     entry_points={
38 |         "console_scripts": [
39 |             "srt = srt.tools._srt:main",
40 |         ],
41 |     },
42 | )
43 | 


--------------------------------------------------------------------------------
/srt/__init__.py:
--------------------------------------------------------------------------------
1 | """A simple library for parsing, modifying, and composing SRT files."""
2 | from srt import tools
3 | from .srt import *
4 | 


--------------------------------------------------------------------------------
/srt/srt.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | """A simple library for parsing, modifying, and composing SRT files."""
  4 | 
  5 | import functools
  6 | import re
  7 | from datetime import timedelta
  8 | import logging
  9 | import io
 10 | 
 11 | 
 12 | LOG = logging.getLogger(__name__)
 13 | 
 14 | # "." is not technically valid as a delimiter, but many editors create SRT
 15 | # files with this delimiter for whatever reason. Many editors and players
 16 | # accept it, so we do too.
 17 | RGX_TIMESTAMP_MAGNITUDE_DELIM = r"[,.:，．。：]"
 18 | RGX_TIMESTAMP_FIELD = r"[0-9]+"
 19 | RGX_TIMESTAMP = RGX_TIMESTAMP_MAGNITUDE_DELIM.join([RGX_TIMESTAMP_FIELD] * 4)
 20 | RGX_TIMESTAMP_PARSEABLE = r"^{}$".format(
 21 |     RGX_TIMESTAMP_MAGNITUDE_DELIM.join(["(" + RGX_TIMESTAMP_FIELD + ")"] * 4)
 22 | )
 23 | RGX_INDEX = r"-?[0-9]+\.?[0-9]*"
 24 | RGX_PROPRIETARY = r"[^\r\n]*"
 25 | RGX_CONTENT = r".*?"
 26 | RGX_POSSIBLE_CRLF = r"\r?\n"
 27 | 
 28 | TS_REGEX = re.compile(RGX_TIMESTAMP_PARSEABLE)
 29 | MULTI_WS_REGEX = re.compile(r"\n\n+")
 30 | SRT_REGEX = re.compile(
 31 |     r"\s*({idx})\s*{eof}({ts}) *-[ -] *> *({ts}) ?({proprietary})(?:{eof}|\Z)({content})"
 32 |     # Many sub editors don't add a blank line to the end, and many editors and
 33 |     # players accept that. We allow it to be missing in input.
 34 |     #
 35 |     # We also allow subs that are missing a double blank newline. This often
 36 |     # happens on subs which were first created as a mixed language subtitle,
 37 |     # for example chs/eng, and then were stripped using naive methods (such as
 38 |     # ed/sed) that don't understand newline preservation rules in SRT files.
 39 |     #
 40 |     # This means that when you are, say, only keeping chs, and the line only
 41 |     # contains english, you end up with not only no content, but also all of
 42 |     # the content lines are stripped instead of retaining a newline.
 43 |     r"(?:{eof}|\Z)(?:{eof}|\Z|(?=(?:{idx}\s*{eof}{ts})))"
 44 |     # Some SRT blocks, while this is technically invalid, have blank lines
 45 |     # inside the subtitle content. We look ahead a little to check that the
 46 |     # next lines look like an index and a timestamp as a best-effort
 47 |     # solution to work around these.
 48 |     r"(?=(?:{idx}\s*{eof}{ts}|\Z))".format(
 49 |         idx=RGX_INDEX,
 50 |         ts=RGX_TIMESTAMP,
 51 |         proprietary=RGX_PROPRIETARY,
 52 |         content=RGX_CONTENT,
 53 |         eof=RGX_POSSIBLE_CRLF,
 54 |     ),
 55 |     re.DOTALL,
 56 | )
 57 | 
 58 | ZERO_TIMEDELTA = timedelta(0)
 59 | 
 60 | # Info message if truthy return -> Function taking a Subtitle, skip if True
 61 | SUBTITLE_SKIP_CONDITIONS = (
 62 |     ("No content", lambda sub: not sub.content.strip()),
 63 |     ("Start time < 0 seconds", lambda sub: sub.start < ZERO_TIMEDELTA),
 64 |     ("Subtitle start time >= end time", lambda sub: sub.start >= sub.end),
 65 | )
 66 | 
 67 | SECONDS_IN_HOUR = 3600
 68 | SECONDS_IN_MINUTE = 60
 69 | HOURS_IN_DAY = 24
 70 | MICROSECONDS_IN_MILLISECOND = 1000
 71 | FILE_TYPES = (io.IOBase,)
 72 | 
 73 | 
 74 | @functools.total_ordering
 75 | class Subtitle:
 76 |     r"""
 77 |     The metadata relating to a single subtitle. Subtitles are sorted by start
 78 |     time by default.
 79 | 
 80 |     :param int index: The SRT index for this subtitle
 81 |     :param start: The time that the subtitle should start being shown
 82 |     :type start: :py:class:`datetime.timedelta`
 83 |     :param end: The time that the subtitle should stop being shown
 84 |     :type end: :py:class:`datetime.timedelta`
 85 |     :param str proprietary: Proprietary metadata for this subtitle
 86 |     :param str content: The subtitle content. Should not contain OS-specific
 87 |                         line separators, only \\n. This is taken care of
 88 |                         already if you use :py:func:`srt.parse` to generate
 89 |                         Subtitle objects.
 90 |     """
 91 | 
 92 |     # pylint: disable=R0913
 93 |     def __init__(self, index, start, end, content, proprietary=""):
 94 |         self.index = index
 95 |         self.start = start
 96 |         self.end = end
 97 |         self.content = content
 98 |         self.proprietary = proprietary
 99 | 
100 |     def __hash__(self):
101 |         return hash(frozenset(vars(self).items()))
102 | 
103 |     def __eq__(self, other):
104 |         return vars(self) == vars(other)
105 | 
106 |     def __lt__(self, other):
107 |         return self.start < other.start or (
108 |             self.start == other.start and self.end < other.end
109 |         )
110 | 
111 |     def __repr__(self):
112 |         item_list = ", ".join("%s=%r" % (k, v) for k, v in vars(self).items())
113 |         return "%s(%s)" % (type(self).__name__, item_list)
114 | 
115 |     def to_srt(self, strict=True, eol="\n"):
116 |         r"""
117 |         Convert the current :py:class:`Subtitle` to an SRT block.
118 | 
119 |         :param bool strict: If disabled, will allow blank lines in the content
120 |                             of the SRT block, which is a violation of the SRT
121 |                             standard and may case your media player to explode
122 |         :param str eol: The end of line string to use (default "\\n")
123 |         :returns: The metadata of the current :py:class:`Subtitle` object as an
124 |                   SRT formatted subtitle block
125 |         :rtype: str
126 |         """
127 |         output_content = self.content
128 |         output_proprietary = self.proprietary
129 | 
130 |         if output_proprietary:
131 |             # output_proprietary is output directly next to the timestamp, so
132 |             # we need to add the space as a field delimiter.
133 |             output_proprietary = " " + output_proprietary
134 | 
135 |         if strict:
136 |             output_content = make_legal_content(output_content)
137 | 
138 |         if eol is None:
139 |             eol = "\n"
140 |         elif eol != "\n":
141 |             output_content = output_content.replace("\n", eol)
142 | 
143 |         template = "{idx}{eol}{start} --> {end}{prop}{eol}{content}{eol}{eol}"
144 |         return template.format(
145 |             idx=self.index,
146 |             start=timedelta_to_srt_timestamp(self.start),
147 |             end=timedelta_to_srt_timestamp(self.end),
148 |             prop=output_proprietary,
149 |             content=output_content,
150 |             eol=eol,
151 |         )
152 | 
153 | 
154 | def make_legal_content(content):
155 |     r"""
156 |     Remove illegal content from a content block. Illegal content includes:
157 | 
158 |     * Blank lines
159 |     * Starting or ending with a blank line
160 | 
161 |     .. doctest::
162 | 
163 |         >>> make_legal_content('\nfoo\n\nbar\n')
164 |         'foo\nbar'
165 | 
166 |     :param str content: The content to make legal
167 |     :returns: The legalised content
168 |     :rtype: srt
169 |     """
170 |     # Optimisation: Usually the content we get is legally valid. Do a quick
171 |     # check to see if we really need to do anything here. This saves time from
172 |     # generating legal_content by about 50%.
173 |     if content and content[0] != "\n" and "\n\n" not in content:
174 |         return content
175 | 
176 |     legal_content = MULTI_WS_REGEX.sub("\n", content.strip("\n"))
177 |     LOG.info("Legalised content %r to %r", content, legal_content)
178 |     return legal_content
179 | 
180 | 
181 | def timedelta_to_srt_timestamp(timedelta_timestamp):
182 |     r"""
183 |     Convert a :py:class:`~datetime.timedelta` to an SRT timestamp.
184 | 
185 |     .. doctest::
186 | 
187 |         >>> import datetime
188 |         >>> delta = datetime.timedelta(hours=1, minutes=23, seconds=4)
189 |         >>> timedelta_to_srt_timestamp(delta)
190 |         '01:23:04,000'
191 | 
192 |     :param datetime.timedelta timedelta_timestamp: A datetime to convert to an
193 |                                                    SRT timestamp
194 |     :returns: The timestamp in SRT format
195 |     :rtype: str
196 |     """
197 | 
198 |     hrs, secs_remainder = divmod(timedelta_timestamp.seconds, SECONDS_IN_HOUR)
199 |     hrs += timedelta_timestamp.days * HOURS_IN_DAY
200 |     mins, secs = divmod(secs_remainder, SECONDS_IN_MINUTE)
201 |     msecs = timedelta_timestamp.microseconds // MICROSECONDS_IN_MILLISECOND
202 |     return "%02d:%02d:%02d,%03d" % (hrs, mins, secs, msecs)
203 | 
204 | 
205 | def srt_timestamp_to_timedelta(timestamp):
206 |     r"""
207 |     Convert an SRT timestamp to a :py:class:`~datetime.timedelta`.
208 | 
209 |     .. doctest::
210 | 
211 |         >>> srt_timestamp_to_timedelta('01:23:04,000')
212 |         datetime.timedelta(seconds=4984)
213 | 
214 |     :param str timestamp: A timestamp in SRT format
215 |     :returns: The timestamp as a :py:class:`~datetime.timedelta`
216 |     :rtype: datetime.timedelta
217 |     :raises TimestampParseError: If the timestamp is not parseable
218 |     """
219 | 
220 |     match = TS_REGEX.match(timestamp)
221 |     if match is None:
222 |         raise TimestampParseError("Unparseable timestamp: {}".format(timestamp))
223 |     hrs, mins, secs, msecs = map(int, match.groups())
224 |     return timedelta(hours=hrs, minutes=mins, seconds=secs, milliseconds=msecs)
225 | 
226 | 
227 | def sort_and_reindex(subtitles, start_index=1, in_place=False, skip=True):
228 |     """
229 |     Reorder subtitles to be sorted by start time order, and rewrite the indexes
230 |     to be in that same order. This ensures that the SRT file will play in an
231 |     expected fashion after, for example, times were changed in some subtitles
232 |     and they may need to be resorted.
233 | 
234 |     If skip=True, subtitles will also be skipped if they are considered not to
235 |     be useful. Currently, the conditions to be considered "not useful" are as
236 |     follows:
237 | 
238 |     - Content is empty, or only whitespace
239 |     - The start time is negative
240 |     - The start time is equal to or later than the end time
241 | 
242 |     .. doctest::
243 | 
244 |         >>> from datetime import timedelta
245 |         >>> one = timedelta(seconds=1)
246 |         >>> two = timedelta(seconds=2)
247 |         >>> three = timedelta(seconds=3)
248 |         >>> subs = [
249 |         ...     Subtitle(index=999, start=one, end=two, content='1'),
250 |         ...     Subtitle(index=0, start=two, end=three, content='2'),
251 |         ... ]
252 |         >>> list(sort_and_reindex(subs))  # doctest: +ELLIPSIS
253 |         [Subtitle(...index=1...), Subtitle(...index=2...)]
254 | 
255 |     :param subtitles: :py:class:`Subtitle` objects in any order
256 |     :param int start_index: The index to start from
257 |     :param bool in_place: Whether to modify subs in-place for performance
258 |                           (version <=1.0.0 behaviour)
259 |                           `https://en.wikipedia.org/wiki/in-place_algorithm
260 |                           <https://en.wikipedia.org/wiki/in-place_algorithm>`_
261 |     :param bool skip: Whether to skip subtitles considered not useful (see
262 |                       above for rules)
263 |     :returns: The sorted subtitles
264 |     :rtype: :term:`generator` of :py:class:`Subtitle` objects
265 |     """
266 |     skipped_subs = 0
267 |     for sub_num, subtitle in enumerate(sorted(subtitles), start=start_index):
268 |         if not in_place:
269 |             subtitle = Subtitle(**vars(subtitle))
270 | 
271 |         if skip:
272 |             try:
273 |                 _should_skip_sub(subtitle)
274 |             except _ShouldSkipException as thrown_exc:
275 |                 LOG.info("Skipped subtitle at index %d: %s", subtitle.index, thrown_exc)
276 |                 skipped_subs += 1
277 |                 continue
278 | 
279 |         subtitle.index = sub_num - skipped_subs
280 | 
281 |         yield subtitle
282 | 
283 | 
284 | def _should_skip_sub(subtitle):
285 |     """
286 |     Check if a subtitle should be skipped based on the rules in
287 |     SUBTITLE_SKIP_CONDITIONS.
288 | 
289 |     :param subtitle: A :py:class:`Subtitle` to check whether to skip
290 |     :raises _ShouldSkipException: If the subtitle should be skipped
291 |     """
292 |     for info_msg, sub_skipper in SUBTITLE_SKIP_CONDITIONS:
293 |         if sub_skipper(subtitle):
294 |             raise _ShouldSkipException(info_msg)
295 | 
296 | 
297 | def parse(srt, ignore_errors=False):
298 |     r'''
299 |     Convert an SRT formatted string to a :term:`generator` of Subtitle objects.
300 | 
301 |     This function works around bugs present in many SRT files, most notably
302 |     that it is designed to not bork when presented with a blank line as part of
303 |     a subtitle's content.
304 | 
305 |     .. doctest::
306 | 
307 |         >>> subs = parse("""\
308 |         ... 422
309 |         ... 00:31:39,931 --> 00:31:41,931
310 |         ... Using mainly spoons,
311 |         ...
312 |         ... 423
313 |         ... 00:31:41,933 --> 00:31:43,435
314 |         ... we dig a tunnel under the city and release it into the wild.
315 |         ...
316 |         ... """)
317 |         >>> list(subs)  # doctest: +ELLIPSIS
318 |         [Subtitle(...index=422...), Subtitle(...index=423...)]
319 | 
320 |     :param srt: Subtitles in SRT format
321 |     :type srt: str or a file-like object
322 |     :param ignore_errors: If True, garbled SRT data will be ignored, and we'll
323 |                           continue trying to parse the rest of the file,
324 |                           instead of raising :py:class:`SRTParseError` and
325 |                           stopping execution.
326 |     :returns: The subtitles contained in the SRT file as :py:class:`Subtitle`
327 |               objects
328 |     :rtype: :term:`generator` of :py:class:`Subtitle` objects
329 |     :raises SRTParseError: If the matches are not contiguous and
330 |                            ``ignore_errors`` is False.
331 |     '''
332 | 
333 |     expected_start = 0
334 | 
335 |     # Transparently read files -- the whole thing is needed for regex's
336 |     # finditer
337 |     if isinstance(srt, FILE_TYPES):
338 |         srt = srt.read()
339 | 
340 |     for match in SRT_REGEX.finditer(srt):
341 |         actual_start = match.start()
342 |         _check_contiguity(srt, expected_start, actual_start, ignore_errors)
343 |         raw_index, raw_start, raw_end, proprietary, content = match.groups()
344 | 
345 |         # pytype sees that this is Optional[str] and thus complains that they
346 |         # can be None, but they can't realistically be None, since we're using
347 |         # finditer and all match groups are mandatory in the regex.
348 |         content = content.replace("\r\n", "\n")  # pytype: disable=attribute-error
349 | 
350 |         try:
351 |             raw_index = int(raw_index)
352 |         except ValueError:
353 |             # Index 123.4. Handled separately, since it's a rare case and we
354 |             # don't want to affect general performance.
355 |             #
356 |             # The pytype disable is for the same reason as content, above.
357 |             raw_index = int(raw_index.split(".")[0])  # pytype: disable=attribute-error
358 | 
359 |         yield Subtitle(
360 |             index=raw_index,
361 |             start=srt_timestamp_to_timedelta(raw_start),
362 |             end=srt_timestamp_to_timedelta(raw_end),
363 |             content=content,
364 |             proprietary=proprietary,
365 |         )
366 | 
367 |         expected_start = match.end()
368 | 
369 |     _check_contiguity(srt, expected_start, len(srt), ignore_errors)
370 | 
371 | 
372 | def _check_contiguity(srt, expected_start, actual_start, warn_only):
373 |     """
374 |     If ``warn_only`` is False, raise :py:class:`SRTParseError` with diagnostic
375 |     info if expected_start does not equal actual_start. Otherwise, log a
376 |     warning.
377 | 
378 |     :param str srt: The data being matched
379 |     :param int expected_start: The expected next start, as from the last
380 |                                iteration's match.end()
381 |     :param int actual_start: The actual start, as from this iteration's
382 |                              match.start()
383 |     :raises SRTParseError: If the matches are not contiguous and ``warn_only``
384 |                            is False
385 |     """
386 |     if expected_start != actual_start:
387 |         unmatched_content = srt[expected_start:actual_start]
388 | 
389 |         if expected_start == 0 and (
390 |             unmatched_content.isspace() or unmatched_content == "\ufeff"
391 |         ):
392 |             # #50: Leading whitespace has nowhere to be captured like in an
393 |             # intermediate subtitle
394 |             return
395 | 
396 |         if warn_only:
397 |             LOG.warning("Skipped unparseable SRT data: %r", unmatched_content)
398 |         else:
399 |             raise SRTParseError(expected_start, actual_start, unmatched_content)
400 | 
401 | 
402 | def compose(
403 |     subtitles, reindex=True, start_index=1, strict=True, eol=None, in_place=False
404 | ):
405 |     r"""
406 |     Convert an iterator of :py:class:`Subtitle` objects to a string of joined
407 |     SRT blocks.
408 | 
409 |     .. doctest::
410 | 
411 |         >>> from datetime import timedelta
412 |         >>> start = timedelta(seconds=1)
413 |         >>> end = timedelta(seconds=2)
414 |         >>> subs = [
415 |         ...     Subtitle(index=1, start=start, end=end, content='x'),
416 |         ...     Subtitle(index=2, start=start, end=end, content='y'),
417 |         ... ]
418 |         >>> compose(subs)  # doctest: +ELLIPSIS
419 |         '1\n00:00:01,000 --> 00:00:02,000\nx\n\n2\n00:00:01,000 --> ...'
420 | 
421 |     :param subtitles: The subtitles to convert to SRT blocks
422 |     :type subtitles: :term:`iterator` of :py:class:`Subtitle` objects
423 |     :param bool reindex: Whether to reindex subtitles based on start time
424 |     :param int start_index: If reindexing, the index to start reindexing from
425 |     :param bool strict: Whether to enable strict mode, see
426 |                         :py:func:`Subtitle.to_srt` for more information
427 |     :param str eol: The end of line string to use (default "\\n")
428 |     :returns: A single SRT formatted string, with each input
429 |               :py:class:`Subtitle` represented as an SRT block
430 |     :param bool in_place: Whether to reindex subs in-place for performance
431 |                           (version <=1.0.0 behaviour)
432 |     :rtype: str
433 |     """
434 |     if reindex:
435 |         subtitles = sort_and_reindex(
436 |             subtitles, start_index=start_index, in_place=in_place
437 |         )
438 | 
439 |     return "".join(subtitle.to_srt(strict=strict, eol=eol) for subtitle in subtitles)
440 | 
441 | 
442 | class SRTParseError(Exception):
443 |     """
444 |     Raised when part of an SRT block could not be parsed.
445 | 
446 |     :param int expected_start: The expected contiguous start index
447 |     :param int actual_start: The actual non-contiguous start index
448 |     :param str unmatched_content: The content between the expected start index
449 |                                   and the actual start index
450 |     """
451 | 
452 |     def __init__(self, expected_start, actual_start, unmatched_content):
453 |         message = (
454 |             "Expected contiguous start of match or end of input at char %d, "
455 |             "but started at char %d (unmatched content: %r)"
456 |             % (expected_start, actual_start, unmatched_content)
457 |         )
458 |         super().__init__(message)
459 | 
460 |         self.expected_start = expected_start
461 |         self.actual_start = actual_start
462 |         self.unmatched_content = unmatched_content
463 | 
464 | 
465 | class TimestampParseError(ValueError):
466 |     """
467 |     Raised when an SRT timestamp could not be parsed.
468 |     """
469 | 
470 | 
471 | class _ShouldSkipException(Exception):
472 |     """
473 |     Raised when a subtitle should be skipped.
474 |     """
475 | 


--------------------------------------------------------------------------------
/srt/tools/README.rst:
--------------------------------------------------------------------------------
  1 | srt3 tools contains tools written to process SRT files. All tools use
  2 | the Python srt3_ library internally.
  3 | 
  4 | .. _srt3: https://github.com/switchupcb/srt3
  5 | 
  6 | Library Usage
  7 | -------------
  8 | You can use the srt3 tools module in your python projects. Check the `Detailed
  9 | API documentation`_ for more information.
 10 | 
 11 | Command Line Interface Usage
 12 | ----------------------------
 13 | 
 14 | You can call ``srt`` directly to see a list of all available utilities.
 15 | 
 16 | .. code::
 17 | 
 18 |     srt [tool_name] [args ...]
 19 | 
 20 | Arbitrary actions can be done with *srt match*, for example:
 21 | 
 22 | .. code::
 23 | 
 24 |     # Strip HTML
 25 |     srt match -m re -fp 'lambda sub: re.sub("<[^<]+?>", "", sub)'
 26 | 
 27 |     # Only keep Chinese subtitles
 28 |     srt match -m hanzidentifier -fm hanzidentifier.has_chinese
 29 | 
 30 |     # Do Both
 31 |     srt match -m re -m hanzidentifier -fm hanzidentifier.has_chinese -fp 'lambda sub: re.sub("<[^<]+?>", "", sub)'
 32 | 
 33 | Tools
 34 | -----
 35 | 
 36 | .. list-table::
 37 |    :widths: 25 50 20
 38 |    :header-rows: 1
 39 |    :align: center
 40 | 
 41 |    * - Tool
 42 |      - Description
 43 |      - Arguments (--)
 44 |    * - ADD
 45 |      - Add a subtitle with the option to move subsequent captions.
 46 |      - start -s, end -e, content -c, adjust -a
 47 |    * - DEDUPLICATE
 48 |      - Remove subtitles with duplicate content.
 49 |      - ms -t
 50 |    * - FIND
 51 |      - Find subtitles by timestamp in sequential or non-sequential order. Placing timestamps non-sequentially finds subtitles up to start and after end.
 52 |      - start -s, end -e, adjust -a
 53 |    * - FIXED TIMESHIFT
 54 |      - Shift subtitles by a fixed amount of time.
 55 |      - seconds -s
 56 |    * - LINEAR TIMESHIFT
 57 |      - Shift the linear rate of each subtitle. Useful for videos that have been sped up or slowed.
 58 |      - from-start --f1, from-end --f2, to-start --t1, to-end --t2
 59 |    * - MATCH
 60 |      - Match subtitle-content using a provided conditional function. Process lines that are matched. Lines that aren't matched are removed.
 61 |      - module -m, match -fm, process -fp, lines -l
 62 |    * - MUX
 63 |      - Multiplex_ multiple subtitles together into one. Useful for creating bilingual subtitles. Supports merging subtitles with similar start/end times to the same time.
 64 |      - ms, width -w, top-and-bottom -t, no-time-matching --nt
 65 |    * - NORMALIZE
 66 |      - Clean SRT Files and standardize them. Removes invalid newlines, normalizes timestamps, and fixes subtitle indexing with compliant data.
 67 |      -
 68 |    * - PASTE
 69 |      - Paste subtitles into/before other subtitles at a given timestamp. Add space that precedes the copied subtitles.
 70 |      - t1, t2, paste -p, space -s, block -b, zero -z
 71 |    * - SPLIT
 72 |      - Split subtitles at a given timestamp.
 73 |      - timestamp -t
 74 | 
 75 | Default Arguments
 76 | -----------------
 77 | .. list-table::
 78 |   :widths: 25 50 20
 79 |   :header-rows: 1
 80 |   :align: center
 81 | 
 82 |   * - Argument (--)
 83 |     - Description
 84 |     - Option
 85 |   * - input
 86 |     - The file to process (default: stdin).
 87 |     - -i
 88 |   * - output
 89 |     - The file to write to (default: stdout).
 90 |     - -o
 91 |   * - inplace
 92 |     - Modify the file in place.
 93 |     - -q
 94 |   * - encoding
 95 |     - The encoding to read/write files in (default: utf8).
 96 |     -
 97 |   * - ignore-parsing-errors
 98 |     - Attempt to continue when there are parsing errors.
 99 |     - -x
100 |   * - no-strict
101 |     - Allow blank lines in output. Your media player may explode!
102 |     -
103 |   * - debug
104 |     - Enable debug logging.
105 |     -
106 |   * - help
107 |     - The default option for help (--help does NOT apply).
108 |     - -h
109 | 
110 | .. _`Multiplex`: https://en.wikipedia.org/wiki/Multiplexing
111 | .. _`Detailed API documentation`: http://srt3.readthedocs.org/en/latest/api.html
112 | 


--------------------------------------------------------------------------------
/srt/tools/__init__.py:
--------------------------------------------------------------------------------
 1 | """srt3 tools perform tasks using the srt module."""
 2 | import os
 3 | import importlib
 4 | 
 5 | folder_path = os.path.normpath(os.path.join(__file__, os.pardir))
 6 | for file in os.listdir(folder_path):
 7 |     if not file.startswith("_") and file.endswith(".py"):
 8 |         importlib.import_module(f"srt.tools.{file[:-3]}")
 9 | del folder_path
10 | del file
11 | 


--------------------------------------------------------------------------------
/srt/tools/_cli.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | import argparse
  4 | import codecs
  5 | import sys
  6 | import itertools
  7 | import collections
  8 | import os
  9 | import logging
 10 | import srt
 11 | 
 12 | 
 13 | PROG_NAME = os.path.basename(sys.argv[0]).replace("-", " ", 1)
 14 | 
 15 | STDIN_BYTESTREAM = getattr(sys.stdin, "buffer", sys.stdin)
 16 | STDOUT_BYTESTREAM = getattr(sys.stdout, "buffer", sys.stdout)
 17 | 
 18 | DASH_STREAM_MAP = {"input": STDIN_BYTESTREAM, "output": STDOUT_BYTESTREAM}
 19 | 
 20 | log = logging.getLogger(__name__)
 21 | 
 22 | 
 23 | def noop(stream):
 24 |     """
 25 |     Used when we didn't explicitly specify a stream to avoid using
 26 |     codecs.get{reader,writer}
 27 |     """
 28 |     return stream
 29 | 
 30 | 
 31 | def dash_to_stream(arg, arg_type):
 32 |     if arg == "-":
 33 |         return DASH_STREAM_MAP[arg_type]
 34 |     return arg
 35 | 
 36 | 
 37 | def basic_parser(
 38 |     description=None,
 39 |     multi_input=False,
 40 |     no_output=False,
 41 |     examples=None,
 42 |     hide_no_strict=False,
 43 | ):
 44 |     example_lines = []
 45 | 
 46 |     if examples is not None:
 47 |         example_lines.append("examples:")
 48 | 
 49 |         for desc, code in examples.items():
 50 |             example_lines.append("  {}".format(desc))
 51 |             example_lines.append("    $ {}\n".format(code))
 52 | 
 53 |     parser = argparse.ArgumentParser(
 54 |         prog=PROG_NAME,
 55 |         description=description,
 56 |         epilog="\n".join(example_lines),
 57 |         formatter_class=argparse.RawDescriptionHelpFormatter,
 58 |     )
 59 | 
 60 |     # Can't use argparse.FileType as we need to know the encoding from the args
 61 | 
 62 |     if multi_input:
 63 |         parser.add_argument(
 64 |             "--input",
 65 |             "-i",
 66 |             metavar="FILE",
 67 |             action="append",
 68 |             type=lambda arg: dash_to_stream(arg, "input"),
 69 |             help="The files to process.",
 70 |             required=True,
 71 |         )
 72 |     else:
 73 |         parser.add_argument(
 74 |             "--input",
 75 |             "-i",
 76 |             metavar="FILE",
 77 |             default=STDIN_BYTESTREAM,
 78 |             type=lambda arg: dash_to_stream(arg, "input"),
 79 |             help="The file to process (default: stdin).",
 80 |         )
 81 | 
 82 |     if not no_output:
 83 |         parser.add_argument(
 84 |             "--output",
 85 |             "-o",
 86 |             metavar="FILE",
 87 |             default=STDOUT_BYTESTREAM,
 88 |             type=lambda arg: dash_to_stream(arg, "output"),
 89 |             help="The file to write to (default: stdout).",
 90 |         )
 91 |         if not multi_input:
 92 |             parser.add_argument(
 93 |                 "--inplace",
 94 |                 "-q",
 95 |                 action="store_true",
 96 |                 help="Modify the file in place.",
 97 |             )
 98 | 
 99 |     parser.add_argument(
100 |         "--encoding", help="The encoding to read/write files in (default: utf8)."
101 |     )
102 | 
103 |     parser.add_argument(
104 |         "--ignore-parsing-errors",
105 |         "-x",
106 |         action="store_true",
107 |         help="Attempt to continue when there are parsing errors.",
108 |     )
109 | 
110 |     shelp = "Allow blank lines in output. Your media player may explode!"
111 |     if hide_no_strict:
112 |         shelp = argparse.SUPPRESS
113 | 
114 |     parser.add_argument("--no-strict", action="store_false", dest="strict", help=shelp)
115 |     parser.add_argument(
116 |         "--debug",
117 |         action="store_const",
118 |         dest="log_level",
119 |         const=logging.DEBUG,
120 |         default=logging.INFO,
121 |         help="Enable debug logging.",
122 |     )
123 | 
124 |     # if len(sys.argv) == 1:
125 |     #     parser.error("unrecognized arguments: None")
126 |     return parser
127 | 
128 | 
129 | def set_basic_args(args):
130 |     # TODO: dedupe some of this
131 |     if getattr(args, "inplace", None):
132 |         if args.input == DASH_STREAM_MAP["input"]:
133 |             raise ValueError("Cannot use --inplace on stdin")
134 | 
135 |         if args.output != DASH_STREAM_MAP["output"]:
136 |             raise ValueError("Cannot use -o and -q together")
137 | 
138 |         args.output = args.input
139 | 
140 |     for stream_name in ("input", "output"):
141 |         log.debug('Processing stream "%s"', stream_name)
142 | 
143 |         try:
144 |             stream = getattr(args, stream_name)
145 |         except AttributeError:
146 |             # For example, in the case of no_output
147 |             continue
148 | 
149 |         # We don't use system default encoding, because usually one runs this
150 |         # on files they got from elsewhere. As such, be opinionated that these
151 |         # files are probably UTF-8. Looking for the BOM on reading allows us to
152 |         # be more liberal with what we accept, without adding BOMs on write.
153 |         read_encoding = args.encoding or "utf-8-sig"
154 |         write_encoding = args.encoding or "utf-8"
155 | 
156 |         r_enc = codecs.getreader(read_encoding)
157 |         w_enc = codecs.getwriter(write_encoding)
158 | 
159 |         log.debug("Got %r as stream", stream)
160 |         # We don't use encoding= option to open because we want to have the
161 |         # same universal newlines behaviour as STD{IN,OUT}_BYTESTREAM
162 |         if stream in DASH_STREAM_MAP.values():
163 |             log.debug("%s in DASH_STREAM_MAP", stream_name)
164 |             if stream is args.input:
165 |                 args.input = srt.parse(
166 |                     r_enc(args.input).read(), ignore_errors=args.ignore_parsing_errors
167 |                 )
168 |             elif stream is args.output:
169 |                 # Since args.output is not in text mode (since we didn't
170 |                 # earlier know the encoding), we have no universal newline
171 |                 # support and need to do it ourselves
172 |                 args.output = w_enc(args.output)
173 |         else:
174 |             log.debug("%s not in DASH_STREAM_MAP", stream_name)
175 |             if stream is args.input:
176 |                 if isinstance(args.input, collections.MutableSequence):
177 |                     for i, input_fn in enumerate(args.input):
178 |                         if input_fn in DASH_STREAM_MAP.values():
179 |                             if stream is args.input:
180 |                                 args.input[i] = srt.parse(
181 |                                     r_enc(input_fn).read(),
182 |                                     ignore_errors=args.ignore_parsing_errors,
183 |                                 )
184 |                         else:
185 |                             f = r_enc(open(input_fn, "rb"))
186 |                             with f:
187 |                                 args.input[i] = srt.parse(
188 |                                     f.read(), ignore_errors=args.ignore_parsing_errors
189 |                                 )
190 |                 else:
191 |                     f = r_enc(open(stream, "rb"))
192 |                     with f:
193 |                         args.input = srt.parse(
194 |                             f.read(), ignore_errors=args.ignore_parsing_errors
195 |                         )
196 |             else:
197 |                 args.output = w_enc(open(args.output, "wb"))
198 | 
199 | 
200 | def compose_suggest_on_fail(subs, strict=True):
201 |     try:
202 |         return srt.compose(subs, strict=strict, eol=os.linesep, in_place=True)
203 |     except srt.SRTParseError as thrown_exc:
204 |         # Since `subs` is actually a generator
205 |         log.critical(
206 |             "Parsing failed, maybe you need to pass a different encoding "
207 |             "with --encoding?"
208 |         )
209 |         raise
210 | 
211 | 
212 | def sliding_window(seq, width=2, inclusive=True):
213 |     """
214 |     If inclusive is True, we also include final elements where len(sliced) <
215 |     width.
216 |     """
217 |     seq_iter = iter(seq)
218 | 
219 |     # Consume seq_iter up to width
220 |     sliced = tuple(itertools.islice(seq_iter, width))
221 | 
222 |     if not inclusive and len(sliced) != width:
223 |         return
224 | 
225 |     yield sliced
226 | 
227 |     for elem in seq_iter:
228 |         sliced = sliced[1:] + (elem,)
229 |         yield sliced
230 | 
231 |     if inclusive:
232 |         for idx in range(len(sliced)):
233 |             if idx != 0:
234 |                 yield sliced[idx:]
235 | 


--------------------------------------------------------------------------------
/srt/tools/_srt.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | import os
 4 | import sys
 5 | import importlib
 6 | 
 7 | 
 8 | def commands():
 9 |     commands = set()
10 |     folder_path = os.path.normpath(os.path.join(__file__, os.pardir))
11 |     for script in os.listdir(folder_path):
12 |         if not script.startswith("_") and script.endswith(".py"):
13 |             commands.add(script[:-3])
14 |     return sorted(commands)
15 | 
16 | 
17 | def show_help():
18 |     print(
19 |         "Available commands "
20 |         "(pass --help to a specific command for usage information):\n"
21 |     )
22 |     for command in commands():
23 |         print(f"- {command}")
24 | 
25 | 
26 | def main():
27 |     if len(sys.argv) < 2 or sys.argv[1].startswith("-"):
28 |         show_help()
29 |         sys.exit(0)
30 | 
31 |     command = sys.argv[1]
32 |     if command not in commands():
33 |         print(f'Unknown command: "{command}"')
34 |         show_help()
35 |         sys.exit(1)
36 | 
37 |     sys.argv = sys.argv[1:]
38 |     module = importlib.import_module("srt.tools." + command, "")
39 |     module.main()
40 | 
41 | 
42 | if __name__ == "__main__":  # pragma: no cover
43 |     main()
44 | 


--------------------------------------------------------------------------------
/srt/tools/_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | 
 4 | def tryNext(subs):
 5 |     """Finds the next subtitle in an iterator otherwise returns None."""
 6 |     try:
 7 |         return next(subs)
 8 |     except StopIteration:
 9 |         return None
10 | 


--------------------------------------------------------------------------------
/srt/tools/add.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | """Add a subtitle to subtitles."""
  4 | 
  5 | import datetime
  6 | import logging
  7 | import srt
  8 | from types import GeneratorType
  9 | from . import _cli
 10 | from . import _utils
 11 | 
 12 | log = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | def add(subs, start, end, content="", adjust=False):
 16 |     """
 17 |     Adds a subtitle to subtitles in the correct position.
 18 | 
 19 |     :param subs: :py:class:`Subtitle` objects
 20 |     :param datetime.timedelta start: The timestamp the subtitle starts at.
 21 |     :param datetime.timedelta end: The timestamp the subtitle ends at.
 22 |     :param boolean adjust: Whether to adjust the timestamps of subsequent subtitles.
 23 |     :rtype: :term:`generator` of :py:class:`Subtitle` objects
 24 |     """
 25 |     if end <= start:
 26 |         raise ValueError(
 27 |             "The end timestamp can't occur before or at the start timestamp."
 28 |         )
 29 | 
 30 |     # ensures list compatibility
 31 |     subs = (x for x in subs) if not isinstance(subs, GeneratorType) else subs
 32 | 
 33 |     # Add the subtitle in the correct position.
 34 |     added = False
 35 |     idx = 1
 36 |     adjust_time = datetime.timedelta(0)
 37 |     subtitle = _utils.tryNext(subs)
 38 |     while subtitle is not None:
 39 |         subtitle_start = subtitle.start
 40 | 
 41 |         if not added and (
 42 |             (start == subtitle_start and end < subtitle.end) or start < subtitle_start
 43 |         ):
 44 |             yield srt.Subtitle(
 45 |                 idx,
 46 |                 start,
 47 |                 end,
 48 |                 content,
 49 |             )
 50 |             idx += 1
 51 |             adjust_time = end - start if adjust else adjust_time
 52 |             added = True
 53 | 
 54 |         yield srt.Subtitle(
 55 |             idx,
 56 |             subtitle_start + adjust_time,
 57 |             subtitle.end + adjust_time,
 58 |             subtitle.content,
 59 |         )
 60 |         idx += 1
 61 |         subtitle = _utils.tryNext(subs)
 62 | 
 63 |     if not added:
 64 |         yield srt.Subtitle(
 65 |             idx,
 66 |             start,
 67 |             end,
 68 |             content,
 69 |         )
 70 | 
 71 | 
 72 | # Command Line Interface
 73 | def set_args():
 74 |     examples = {
 75 |         "Add a subtitle": 'srt add -i example.srt -s 00:00:5,00 -e 00:00:5,00 -c "srt3 is awesome."',
 76 |         "Add a subtitle and adjust subsequent ones": 'srt add -i example.srt -s 00:00:5,00 -e 00:00:5,00 --c "srt3 is awesome." -a',
 77 |     }
 78 |     parser = _cli.basic_parser(description=__doc__, examples=examples)
 79 |     parser.add_argument(
 80 |         "--start",
 81 |         "-s",
 82 |         metavar=("TIMESTAMP"),
 83 |         type=lambda arg: srt.srt_timestamp_to_timedelta(arg),
 84 |         default=datetime.timedelta(0),
 85 |         nargs="?",
 86 |         help="The timestamp to start the subtitle at.",
 87 |     )
 88 |     parser.add_argument(
 89 |         "--end",
 90 |         "-e",
 91 |         metavar=("TIMESTAMP"),
 92 |         type=lambda arg: srt.srt_timestamp_to_timedelta(arg),
 93 |         default=datetime.timedelta(0),
 94 |         nargs="?",
 95 |         help="The timestamp to stop the subtitle at.",
 96 |     )
 97 |     parser.add_argument(
 98 |         "-c", "--content", required=True, help="The content of the subtitle."
 99 |     )
100 |     parser.add_argument(
101 |         "--adjust",
102 |         "-a",
103 |         action="store_true",
104 |         help="Adjust the timestamps of subsequent subtitles.",
105 |     )
106 |     return parser.parse_args()
107 | 
108 | 
109 | def main():
110 |     args = set_args()
111 |     logging.basicConfig(level=args.log_level)
112 |     _cli.set_basic_args(args)
113 |     add_subs = add(args.input, args.start, args.end, args.content, args.adjust)
114 |     output = _cli.compose_suggest_on_fail(add_subs, strict=args.strict)
115 |     args.output.write(output)
116 | 
117 | 
118 | if __name__ == "__main__":  # pragma: no cover
119 |     main()
120 | 


--------------------------------------------------------------------------------
/srt/tools/deduplicate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | """Merge multiple subtitles together into one."""
 4 | 
 5 | import datetime
 6 | import logging
 7 | from . import _cli
 8 | 
 9 | 
10 | log = logging.getLogger(__name__)
11 | 
12 | 
13 | def deduplicate(orig_subs, acceptable_diff):
14 |     r"""
15 |     Removes subtitles with duplicated content.
16 | 
17 |     :param orig_subs: :py:class:`Subtitle` objects
18 |     :param datetime.timedelta acceptable_diff: The amount of milliseconds
19 |                                     a subtitle start time must be to shift.
20 |     :rtype: :term:`generator` of :py:class:`Subtitle` objects
21 |     """
22 |     indices_to_remove = set()
23 | 
24 |     # If we only store the subtitle itself and compare that, it's possible that
25 |     # we'll not only remove the duplicate, but also the _original_ subtitle if
26 |     # they have the same sub index/times/etc.
27 |     #
28 |     # As such, we need to also store the index in the original subs list that
29 |     # this entry belongs to for each subtitle prior to sorting.
30 |     sorted_subs = sorted(
31 |         enumerate(orig_subs), key=lambda sub: (sub[1].content, sub[1].start)
32 |     )
33 | 
34 |     for subs in _cli.sliding_window(sorted_subs, width=2, inclusive=False):
35 |         cur_idx, cur_sub = subs[0]
36 |         next_idx, next_sub = subs[1]
37 | 
38 |         if cur_sub.content == next_sub.content and (
39 |             not acceptable_diff or cur_sub.start + acceptable_diff >= next_sub.start
40 |         ):
41 |             log.debug(
42 |                 "Marking l%d/s%d for removal, duplicate of l%d/s%d",
43 |                 next_idx,
44 |                 next_sub.index,
45 |                 cur_idx,
46 |                 cur_sub.index,
47 |             )
48 |             indices_to_remove.add(next_idx)
49 | 
50 |     offset = 0
51 |     for idx in indices_to_remove:
52 |         del orig_subs[idx - offset]
53 |         offset += 1
54 | 
55 | 
56 | def set_args():
57 |     examples = {
58 |         "Remove duplicated subtitles within 5 seconds of each other": "srt deduplicate -i duplicated.srt",
59 |         "Remove duplicated subtitles within 500 milliseconds of each other": "srt deduplicate -t 500 -i duplicated.srt",
60 |         "Remove duplicated subtitles regardless of temporal proximity": "srt deduplicate -t 0 -i duplicated.srt",
61 |     }
62 |     parser = _cli.basic_parser(
63 |         description=__doc__,
64 |         examples=examples,
65 |     )
66 |     parser.add_argument(
67 |         "--ms",
68 |         "-t",
69 |         metavar="MILLISECONDS",
70 |         default=datetime.timedelta(milliseconds=5000),
71 |         type=lambda ms: datetime.timedelta(milliseconds=int(ms)),
72 |         help="how many milliseconds distance a subtitle start time must be"
73 |         "within of another to be considered a duplicate "
74 |         "(default: 5000ms)",
75 |     )
76 | 
77 |     return parser.parse_args()
78 | 
79 | 
80 | def main():
81 |     args = set_args()
82 |     logging.basicConfig(level=args.log_level)
83 |     _cli.set_basic_args(args)
84 | 
85 |     subs = list(args.input)
86 |     deduplicate(subs, args.ms)
87 |     output = _cli.compose_suggest_on_fail(subs, strict=args.strict)
88 |     args.output.write(output)
89 | 
90 | 
91 | if __name__ == "__main__":  # pragma: no cover
92 |     main()
93 | 


--------------------------------------------------------------------------------
/srt/tools/find.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | """Find subtitles by timestamp."""
  4 | 
  5 | import datetime
  6 | import logging
  7 | import srt
  8 | from types import GeneratorType
  9 | from . import _cli
 10 | from . import _utils
 11 | 
 12 | 
 13 | log = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | def find_by_timestamp(
 17 |     subs,
 18 |     timestamp_one=datetime.timedelta(0),
 19 |     timestamp_two=datetime.timedelta(0),
 20 |     adjust=False,
 21 | ):
 22 |     """
 23 |     Finds subtitles from subtitles by timestamp.
 24 |     When timestamp one > timestamp two, subtitles up to timestamp two and
 25 |     subtitles after timestamp one will be found.
 26 | 
 27 |     :param subs: :py:class:`Subtitle` objects
 28 |     :param datetime.timedelta timestamp_one: The timestamp to find from.
 29 |     :param datetime.timedelta timestamp_two: The timestamp to find to.
 30 |     :param boolean adjust: Whether to adjust the timestamps of found subtitles.
 31 |     :rtype: :term:`generator` of :py:class:`Subtitle` objects
 32 |     """
 33 |     # ensure subs is iterable
 34 |     subs = (x for x in subs) if not isinstance(subs, GeneratorType) else subs
 35 | 
 36 |     # Split the subtitle at the start and end of the block(s).
 37 |     subs = srt.tools.split.split(subs, timestamp_one)
 38 |     subs = srt.tools.split.split(subs, timestamp_two)
 39 | 
 40 |     # edge cases
 41 |     subtitle = _utils.tryNext(subs)
 42 |     sequential = timestamp_one < timestamp_two
 43 |     if subtitle is None or (sequential and timestamp_two <= subtitle.start):
 44 |         return
 45 | 
 46 |     # Find the subtitles using a generator.
 47 |     idx = 1
 48 |     adjust_time = timestamp_one if adjust else datetime.timedelta(0)
 49 |     while subtitle is not None:
 50 |         start = subtitle.start
 51 | 
 52 |         if (
 53 |             timestamp_one == timestamp_two
 54 |             or (sequential and timestamp_one <= start and start < timestamp_two)
 55 |             or (not sequential and (start < timestamp_two or timestamp_one <= start))
 56 |         ):
 57 |             yield srt.Subtitle(
 58 |                 idx,
 59 |                 subtitle.start - adjust_time,
 60 |                 subtitle.end - adjust_time,
 61 |                 subtitle.content,
 62 |             )
 63 |             idx += 1
 64 | 
 65 |         subtitle = _utils.tryNext(subs)
 66 | 
 67 | 
 68 | # Command Line Interface
 69 | def set_args():
 70 |     examples = {
 71 |         "Find subtitles from :05 - :08": "srt find -i example.srt -s 00:00:5,00 -e 00:00:8,00",
 72 |         "Find subtitles from :00 - :05 and :08 onwards": "srt find -i example.srt -s 00:00:8,00 -e 00:00:5,00",
 73 |         "Find subtitles from :00 - :16 and adjust the timestamps of found subtitles": "srt find -i example.srt -e 00:00:16,00",
 74 |         "Find subtitles from :16 onwards and zero the block.": "srt find -i example.srt -s 00:00:16,00 -a",
 75 |         "Find every subtitle": "srt find -i example.srt",
 76 |     }
 77 |     parser = _cli.basic_parser(description=__doc__, examples=examples)
 78 |     parser.add_argument(
 79 |         "--start",
 80 |         "-s",
 81 |         metavar=("TIMESTAMP"),
 82 |         type=lambda arg: srt.srt_timestamp_to_timedelta(arg),
 83 |         default=datetime.timedelta(0),
 84 |         nargs="?",
 85 |         help="The timestamp to start removing from.",
 86 |     )
 87 |     parser.add_argument(
 88 |         "--end",
 89 |         "-e",
 90 |         metavar=("TIMESTAMP"),
 91 |         type=lambda arg: srt.srt_timestamp_to_timedelta(arg),
 92 |         default=datetime.timedelta(0),
 93 |         nargs="?",
 94 |         help="The timestamp to stop removing at.",
 95 |     )
 96 |     parser.add_argument(
 97 |         "--adjust",
 98 |         "-a",
 99 |         action="store_true",
100 |         help="Adjust the timestamps of subtitles by placing the first found subtitle at 00:00.",
101 |     )
102 |     return parser.parse_args()
103 | 
104 | 
105 | def main():
106 |     args = set_args()
107 |     logging.basicConfig(level=args.log_level)
108 |     _cli.set_basic_args(args)
109 |     found_subs = find_by_timestamp(args.input, args.start, args.end, args.adjust)
110 |     output = _cli.compose_suggest_on_fail(found_subs, strict=args.strict)
111 |     args.output.write(output)
112 | 
113 | 
114 | if __name__ == "__main__":  # pragma: no cover
115 |     main()
116 | 


--------------------------------------------------------------------------------
/srt/tools/fixed_timeshift.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | """Shifts subtitles by a fixed number of seconds."""
 4 | 
 5 | import datetime
 6 | import logging
 7 | from . import _cli
 8 | 
 9 | log = logging.getLogger(__name__)
10 | 
11 | 
12 | def timeshift(subtitles, seconds_to_shift):
13 |     """
14 |     Performs a fixed timeshift on given subtitles.
15 | 
16 |     :param subtitles: :py:class:`Subtitle` objects
17 |     :param float seconds_to_shift: The amount of seconds to shift.
18 |     :rtype: :term:`generator` of :py:class:`Subtitle` objects
19 |     """
20 |     td_to_shift = datetime.timedelta(seconds=seconds_to_shift)
21 |     for subtitle in subtitles:
22 |         subtitle.start += td_to_shift
23 |         subtitle.end += td_to_shift
24 |         yield subtitle
25 | 
26 | 
27 | def set_args():
28 |     examples = {
29 |         "Make all subtitles 5 seconds later": "srt fixed_timeshift -s 5",
30 |         "Make all subtitles 5 seconds earlier": "srt fixed_timeshift --seconds -5",
31 |     }
32 | 
33 |     parser = _cli.basic_parser(description=__doc__, examples=examples)
34 |     parser.add_argument(
35 |         "--seconds",
36 |         "-s",
37 |         type=float,
38 |         required=True,
39 |         help="The amount of seconds to shift subtitiles by.",
40 |     )
41 |     return parser.parse_args()
42 | 
43 | 
44 | def main():
45 |     args = set_args()
46 |     logging.basicConfig(level=args.log_level)
47 |     _cli.set_basic_args(args)
48 |     corrected_subs = timeshift(args.input, args.seconds)
49 |     output = _cli.compose_suggest_on_fail(corrected_subs, strict=args.strict)
50 |     args.output.write(output)
51 | 
52 | 
53 | if __name__ == "__main__":  # pragma: no cover
54 |     main()
55 | 


--------------------------------------------------------------------------------
/srt/tools/linear_timeshift.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | """Perform linear time correction on a subtitle."""
  4 | 
  5 | import datetime
  6 | import logging
  7 | import srt
  8 | from . import _cli
  9 | 
 10 | log = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | def _timedelta_to_milliseconds(delta):
 14 |     return delta.days * 86400000 + delta.seconds * 1000 + delta.microseconds / 1000
 15 | 
 16 | 
 17 | def _calc_correction(to_start, to_end, from_start, from_end):
 18 |     angular = (to_end - to_start) / (from_end - from_start)
 19 |     linear = to_end - angular * from_end
 20 |     return angular, linear
 21 | 
 22 | 
 23 | def _correct_timedelta(bad_delta, angular, linear):
 24 |     bad_msecs = _timedelta_to_milliseconds(bad_delta)
 25 |     good_msecs = round(bad_msecs * angular + linear)
 26 |     good_delta = datetime.timedelta(milliseconds=good_msecs)
 27 |     return good_delta
 28 | 
 29 | 
 30 | def timeshift(subtitles, angular, linear):
 31 |     """
 32 |     Performs a linear timeshift on given subtitles.
 33 | 
 34 |     :param subtitles: :py:class:`Subtitle` objects
 35 |     :param float angular:
 36 |     :param float linear:
 37 |     :rtype: :term:`generator` of :py:class:`Subtitle` objects
 38 |     """
 39 |     for subtitle in subtitles:
 40 |         subtitle.start = _correct_timedelta(subtitle.start, angular, linear)
 41 |         subtitle.end = _correct_timedelta(subtitle.end, angular, linear)
 42 |         yield subtitle
 43 | 
 44 | 
 45 | def set_args():
 46 |     def _srt_timestamp_to_milliseconds(parser, arg):
 47 |         try:
 48 |             delta = srt.srt_timestamp_to_timedelta(arg)
 49 |         except ValueError:
 50 |             parser.error("not a valid SRT timestamp: %s" % arg)
 51 |         else:
 52 |             return _timedelta_to_milliseconds(delta)
 53 | 
 54 |     examples = {
 55 |         "Stretch out a subtitle so that second 1 is 2, 2 is 4, etc": "srt linear_timeshift --f1 00:00:01,000 --t1 00:00:01,000 --f2 00:00:02,000 --t2 00:00:03,000"
 56 |     }
 57 | 
 58 |     parser = _cli.basic_parser(description=__doc__, examples=examples)
 59 |     parser.add_argument(
 60 |         "--from-start",
 61 |         "--f1",
 62 |         type=lambda arg: _srt_timestamp_to_milliseconds(parser, arg),
 63 |         required=True,
 64 |         help="The first desynchronised timestamp.",
 65 |     )
 66 |     parser.add_argument(
 67 |         "--from-end",
 68 |         "--f2",
 69 |         type=lambda arg: _srt_timestamp_to_milliseconds(parser, arg),
 70 |         required=True,
 71 |         help="The second desynchronised timestamp.",
 72 |     )
 73 |     parser.add_argument(
 74 |         "--to-start",
 75 |         "--t1",
 76 |         type=lambda arg: _srt_timestamp_to_milliseconds(parser, arg),
 77 |         required=True,
 78 |         help="The first synchronised timestamp.",
 79 |     )
 80 |     parser.add_argument(
 81 |         "--to-end",
 82 |         "--t2",
 83 |         type=lambda arg: _srt_timestamp_to_milliseconds(parser, arg),
 84 |         required=True,
 85 |         help="The second synchronised timestamp.",
 86 |     )
 87 |     return parser.parse_args()
 88 | 
 89 | 
 90 | def main():
 91 |     args = set_args()
 92 |     logging.basicConfig(level=args.log_level)
 93 |     angular, linear = _calc_correction(
 94 |         args.to_start, args.to_end, args.from_start, args.from_end
 95 |     )
 96 |     _cli.set_basic_args(args)
 97 |     corrected_subs = timeshift(args.input, angular, linear)
 98 |     output = _cli.compose_suggest_on_fail(corrected_subs, strict=args.strict)
 99 |     args.output.write(output)
100 | 
101 | 
102 | if __name__ == "__main__":  # pragma: no cover
103 |     main()
104 | 


--------------------------------------------------------------------------------
/srt/tools/match.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | """Filter and/or process subtitles' content that match a particular pattern."""
 4 | 
 5 | import importlib
 6 | import logging
 7 | from . import _cli
 8 | 
 9 | log = logging.getLogger(__name__)
10 | 
11 | 
12 | def _true(param):
13 |     """Always returns true for matching functionality."""
14 |     return True
15 | 
16 | 
17 | def _pass(param):
18 |     """Always returns the given parameter for process functionality."""
19 |     return param
20 | 
21 | 
22 | def match(subtitles, imports, func_match, func_process, lines):
23 |     """
24 |     Passes each matching subtitle-content to a function.
25 | 
26 |     :param subtitles: :py:class:`Subtitle` objects
27 |     :param imports: Modules to import in the context of the function.
28 |     :param str func_match: The function used to match lines.
29 |     :param str func_process: The function used to process subtitle content.
30 |     :param bool invert: Whether to only match lines that return False.
31 |     :param per_line: Whether to apply functions to each line of content
32 |                      (as opposed to the whole content string).
33 |     :rtype: :term:`generator` of :py:class:`Subtitle` objects
34 |     """
35 |     for import_name in imports:
36 |         real_import = importlib.import_module(import_name)
37 |         globals()[import_name] = real_import
38 | 
39 |     # fmt: off
40 |     # Evaluate the each function
41 |     match_func = eval(func_match) if func_match else _true # nosec pylint: disable-msg=eval-used
42 |     process_func = eval(func_process) if func_process else _pass # nosec pylint: disable-msg=eval-used
43 |     # fmt: on
44 | 
45 |     # Match and process each subtitle (or subtitle-line).
46 |     for subtitle in subtitles:
47 |         if lines:
48 |             matched_lines = [
49 |                 line for line in subtitle.content.splitlines() if match_func(line)
50 |             ]
51 |             processed_lines = [process_func(line) for line in matched_lines]
52 |             subtitle.content = "\n".join(processed_lines)
53 |         else:
54 |             if match_func(subtitle.content):
55 |                 subtitle.content = process_func(subtitle.content)
56 |             else:
57 |                 subtitle.content = ""
58 | 
59 |         yield subtitle
60 | 
61 | 
62 | def set_args():
63 |     examples = {
64 |         "Only include Chinese lines": "srt match -m hanzidentifier -fm hanzidentifier.has_chinese",
65 |         "Exclude all lines which only contain numbers": "srt match -fm 'lambda x: not x.isdigit()'",
66 |         "Strip HTML-like symbols from a subtitle": """srt match -m re -fp 'lambda sub: re.sub("<[^<]+?>", "", sub)'""",
67 |     }
68 |     parser = _cli.basic_parser(description=__doc__, examples=examples)
69 |     parser.add_argument("--match", "--fm", help="The function used to match lines.")
70 |     parser.add_argument("--process", "--fp", help="The function used to process lines.")
71 |     parser.add_argument(
72 |         "--module",
73 |         "-m",
74 |         help="modules to import in the function context",
75 |         action="append",
76 |         default=[],
77 |     )
78 |     parser.add_argument(
79 |         "--lines",
80 |         "-l",
81 |         help="Match the content of each subtitle-line, not each subtitle-content.",
82 |         action="store_true",
83 |     )
84 |     return parser.parse_args()
85 | 
86 | 
87 | def main():
88 |     args = set_args()
89 |     logging.basicConfig(level=args.log_level)
90 |     _cli.set_basic_args(args)
91 |     matched_subs = match(args.input, args.module, args.match, args.process, args.lines)
92 |     output = _cli.compose_suggest_on_fail(matched_subs, strict=args.strict)
93 |     args.output.write(output)
94 | 
95 | 
96 | if __name__ == "__main__":  # pragma: no cover
97 |     main()
98 | 


--------------------------------------------------------------------------------
/srt/tools/mux.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | """Merge multiple subtitles with similar start/end times into one."""
  4 | 
  5 | import datetime
  6 | import operator
  7 | import logging
  8 | from . import _cli
  9 | 
 10 | log = logging.getLogger(__name__)
 11 | 
 12 | TOP = r"{\an8}"
 13 | BOTTOM = r"{\an2}"
 14 | 
 15 | 
 16 | def mux(subs, acceptable_diff, attr, width):
 17 |     """
 18 |     Merges subs with similar start/end times together (in-place).
 19 |     This prevents subtitles from jumping around the screen.
 20 | 
 21 |     :param subs: :py:class:`Subtitle` objects
 22 |     :param datetime.timedelta acceptable_diff: The amount of milliseconds
 23 |                                     a subtitle start time must be to shift.
 24 |     :param str attr:
 25 |     :param int width: The amount of subtitles to consider for time matching at once.
 26 |     :rtype: :term:`generator` of :py:class:`Subtitle` objects
 27 |     """
 28 |     sorted_subs = sorted(subs, key=operator.attrgetter(attr))
 29 | 
 30 |     for subs in _cli.sliding_window(sorted_subs, width=width):
 31 |         current_sub = subs[0]
 32 |         future_subs = subs[1:]
 33 |         current_comp = getattr(current_sub, attr)
 34 | 
 35 |         for future_sub in future_subs:
 36 |             future_comp = getattr(future_sub, attr)
 37 |             if current_comp + acceptable_diff > future_comp:
 38 |                 log.debug(
 39 |                     "Merging %d's %s time into %d",
 40 |                     future_sub.index,
 41 |                     attr,
 42 |                     current_sub.index,
 43 |                 )
 44 |                 setattr(future_sub, attr, current_comp)
 45 |             else:
 46 |                 # Since these are sorted, and this one didn't match, we can be
 47 |                 # sure future ones won't match either.
 48 |                 break
 49 | 
 50 | 
 51 | def set_args():
 52 |     examples = {
 53 |         "Merge English and Chinese subtitles": "srt mux -i eng.srt -i chs.srt -o both.srt",
 54 |         "Merge subtitles with one on top and one at the bottom": "srt mux -t -i eng.srt -i chs.srt -o both.srt",
 55 |     }
 56 |     parser = _cli.basic_parser(description=__doc__, examples=examples, multi_input=True)
 57 |     parser.add_argument(
 58 |         "--ms",
 59 |         metavar="MILLISECONDS",
 60 |         default=datetime.timedelta(milliseconds=600),
 61 |         type=lambda ms: datetime.timedelta(milliseconds=int(ms)),
 62 |         help="Match to-be-muxed subs within this number of milliseconds (default: 600).",
 63 |     )
 64 |     parser.add_argument(
 65 |         "--width",
 66 |         "-w",
 67 |         default=5,
 68 |         type=int,
 69 |         help="The amount of subs to consider time matching at once (default: %(default)s)",
 70 |     )
 71 |     parser.add_argument(
 72 |         "--top-and-bottom",
 73 |         "-t",
 74 |         action="store_true",
 75 |         help="Use SSA-style tags to place files at the top and bottom, respectively. Turns off time matching.",
 76 |     )
 77 |     parser.add_argument(
 78 |         "--no-time-matching",
 79 |         "--nt",
 80 |         action="store_true",
 81 |         help="Prevents time matching for close subtitles (see --ms)",
 82 |     )
 83 |     return parser.parse_args()
 84 | 
 85 | 
 86 | def main():
 87 |     args = set_args()
 88 |     logging.basicConfig(level=args.log_level)
 89 | 
 90 |     _cli.set_basic_args(args)
 91 | 
 92 |     muxed_subs = []
 93 |     for idx, subs in enumerate(args.input):
 94 |         for sub in subs:
 95 |             if args.top_and_bottom:
 96 |                 if idx % 2 == 0:
 97 |                     sub.content = TOP + sub.content
 98 |                 else:
 99 |                     sub.content = BOTTOM + sub.content
100 |             muxed_subs.append(sub)
101 | 
102 |     if args.no_time_matching or not args.top_and_bottom:
103 |         mux(muxed_subs, args.ms, "start", args.width)
104 |         mux(muxed_subs, args.ms, "end", args.width)
105 | 
106 |     output = _cli.compose_suggest_on_fail(muxed_subs, strict=args.strict)
107 |     args.output.write(output)
108 | 
109 | 
110 | if __name__ == "__main__":  # pragma: no cover
111 |     main()
112 | 


--------------------------------------------------------------------------------
/srt/tools/normalize.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | """Take a badly formatted SRT file and output a strictly valid one."""
 4 | 
 5 | import logging
 6 | from . import _cli
 7 | 
 8 | log = logging.getLogger(__name__)
 9 | 
10 | 
11 | def normalize(subs, strict):
12 |     """
13 |     Normalises subtitles.
14 | 
15 |     :param subs: :py:class:`Subtitle` objects
16 |     :param bool strict: Whether to enable strict mode, see
17 |                         :py:func:`Subtitle.to_srt` for more information
18 |     :returns: A single SRT formatted string, with each input
19 |                         :py:class:`Subtitle` represented as an SRT block
20 |     :rtype: str
21 |     :raises SRTParseError: If parsing fails.
22 |     """
23 |     return _cli.compose_suggest_on_fail(subs, strict)
24 | 
25 | 
26 | def main():
27 |     examples = {"Normalise a subtitle": "srt normalize -i bad.srt -o good.srt"}
28 | 
29 |     args = _cli.basic_parser(
30 |         description=__doc__, examples=examples, hide_no_strict=True
31 |     ).parse_args()
32 |     logging.basicConfig(level=args.log_level)
33 |     _cli.set_basic_args(args)
34 |     output = normalize(args.input, strict=args.strict)
35 |     args.output.write(output)
36 | 
37 | 
38 | if __name__ == "__main__":  # pragma: no cover
39 |     main()
40 | 


--------------------------------------------------------------------------------
/srt/tools/paste.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | """Paste subtitles into other subtitles at a given timestamp."""
  4 | 
  5 | import datetime
  6 | import logging
  7 | import srt
  8 | from types import GeneratorType
  9 | from . import _cli
 10 | from . import _utils
 11 | 
 12 | 
 13 | log = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | def paste(subs, copy, timestamp, space=datetime.timedelta(0), block=False):
 17 |     """Pastes subtitles into other subtitles at a given timestamp.
 18 | 
 19 |     :param subs: :py:class:`Subtitle` objects
 20 |     :param copy: The :py:class:`Subtitle` objects to be pasted.
 21 |     :param datetime.timedelta timestamp: The timestamp to paste at.
 22 |     :param datetime.timedelta space: The amount of space to precede the paste.
 23 |     :param boolean block: Whether to paste the copied subtitles as a block
 24 |                           and adjust the timestamps of subsequent subtitles.
 25 |     :rtype: :term:`generator` of :py:class:`Subtitle` objects
 26 |     """
 27 |     # In the case of a block paste, determine the block time(span).
 28 |     block_time = datetime.timedelta(0)
 29 |     if block:
 30 |         block_copy = list(copy)
 31 |         for subtitle in block_copy:
 32 |             if subtitle.end > block_time:
 33 |                 block_time = subtitle.end
 34 |         block_time += space
 35 |         copy = (x for x in block_copy)  # regenerate copy
 36 | 
 37 |     # Ensure each block is iterable
 38 |     subs = (x for x in subs) if not isinstance(subs, GeneratorType) else subs
 39 |     copy = (x for x in copy) if not isinstance(copy, GeneratorType) else copy
 40 | 
 41 |     # Perform the paste operation.
 42 |     idx = 1
 43 |     subtitle = _utils.tryNext(subs)
 44 |     copied_subtitle = _utils.tryNext(copy)
 45 |     copied_time = timestamp + space
 46 |     while subtitle is not None or copied_subtitle is not None:
 47 |         if subtitle is None:
 48 |             yield srt.Subtitle(
 49 |                 idx,
 50 |                 copied_subtitle.start + copied_time,
 51 |                 copied_subtitle.end + copied_time,
 52 |                 copied_subtitle.content,
 53 |             )
 54 |             idx += 1
 55 |             copied_subtitle = _utils.tryNext(copy)
 56 | 
 57 |         elif copied_subtitle is None:
 58 |             yield srt.Subtitle(
 59 |                 idx,
 60 |                 subtitle.start + block_time,
 61 |                 subtitle.end + block_time,
 62 |                 subtitle.content,
 63 |             )
 64 |             idx += 1
 65 |             subtitle = _utils.tryNext(subs)
 66 | 
 67 |         # fmt: off
 68 |         # ^ prevents extravagant statement expansion from black
 69 |         else:
 70 |             start = subtitle.start
 71 |             subtitle_end = subtitle.end
 72 |             if subtitle.start > timestamp:
 73 |                 start += block_time
 74 |                 subtitle_end += block_time
 75 | 
 76 |             copied_start = copied_subtitle.start + copied_time
 77 |             copied_end = copied_subtitle.end + copied_time
 78 | 
 79 |             # compare the alterted timestamps of subtitle and copied_subtitle
 80 |             if start > copied_start:
 81 |                 yield srt.Subtitle(idx, copied_start, copied_end, copied_subtitle.content)
 82 |                 idx += 1
 83 |                 copied_subtitle = _utils.tryNext(copy)
 84 |             elif start < copied_start:
 85 |                 yield srt.Subtitle(idx, start, subtitle_end, subtitle.content)
 86 |                 idx += 1
 87 |                 subtitle = _utils.tryNext(subs)
 88 |             elif start == copied_start:
 89 |                 if (subtitle_end > copied_end):
 90 |                     yield srt.Subtitle(idx, copied_start, copied_end, copied_subtitle.content)
 91 |                     idx += 1
 92 |                     copied_subtitle = _utils.tryNext(copy)
 93 |                 else:
 94 |                     yield srt.Subtitle(idx, start, subtitle_end, subtitle.content)
 95 |                     idx += 1
 96 |                     subtitle = _utils.tryNext(subs)
 97 |         # fmt: on
 98 | 
 99 | 
100 | # Command Line Interface
101 | def set_args():
102 |     examples = {
103 |         "Paste subtitles from :05 - :08 at :10": "srt paste -i example.srt --t1 00:00:5,00 --t2 00:00:8,00 -p 00:00:10,00",
104 |         "Paste subtitles from :05 - :08 at :10 with :01 space beforehand": "srt paste -i example.srt --t1 00:00:5,00 --t2 00:00:8,00 -p 00:00:10,00 -s 00:00:01,00",
105 |         "Paste subtitles from :05 - :08 at :10 and adjust all subsequent subtitles": "srt paste -i example.srt --t1 00:00:5,00 --t2 00:00:8,00 -p 00:00:10,00 -b",
106 |     }
107 |     parser = _cli.basic_parser(description=__doc__, examples=examples)
108 |     parser.add_argument(
109 |         "--t1",
110 |         metavar=("TIMESTAMP"),
111 |         type=lambda arg: srt.srt_timestamp_to_timedelta(arg),
112 |         default=datetime.timedelta(0),
113 |         nargs="?",
114 |         help="The timestamp to start copying from.",
115 |     )
116 |     parser.add_argument(
117 |         "--t2",
118 |         metavar=("TIMESTAMP"),
119 |         type=lambda arg: srt.srt_timestamp_to_timedelta(arg),
120 |         default=datetime.timedelta(0),
121 |         nargs="?",
122 |         help="The timestamp to stop copying at.",
123 |     )
124 |     parser.add_argument(
125 |         "--paste",
126 |         "-p",
127 |         metavar=("TIMESTAMP"),
128 |         type=lambda arg: srt.srt_timestamp_to_timedelta(arg),
129 |         default=datetime.timedelta(0),
130 |         nargs="?",
131 |         help="The timestamp to paste at.",
132 |     )
133 |     parser.add_argument(
134 |         "--space",
135 |         "-s",
136 |         metavar=("TIMESTAMP"),
137 |         type=lambda arg: srt.srt_timestamp_to_timedelta(arg),
138 |         default=datetime.timedelta(0),
139 |         help="The amount of space to place before copied subtitles.",
140 |     )
141 |     parser.add_argument(
142 |         "--block",
143 |         "-b",
144 |         action="store_true",
145 |         help="Paste copied subtitles as a block and adjust subsequent subtitles' timestamps.",
146 |     )
147 |     parser.add_argument(
148 |         "--zero",
149 |         "-z",
150 |         action="store_true",
151 |         help="Start the copied subtitle block from 00:00.",
152 |     )
153 |     return parser.parse_args()
154 | 
155 | 
156 | def main():
157 |     args = set_args()
158 |     logging.basicConfig(level=args.log_level)
159 |     _cli.set_basic_args(args)
160 |     origin_subs = list(args.input)
161 |     copy_subs = srt.tools.find.find_by_timestamp(
162 |         origin_subs, args.t1, args.t2, args.zero
163 |     )
164 |     paste_subs = paste(origin_subs, copy_subs, args.paste, args.space, args.block)
165 |     output = _cli.compose_suggest_on_fail(paste_subs, strict=args.strict)
166 |     args.output.write(output)
167 | 
168 | 
169 | if __name__ == "__main__":  # pragma: no cover
170 |     main()
171 | 


--------------------------------------------------------------------------------
/srt/tools/split.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | """Split subtitles at a given timestamp."""
 4 | 
 5 | import datetime
 6 | import logging
 7 | import srt
 8 | from types import GeneratorType
 9 | from . import _utils
10 | from . import _cli
11 | 
12 | log = logging.getLogger(__name__)
13 | 
14 | 
15 | def split(subs, timestamp):
16 |     """
17 |     Splits subtitles at a given timestamp.
18 | 
19 |     :param subs: :py:class:`Subtitle` objects
20 |     :param datetime.timedelta timestamp: The timestamp to split subtitles at.
21 |     :rtype: :term:`generator` of :py:class:`Subtitle` objects
22 |     """
23 |     # ensures list compatibility
24 |     subs = (x for x in subs) if not isinstance(subs, GeneratorType) else subs
25 | 
26 |     # Split subtitles at timestamp.
27 |     added_split_subs = False
28 |     idx = 1
29 |     subtitle = _utils.tryNext(subs)
30 |     split_subs = []
31 |     while subtitle is not None:
32 |         start = subtitle.start
33 |         end = subtitle.end
34 | 
35 |         if start < timestamp and timestamp < end:
36 |             yield srt.Subtitle(idx, start, timestamp, subtitle.content)
37 |             idx += 1
38 |             split_subs.append(srt.Subtitle(idx, timestamp, end, subtitle.content))
39 |         elif not added_split_subs and timestamp < start:
40 |             added_split_subs = True
41 |             split_subs.sort()
42 |             for sub in split_subs:
43 |                 yield srt.Subtitle(idx, timestamp, sub.end, sub.content)
44 |                 idx += 1
45 |             yield srt.Subtitle(idx, start, end, subtitle.content)
46 |             idx += 1
47 |         else:
48 |             yield srt.Subtitle(idx, start, end, subtitle.content)
49 |             idx += 1
50 | 
51 |         subtitle = _utils.tryNext(subs)
52 | 
53 |     if not added_split_subs:
54 |         split_subs.sort()
55 |         for sub in split_subs:
56 |             yield srt.Subtitle(idx, timestamp, sub.end, sub.content)
57 |             idx += 1
58 | 
59 | 
60 | # Command Line Interface
61 | def set_args():
62 |     examples = {
63 |         "Split subtitles at :05": "srt split -i example.srt -t 00:00:5,00",
64 |     }
65 |     parser = _cli.basic_parser(description=__doc__, examples=examples)
66 |     parser.add_argument(
67 |         "-t",
68 |         "--timestamp",
69 |         metavar=("TIMESTAMP"),
70 |         type=lambda arg: srt.srt_timestamp_to_timedelta(arg),
71 |         default=datetime.timedelta(0),
72 |         nargs="?",
73 |         help="The timestamp to split subtitles at.",
74 |     )
75 |     return parser.parse_args()
76 | 
77 | 
78 | def main():
79 |     args = set_args()
80 |     logging.basicConfig(level=args.log_level)
81 |     _cli.set_basic_args(args)
82 |     split_subs = split(args.input, args.timestamp)
83 |     output = _cli.compose_suggest_on_fail(split_subs, strict=args.strict)
84 |     args.output.write(output)
85 | 
86 | 
87 | if __name__ == "__main__":  # pragma: no cover
88 |     main()
89 | 


--------------------------------------------------------------------------------
/tests/files/ascii.srt:
--------------------------------------------------------------------------------
 1 | 2
 2 | 00:00:27,000 --> 00:00:30,730
 3 | ascii
 4 | 
 5 | 4
 6 | 00:00:31,500 --> 00:00:34,100
 7 | oh look
 8 | 
 9 | 6
10 | 00:00:34,100 --> 00:00:36,570
11 | ascii everywhere
12 | 
13 | 


--------------------------------------------------------------------------------
/tests/files/gb2312.srt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/switchupcb/srt3/97ab1e5cc57da853c14ebc75b74193ec29bd048a/tests/files/gb2312.srt


--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | pytest==6.*
2 | pytest-cov==2.*
3 | hypothesis==6.*
4 | 


--------------------------------------------------------------------------------
/tests/test_srt.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | from datetime import timedelta
  4 | from io import StringIO
  5 | import collections
  6 | import functools
  7 | import os
  8 | import string
  9 | 
 10 | import pytest
 11 | from hypothesis import given, settings, HealthCheck, assume
 12 | import hypothesis.strategies as st
 13 | 
 14 | import srt
 15 | 
 16 | REGISTER_SETTINGS = lambda name, **kwargs: settings.register_profile(
 17 |     name, suppress_health_check=[HealthCheck.too_slow], deadline=None, **kwargs
 18 | )
 19 | 
 20 | REGISTER_SETTINGS("base")
 21 | REGISTER_SETTINGS("release", max_examples=1000)
 22 | 
 23 | settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "base"))
 24 | 
 25 | HOURS_IN_DAY = 24
 26 | TIMEDELTA_MAX_DAYS = 999999999
 27 | CONTENTLESS_SUB = functools.partial(
 28 |     srt.Subtitle, index=1, start=timedelta(seconds=1), end=timedelta(seconds=2)
 29 | )
 30 | 
 31 | 
 32 | def is_strictly_legal_content(content):
 33 |     """
 34 |     Filter out things that would violate strict mode. Illegal content
 35 |     includes:
 36 | 
 37 |     - A content section that starts or ends with a newline
 38 |     - A content section that contains blank lines
 39 |     """
 40 | 
 41 |     if content.strip("\r\n") != content:
 42 |         return False
 43 |     elif not content.strip():
 44 |         return False
 45 |     elif "\n\n" in content:
 46 |         return False
 47 |     else:
 48 |         return True
 49 | 
 50 | 
 51 | def subs_eq(got, expected, any_order=False):
 52 |     """
 53 |     Compare Subtitle objects using vars() so that differences are easy to
 54 |     identify.
 55 |     """
 56 |     got_vars = [frozenset(vars(sub).items()) for sub in got]
 57 |     expected_vars = [frozenset(vars(sub).items()) for sub in expected]
 58 |     if any_order:
 59 |         assert collections.Counter(got_vars) == collections.Counter(expected_vars)
 60 |     else:
 61 |         assert got_vars == expected_vars
 62 | 
 63 | 
 64 | def timedeltas(min_value=0, max_value=TIMEDELTA_MAX_DAYS):
 65 |     """
 66 |     A Hypothesis strategy to generate timedeltas.
 67 | 
 68 |     Right now {min,max}_value are shoved into multiple fields in timedelta(),
 69 |     which is not very customisable, but it's good enough for our current test
 70 |     purposes. If you need more precise control, you may need to add more
 71 |     parameters to this function to be able to customise more freely.
 72 |     """
 73 |     time_unit_strategy = st.integers(min_value=min_value, max_value=max_value)
 74 |     timestamp_strategy = st.builds(
 75 |         timedelta,
 76 |         hours=time_unit_strategy,
 77 |         minutes=time_unit_strategy,
 78 |         seconds=time_unit_strategy,
 79 |     )
 80 |     return timestamp_strategy
 81 | 
 82 | 
 83 | def equivalent_timestamps(min_value=0, max_value=TIMEDELTA_MAX_DAYS):
 84 |     def string_timestamp(hours, minutes, seconds, msecs, paddings):
 85 |         hours, minutes, seconds, msecs = map(
 86 |             lambda v_and_p: "0" * v_and_p[1] + str(v_and_p[0]),
 87 |             zip((hours, minutes, seconds, msecs), paddings),
 88 |         )
 89 |         return "{}:{}:{},{}".format(hours, minutes, seconds, msecs)
 90 | 
 91 |     def ts_field_value():
 92 |         return st.integers(min_value=min_value, max_value=max_value)
 93 | 
 94 |     def zero_padding():
 95 |         return st.integers(min_value=0, max_value=2)
 96 | 
 97 |     @st.composite
 98 |     def maybe_off_by_one_fields(draw):
 99 |         field = draw(ts_field_value())
100 |         field_maybe_plus_one = draw(st.integers(min_value=field, max_value=field + 1))
101 |         return field_maybe_plus_one, field
102 | 
103 |     def get_equiv_timestamps(h, m, s, ms2, ts1paddings, ts2paddings):
104 |         h2, h1 = h
105 |         m2, m1 = m
106 |         s2, s1 = s
107 |         ms1 = (
108 |             (h2 - h1) * 60 * 60 * 1000 + (m2 - m1) * 60 * 1000 + (s2 - s1) * 1000 + ms2
109 |         )
110 |         return (
111 |             string_timestamp(h2, m2, s2, ms2, ts2paddings),
112 |             string_timestamp(h1, m1, s1, ms1, ts1paddings),
113 |         )
114 | 
115 |     return st.builds(
116 |         get_equiv_timestamps,
117 |         maybe_off_by_one_fields(),
118 |         maybe_off_by_one_fields(),
119 |         maybe_off_by_one_fields(),
120 |         ts_field_value(),
121 |         st.tuples(*[zero_padding() for _ in range(4)]),
122 |         st.tuples(*[zero_padding() for _ in range(4)]),
123 |     )
124 | 
125 | 
126 | def subtitles(strict=True):
127 |     """A Hypothesis strategy to generate Subtitle objects."""
128 |     # max_value settings are just to avoid overflowing TIMEDELTA_MAX_DAYS by
129 |     # using arbitrary low enough numbers.
130 |     #
131 |     # We also skip subs with start time >= end time, so we split them into two
132 |     # groups to avoid overlap.
133 |     start_timestamp_strategy = timedeltas(min_value=0, max_value=500000)
134 |     end_timestamp_strategy = timedeltas(min_value=500001, max_value=999999)
135 | 
136 |     # \r is not legal inside Subtitle.content, it should have already been
137 |     # normalised to \n.
138 |     content_strategy = st.text(min_size=1).filter(lambda x: "\r" not in x)
139 |     proprietary_strategy = st.text().filter(
140 |         lambda x: all(eol not in x for eol in "\r\n")
141 |     )
142 | 
143 |     if strict:
144 |         content_strategy = content_strategy.filter(is_strictly_legal_content)
145 | 
146 |     subtitle_strategy = st.builds(
147 |         srt.Subtitle,
148 |         index=st.integers(min_value=0),
149 |         start=start_timestamp_strategy,
150 |         end=end_timestamp_strategy,
151 |         proprietary=proprietary_strategy,
152 |         content=content_strategy,
153 |     )
154 | 
155 |     return subtitle_strategy
156 | 
157 | 
158 | @given(st.lists(subtitles()))
159 | def test_compose_and_parse_from_file(input_subs):
160 |     srt_file = StringIO(srt.compose(input_subs, reindex=False))
161 |     reparsed_subs = srt.parse(srt_file)
162 |     subs_eq(reparsed_subs, input_subs)
163 | 
164 | 
165 | @given(st.lists(subtitles()))
166 | def test_compose_and_parse_from_file_bom(input_subs):
167 |     srt_file = StringIO("\ufeff" + srt.compose(input_subs, reindex=False))
168 |     reparsed_subs = srt.parse(srt_file)
169 |     subs_eq(reparsed_subs, input_subs)
170 | 
171 | 
172 | @given(st.lists(subtitles()))
173 | def test_compose_and_parse_strict(input_subs):
174 |     composed = srt.compose(input_subs, reindex=False)
175 |     reparsed_subs = srt.parse(composed)
176 |     subs_eq(reparsed_subs, input_subs)
177 | 
178 | 
179 | @given(st.lists(subtitles()))
180 | def test_can_compose_without_ending_blank_line(input_subs):
181 |     """
182 |     Many sub editors don't add a blank line to the end, and many editors accept
183 |     it. We should just accept this too in input.
184 |     """
185 |     composed = srt.compose(input_subs, reindex=False)
186 |     composed_without_ending_blank = composed[:-1]
187 |     reparsed_subs = srt.parse(composed_without_ending_blank)
188 |     subs_eq(reparsed_subs, input_subs)
189 | 
190 | 
191 | @given(st.lists(subtitles()))
192 | def test_can_compose_without_eol_at_all(input_subs):
193 |     composed = srt.compose(input_subs, reindex=False)
194 |     composed_without_ending_blank = composed.rstrip("\r\n")
195 |     reparsed_subs = srt.parse(composed_without_ending_blank)
196 |     subs_eq(reparsed_subs, input_subs)
197 | 
198 | 
199 | @given(st.text().filter(is_strictly_legal_content))
200 | def test_compose_and_parse_strict_mode(content):
201 |     # sub.content should not have OS-specific line separators, only \n
202 |     assume("\r" not in content)
203 | 
204 |     content = "\n" + content + "\n\n" + content + "\n"
205 |     sub = CONTENTLESS_SUB(content=content)
206 | 
207 |     parsed_strict = list(srt.parse(sub.to_srt()))[0]
208 |     parsed_unstrict = list(srt.parse(sub.to_srt(strict=False)))[0]
209 | 
210 |     # Strict mode should remove blank lines in content, leading, and trailing
211 |     # newlines.
212 |     assert not parsed_strict.content.startswith("\n")
213 |     assert not parsed_strict.content.endswith("\n")
214 |     assert "\n\n" not in parsed_strict.content
215 | 
216 |     # When strict mode is false, no processing should be applied to the
217 |     # content (other than \r\n becoming \n).
218 |     assert parsed_unstrict.content == sub.content.replace("\r\n", "\n")
219 | 
220 | 
221 | @given(st.integers(min_value=1, max_value=TIMEDELTA_MAX_DAYS))
222 | def test_timedelta_to_srt_timestamp_can_go_over_24_hours(days):
223 |     srt_timestamp = srt.timedelta_to_srt_timestamp(timedelta(days=days))
224 |     srt_timestamp_hours = int(srt_timestamp.split(":")[0])
225 |     assert srt_timestamp_hours == days * HOURS_IN_DAY
226 | 
227 | 
228 | @given(subtitles())
229 | def test_subtitle_equality(sub_1):
230 |     sub_2 = srt.Subtitle(**vars(sub_1))
231 |     assert sub_1 == sub_2
232 | 
233 | 
234 | @given(subtitles())
235 | def test_subtitle_inequality(sub_1):
236 |     sub_2 = srt.Subtitle(**vars(sub_1))
237 |     sub_2.index += 1
238 |     assert sub_1 != sub_2
239 | 
240 | 
241 | @given(subtitles())
242 | def test_subtitle_from_scratch_equality(subtitle):
243 |     srt_block = subtitle.to_srt()
244 | 
245 |     # Get two totally new sets of objects so as not to affect the hash
246 |     # comparison
247 |     sub_1 = list(srt.parse(srt_block))[0]
248 |     sub_2 = list(srt.parse(srt_block))[0]
249 | 
250 |     subs_eq([sub_1], [sub_2])
251 |     # In case subs_eq and eq disagree for some reason
252 |     assert sub_1 == sub_2
253 |     assert hash(sub_1) == hash(sub_2)
254 | 
255 | 
256 | @given(st.lists(subtitles()))
257 | def test_parsing_spaced_arrow(subs):
258 |     spaced_block = srt.compose(subs, reindex=False, strict=False).replace("-->", "- >")
259 |     reparsed_subtitles = srt.parse(spaced_block)
260 |     subs_eq(reparsed_subtitles, subs)
261 | 
262 | 
263 | @given(st.lists(subtitles()))
264 | def test_parsing_spaced_ender_arrow(subs):
265 |     # Seen in BSG subtitles
266 |     spaced_block = srt.compose(subs, reindex=False, strict=False).replace("-->", "-- >")
267 |     reparsed_subtitles = srt.parse(spaced_block)
268 |     subs_eq(reparsed_subtitles, subs)
269 | 
270 | 
271 | @given(st.lists(subtitles()))
272 | def test_parsing_no_ws_arrow(subs):
273 |     spaced_block = srt.compose(subs, reindex=False, strict=False).replace(
274 |         " --> ", "-->"
275 |     )
276 |     reparsed_subtitles = srt.parse(spaced_block)
277 |     subs_eq(reparsed_subtitles, subs)
278 | 
279 | 
280 | @given(st.text(string.whitespace), st.lists(subtitles()))
281 | def test_parsing_leading_whitespace(ws, subs):
282 |     prews_block = ws + srt.compose(subs, reindex=False, strict=False)
283 |     reparsed_subtitles = srt.parse(prews_block)
284 |     subs_eq(reparsed_subtitles, subs)
285 | 
286 | 
287 | @given(st.lists(subtitles()))
288 | def test_parsing_negative_index(subs):
289 |     for sub in subs:
290 |         sub.index *= -1
291 |     prews_block = srt.compose(subs, reindex=False, strict=False)
292 |     reparsed_subtitles = srt.parse(prews_block)
293 |     subs_eq(reparsed_subtitles, subs)
294 | 
295 | 
296 | @given(st.lists(subtitles()))
297 | def test_parsing_content_with_blank_lines(subs):
298 |     for subtitle in subs:
299 |         # We stuff a blank line in the middle so as to trigger the "special"
300 |         # content parsing for erroneous SRT files that have blank lines.
301 |         subtitle.content = subtitle.content + "\n\n" + subtitle.content
302 | 
303 |     reparsed_subtitles = srt.parse(srt.compose(subs, reindex=False, strict=False))
304 |     subs_eq(reparsed_subtitles, subs)
305 | 
306 | 
307 | @given(st.lists(subtitles()))
308 | def test_parsing_no_content(subs):
309 |     for subtitle in subs:
310 |         subtitle.content = ""
311 | 
312 |     reparsed_subtitles = srt.parse(srt.compose(subs, reindex=False, strict=False))
313 |     subs_eq(reparsed_subtitles, subs)
314 | 
315 | 
316 | @given(st.lists(subtitles()), st.lists(subtitles()), st.text(alphabet="\n\r\t "))
317 | def test_subs_missing_content_removed(content_subs, contentless_subs, contentless_text):
318 |     for sub in contentless_subs:
319 |         sub.content = contentless_text
320 | 
321 |     subs = contentless_subs + content_subs
322 |     composed_subs = list(srt.sort_and_reindex(subs, in_place=True))
323 | 
324 |     # We should have composed the same subs as there are in content_subs, as
325 |     # all contentless_subs should have been stripped.
326 |     subs_eq(composed_subs, content_subs, any_order=True)
327 | 
328 |     # The subtitles should be reindexed starting at start_index, excluding
329 |     # contentless subs
330 |     default_start_index = 1
331 |     assert [sub.index for sub in composed_subs] == list(
332 |         range(default_start_index, default_start_index + len(composed_subs))
333 |     )
334 | 
335 | 
336 | @given(
337 |     st.lists(subtitles()),
338 |     st.lists(subtitles()),
339 |     timedeltas(min_value=-999, max_value=-1),
340 | )
341 | def test_subs_starts_before_zero_removed(positive_subs, negative_subs, negative_td):
342 |     for sub in negative_subs:
343 |         sub.start = negative_td
344 |         sub.end = negative_td  # Just to avoid tripping any start >= end errors
345 | 
346 |     subs = positive_subs + negative_subs
347 |     composed_subs = list(srt.sort_and_reindex(subs, in_place=True))
348 | 
349 |     # There should be no negative subs
350 |     subs_eq(composed_subs, positive_subs, any_order=True)
351 | 
352 | 
353 | @given(st.lists(subtitles(), min_size=1), st.integers(min_value=0))
354 | def test_sort_and_reindex(input_subs, start_index):
355 |     for sub in input_subs:
356 |         # Pin all subs to same end time so that start time is compared only,
357 |         # must be guaranteed to be < sub.start, see how
358 |         # start_timestamp_strategy is done
359 |         sub.end = timedelta(500001)
360 | 
361 |     reindexed_subs = list(
362 |         srt.sort_and_reindex(input_subs, start_index=start_index, in_place=True)
363 |     )
364 | 
365 |     # The subtitles should be reindexed starting at start_index
366 |     assert [sub.index for sub in reindexed_subs] == list(
367 |         range(start_index, start_index + len(input_subs))
368 |     )
369 | 
370 |     # The subtitles should be sorted by start time
371 |     expected_sorting = sorted(input_subs, key=lambda sub: sub.start)
372 |     assert reindexed_subs == expected_sorting
373 | 
374 | 
375 | @given(st.lists(subtitles()))
376 | def test_sort_and_reindex_no_skip(input_subs):
377 |     # end time > start time should not trigger a skip if skip=False
378 |     for sub in input_subs:
379 |         old_start = sub.start
380 |         sub.start = sub.end
381 |         sub.end = old_start
382 | 
383 |     reindexed_subs = list(srt.sort_and_reindex(input_subs, skip=False))
384 | 
385 |     # Nothing should have been skipped
386 |     assert len(reindexed_subs) == len(input_subs)
387 | 
388 | 
389 | @given(st.lists(subtitles(), min_size=1))
390 | def test_sort_and_reindex_same_start_time_uses_end(input_subs):
391 |     for sub in input_subs:
392 |         # Pin all subs to same start time so that end time is compared only
393 |         sub.start = timedelta(1)
394 | 
395 |     reindexed_subs = list(srt.sort_and_reindex(input_subs, in_place=True))
396 | 
397 |     # The subtitles should be sorted by end time when start time is the same
398 |     expected_sorting = sorted(input_subs, key=lambda sub: sub.end)
399 |     assert reindexed_subs == expected_sorting
400 | 
401 | 
402 | @given(st.lists(subtitles(), min_size=1), st.integers(min_value=0))
403 | def test_sort_and_reindex_not_in_place_matches(input_subs, start_index):
404 |     # Make copies for both sort_and_reindex calls so that they can't affect
405 |     # each other
406 |     not_in_place_subs = [srt.Subtitle(**vars(sub)) for sub in input_subs]
407 |     in_place_subs = [srt.Subtitle(**vars(sub)) for sub in input_subs]
408 | 
409 |     nip_ids = [id(sub) for sub in not_in_place_subs]
410 |     ip_ids = [id(sub) for sub in in_place_subs]
411 | 
412 |     not_in_place_output = list(
413 |         srt.sort_and_reindex(not_in_place_subs, start_index=start_index)
414 |     )
415 |     in_place_output = list(
416 |         srt.sort_and_reindex(in_place_subs, start_index=start_index, in_place=True)
417 |     )
418 | 
419 |     # The results in each case should be the same
420 |     subs_eq(not_in_place_output, in_place_output)
421 | 
422 |     # Not in place sort_and_reindex should have created new subs
423 |     assert not any(id(sub) in nip_ids for sub in not_in_place_output)
424 | 
425 |     # In place sort_and_reindex should be reusing the same subs
426 |     assert all(id(sub) in ip_ids for sub in in_place_output)
427 | 
428 | 
429 | @given(
430 |     st.lists(subtitles(), min_size=1),
431 |     st.integers(min_value=0),
432 |     st.text(min_size=1),
433 |     timedeltas(),
434 | )
435 | def test_parser_noncontiguous(subs, fake_idx, garbage, fake_timedelta):
436 |     composed = srt.compose(subs)
437 | 
438 |     # Put some garbage between subs that should trigger our failed parsing
439 |     # detection. Since we do some magic to try and detect blank lines that
440 |     # don't really delimit subtitles, it has to look at least a little like an
441 |     # SRT block.
442 |     srt_timestamp = srt.timedelta_to_srt_timestamp(fake_timedelta)
443 |     composed = composed.replace(
444 |         "\n\n", "\n\n%d\n%s %s" % (fake_idx, srt_timestamp, garbage)
445 |     )
446 | 
447 |     with pytest.raises(srt.SRTParseError):
448 |         list(srt.parse(composed))
449 | 
450 | 
451 | @given(
452 |     st.lists(subtitles(), min_size=1),
453 |     st.integers(min_value=0),
454 |     st.text(min_size=1),
455 |     timedeltas(),
456 | )
457 | def test_parser_noncontiguous_ignore_errors(subs, fake_idx, garbage, fake_timedelta):
458 |     composed = srt.compose(subs)
459 |     srt_timestamp = srt.timedelta_to_srt_timestamp(fake_timedelta)
460 |     composed = composed.replace(
461 |         "\n\n", "\n\n%d\n%s %s" % (fake_idx, srt_timestamp, garbage)
462 |     )
463 |     # Should not raise, we have ignore_errors
464 |     list(srt.parse(composed, ignore_errors=True))
465 | 
466 | 
467 | def _parseable_as_int(text):
468 |     try:
469 |         int(text)
470 |     except ValueError:
471 |         return False
472 |     return True
473 | 
474 | 
475 | def _parseable_as_float(text):
476 |     try:
477 |         float(text)
478 |     except ValueError:
479 |         return False
480 |     return True
481 | 
482 | 
483 | @given(st.lists(subtitles()), st.text(min_size=1))
484 | def test_parser_noncontiguous_leading(subs, garbage):
485 |     # Issue #50 permits leading whitespace, see test_parsing_leading_whitespace
486 |     assume(not garbage.isspace())
487 | 
488 |     # Issue #56 permits negative indexes, see test_parsing_negative_index. It
489 |     # also shouldn't just be a number, because then we'd confuse it with our
490 |     # index...
491 |     assume(garbage.strip()[0] != ".")
492 |     assume(garbage.strip()[0] != "-")
493 |     assume(not _parseable_as_int(garbage.strip()))
494 |     assume(not _parseable_as_float(garbage.strip()))
495 | 
496 |     # Put some garbage at the beginning that should trigger our noncontiguity
497 |     # checks
498 |     composed = garbage + srt.compose(subs)
499 | 
500 |     with pytest.raises(srt.SRTParseError):
501 |         list(srt.parse(composed))
502 | 
503 | 
504 | @given(
505 |     st.lists(subtitles(), min_size=1),
506 |     st.integers(min_value=0),
507 |     st.text(min_size=1),
508 |     timedeltas(),
509 | )
510 | def test_parser_didnt_match_to_end_raises(subs, fake_idx, garbage, fake_timedelta):
511 |     srt_blocks = [sub.to_srt() for sub in subs]
512 |     srt_timestamp = srt.timedelta_to_srt_timestamp(fake_timedelta)
513 |     garbage = "\n\n%d\n%s %s" % (fake_idx, srt_timestamp, garbage)
514 |     srt_blocks.append(garbage)
515 |     composed = "".join(srt_blocks)
516 | 
517 |     with pytest.raises(srt.SRTParseError) as thrown_exc:
518 |         list(srt.parse(composed))
519 | 
520 |     # Since we will consume as many \n as needed until we meet the lookahead
521 |     # assertion, leading newlines in `garbage` will be stripped.
522 |     garbage_stripped = garbage.lstrip("\n")
523 | 
524 |     assert garbage_stripped == thrown_exc.value.unmatched_content
525 |     assert len(composed) - len(garbage_stripped) == thrown_exc.value.expected_start
526 |     assert len(composed) == thrown_exc.value.actual_start
527 | 
528 | 
529 | @given(st.lists(subtitles()))
530 | def test_parser_can_parse_with_dot_msec_delimiter(subs):
531 |     original_srt_blocks = [sub.to_srt() for sub in subs]
532 |     dot_srt_blocks = []
533 | 
534 |     for srt_block in original_srt_blocks:
535 |         srt_lines = srt_block.split("\n")
536 |         # We should only do the first two, as it might also be in the
537 |         # proprietary metadata, causing this test to fail.
538 |         dot_timestamp = srt_lines[1].replace(",", ".", 2)
539 |         srt_lines[1] = dot_timestamp
540 |         dot_srt_blocks.append("\n".join(srt_lines))
541 | 
542 |     composed_with_dots = "".join(dot_srt_blocks)
543 |     reparsed_subs = srt.parse(composed_with_dots)
544 |     subs_eq(reparsed_subs, subs)
545 | 
546 | 
547 | @given(st.lists(subtitles()))
548 | def test_parser_can_parse_with_fullwidth_delimiter(subs):
549 |     original_srt_blocks = [sub.to_srt() for sub in subs]
550 |     dot_srt_blocks = []
551 | 
552 |     for srt_block in original_srt_blocks:
553 |         srt_lines = srt_block.split("\n")
554 |         dot_timestamp = srt_lines[1].replace(",", "，", 1).replace(":", "：", 1)
555 |         srt_lines[1] = dot_timestamp
556 |         dot_srt_blocks.append("\n".join(srt_lines))
557 | 
558 |     composed_with_fullwidth = "".join(dot_srt_blocks)
559 |     reparsed_subs = srt.parse(composed_with_fullwidth)
560 |     subs_eq(reparsed_subs, subs)
561 | 
562 | 
563 | @given(subtitles())
564 | def test_repr_doesnt_crash(sub):
565 |     # Not much we can do here, but we should make sure __repr__ doesn't crash
566 |     # or anything and it does at least vaguely look like what we want
567 |     assert "Subtitle" in repr(sub)
568 |     assert str(sub.index) in repr(sub)
569 | 
570 | 
571 | @given(subtitles(), subtitles())
572 | def test_parser_accepts_final_no_newline_no_content(sub1, sub2):
573 |     # Limit size so we know how much to remove
574 |     sub2.content = ""
575 |     subs = [sub1, sub2]
576 | 
577 |     # Remove the last newlines so that there are none. Cannot use rstrip since
578 |     # there might be other stuff that gets matched in proprietary
579 |     stripped_srt_blocks = srt.compose(subs, reindex=False)[:-2]
580 | 
581 |     reparsed_subs = srt.parse(stripped_srt_blocks)
582 |     subs_eq(reparsed_subs, subs)
583 | 
584 | 
585 | @given(st.lists(subtitles()))
586 | def test_parser_accepts_newline_no_content(subs):
587 |     for sub in subs:
588 |         # Limit size so we know how many lines to remove
589 |         sub.content = ""
590 | 
591 |     # Remove the last \n so that there is only one
592 |     stripped_srt_blocks = "".join(sub.to_srt()[:-1] for sub in subs)
593 | 
594 |     reparsed_subs = srt.parse(stripped_srt_blocks)
595 |     subs_eq(reparsed_subs, subs)
596 | 
597 | 
598 | @given(st.lists(subtitles()))
599 | def test_compose_and_parse_strict_crlf(input_subs):
600 |     composed_raw = srt.compose(input_subs, reindex=False)
601 |     composed = composed_raw.replace("\n", "\r\n")
602 |     reparsed_subs = list(srt.parse(composed))
603 | 
604 |     for sub in reparsed_subs:
605 |         sub.content = sub.content.replace("\r\n", "\n")
606 | 
607 |     subs_eq(reparsed_subs, input_subs)
608 | 
609 | 
610 | @given(st.lists(subtitles()), st.one_of(st.just("\n"), st.just("\r\n")))
611 | def test_compose_and_parse_strict_custom_eol(input_subs, eol):
612 |     composed = srt.compose(input_subs, reindex=False, eol=eol)
613 |     reparsed_subs = srt.parse(composed)
614 |     subs_eq(reparsed_subs, input_subs)
615 | 
616 | 
617 | @given(equivalent_timestamps())
618 | def test_equal_timestamps_despite_different_fields_parsed_as_equal(timestamps):
619 |     ts1, ts2 = timestamps
620 |     assert srt.srt_timestamp_to_timedelta(ts1) == srt.srt_timestamp_to_timedelta(ts2)
621 | 
622 | 
623 | @given(timedeltas())
624 | def test_bad_timestamp_format_raises(ts):
625 |     ts = srt.timedelta_to_srt_timestamp(ts)
626 |     ts = ts.replace(":", "t", 1)
627 |     with pytest.raises(srt.TimestampParseError):
628 |         srt.srt_timestamp_to_timedelta(ts)
629 | 
630 | 
631 | @given(st.lists(subtitles()), st.lists(st.sampled_from(string.whitespace)))
632 | def test_can_parse_index_trailing_ws(input_subs, whitespace):
633 |     out = ""
634 | 
635 |     for sub in input_subs:
636 |         lines = sub.to_srt().split("\n")
637 |         lines[0] = lines[0] + "".join(whitespace)
638 |         out += "\n".join(lines)
639 | 
640 |     reparsed_subs = srt.parse(out)
641 |     subs_eq(reparsed_subs, input_subs)
642 | 
643 | 
644 | @given(st.lists(subtitles()))
645 | def test_can_parse_index_with_dot(input_subs):
646 |     # Seen in Battlestar Galactica subs
647 |     out = ""
648 | 
649 |     for sub in input_subs:
650 |         lines = sub.to_srt().split("\n")
651 |         lines[0] = lines[0] + "." + lines[0]
652 |         out += "\n".join(lines)
653 | 
654 |     reparsed_subs = srt.parse(out)
655 |     subs_eq(reparsed_subs, input_subs)
656 | 
657 | 
658 | @given(st.lists(subtitles()), st.lists(st.just("0")))
659 | def test_can_parse_index_leading_zeroes(input_subs, zeroes):
660 |     out = ""
661 | 
662 |     for sub in input_subs:
663 |         lines = sub.to_srt().split("\n")
664 |         lines[0] = "".join(zeroes) + lines[0]
665 |         out += "\n".join(lines)
666 | 
667 |     reparsed_subs = srt.parse(out)
668 |     subs_eq(reparsed_subs, input_subs)
669 | 


--------------------------------------------------------------------------------
/tests/test_tools.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | import os
  4 | import subprocess
  5 | import sys
  6 | import tempfile
  7 | from shlex import quote
  8 | 
  9 | 
 10 | sample_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "files")
 11 | 
 12 | 
 13 | if os.name == "nt":
 14 |     # shlex.quote quotes incorrectly on Windows
 15 |     quote = lambda x: windows_ebic_quote(x)
 16 | 
 17 | 
 18 | def windows_ebic_quote(data):
 19 |     """100% secure"""
 20 |     data = data.replace('"', '""')
 21 |     return '"' + data + '"'
 22 | 
 23 | 
 24 | def run_srt_util(cmd, shell=False, encoding="utf-8-sig"):
 25 |     extra_env = {}
 26 | 
 27 |     env = {"PYTHONPATH": ".", "SystemRoot": r"C:\Windows"}
 28 |     env.update(extra_env)
 29 | 
 30 |     raw_out = subprocess.check_output(cmd, shell=shell, env=env)
 31 |     return raw_out.decode(encoding)
 32 | 
 33 | 
 34 | def assert_supports_all_io_methods(cmd, exclude_output=False, exclude_stdin=False):
 35 |     cmd.insert(0, sys.executable)
 36 |     cmd.insert(1, "srt/tools/_srt.py")
 37 |     in_file = os.path.join(sample_dir, "ascii.srt")
 38 |     in_file_gb = os.path.join(sample_dir, "gb2312.srt")
 39 |     fd, out_file = tempfile.mkstemp()
 40 | 
 41 |     # This is accessed by filename, not fd
 42 |     os.close(fd)
 43 | 
 44 |     outputs = []
 45 |     cmd_string = " ".join(quote(x) for x in cmd)
 46 | 
 47 |     try:
 48 |         outputs.append(run_srt_util(cmd + ["-i", in_file]))
 49 |         if not exclude_stdin:
 50 |             outputs.append(
 51 |                 run_srt_util("%s < %s" % (cmd_string, quote(in_file)), shell=True)
 52 |             )
 53 |         if not exclude_output:
 54 |             run_srt_util(cmd + ["-i", in_file, "-o", out_file])
 55 |             run_srt_util(
 56 |                 cmd + ["-i", in_file_gb, "-o", out_file, "--encoding", "gb2312"],
 57 |                 encoding="gb2312",
 58 |             )
 59 |             if not exclude_stdin:
 60 |                 run_srt_util(
 61 |                     "%s < %s > %s" % (cmd_string, quote(in_file), quote(out_file)),
 62 |                     shell=True,
 63 |                 )
 64 |                 run_srt_util(
 65 |                     "%s < %s > %s"
 66 |                     % (
 67 |                         cmd_string + " --encoding gb2312",
 68 |                         quote(in_file),
 69 |                         quote(out_file),
 70 |                     ),
 71 |                     shell=True,
 72 |                     encoding="gb2312",
 73 |                 )
 74 |         assert len(set(outputs)) == 1, repr(outputs)
 75 | 
 76 |         if os.name == "nt":
 77 |             assert "\r\n" in outputs[0]
 78 |         else:
 79 |             assert "\r\n" not in outputs[0]
 80 |     finally:
 81 |         os.remove(out_file)
 82 | 
 83 | 
 84 | def test_tools_support():
 85 |     matrix = [
 86 |         (["add", "-s", "00:00:01,000", "-e", "00:00:02,000", "-c", "test"], False),
 87 |         (["deduplicate"], False),
 88 |         (["find"], False),
 89 |         (["fixed_timeshift", "--seconds", "5"], False),
 90 |         (
 91 |             [
 92 |                 "linear_timeshift",
 93 |                 "--f1",
 94 |                 "00:00:01,000",
 95 |                 "--f2",
 96 |                 "00:00:02,000",
 97 |                 "--t1",
 98 |                 "00:00:03,000",
 99 |                 "--t2",
100 |                 "00:00:04,000",
101 |             ],
102 |             False,
103 |         ),
104 |         (["match", "--fm", "lambda x: True"], False),
105 |         (["mux"], False, True),
106 |         (["mux", "-t"], False, True),
107 |         (["normalize"], False),
108 |         (["paste"], False),
109 |     ]
110 | 
111 |     for args in matrix:
112 |         assert_supports_all_io_methods(*args)
113 | 


--------------------------------------------------------------------------------
/tests/tools/__init__.py:
--------------------------------------------------------------------------------
 1 | from srt import srt
 2 | from srt import srt_timestamp_to_timedelta as t
 3 | 
 4 | 
 5 | def create_blocks(setting=0):
 6 |     """Creates a generator of subtitles for testing purposes"""
 7 |     subs = []
 8 |     if setting == 0:
 9 |         subs.append(srt.Subtitle(1, t("00:00:11,000"), t("00:00:12,701"), "A"))
10 |         subs.append(srt.Subtitle(2, t("00:00:12,701"), t("00:00:14,203"), "B"))
11 |         subs.append(srt.Subtitle(3, t("00:00:14,500"), t("00:00:19,738"), "C"))
12 |         subs.append(srt.Subtitle(4, t("00:00:16,538"), t("00:00:17,272"), "D"))
13 |         subs.append(srt.Subtitle(5, t("00:00:17,272"), t("00:00:18,440"), "E"))
14 |     elif setting == 1:
15 |         subs.append(srt.Subtitle(1, t("00:00:1,000"), t("00:00:10,000"), "A"))
16 |         subs.append(srt.Subtitle(2, t("00:00:2,000"), t("00:00:08,000"), "B"))
17 |         subs.append(srt.Subtitle(3, t("00:00:3,000"), t("00:00:05,000"), "C"))
18 |         subs.append(srt.Subtitle(4, t("00:00:3,500"), t("00:00:04,500"), "D"))
19 |         subs.append(srt.Subtitle(5, t("00:00:6,000"), t("00:00:08,000"), "E"))
20 |         subs.append(srt.Subtitle(6, t("00:00:9,000"), t("00:00:10,000"), "F"))
21 | 
22 |     for subtitle in subs:
23 |         yield subtitle
24 | 
25 | 
26 | def sort(subs):
27 |     return list(srt.sort_and_reindex(subs))
28 | 


--------------------------------------------------------------------------------
/tests/tools/test_add.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from . import *
 3 | from srt.tools.add import *
 4 | 
 5 | 
 6 | class TestToolAdd(unittest.TestCase):
 7 |     def setUp(self):
 8 |         self.subs = create_blocks
 9 |         self.x = list(create_blocks())
10 |         self.y = list(create_blocks(1))
11 | 
12 |     def tearDown(self):
13 |         pass
14 | 
15 |     def test_add_caption(self):
16 |         result = add(self.subs(), t("00:00:10,000"), t("00:00:11,000"), "ADD")
17 |         a = list(self.subs())
18 |         a.append(srt.Subtitle(0, t("00:00:10,000"), t("00:00:11,000"), "ADD"))
19 |         self.assertEqual(list(result), sort(a))  # before
20 | 
21 |         result = add(self.subs(), t("00:00:00,000"), t("00:00:01,000"), "ADD", True)
22 |         a = [
23 |             srt.Subtitle(1, t("00:00:00,000"), t("00:00:01,000"), "ADD"),
24 |             srt.Subtitle(2, t("00:00:12,000"), t("00:00:13,701"), "A"),
25 |             srt.Subtitle(3, t("00:00:13,701"), t("00:00:15,203"), "B"),
26 |             srt.Subtitle(4, t("00:00:15,500"), t("00:00:20,738"), "C"),
27 |             srt.Subtitle(5, t("00:00:17,538"), t("00:00:18,272"), "D"),
28 |             srt.Subtitle(6, t("00:00:18,272"), t("00:00:19,440"), "E"),
29 |         ]
30 |         self.assertEqual(list(result), a)  # before (adjust)
31 | 
32 |         result = add(self.subs(), t("00:00:15,000"), t("00:00:18,000"), "ADD")
33 |         a = list(self.subs())
34 |         a.append(srt.Subtitle(0, t("00:00:15,000"), t("00:00:18,000"), "ADD"))
35 |         self.assertEqual(list(result), sort(a))  # middle
36 | 
37 |         result = add(self.subs(), t("00:00:15,000"), t("00:00:16,000"), "ADD", True)
38 |         a = [
39 |             srt.Subtitle(1, t("00:00:11,000"), t("00:00:12,701"), "A"),
40 |             srt.Subtitle(2, t("00:00:12,701"), t("00:00:14,203"), "B"),
41 |             srt.Subtitle(3, t("00:00:14,500"), t("00:00:19,738"), "C"),
42 |             srt.Subtitle(4, t("00:00:15,000"), t("00:00:16,000"), "ADD"),
43 |             srt.Subtitle(5, t("00:00:17,538"), t("00:00:18,272"), "D"),
44 |             srt.Subtitle(6, t("00:00:18,272"), t("00:00:19,440"), "E"),
45 |         ]
46 |         self.assertEqual(list(result), a)  # middle (adjust)
47 | 
48 |         result = add(self.subs(), t("00:00:25,000"), t("00:00:30,000"), "ADD")
49 |         a = list(self.subs())
50 |         a.append(srt.Subtitle(0, t("00:00:25,000"), t("00:00:30,000"), "ADD"))
51 |         self.assertEqual(list(result), sort(a))  # after
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     unittest.main()
56 | 


--------------------------------------------------------------------------------
/tests/tools/test_find.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from . import *
  3 | from srt.tools.find import *
  4 | 
  5 | 
  6 | class TestToolFind(unittest.TestCase):
  7 |     def setUp(self):
  8 |         self.subs = create_blocks
  9 |         self.x = list(create_blocks())
 10 |         self.y = list(create_blocks(1))
 11 | 
 12 |     def tearDown(self):
 13 |         pass
 14 | 
 15 |     def test_find_sequential(self):
 16 |         result = find_by_timestamp([], t("00:00:00,000"), t("00:00:30,000"))
 17 |         self.assertEqual(list(result), [])
 18 | 
 19 |         result = find_by_timestamp(self.subs(), t("00:00:11,000"), t("00:00:19,738"))
 20 |         self.assertEqual(list(result), self.x)
 21 | 
 22 |         result = find_by_timestamp(self.subs(), self.x[0].start, self.x[0].end)
 23 |         self.assertEqual(list(result), [self.x[0]])
 24 | 
 25 |         result = find_by_timestamp(self.subs(), self.x[0].start, t("00:00:14,500"))
 26 |         self.assertEqual(list(result), sort([self.x[0], self.x[1]]))
 27 | 
 28 |         result = find_by_timestamp(self.subs(), t("00:00:00,000"), t("00:00:17,500"))
 29 |         a = [
 30 |             srt.Subtitle(1, t("00:00:11,000"), t("00:00:12,701"), "A"),
 31 |             srt.Subtitle(2, t("00:00:12,701"), t("00:00:14,203"), "B"),
 32 |             srt.Subtitle(3, t("00:00:14,500"), t("00:00:17,500"), "C"),
 33 |             srt.Subtitle(4, t("00:00:16,538"), t("00:00:17,272"), "D"),
 34 |             srt.Subtitle(5, t("00:00:17,272"), t("00:00:17,500"), "E"),
 35 |         ]
 36 |         self.assertEqual(list(result), a)  # split
 37 | 
 38 |     def test_find_nonsequential(self):
 39 |         result = find_by_timestamp([], t("00:00:30,000"), t("00:00:00,000"))
 40 |         self.assertEqual(list(result), [])
 41 | 
 42 |         result = find_by_timestamp(self.subs(), t("00:00:19,738"), t("00:00:11,000"))
 43 |         self.assertEqual(list(result), [])
 44 | 
 45 |         result = find_by_timestamp(self.subs(), t("00:00:11,000"), t("00:00:12,701"))
 46 |         self.assertEqual(list(result), [self.x[0]])  # first
 47 | 
 48 |         result = find_by_timestamp(self.subs(), t("00:00:16,538"), t("00:00:14,203"))
 49 |         a = [
 50 |             srt.Subtitle(1, t("00:00:11,000"), t("00:00:12,701"), "A"),
 51 |             srt.Subtitle(2, t("00:00:12,701"), t("00:00:14,203"), "B"),
 52 |             srt.Subtitle(3, t("00:00:16,538"), t("00:00:17,272"), "D"),
 53 |             srt.Subtitle(4, t("00:00:16,538"), t("00:00:19,738"), "C"),
 54 |             srt.Subtitle(5, t("00:00:17,272"), t("00:00:18,440"), "E"),
 55 |         ]
 56 |         self.assertEqual(list(result), a)  # middle
 57 | 
 58 |         result = find_by_timestamp(self.subs(), self.x[0].end, self.x[0].start)
 59 |         self.assertEqual(
 60 |             list(result), sort([self.x[1], self.x[2], self.x[3], self.x[4]])  # last
 61 |         )
 62 | 
 63 |         result = find_by_timestamp(self.subs(), t("00:00:14,500"), self.x[0].start)
 64 |         self.assertEqual(list(result), sort([self.x[2], self.x[3], self.x[4]]))  # edges
 65 | 
 66 |         result = find_by_timestamp(self.subs(), t("00:00:17,500"), t("00:00:00,000"))
 67 |         a = [
 68 |             srt.Subtitle(1, t("00:00:17,500"), t("00:00:18,440"), "E"),
 69 |             srt.Subtitle(2, t("00:00:17,500"), t("00:00:19,738"), "C"),
 70 |         ]
 71 |         self.assertEqual(list(result), a)  # split
 72 | 
 73 |     def test_find_sequential_adjust(self):
 74 |         result = find_by_timestamp(
 75 |             self.subs(), self.x[0].start, t("00:00:14,500"), True
 76 |         )
 77 |         a = [
 78 |             srt.Subtitle(1, t("00:00:00,000"), t("00:00:01,701"), "A"),
 79 |             srt.Subtitle(2, t("00:00:01,701"), t("00:00:03,203"), "B"),
 80 |         ]
 81 |         self.assertEqual(list(result), a)  # first - second
 82 | 
 83 |     def test_find_nonsequential_adjust(self):
 84 |         result = find_by_timestamp(
 85 |             self.subs(), t("00:00:14,500"), self.x[0].start, True
 86 |         )
 87 |         a = [
 88 |             srt.Subtitle(1, t("00:00:00,000"), t("00:00:05,238"), "C"),
 89 |             srt.Subtitle(2, t("00:00:02,038"), t("00:00:02,772"), "D"),
 90 |             srt.Subtitle(3, t("00:00:02,772"), t("00:00:03,940"), "E"),
 91 |         ]
 92 |         self.assertEqual(list(result), a)  # to first subtitle
 93 | 
 94 |         result = find_by_timestamp(
 95 |             self.subs(), t("00:00:17,500"), t("00:00:00,000"), True
 96 |         )
 97 |         a = [
 98 |             srt.Subtitle(1, t("00:00:00,000"), t("00:00:00,940"), "E"),
 99 |             srt.Subtitle(2, t("00:00:00,000"), t("00:00:02,238"), "C"),
100 |         ]
101 |         self.assertEqual(list(result), a)  # split
102 | 
103 | 
104 | if __name__ == "__main__":
105 |     unittest.main()
106 | 


--------------------------------------------------------------------------------
/tests/tools/test_import.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | 
 4 | class TestImportSRT(unittest.TestCase):
 5 |     def test_import(self):
 6 |         try:
 7 |             # import the srt package as a whole.
 8 |             print("srt package")
 9 |             import srt
10 | 
11 |             print([module for module in dir(srt) if not module.startswith("__")])
12 | 
13 |             print("\nsrt module")
14 |             print([member for member in dir(srt.srt) if not member.startswith("__")])
15 | 
16 |             print("\ntools module")
17 |             print([member for member in dir(srt.tools) if not member.startswith("__")])
18 | 
19 |             print("\nremove module")
20 |             print(
21 |                 [
22 |                     member
23 |                     for member in dir(srt.tools.find)
24 |                     if not member.startswith("__")
25 |                 ]
26 |             )
27 |         except AttributeError:
28 |             self.fail("AttributeError raised during package import.")
29 | 
30 |         try:
31 |             # only import the srt.py (module) from the srt package.
32 |             print("\nonly srt module")
33 |             from srt import srt as module
34 | 
35 |             print([member for member in dir(module) if not member.startswith("__")])
36 | 
37 |             # only import the tools package from the srt package.
38 |             print("\nonly tools package")
39 |             from srt import tools
40 | 
41 |             print([member for member in dir(tools) if not member.startswith("__")])
42 | 
43 |             # only import the remove module from the tools package in the srt package.
44 |             print("\nonly remove module")
45 |             from srt.tools import find
46 | 
47 |             print([member for member in dir(find) if not member.startswith("__")])
48 | 
49 |         except AttributeError:
50 |             self.fail("AttributeError raised during module import.")
51 | 


--------------------------------------------------------------------------------
/tests/tools/test_paste.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from . import *
  3 | from srt.tools.paste import *
  4 | 
  5 | 
  6 | class TestToolPaste(unittest.TestCase):
  7 |     def setUp(self):
  8 |         self.subs = create_blocks
  9 |         self.x = list(create_blocks())
 10 |         self.y = list(create_blocks(1))
 11 |         self.copied = [
 12 |             srt.Subtitle(1, t("00:00:10,000"), t("00:00:11,000"), "ADD"),
 13 |             srt.Subtitle(2, t("00:00:14,500"), t("00:00:20,000"), "ADD2"),
 14 |             srt.Subtitle(3, t("00:00:25,000"), t("00:00:30,000"), "ADD3"),
 15 |         ]
 16 | 
 17 |     def tearDown(self):
 18 |         pass
 19 | 
 20 |     def test_paste(self):
 21 |         result = paste([], self.copied, t("00:00:00,000"))
 22 |         self.assertEqual(list(result), self.copied)
 23 | 
 24 |         result = paste(self.subs(), [], t("00:00:00,000"))
 25 |         self.assertEqual(list(result), self.x)
 26 | 
 27 |         result = paste(self.subs(), self.copied, t("00:00:00,000"))
 28 |         a = [
 29 |             srt.Subtitle(1, t("00:00:10,000"), t("00:00:11,000"), "ADD"),
 30 |             srt.Subtitle(2, t("00:00:11,000"), t("00:00:12,701"), "A"),
 31 |             srt.Subtitle(3, t("00:00:12,701"), t("00:00:14,203"), "B"),
 32 |             srt.Subtitle(4, t("00:00:14,500"), t("00:00:19,738"), "C"),
 33 |             srt.Subtitle(5, t("00:00:14,500"), t("00:00:20,000"), "ADD2"),
 34 |             srt.Subtitle(6, t("00:00:16,538"), t("00:00:17,272"), "D"),
 35 |             srt.Subtitle(7, t("00:00:17,272"), t("00:00:18,440"), "E"),
 36 |             srt.Subtitle(8, t("00:00:25,000"), t("00:00:30,000"), "ADD3"),
 37 |         ]
 38 |         self.assertEqual(list(result), a)  # before
 39 | 
 40 |         result = paste(self.subs(), self.copied, t("00:00:05,000"))
 41 |         a = [
 42 |             srt.Subtitle(1, t("00:00:11,000"), t("00:00:12,701"), "A"),
 43 |             srt.Subtitle(2, t("00:00:12,701"), t("00:00:14,203"), "B"),
 44 |             srt.Subtitle(3, t("00:00:14,500"), t("00:00:19,738"), "C"),
 45 |             srt.Subtitle(4, t("00:00:15,000"), t("00:00:16,000"), "ADD"),
 46 |             srt.Subtitle(5, t("00:00:16,538"), t("00:00:17,272"), "D"),
 47 |             srt.Subtitle(6, t("00:00:17,272"), t("00:00:18,440"), "E"),
 48 |             srt.Subtitle(7, t("00:00:19,500"), t("00:00:25,000"), "ADD2"),
 49 |             srt.Subtitle(8, t("00:00:30,000"), t("00:00:35,000"), "ADD3"),
 50 |         ]
 51 |         self.assertEqual(list(result), a)  # middle
 52 | 
 53 |         result = paste(self.subs(), self.copied, t("00:00:10,000"))
 54 |         a = [
 55 |             srt.Subtitle(1, t("00:00:11,000"), t("00:00:12,701"), "A"),
 56 |             srt.Subtitle(2, t("00:00:12,701"), t("00:00:14,203"), "B"),
 57 |             srt.Subtitle(3, t("00:00:14,500"), t("00:00:19,738"), "C"),
 58 |             srt.Subtitle(4, t("00:00:16,538"), t("00:00:17,272"), "D"),
 59 |             srt.Subtitle(5, t("00:00:17,272"), t("00:00:18,440"), "E"),
 60 |             srt.Subtitle(6, t("00:00:20,000"), t("00:00:21,000"), "ADD"),
 61 |             srt.Subtitle(7, t("00:00:24,500"), t("00:00:30,000"), "ADD2"),
 62 |             srt.Subtitle(8, t("00:00:35,000"), t("00:00:40,000"), "ADD3"),
 63 |         ]
 64 |         self.assertEqual(list(result), a)  # after
 65 | 
 66 |     def test_paste_space(self):
 67 |         result = paste([], self.copied, t("00:00:00,000"), t("00:00:10,000"))
 68 |         a = [
 69 |             srt.Subtitle(1, t("00:00:20,000"), t("00:00:21,000"), "ADD"),
 70 |             srt.Subtitle(2, t("00:00:24,500"), t("00:00:30,000"), "ADD2"),
 71 |             srt.Subtitle(3, t("00:00:35,000"), t("00:00:40,000"), "ADD3"),
 72 |         ]
 73 |         self.assertEqual(list(result), a)
 74 | 
 75 |         result = paste(self.subs(), [], t("00:00:00,000"), t("00:00:10,000"))
 76 |         self.assertEqual(list(result), self.x)
 77 | 
 78 |         result = paste(self.subs(), self.copied, t("00:00:00,000"), t("00:00:10,000"))
 79 |         a = [
 80 |             srt.Subtitle(1, t("00:00:11,000"), t("00:00:12,701"), "A"),
 81 |             srt.Subtitle(2, t("00:00:12,701"), t("00:00:14,203"), "B"),
 82 |             srt.Subtitle(3, t("00:00:14,500"), t("00:00:19,738"), "C"),
 83 |             srt.Subtitle(4, t("00:00:16,538"), t("00:00:17,272"), "D"),
 84 |             srt.Subtitle(5, t("00:00:17,272"), t("00:00:18,440"), "E"),
 85 |             srt.Subtitle(6, t("00:00:20,000"), t("00:00:21,000"), "ADD"),
 86 |             srt.Subtitle(7, t("00:00:24,500"), t("00:00:30,000"), "ADD2"),
 87 |             srt.Subtitle(8, t("00:00:35,000"), t("00:00:40,000"), "ADD3"),
 88 |         ]
 89 |         self.assertEqual(list(result), a)
 90 | 
 91 |     def test_block_paste(self):
 92 |         result = paste([], self.copied, t("00:00:00,000"), block=True)
 93 |         self.assertEqual(list(result), self.copied)
 94 | 
 95 |         result = paste(self.subs(), [], t("00:00:00,000"), t("00:00:10,000"), True)
 96 |         a = [
 97 |             srt.Subtitle(1, t("00:00:21,000"), t("00:00:22,701"), "A"),
 98 |             srt.Subtitle(2, t("00:00:22,701"), t("00:00:24,203"), "B"),
 99 |             srt.Subtitle(3, t("00:00:24,500"), t("00:00:29,738"), "C"),
100 |             srt.Subtitle(4, t("00:00:26,538"), t("00:00:27,272"), "D"),
101 |             srt.Subtitle(5, t("00:00:27,272"), t("00:00:28,440"), "E"),
102 |         ]
103 |         self.assertEqual(list(result), a)
104 | 
105 |         result = paste(self.subs(), self.copied, t("00:00:00,000"), block=True)
106 |         a = [
107 |             srt.Subtitle(1, t("00:00:10,000"), t("00:00:11,000"), "ADD"),
108 |             srt.Subtitle(2, t("00:00:14,500"), t("00:00:20,000"), "ADD2"),
109 |             srt.Subtitle(3, t("00:00:25,000"), t("00:00:30,000"), "ADD3"),
110 |             srt.Subtitle(4, t("00:00:41,000"), t("00:00:42,701"), "A"),
111 |             srt.Subtitle(5, t("00:00:42,701"), t("00:00:44,203"), "B"),
112 |             srt.Subtitle(6, t("00:00:44,500"), t("00:00:49,738"), "C"),
113 |             srt.Subtitle(7, t("00:00:46,538"), t("00:00:47,272"), "D"),
114 |             srt.Subtitle(8, t("00:00:47,272"), t("00:00:48,440"), "E"),
115 |         ]
116 |         self.assertEqual(list(result), a)  # before
117 | 
118 |         result = paste(self.subs(), self.copied, t("00:00:15,000"), block=True)
119 |         a = [
120 |             srt.Subtitle(1, t("00:00:11,000"), t("00:00:12,701"), "A"),
121 |             srt.Subtitle(2, t("00:00:12,701"), t("00:00:14,203"), "B"),
122 |             srt.Subtitle(3, t("00:00:14,500"), t("00:00:19,738"), "C"),
123 |             srt.Subtitle(4, t("00:00:25,000"), t("00:00:26,000"), "ADD"),
124 |             srt.Subtitle(5, t("00:00:29,500"), t("00:00:35,000"), "ADD2"),
125 |             srt.Subtitle(6, t("00:00:40,000"), t("00:00:45,000"), "ADD3"),
126 |             srt.Subtitle(7, t("00:00:46,538"), t("00:00:47,272"), "D"),
127 |             srt.Subtitle(8, t("00:00:47,272"), t("00:00:48,440"), "E"),
128 |         ]
129 |         self.assertEqual(list(result), a)  # middle
130 | 
131 |         result = paste(self.subs(), self.copied, t("00:00:20,000"), block=True)
132 |         a = [
133 |             srt.Subtitle(1, t("00:00:11,000"), t("00:00:12,701"), "A"),
134 |             srt.Subtitle(2, t("00:00:12,701"), t("00:00:14,203"), "B"),
135 |             srt.Subtitle(3, t("00:00:14,500"), t("00:00:19,738"), "C"),
136 |             srt.Subtitle(4, t("00:00:16,538"), t("00:00:17,272"), "D"),
137 |             srt.Subtitle(5, t("00:00:17,272"), t("00:00:18,440"), "E"),
138 |             srt.Subtitle(6, t("00:00:30,000"), t("00:00:31,000"), "ADD"),
139 |             srt.Subtitle(7, t("00:00:34,500"), t("00:00:40,000"), "ADD2"),
140 |             srt.Subtitle(8, t("00:00:45,000"), t("00:00:50,000"), "ADD3"),
141 |         ]
142 |         self.assertEqual(list(result), a)  # after
143 | 
144 |     def test_block_paste_space(self):
145 |         result = paste(
146 |             self.subs(), self.copied, t("00:00:00,000"), t("00:00:10,000"), True
147 |         )
148 |         a = [
149 |             srt.Subtitle(1, t("00:00:20,000"), t("00:00:21,000"), "ADD"),
150 |             srt.Subtitle(2, t("00:00:24,500"), t("00:00:30,000"), "ADD2"),
151 |             srt.Subtitle(3, t("00:00:35,000"), t("00:00:40,000"), "ADD3"),
152 |             srt.Subtitle(4, t("00:00:51,000"), t("00:00:52,701"), "A"),
153 |             srt.Subtitle(5, t("00:00:52,701"), t("00:00:54,203"), "B"),
154 |             srt.Subtitle(6, t("00:00:54,500"), t("00:00:59,738"), "C"),
155 |             srt.Subtitle(7, t("00:00:56,538"), t("00:00:57,272"), "D"),
156 |             srt.Subtitle(8, t("00:00:57,272"), t("00:00:58,440"), "E"),
157 |         ]
158 |         self.assertEqual(list(result), a)  # before
159 | 
160 |         result = paste(
161 |             self.subs(), self.copied, t("00:00:15,000"), t("00:00:01,000"), True
162 |         )
163 |         a = [
164 |             srt.Subtitle(1, t("00:00:11,000"), t("00:00:12,701"), "A"),
165 |             srt.Subtitle(2, t("00:00:12,701"), t("00:00:14,203"), "B"),
166 |             srt.Subtitle(3, t("00:00:14,500"), t("00:00:19,738"), "C"),
167 |             srt.Subtitle(4, t("00:00:26,000"), t("00:00:27,000"), "ADD"),
168 |             srt.Subtitle(5, t("00:00:30,500"), t("00:00:36,000"), "ADD2"),
169 |             srt.Subtitle(6, t("00:00:41,000"), t("00:00:46,000"), "ADD3"),
170 |             srt.Subtitle(7, t("00:00:47,538"), t("00:00:48,272"), "D"),
171 |             srt.Subtitle(8, t("00:00:48,272"), t("00:00:49,440"), "E"),
172 |         ]
173 |         self.assertEqual(list(result), a)  # middle
174 | 
175 | 
176 | if __name__ == "__main__":
177 |     unittest.main()
178 | 


--------------------------------------------------------------------------------
/tests/tools/test_split.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from . import *
 3 | from srt.tools.split import *
 4 | 
 5 | 
 6 | class TestToolSplit(unittest.TestCase):
 7 |     def setUp(self):
 8 |         self.subs = create_blocks
 9 |         self.x = list(create_blocks())
10 |         self.y = list(create_blocks(1))
11 | 
12 |     def tearDown(self):
13 |         pass
14 | 
15 |     def test_split(self):
16 |         result = split(self.subs(), self.x[0].start)
17 |         self.assertEqual(list(result), self.x)
18 | 
19 |         result = split(self.subs(), t("00:00:12,000"))
20 |         a = [
21 |             srt.Subtitle(1, t("00:00:11,000"), t("00:00:12,000"), "A"),
22 |             srt.Subtitle(2, t("00:00:12,000"), t("00:00:12,701"), "A"),
23 |             srt.Subtitle(3, t("00:00:12,701"), t("00:00:14,203"), "B"),
24 |             srt.Subtitle(4, t("00:00:14,500"), t("00:00:19,738"), "C"),
25 |             srt.Subtitle(5, t("00:00:16,538"), t("00:00:17,272"), "D"),
26 |             srt.Subtitle(6, t("00:00:17,272"), t("00:00:18,440"), "E"),
27 |         ]
28 |         self.assertEqual(list(result), a)  # before
29 | 
30 |         result = split(self.subs(), t("00:00:16,538"))
31 |         a = [
32 |             srt.Subtitle(1, t("00:00:11,000"), t("00:00:12,701"), "A"),
33 |             srt.Subtitle(2, t("00:00:12,701"), t("00:00:14,203"), "B"),
34 |             srt.Subtitle(3, t("00:00:14,500"), t("00:00:16,538"), "C"),
35 |             srt.Subtitle(4, t("00:00:16,538"), t("00:00:17,272"), "D"),
36 |             srt.Subtitle(5, t("00:00:16,538"), t("00:00:19,738"), "C"),
37 |             srt.Subtitle(6, t("00:00:17,272"), t("00:00:18,440"), "E"),
38 |         ]
39 |         self.assertEqual(list(result), a)  # middle
40 | 
41 |         result = split(self.subs(), t("00:00:17,500"))
42 |         a = [
43 |             srt.Subtitle(1, t("00:00:11,000"), t("00:00:12,701"), "A"),
44 |             srt.Subtitle(2, t("00:00:12,701"), t("00:00:14,203"), "B"),
45 |             srt.Subtitle(3, t("00:00:14,500"), t("00:00:17,500"), "C"),
46 |             srt.Subtitle(4, t("00:00:16,538"), t("00:00:17,272"), "D"),
47 |             srt.Subtitle(5, t("00:00:17,272"), t("00:00:17,500"), "E"),
48 |             srt.Subtitle(6, t("00:00:17,500"), t("00:00:18,440"), "E"),
49 |             srt.Subtitle(7, t("00:00:17,500"), t("00:00:19,738"), "C"),
50 |         ]
51 |         self.assertEqual(list(result), a)  # append
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     unittest.main()
56 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py
 3 | 
 4 | [testenv]
 5 | deps =
 6 |     -rtests/requirements.txt
 7 | commands =
 8 |     {basepython} --version
 9 |     pytest
10 | setenv=
11 |     release: HYPOTHESIS_PROFILE=release
12 | 
13 | [testenv:doctest]
14 | deps =
15 |     {[testenv]deps}
16 | commands =
17 |     pytest --doctest-modules
18 | 
19 | [testenv:coverage]
20 | passenv = CODECOV_TOKEN
21 | deps =
22 |     {[testenv]deps}
23 |     codecov
24 | commands =
25 |     pytest --cov=srt.srt --cov-branch --cov-fail-under=100 --cov-report xml
26 |     codecov -e CODECOV_TOKEN
27 | 
28 | [testenv:bandit]
29 | skipsdist = True
30 | deps =
31 |     {[testenv]deps}
32 |     bandit
33 | commands =
34 |     bandit -r srt
35 | 
36 | [testenv:black]
37 | skipsdist = True
38 | whitelist_externals = sh
39 | deps =
40 |     black
41 | commands =
42 |     black --check .
43 | 
44 | [testenv:pylint]
45 | skipsdist = True
46 | deps =
47 |     {[testenv]deps}
48 |     pylint
49 | commands =
50 |     # R0913: These are intentional design decisions from srt1.
51 |     pylint --disable=R0913 srt/srt.py
52 | 


--------------------------------------------------------------------------------