├── .github
└── workflows
│ └── python-publish.yml
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── setup.py
├── subtitle_filter
├── __init__.py
├── bin
│ └── filter-subtitles.py
└── libs
│ ├── __init__.py
│ └── subtitle.py
└── tests
├── __init__.py
├── data
├── apostrphone_in_name_after.srt
├── apostrphone_in_name_before.srt
├── hour_in_dialogue_after.srt
├── hour_in_dialogue_before.srt
├── subtitle_angle_brackets_after.srt
├── subtitle_angle_brackets_before.srt
├── subtitle_author_after.srt
├── subtitle_author_before.srt
├── subtitle_bom_after.srt
├── subtitle_bom_before.srt
├── subtitle_commas_after.srt
├── subtitle_commas_before.srt
├── subtitle_example_after.srt
├── subtitle_example_before.srt
├── subtitle_font_after.srt
├── subtitle_font_before.srt
├── subtitle_italics_after.srt
├── subtitle_italics_before.srt
├── subtitle_music_after.srt
├── subtitle_music_before.srt
├── subtitle_names_after.srt
├── subtitle_names_before.srt
├── subtitle_sound_effects_after.srt
├── subtitle_sound_effects_before.srt
├── subtitle_space_parsing_after.srt
├── subtitle_space_parsing_before.srt
├── subtitle_symbols_after.srt
└── subtitle_symbols_before.srt
├── io_test.py
└── subtitles_test.py
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
1 | # This workflows will upload a Python Package using Twine when a release is created
2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3 |
4 | name: Upload Python Package
5 |
6 | on:
7 | release:
8 | types: [created]
9 |
10 | jobs:
11 | build_and_deploy:
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | - name: Checkout repository
16 | uses: actions/checkout@v2
17 |
18 | - name: Set up Python
19 | uses: actions/setup-python@v2
20 | with:
21 | python-version: 3.x
22 |
23 | - name: Install dependencies
24 | run: |
25 | python -m pip install --upgrade pip
26 | pip install setuptools wheel twine
27 |
28 | - name: Build package
29 | run: python setup.py sdist bdist_wheel
30 |
31 | - name: Publish package to PyPI
32 | uses: pypa/gh-action-pypi-publish@v1.4.2
33 | with:
34 | user: __token__
35 | password: ${{ secrets.PYPI_API_TOKEN }}
36 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | # Visual Studio Code
132 | .vscode
133 |
134 | # pylint
135 | .pylintrc
136 |
137 | # OS X
138 | *.DS_Store
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - "3.5"
4 | - "3.6"
5 | - "3.7"
6 | - "3.8"
7 |
8 | # command to install dependencies
9 | install:
10 | - pip install coverage coveralls .
11 | # command to run tests
12 | script: "coverage run -m unittest discover tests/ '*_test.py'"
13 | after_success: "coveralls"
14 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Matt
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # subtitle-filter
2 |
3 | [](https://badge.fury.io/py/subtitle-filter)
4 |
5 | Filter `.srt` subtitle files to remove SDH (Deaf or Hard-of-Hearing) entries and other tags.
6 |
7 | ## Installation
8 |
9 | ```bash
10 | pip install subtitle-filter
11 | ```
12 |
13 | ## Usage
14 |
15 | `subtitle-filter` can be used either as a script or a module.
16 |
17 | By default, this module filters the following (in order):
18 |
19 | 1. Removes font tags and text contained within, e.g. `Hey\` is removed.
20 | 2. Removes subtitle entries containing only asterisks: `*`.
21 | 3. Removes subtitle lines containing `♪` or `#`.
22 | 4. Removes sound effects: text contained with and including parenthesis `(BANG)`, asterisks `*whisper*`, brackets `[boom]`, and text contained within forward slashes `/POW/`.
23 | 5. Replaces names with dashes, e.g. `GARY: Hey` or `Gary: Hey` to `- Hey`.
24 | 6. Removes author tags e.g. `XoXo Subtitles by PwnedDude967 XoXo`.
25 | 7. Fixes erroneous comma spacing, e.g. `Hey , what's up? Nothing,my man` to `Hey, what's up? Nothing, my man`.
26 | 8. Removes lone symbols such as `?`, `-`, `#`, `_`.
27 | 9. Remove leading dashes `-` if there is only one line present.
28 |
29 | ### Script Usage
30 |
31 | Bring up the help display:
32 |
33 | ```bash
34 | filter-subtitles.py -h
35 | ```
36 |
37 | Filter a subtitle in place (overwrites original subtitle) with default options.
38 |
39 | ```bash
40 | filter-subtitles.py -s /path/to/sub.srt
41 | ```
42 |
43 | Instead of saving to disk, print the output.
44 |
45 | ```bash
46 | filter-subtitles.py -s /path/to/sub.srt -p
47 | ```
48 |
49 | Save the output to a different filepath.
50 |
51 | ```bash
52 | filter-subtitles.py -s /path/to/sub.srt -o /path/to/outsub.srt
53 | ```
54 |
55 | Custom filter flags.
56 |
57 | ```
58 | --keep-fonts Do not remove font tags and text contained within.
59 | --keep-ast Do not remove subtitles containing asterisks: (*).
60 | --keep-music Do not lines containing 1 or more "♪" symbols.
61 | --keep-effects Do not remove text between and including parenthesis
62 | () or brackets []
63 | --keep-names Do not replace names in CAPITALS with "-" tags
64 | --keep-author Do not remove author tags, eg. Subtitles by some guy.
65 | --keep-lone-dashes Do not remove the dash if only one dashed line is present.
66 | --keep-commas Do not fix comma spacings.
67 | ```
68 |
69 | ### Module Usage
70 |
71 | Filter a subtitle in place (overwrites original subtitle) with default options
72 |
73 | ```python
74 | from subtitle_filter import Subtitles
75 |
76 | subs = Subtitles('/path/to/sub.srt')
77 | subs.filter()
78 | subs.save()
79 | ```
80 |
81 | Instead of saving to disk, print the output.
82 |
83 | ```python
84 | subs.print()
85 | ```
86 |
87 | Save the output to a different filepath.
88 |
89 | ```python
90 | subs.save('/path/to/newsub.srt')
91 | ```
92 |
93 | Use custom filter flags.
94 |
95 | ```python
96 | subs.filter(
97 | rm_fonts=True,
98 | rm_ast=False,
99 | rm_music=True,
100 | rm_effects=True,
101 | rm_names=False,
102 | rm_author=False,
103 | )
104 | ```
105 |
106 | ### Issues & Requests
107 |
108 | If you spot any issues with the filtered subtitles, or would like to request new features, please create an issue on GitHub and provide examples.
109 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | '''setup.py, use this to install module'''
3 | from os import path
4 | from setuptools import setup
5 |
6 | version = '1.5.0'
7 | this_dir = path.abspath(path.dirname(__file__))
8 | with open(path.join(this_dir, 'README.md'), encoding='utf-8') as f:
9 | long_description = f.read()
10 |
11 | setup(
12 | name='subtitle-filter',
13 | version=version,
14 | description='Filter SDH entries and more from .srt files',
15 | author='Matt Lyon',
16 | author_email='matthewlyon18@gmail.com',
17 | url='https://github.com/mattlyon93/filter-subs',
18 | download_url='https://github.com/mattlyon93/filter-subs/archive/v{}.tar.gz'.format(version),
19 | long_description=long_description,
20 | long_description_content_type='text/markdown',
21 | python_requires='>=3.5',
22 | license='MIT License',
23 | packages=['subtitle_filter', 'subtitle_filter/libs'],
24 | classifiers=[
25 | 'Programming Language :: Python',
26 | 'Operating System :: Unix',
27 | 'Operating System :: MacOS',
28 | 'Topic :: Text Processing :: Filters',
29 | 'Topic :: Multimedia :: Sound/Audio :: Speech',
30 | ],
31 | keywords=['subtitle', 'SDH', 'hard-of-hearing', 'filter', 'movie', 'tv'],
32 | scripts=['subtitle_filter/bin/filter-subtitles.py'],
33 | )
34 |
--------------------------------------------------------------------------------
/subtitle_filter/__init__.py:
--------------------------------------------------------------------------------
1 | from subtitle_filter.libs.subtitle import Subtitles
2 |
--------------------------------------------------------------------------------
/subtitle_filter/bin/filter-subtitles.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | '''Script to Filter SDH tags from subtitles'''
3 |
4 | import argparse
5 |
6 | from subtitle_filter.libs.subtitle import Subtitles
7 |
8 |
9 | def run(args):
10 | '''Main entry point of script'''
11 | subs = Subtitles(args.sub_fpath)
12 | subs.filter(
13 | rm_fonts=args.rm_fonts,
14 | rm_ast=args.rm_ast,
15 | rm_music=args.rm_music,
16 | rm_effects=args.rm_effects,
17 | rm_names=args.rm_names,
18 | rm_author=args.rm_author,
19 | rm_lone_dashes=args.rm_lone_dashes,
20 | fix_commas=args.fix_commas,
21 | )
22 |
23 | if args.print:
24 | subs.print()
25 | return
26 |
27 | subs.save(new_filepath=args.out_fpath)
28 |
29 |
30 | if __name__ == '__main__':
31 | ap = argparse.ArgumentParser(
32 | description='Filter subtitles to remove various SDH (Deaf or Hard-of-Hearing) tags.'
33 | )
34 |
35 | ap.add_argument(
36 | '-s',
37 | '--subtitle',
38 | dest='sub_fpath',
39 | type=str,
40 | help='Subtitle file to filter',
41 | required=True,
42 | )
43 | ap.add_argument(
44 | '-o',
45 | '--output',
46 | dest='out_fpath',
47 | type=str,
48 | help='Path to save filtered subtitle, omit to save inplace',
49 | default=None,
50 | )
51 | ap.add_argument(
52 | '-p',
53 | '--print-only',
54 | dest='print',
55 | action='store_true',
56 | default=False,
57 | help='Print output subtitles instead of saving to disk.',
58 | )
59 | ap.add_argument(
60 | '--keep-fonts',
61 | dest='rm_fonts',
62 | default=True,
63 | action='store_false',
64 | help='Do not remove font tags from subtitles.',
65 | )
66 | ap.add_argument(
67 | '--keep-ast',
68 | dest='rm_ast',
69 | default=True,
70 | action='store_false',
71 | help='Do not remove subtitles containing asterisks: (*).',
72 | )
73 | ap.add_argument(
74 | '--keep-music',
75 | dest='rm_music',
76 | default=True,
77 | action='store_false',
78 | help='Do not remove "♪" symbols and text contained within two "♪" symbols.',
79 | )
80 | ap.add_argument(
81 | '--keep-effects',
82 | dest='rm_effects',
83 | default=True,
84 | action='store_false',
85 | help='Do not remove text between and including parenthesis () or brackets []',
86 | )
87 | ap.add_argument(
88 | '--keep-names',
89 | dest='rm_names',
90 | default=True,
91 | action='store_false',
92 | help='Do not replace names in CAPITALS with "-" tags',
93 | )
94 | ap.add_argument(
95 | '--keep-author',
96 | dest='rm_author',
97 | default=True,
98 | action='store_false',
99 | help='Do not remove author tags, eg. Subtitles by some guy.',
100 | )
101 | ap.add_argument(
102 | '--keep-lone-dashes',
103 | dest='rm_lone_dashes',
104 | default=True,
105 | action='store_false',
106 | help='Do not remove lone dashes from subtitles.',
107 | )
108 | ap.add_argument(
109 | '--keep-commas',
110 | dest='fix_commas',
111 | default=True,
112 | action='store_false',
113 | help='Do not fix comma spacings.',
114 | )
115 |
116 | arguments = ap.parse_args()
117 |
118 | run(arguments)
119 |
--------------------------------------------------------------------------------
/subtitle_filter/libs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-lyon/filter-subs/8f89122d8ea1b967a8ed2bb47e635d3df1e61acf/subtitle_filter/libs/__init__.py
--------------------------------------------------------------------------------
/subtitle_filter/libs/subtitle.py:
--------------------------------------------------------------------------------
1 | '''Module containing Subtitle and Subtitles classes'''
2 |
3 | import os
4 | import re
5 |
6 | AUTHOR_STRINGS = (
7 | 'synced and corrected by',
8 | 'sync and corrections by',
9 | 'subtitles by',
10 | 'encoded and released by',
11 | 'opensubtitles.org',
12 | 'please rate this subtitle',
13 | 'captioning sponsored by',
14 | 'captioned by',
15 | )
16 |
17 |
18 | def has_bom(filename):
19 | '''Tests whether file byte order marking'''
20 | with open(filename, 'rb') as file:
21 | bom_bytes = file.read(4)
22 | return bom_bytes.startswith(
23 | (b'\xef\xbb\xbf', b'\xff\xfe', b'\xfe\xff', b'\xff\xfe\x00\x00', b'\x00\x00\xfe\xff')
24 | )
25 |
26 |
27 | class Subtitle:
28 | '''Subtitle contents object
29 | (invidual subtitle entry)
30 | '''
31 |
32 | def __init__(self):
33 | self._index = None
34 | self._contents = ''
35 | self.start = None
36 | self.end = None
37 |
38 | def __str__(self):
39 | return '{}\n{} --> {}\n{}\n'.format(self._index, self.start, self.end, self._contents)
40 |
41 | def __eq__(self, other):
42 | if self.__str__() == other.__str__():
43 | return True
44 | return False
45 |
46 | def _contents_to_list(self):
47 | if isinstance(self._contents, str):
48 | self._contents = self._contents.split('\n')
49 |
50 | def _contents_to_str(self):
51 | if isinstance(self._contents, list):
52 | self._contents = '\n'.join(self._contents)
53 |
54 | @property
55 | def index(self):
56 | '''Returns the index number for subtitle, or False if index is not assigned'''
57 | if self._index is None:
58 | return False
59 | return self._index
60 |
61 | @index.setter
62 | def index(self, index):
63 | self._index = int(index)
64 |
65 | @property
66 | def contents(self):
67 | '''Returns the contents lines for the subtitle'''
68 | return self._contents
69 |
70 | @contents.setter
71 | def contents(self, item):
72 | if self._contents:
73 | self._contents += '\n{}'.format(item)
74 | else:
75 | self._contents = '{}'.format(item)
76 |
77 | def _filter_empty(self):
78 | '''Removes empty quotes from contents list,
79 | Converts self.index to 0
80 | '''
81 | # Set index as 0 for later deletion
82 | if not self.contents:
83 | self.index = 0
84 |
85 | @property
86 | def lines(self):
87 | '''Subtitle entry as a newline separated list'''
88 | return [
89 | str(self._index),
90 | '{} --> {}'.format(self.start, self.end),
91 | *self._contents.split('\n'),
92 | ]
93 |
94 | @staticmethod
95 | def _remove_comma_space(matchobj):
96 | return matchobj.group(0).replace(' ,', ',')
97 |
98 | @staticmethod
99 | def _add_comma_space(matchobj):
100 | return matchobj.group(0).replace(',', ', ')
101 |
102 | def fix_comma_spaces(self):
103 | '''Fixes comma space seperation'''
104 | for _ in re.findall(r'[A-Za-z]+\s+,', self._contents):
105 | self._contents = re.sub(r'[A-Za-z]+\s+,', self._remove_comma_space, self._contents)
106 | for _ in re.findall(r'[A-Za-z]+,[A-Za-z]+', self._contents):
107 | self._contents = re.sub(r'[A-Za-z]+,[A-Za-z]+', self._add_comma_space, self._contents)
108 |
109 | def remove_font_colours(self):
110 | '''Removes tags from contents'''
111 | self._contents = re.sub(r'\(.*)\', '', self._contents, flags=re.DOTALL)
112 | self._filter_empty()
113 |
114 | def remove_music(self):
115 | '''Removes music symbols from contents'''
116 | # Remove music symbol behaving as parenthesis
117 | self._contents = re.sub(r'♪(.*)♪', '', self._contents, flags=re.DOTALL)
118 | # Remove behaving as inline
119 | self._contents_to_list()
120 | for idx, _ in enumerate(self._contents):
121 | if any(symbol in self._contents[idx] for symbol in ['#', '♪']):
122 | self._contents[idx] = ''
123 | self._contents_to_str()
124 | self._filter_empty()
125 |
126 | def remove_sound_effects(self):
127 | '''Removes text in between parenthesis, brackets, and forward slashes'''
128 | # Remove single line brackets
129 | self._contents_to_list()
130 | for idx, _ in enumerate(self._contents):
131 | # Have split this check into a for loop across the delimiters as providing
132 | # them in one regex expression will yield errors for forward slash within italics
133 | # tag when used with square brackets/parenthesis. e.g line 8 of
134 | # subtitle_sound_effects_before.srt
135 | for prefix, suffix in (('(', ')'), ('[', ']'), ('/', '/'), ('*', '*')):
136 | self._contents[idx] = re.sub(
137 | rf'[\{prefix}][\S ]*[\{suffix}][\s:]*', '', self._contents[idx]
138 | )
139 | self._remove_lone_symbols()
140 | self._contents_to_str()
141 | # Remove multi-line brackets
142 | self._contents = re.sub(r'[\*\(\[][\S\s]*[\*\)\]][\s:]*', '', self._contents)
143 | self._filter_empty()
144 |
145 | def replace_names(self):
146 | '''Replace names in all caps'''
147 | # Care is taken here to preserve genuine sentences with a colon.
148 | name_regex = r"((?=.*[A-Z])[A-Z0-9 ][A-Z0-9' ]*: *|[A-Z]{1}[a-zA-Z ]+ *: *|^[A-Za-z]+: *)"
149 | names = re.findall(name_regex, self._contents)
150 | # dialogues from different people preceeded with -
151 | # TODO: does this cover the case where the names are the same?
152 | replacement = '- ' if len(names) > 1 else ''
153 |
154 | def replace_if_not_hour(match):
155 | # group0 = entire match
156 | start, end = match.span(0)
157 | original_match = match.string[start:end]
158 |
159 | def is_hour():
160 | hour_candidate = match.string[start : end + 2].strip()
161 | assert (
162 | ":" in hour_candidate
163 | ), "it has to have a ':' character because it was matched by a regexp"
164 | lhs, rhs = hour_candidate.split(":")
165 | return rhs and lhs and len(lhs) <= 2 and "".join([lhs, rhs]).isnumeric()
166 |
167 | return original_match if is_hour() else replacement
168 |
169 | self._contents = re.sub(name_regex, replace_if_not_hour, self._contents).lstrip()
170 | # TODO: would it make sense to make a context manager and do this on exit and expose all the high level methods
171 | # in said context manager?
172 | self._filter_empty()
173 |
174 | def remove_author(self):
175 | '''Removes "Subtitles by" subtitle entries etc'''
176 | for author_str in AUTHOR_STRINGS:
177 | if author_str in self._contents.lower():
178 | self.index = 0
179 | break
180 |
181 | def remove_asterisks(self):
182 | '''Removes line if it contains only an asterisk and/or whitespace'''
183 | self._contents = re.sub(r'^[\*\s]*$', '', self._contents)
184 | self._filter_empty()
185 |
186 | def fix_italics(self):
187 | '''Fixes lone or tags, and removes empty tags, and empty dashes'''
188 | if '' in self._contents and '' not in self._contents:
189 | self._contents += ''
190 | if '' in self._contents and '' not in self._contents:
191 | self._contents = '' + self._contents
192 | self._contents = re.sub(r'[\_\-\‐\?#\s¶]*', '', self._contents, flags=re.DOTALL)
193 | self._remove_lone_symbols()
194 |
195 | def _remove_lone_symbols(self):
196 | self._contents_to_list()
197 | for idx, _ in enumerate(self._contents):
198 | self._contents[idx] = re.sub(r'^[\_\-\‐\?#\s¶]*$', '', self._contents[idx])
199 | self._contents[idx] = re.sub(
200 | r'^[\_\-\‐\?#\s¶]*[\_\-\‐\?#\s¶]*$', '', self._contents[idx]
201 | )
202 | self._contents[idx] = re.sub(
203 | r'^[\_\-\‐\?#\s¶]*[\_\-\‐\?#\s¶]*$', '', self._contents[idx]
204 | )
205 | # Removes empty strings
206 | self._contents = list(filter(None, self._contents))
207 | # Set index as 0 for later deletion
208 | if len(self.contents) == 0:
209 | self.index = 0
210 | self._contents_to_str()
211 |
212 | def remove_single_dash(self):
213 | '''Removes single dashes from contents'''
214 | if re.match(r'^[^\n]*$', self._contents):
215 | self._contents = re.sub(r'(?m)^\s*-\s*(.*)$', r'\1', self._contents)
216 | self._filter_empty()
217 |
218 |
219 | class Subtitles:
220 | '''Content filtering object for subtitles file'''
221 |
222 | EXTENSIONS = ['.srt']
223 |
224 | def __init__(self, fpath):
225 | if not os.path.exists(fpath):
226 | raise IOError('{} does not exist'.format(fpath))
227 | if not os.path.isfile(fpath):
228 | raise IOError('{} is not a file'.format(fpath))
229 | self._fullpath = fpath
230 | if self.ext not in self.EXTENSIONS:
231 | raise IOError('{} is not valid subtitle file: {}'.format(self._fullpath, self.ext))
232 | self._line_list = self._get_line_list()
233 | self.subtitles = self._parse_subs()
234 |
235 | def __repr__(self):
236 | return "".join(map(str, self.subtitles))
237 |
238 | def __eq__(self, other):
239 | if len(self.subtitles) != len(other.subtitles):
240 | return False
241 | for idx, _ in enumerate(self.subtitles):
242 | if self.subtitles[idx] != other.subtitles[idx]:
243 | return False
244 | return True
245 |
246 | @property
247 | def filepath(self):
248 | '''Filepath of mediafile'''
249 | return self._fullpath
250 |
251 | @property
252 | def ext(self):
253 | '''Extension of mediafile'''
254 | _, ext = os.path.splitext(self._fullpath)
255 | return ext
256 |
257 | def _get_line_list(self):
258 | if has_bom(self.filepath):
259 | with open(self.filepath, 'r', encoding='utf-8-sig') as fdata:
260 | line_list = fdata.readlines()
261 | else:
262 | with open(self.filepath, 'r', encoding='utf-8') as fdata:
263 | line_list = fdata.readlines()
264 | line_list_filtered = [x.rstrip() for x in line_list]
265 | return line_list_filtered
266 |
267 | def _parse_subs(self):
268 | sub_list = [Subtitle()]
269 | for line in self._line_list:
270 | # If the index has not yet been created in latest sublist item
271 | if not sub_list[-1].index:
272 | try:
273 | sub_list[-1].index = int(line)
274 | except ValueError:
275 | continue
276 | # Time line
277 | elif sub_list[-1].start is None:
278 | if ' --> ' in line:
279 | sub_list[-1].start, sub_list[-1].end = line.split(' --> ')
280 | else:
281 | continue
282 | # New subtitle entry
283 | elif not line:
284 | sub_list.append(Subtitle())
285 | # Contents
286 | else:
287 | sub_list[-1].contents = line
288 | return sub_list
289 |
290 | def filter(self, **kw):
291 | '''Filters subtitles to remove SDH items'''
292 | # Filter contents
293 | if kw.get('rm_fonts', True):
294 | any(map(lambda sub: sub.remove_font_colours(), self.subtitles))
295 | if kw.get('rm_ast', True):
296 | any(map(lambda sub: sub.remove_asterisks(), self.subtitles))
297 | if kw.get('rm_music', True):
298 | any(map(lambda sub: sub.remove_music(), self.subtitles))
299 | if kw.get('rm_effects', True):
300 | any(map(lambda sub: sub.remove_sound_effects(), self.subtitles))
301 | if kw.get('rm_names', True):
302 | any(map(lambda sub: sub.replace_names(), self.subtitles))
303 | if kw.get('rm_author', True):
304 | any(map(lambda sub: sub.remove_author(), self.subtitles))
305 | if kw.get('fix_commas', True):
306 | any(map(lambda sub: sub.fix_comma_spaces(), self.subtitles))
307 | if kw.get('rm_lone_dashes', True):
308 | any(map(lambda sub: sub.remove_single_dash(), self.subtitles))
309 | any(map(lambda sub: sub.fix_italics(), self.subtitles))
310 | # Remove filtered items from list
311 | self.subtitles[:] = [sub for sub in self.subtitles if sub.index]
312 | # Reassign indices
313 | for idx, sub in enumerate(self.subtitles):
314 | sub.index = idx + 1
315 |
316 | def print(self):
317 | '''Prints all subtitle entries'''
318 | for sub in self.subtitles:
319 | print(sub)
320 |
321 | def save(self, new_filepath=None):
322 | '''Saves subtitle object to disk,
323 | omit new_filepath to save inplace
324 | '''
325 | if new_filepath is not None:
326 | self._fullpath = new_filepath
327 | with open(self._fullpath, 'w', encoding='utf-8') as fp:
328 | for sub in self.subtitles:
329 | fp.write(str(sub) + '\n')
330 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/m-lyon/filter-subs/8f89122d8ea1b967a8ed2bb47e635d3df1e61acf/tests/__init__.py
--------------------------------------------------------------------------------
/tests/data/apostrphone_in_name_after.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:10:50,567 --> 00:10:52,569
3 | Yo!
4 |
--------------------------------------------------------------------------------
/tests/data/apostrphone_in_name_before.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:10:50,567 --> 00:10:52,569
3 | MOTHER'S MILK:
4 | Yo!
5 |
--------------------------------------------------------------------------------
/tests/data/hour_in_dialogue_after.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:45:52,000 --> 00:45:55,295
3 | Just be in Stillwell's office
4 | at 10:00.
5 |
--------------------------------------------------------------------------------
/tests/data/hour_in_dialogue_before.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:45:52,000 --> 00:45:55,295
3 | Just be in Stillwell's office
4 | at 10:00.
5 |
--------------------------------------------------------------------------------
/tests/data/subtitle_angle_brackets_after.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:01:40,723 --> 00:01:42,707
3 | This stays <\i>
4 |
5 | 2
6 | 00:00:57,088 --> 00:00:58,788
7 | This goes.
8 |
9 | 3
10 | 00:45:39,418 --> 00:45:41,084
11 | something something <\i>
12 |
13 | 4
14 | 00:20:36,309 --> 00:20:39,277
15 | - something some
16 | Listen here.<\i>
17 |
18 | 5
19 | 00:11:31,995 --> 00:11:35,346
20 | something
21 | Listen here.<\i>
--------------------------------------------------------------------------------
/tests/data/subtitle_angle_brackets_before.srt:
--------------------------------------------------------------------------------
1 | 50
2 | 00:01:40,723 --> 00:01:42,707
3 | This stays <\i>
4 |
5 | 28
6 | 00:00:57,088 --> 00:00:58,788
7 | This goes.
8 |
9 | 740
10 | 00:45:39,418 --> 00:45:41,084
11 | something something <\i>
12 |
13 | 398
14 | 00:20:36,309 --> 00:20:39,277
15 | - something some
16 | Listen here.<\i>
17 |
18 | 262
19 | 00:11:31,995 --> 00:11:35,346
20 | something
21 | Listen here.<\i>
--------------------------------------------------------------------------------
/tests/data/subtitle_author_after.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:02:18,389 --> 00:02:19,929
3 | Hey, darling.
4 |
--------------------------------------------------------------------------------
/tests/data/subtitle_author_before.srt:
--------------------------------------------------------------------------------
1 | 2
2 | 00:00:15,142 --> 00:00:25,142
3 | � ENCODED AND RELEASED BY Sharpysword ?
4 |
5 | 3
6 | 00:02:18,389 --> 00:02:19,929
7 | Hey, darling.
8 |
9 | 3
10 | 00:00:00,062 --> 00:00:02,507
11 | Subtitles by explosiveskull
12 | www.OpenSubtitles.org
13 |
14 | 3
15 | 00:00:00,062 --> 00:00:02,507
16 | Synced and Corrected by Your Mum.
17 |
18 | 976
19 | 00:42:12,094 --> 00:42:15,054
20 | Captioning sponsored by
21 | CBS
22 |
23 | 978
24 | 00:42:18,884 --> 00:42:21,060
25 | Captioned by
26 | Media Access Group at WGBH
27 | access.wgbh.org
--------------------------------------------------------------------------------
/tests/data/subtitle_bom_after.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:01:21,999 --> 00:01:23,292
3 | Gosh.
4 |
5 | 2
6 | 00:02:03,666 --> 00:02:05,459
7 | How long until we get to his estate?
8 |
--------------------------------------------------------------------------------
/tests/data/subtitle_bom_before.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:01:21,999 --> 00:01:23,292
3 | Gosh.
4 |
5 | 2
6 | 00:02:03,666 --> 00:02:05,459
7 | How long until we get to his estate?
8 |
--------------------------------------------------------------------------------
/tests/data/subtitle_commas_after.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:01:42,460 --> 00:01:47,420
3 | By decree, all persons
4 | found guilty of piracy,
5 |
6 | 2
7 | 00:01:11,600 --> 00:01:14,730
8 | duly appointed representative
9 | of His Majesty, the king.
--------------------------------------------------------------------------------
/tests/data/subtitle_commas_before.srt:
--------------------------------------------------------------------------------
1 | 12
2 | 00:01:42,460 --> 00:01:47,420
3 | By decree,all persons
4 | found guilty of piracy,
5 |
6 | 5
7 | 00:01:11,600 --> 00:01:14,730
8 | duly appointed representative
9 | of His Majesty , the king.
--------------------------------------------------------------------------------
/tests/data/subtitle_example_after.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:01:40,723 --> 00:01:42,707
3 | - It's almost as if...
4 | - Yes?
5 |
6 | 2
7 | 00:01:40,723 --> 00:01:42,707
8 | Gooby pls...
9 |
10 | 3
11 | 00:01:42,709 --> 00:01:46,210
12 | Something's come alive.
13 |
14 | 4
15 | 00:01:42,709 --> 00:01:46,210
16 | Again.
17 |
18 | 5
19 | 00:02:08,501 --> 00:02:11,869
20 | Guh
21 |
22 | 6
23 | 00:02:22,048 --> 00:02:25,083
24 | Now wait a minute, Elliot.
25 |
26 | 7
27 | 00:02:45,238 --> 00:02:46,904
28 | Wait, wait, wait!
29 |
30 | 8
31 | 00:00:57,088 --> 00:00:58,788
32 | And not me?
33 |
--------------------------------------------------------------------------------
/tests/data/subtitle_example_before.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:01:19,519 --> 00:01:26,557
3 | ♪ ♪
4 |
5 | 1
6 | 00:01:19,519 --> 00:01:26,557
7 | ♪
8 | ♪
9 |
10 | 2
11 | 00:01:40,723 --> 00:01:42,707
12 | ELLIOT: It's almost as if...
13 | JOE: Yes?
14 |
15 | 2
16 | 00:01:40,723 --> 00:01:42,707
17 | ELLIOT: Gooby pls...
18 |
19 | 3
20 | 00:01:42,709 --> 00:01:46,210
21 | (CHUCCCKLEEES) Something's come alive.
22 |
23 | 3
24 | 00:01:42,709 --> 00:01:46,210
25 | (CHUCKLES)
26 | Again.
27 |
28 | 4
29 | 00:02:08,501 --> 00:02:11,869
30 | (POPCORN MACHINE DOOR CLOSES) Guh
31 |
32 | 5
33 | 00:02:22,048 --> 00:02:25,083
34 | Now wait a minute, Elliot.
35 |
36 | 6
37 | 00:11:08,086 --> 00:11:15,984
38 | - Synced and corrected by VitoSilans -
39 | -- www.Addic7ed.com --
40 |
41 | 3
42 | 00:00:00,062 --> 00:00:02,507
43 | Subtitles by explosiveskull
44 | www.OpenSubtitles.org
45 |
46 | 6
47 | 00:02:45,238 --> 00:02:46,904
48 | - (GUN COCKS)
49 | - Wait, wait, wait!
50 |
51 | 46
52 | 00:01:43,719 --> 00:01:46,506
53 | - ♪ Now that the day is over ♪
54 | - [beeps]
55 |
56 | 28
57 | 00:00:57,088 --> 00:00:58,788
58 | - [gunshot]
59 | - [Shot] And not me?
60 |
61 | 740
62 | 00:45:39,418 --> 00:45:41,084
63 | [Barenaked Ladies' "One Week" plays]
64 |
65 | 398
66 | 00:20:36,309 --> 00:20:39,277
67 | [Roxette's "Listen to
68 | Your Heart" plays softly]
69 |
70 | 584
71 | 00:37:43,795 --> 00:37:47,899
72 | *
73 |
74 | 487
75 | 00:29:34,006 --> 00:29:37,944
76 | - * Send me an angel
77 | who flies from Montgomery *
78 |
79 |
--------------------------------------------------------------------------------
/tests/data/subtitle_font_after.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:01:42,709 --> 00:01:46,210
3 | Something's come alive.
4 |
5 | 2
6 | 00:01:42,709 --> 00:01:46,210
7 | Again.
8 |
9 | 3
10 | 00:02:45,238 --> 00:02:46,904
11 | Wait, wait, wait!
12 |
--------------------------------------------------------------------------------
/tests/data/subtitle_font_before.srt:
--------------------------------------------------------------------------------
1 | 3
2 | 00:01:42,709 --> 00:01:46,210
3 | What Something's come alive.
4 |
5 | 5
6 | 00:01:42,709 --> 00:01:46,210
7 | (CHUCKLES)
8 | Again.
9 |
10 | 6
11 | 00:02:45,238 --> 00:02:46,904
12 | - (GUN COCKS)
13 | - Wait, wait, wait!
14 |
--------------------------------------------------------------------------------
/tests/data/subtitle_italics_after.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:02:22,048 --> 00:02:25,083
3 | Now wait a minute, Elliot.
4 |
--------------------------------------------------------------------------------
/tests/data/subtitle_italics_before.srt:
--------------------------------------------------------------------------------
1 | 46
2 | 00:01:43,719 --> 00:01:46,506
3 | - ♪ Now that the day is over ♪
4 | - [beeps]
5 |
6 | 5
7 | 00:02:22,048 --> 00:02:25,083
8 | Now wait a minute, Elliot.
9 |
10 | 1
11 | 00:01:19,519 --> 00:01:26,557
12 | ♪
13 | ♪
14 |
--------------------------------------------------------------------------------
/tests/data/subtitle_music_after.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:01:40,723 --> 00:01:42,707
3 | What's up dawg.
4 |
5 | 2
6 | 00:01:19,519 --> 00:01:26,557
7 | Is real
8 |
9 | 3
10 | 00:00:57,939 --> 00:01:01,636
11 | - They want to see the Ram Jam!
12 |
--------------------------------------------------------------------------------
/tests/data/subtitle_music_before.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:01:19,519 --> 00:01:26,557
3 | ♪ ♪
4 |
5 | 2
6 | 00:01:19,519 --> 00:01:26,557
7 | ♪
8 | ♪
9 |
10 | 3
11 | 00:01:40,723 --> 00:01:42,707
12 | What's up dawg.
13 |
14 | 4
15 | 00:01:19,519 --> 00:01:26,557
16 | ♪ This sound
17 | Is real
18 |
19 | 5
20 | 00:01:19,519 --> 00:01:26,557
21 | ♪ This sound
22 | Is not ♪
23 |
24 | 6
25 | 00:00:57,939 --> 00:01:01,636
26 | - They want to see the Ram Jam!
27 | - # Well, I'm frustrated #
28 |
29 | 7
30 | 00:01:01,709 --> 00:01:04,007
31 | # And outdated #
32 |
33 | 8
34 | 00:01:33,125 --> 00:01:36,291
35 | ♪ 'Cause it sure looks to me
36 | Like them people
37 | Ain't playin'♪
38 |
39 |
--------------------------------------------------------------------------------
/tests/data/subtitle_names_after.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:01:40,723 --> 00:01:42,707
3 | - It's almost as if...
4 | - Yes?
5 |
6 | 2
7 | 00:01:40,723 --> 00:01:42,707
8 | Gooby pls...
9 |
10 | 3
11 | 00:03:06,139 --> 00:03:07,639
12 | You're gonna need
13 | most of that
14 |
15 | 4
16 | 00:00:05,377 --> 00:00:06,378
17 | No.
18 |
19 | 5
20 | 00:03:32,296 --> 00:03:34,840
21 | Regular text.
22 |
23 | 6
24 | 00:00:55,296 --> 00:00:58,931
25 | Cop cuties,
26 | cute and on duty
27 |
28 | 7
29 | 00:07:40,362 --> 00:07:44,153
30 | Born in humble circumstances
31 | in Dundee, Scotland,
32 |
33 | 8
34 | 00:16:06,299 --> 00:16:08,802
35 | See? Hebrews 13:4.
36 |
--------------------------------------------------------------------------------
/tests/data/subtitle_names_before.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:01:40,723 --> 00:01:42,707
3 | ELLIOT: It's almost as if...
4 | JOE: Yes?
5 |
6 | 2
7 | 00:01:40,723 --> 00:01:42,707
8 | ELLIOT: Gooby pls...
9 |
10 | 17
11 | 00:03:06,139 --> 00:03:07,639
12 | Bartender:
13 | You're gonna need
14 | most of that
15 |
16 | 5
17 | 00:00:05,377 --> 00:00:06,378
18 | THE FOOBAR: No.
19 |
20 | 39
21 | 00:03:32,296 --> 00:03:34,840
22 | FOO BAR 4: (IN ENGLISH)
23 | Regular text.
24 |
25 | 20
26 | 00:00:55,296 --> 00:00:58,931
27 | all: ♪ Cop cuties,
28 | cute and on duty ♪
29 |
30 | 20
31 | 00:00:55,296 --> 00:00:58,931
32 | all: Cop cuties,
33 | cute and on duty
34 |
35 | 152
36 | 00:07:40,362 --> 00:07:44,153
37 | Man on Video:
38 | Born in humble circumstances
39 | in Dundee, Scotland,
40 |
41 | 1
42 | 00:16:06,299 --> 00:16:08,802
43 | See? Hebrews 13:4.
--------------------------------------------------------------------------------
/tests/data/subtitle_sound_effects_after.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:01:40,723 --> 00:01:42,707
3 | Examples are tough.
4 |
5 | 2
6 | 00:00:57,088 --> 00:00:58,788
7 | And not me?
8 |
9 | 3
10 | 00:11:31,995 --> 00:11:35,346
11 | I'm raising a teenage boy.
12 |
13 | 4
14 | 00:12:31,995 --> 00:12:35,346
15 | twice now.
16 |
17 | 5
18 | 00:15:31,995 --> 00:51:35,346
19 | Third time.
20 |
21 | 6
22 | 00:32:06,925 --> 00:32:09,057
23 | What do you got for me, Cabrera?
24 |
--------------------------------------------------------------------------------
/tests/data/subtitle_sound_effects_before.srt:
--------------------------------------------------------------------------------
1 | 50
2 | 00:01:40,723 --> 00:01:42,707
3 | Examples are tough.
4 |
5 | 28
6 | 00:00:57,088 --> 00:00:58,788
7 | - [gunshot]
8 | - [Shot] And not me?
9 |
10 | 740
11 | 00:45:39,418 --> 00:45:41,084
12 | [Barenaked Ladies' "One Week" plays]
13 |
14 | 398
15 | 00:20:36,309 --> 00:20:39,277
16 | [Roxette's "Listen to
17 | Your Heart" plays softly]
18 |
19 | 262
20 | 00:11:31,995 --> 00:11:35,346
21 | [chuckles softly]:
22 | I'm raising a teenage boy.
23 |
24 | 263
25 | 00:12:31,995 --> 00:12:35,346
26 | (chuckles softly):
27 | twice now.
28 |
29 | 398
30 | 00:20:36,309 --> 00:20:39,277
31 | /BOOM/
32 |
33 | 400
34 | 00:15:31,995 --> 00:51:35,346
35 | /here we go/:
36 | Third time.
37 |
38 | 768
39 | 00:32:06,925 --> 00:32:09,057
40 | *Whistles*
41 | What do you got for me, Cabrera?
42 |
--------------------------------------------------------------------------------
/tests/data/subtitle_space_parsing_after.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:00:06,507 --> 00:00:08,467
3 | The world
4 | is broken.
5 |
6 | 2
7 | 00:00:10,386 --> 00:00:12,555
8 | men who were born
9 | with great power believed
10 |
--------------------------------------------------------------------------------
/tests/data/subtitle_space_parsing_before.srt:
--------------------------------------------------------------------------------
1 | 1
2 |
3 | 00:00:06,507 --> 00:00:08,467
4 | The world
5 | is broken.
6 |
7 |
8 | 2
9 | 00:00:10,386 --> 00:00:12,555
10 | men who were born
11 | with great power believed
12 |
--------------------------------------------------------------------------------
/tests/data/subtitle_symbols_after.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:02:31,229 --> 00:02:33,939
3 | Morning, Maeve.
4 |
--------------------------------------------------------------------------------
/tests/data/subtitle_symbols_before.srt:
--------------------------------------------------------------------------------
1 | 1
2 | 00:00:05,118 --> 00:00:15,118
3 | ?? ## _
4 |
5 | 1
6 | 00:00:05,118 --> 00:00:15,118
7 | - ---
8 | - ¶
9 |
10 | 4
11 | 00:02:31,229 --> 00:02:33,939
12 | Morning, Maeve.
--------------------------------------------------------------------------------
/tests/io_test.py:
--------------------------------------------------------------------------------
1 | '''I/O test cases'''
2 | import copy
3 | import unittest
4 | import tempfile
5 |
6 | from os.path import join, dirname
7 |
8 | from subtitle_filter import Subtitles
9 |
10 | DATA_DIR = join(dirname(__file__), 'data')
11 |
12 | class SubtitleFilterFontTestCase(unittest.TestCase):
13 |
14 | def setUp(self):
15 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_music_before.srt'))
16 |
17 | def test_subtitle_save(self):
18 | subs = copy.deepcopy(self.subs_before)
19 | subs.filter(rm_music=False)
20 | with tempfile.TemporaryDirectory() as dirpath:
21 | fpath = join(dirpath, 'test.srt')
22 | subs.save(fpath)
23 | subs_after = Subtitles(fpath)
24 | self.assertEqual(self.subs_before, subs_after)
--------------------------------------------------------------------------------
/tests/subtitles_test.py:
--------------------------------------------------------------------------------
1 | '''Subtitle test cases'''
2 |
3 | import unittest
4 |
5 | from os.path import join, dirname
6 |
7 | from subtitle_filter import Subtitles
8 |
9 | DATA_DIR = join(dirname(__file__), 'data')
10 |
11 |
12 | class SubtitleFilterFontTestCase(unittest.TestCase):
13 | def setUp(self):
14 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_font_before.srt'))
15 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_font_after.srt'))
16 |
17 | def test_subtitle_font(self):
18 | self.subs_before.filter()
19 | self.assertEqual(self.subs_before, self.subs_after)
20 |
21 |
22 | class SubtitleFilterMusicTestCase(unittest.TestCase):
23 | def setUp(self):
24 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_music_before.srt'))
25 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_music_after.srt'))
26 |
27 | def test_subtitle_music(self):
28 | self.subs_before.filter()
29 | self.assertEqual(self.subs_before, self.subs_after)
30 |
31 |
32 | class SubtitleFilterSoundEffectsTestCase(unittest.TestCase):
33 | def setUp(self):
34 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_sound_effects_before.srt'))
35 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_sound_effects_after.srt'))
36 |
37 | def test_subtitle_sound_effects(self):
38 | self.subs_before.filter()
39 | self.assertEqual(self.subs_before, self.subs_after)
40 |
41 |
42 | class SubtitleFilterItalicsTestCase(unittest.TestCase):
43 | def setUp(self):
44 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_italics_before.srt'))
45 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_italics_after.srt'))
46 |
47 | def test_subtitle_italics(self):
48 | self.subs_before.filter()
49 | self.assertEqual(self.subs_before, self.subs_after)
50 |
51 |
52 | class SubtitleAllTestCase(unittest.TestCase):
53 | def setUp(self):
54 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_example_before.srt'))
55 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_example_after.srt'))
56 |
57 | def test_subtitle_all(self):
58 | self.subs_before.filter()
59 | self.assertEqual(self.subs_before, self.subs_after)
60 |
61 |
62 | class SubtitleFilterNamesTestCase(unittest.TestCase):
63 | def setUp(self):
64 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_names_before.srt'))
65 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_names_after.srt'))
66 |
67 | def test_subtitle_names(self):
68 | self.subs_before.filter()
69 | self.assertEqual(self.subs_before, self.subs_after)
70 |
71 |
72 | class SubtitleFilterSymbolsTestCase(unittest.TestCase):
73 | def setUp(self):
74 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_symbols_before.srt'))
75 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_symbols_after.srt'))
76 |
77 | def test_subtitle_symbols(self):
78 | self.subs_before.filter()
79 | self.assertEqual(self.subs_before, self.subs_after)
80 |
81 |
82 | class SubtitleFilterAuthorTestCase(unittest.TestCase):
83 | def setUp(self):
84 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_author_before.srt'))
85 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_author_after.srt'))
86 |
87 | def test_subtitle_author(self):
88 | self.subs_before.filter()
89 | self.assertEqual(self.subs_before, self.subs_after)
90 |
91 |
92 | class SubtitleFilterCommaTestCase(unittest.TestCase):
93 | def setUp(self):
94 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_commas_before.srt'))
95 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_commas_after.srt'))
96 |
97 | def test_subtitle_commas(self):
98 | self.subs_before.filter()
99 | self.assertEqual(self.subs_before, self.subs_after)
100 |
101 |
102 | class SubtitleParseSpacingTestCase(unittest.TestCase):
103 | def setUp(self):
104 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_space_parsing_before.srt'))
105 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_space_parsing_after.srt'))
106 |
107 | def test_space_parsing_commas(self):
108 | self.subs_before.filter()
109 | self.assertEqual(self.subs_before, self.subs_after)
110 |
111 |
112 | class SubtitleBOMTestCase(unittest.TestCase):
113 | def setUp(self):
114 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_bom_before.srt'))
115 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_bom_after.srt'))
116 |
117 | def test_bom(self):
118 | self.subs_before.filter()
119 | self.assertEqual(self.subs_before, self.subs_after)
120 |
121 |
122 | class SubtitleHoursTestCase(unittest.TestCase):
123 | def setUp(self):
124 | self.subs_before = Subtitles(join(DATA_DIR, 'hour_in_dialogue_before.srt'))
125 | self.subs_after = Subtitles(join(DATA_DIR, 'hour_in_dialogue_after.srt'))
126 |
127 | def test_hours(self):
128 | self.subs_before.filter()
129 | self.assertEqual(self.subs_before, self.subs_after)
130 |
131 |
132 | class SubtitleApostrophe(unittest.TestCase):
133 | def setUp(self):
134 | self.subs_before = Subtitles(join(DATA_DIR, 'apostrphone_in_name_before.srt'))
135 | self.subs_after = Subtitles(join(DATA_DIR, 'apostrphone_in_name_after.srt'))
136 |
137 | def test_thing(self):
138 | self.subs_before.filter()
139 | self.assertEqual(self.subs_before, self.subs_after)
140 |
--------------------------------------------------------------------------------