├── .github └── workflows │ └── python-publish.yml ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── setup.py ├── subtitle_filter ├── __init__.py ├── bin │ └── filter-subtitles.py └── libs │ ├── __init__.py │ └── subtitle.py └── tests ├── __init__.py ├── data ├── apostrphone_in_name_after.srt ├── apostrphone_in_name_before.srt ├── hour_in_dialogue_after.srt ├── hour_in_dialogue_before.srt ├── subtitle_angle_brackets_after.srt ├── subtitle_angle_brackets_before.srt ├── subtitle_author_after.srt ├── subtitle_author_before.srt ├── subtitle_bom_after.srt ├── subtitle_bom_before.srt ├── subtitle_commas_after.srt ├── subtitle_commas_before.srt ├── subtitle_example_after.srt ├── subtitle_example_before.srt ├── subtitle_font_after.srt ├── subtitle_font_before.srt ├── subtitle_italics_after.srt ├── subtitle_italics_before.srt ├── subtitle_music_after.srt ├── subtitle_music_before.srt ├── subtitle_names_after.srt ├── subtitle_names_before.srt ├── subtitle_sound_effects_after.srt ├── subtitle_sound_effects_before.srt ├── subtitle_space_parsing_after.srt ├── subtitle_space_parsing_before.srt ├── subtitle_symbols_after.srt └── subtitle_symbols_before.srt ├── io_test.py └── subtitles_test.py /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | build_and_deploy: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - name: Checkout repository 16 | uses: actions/checkout@v2 17 | 18 | - name: Set up Python 19 | uses: actions/setup-python@v2 20 | with: 21 | python-version: 3.x 22 | 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install --upgrade pip 26 | pip install setuptools wheel twine 27 | 28 | - name: Build package 29 | run: python setup.py sdist bdist_wheel 30 | 31 | - name: Publish package to PyPI 32 | uses: pypa/gh-action-pypi-publish@v1.4.2 33 | with: 34 | user: __token__ 35 | password: ${{ secrets.PYPI_API_TOKEN }} 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # Visual Studio Code 132 | .vscode 133 | 134 | # pylint 135 | .pylintrc 136 | 137 | # OS X 138 | *.DS_Store -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.5" 4 | - "3.6" 5 | - "3.7" 6 | - "3.8" 7 | 8 | # command to install dependencies 9 | install: 10 | - pip install coverage coveralls . 11 | # command to run tests 12 | script: "coverage run -m unittest discover tests/ '*_test.py'" 13 | after_success: "coveralls" 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Matt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # subtitle-filter 2 | 3 | [![PyPI version](https://badge.fury.io/py/subtitle-filter.svg)](https://badge.fury.io/py/subtitle-filter) 4 | 5 | Filter `.srt` subtitle files to remove SDH (Deaf or Hard-of-Hearing) entries and other tags. 6 | 7 | ## Installation 8 | 9 | ```bash 10 | pip install subtitle-filter 11 | ``` 12 | 13 | ## Usage 14 | 15 | `subtitle-filter` can be used either as a script or a module. 16 | 17 | By default, this module filters the following (in order): 18 | 19 | 1. Removes font tags and text contained within, e.g. `Hey\` is removed. 20 | 2. Removes subtitle entries containing only asterisks: `*`. 21 | 3. Removes subtitle lines containing `♪` or `#`. 22 | 4. Removes sound effects: text contained with and including parenthesis `(BANG)`, asterisks `*whisper*`, brackets `[boom]`, and text contained within forward slashes `/POW/`. 23 | 5. Replaces names with dashes, e.g. `GARY: Hey` or `Gary: Hey` to `- Hey`. 24 | 6. Removes author tags e.g. `XoXo Subtitles by PwnedDude967 XoXo`. 25 | 7. Fixes erroneous comma spacing, e.g. `Hey , what's up? Nothing,my man` to `Hey, what's up? Nothing, my man`. 26 | 8. Removes lone symbols such as `?`, `-`, `#`, `_`. 27 | 9. Remove leading dashes `-` if there is only one line present. 28 | 29 | ### Script Usage 30 | 31 | Bring up the help display: 32 | 33 | ```bash 34 | filter-subtitles.py -h 35 | ``` 36 | 37 | Filter a subtitle in place (overwrites original subtitle) with default options. 38 | 39 | ```bash 40 | filter-subtitles.py -s /path/to/sub.srt 41 | ``` 42 | 43 | Instead of saving to disk, print the output. 44 | 45 | ```bash 46 | filter-subtitles.py -s /path/to/sub.srt -p 47 | ``` 48 | 49 | Save the output to a different filepath. 50 | 51 | ```bash 52 | filter-subtitles.py -s /path/to/sub.srt -o /path/to/outsub.srt 53 | ``` 54 | 55 | Custom filter flags. 56 | 57 | ``` 58 | --keep-fonts Do not remove font tags and text contained within. 59 | --keep-ast Do not remove subtitles containing asterisks: (*). 60 | --keep-music Do not lines containing 1 or more "♪" symbols. 61 | --keep-effects Do not remove text between and including parenthesis 62 | () or brackets [] 63 | --keep-names Do not replace names in CAPITALS with "-" tags 64 | --keep-author Do not remove author tags, eg. Subtitles by some guy. 65 | --keep-lone-dashes Do not remove the dash if only one dashed line is present. 66 | --keep-commas Do not fix comma spacings. 67 | ``` 68 | 69 | ### Module Usage 70 | 71 | Filter a subtitle in place (overwrites original subtitle) with default options 72 | 73 | ```python 74 | from subtitle_filter import Subtitles 75 | 76 | subs = Subtitles('/path/to/sub.srt') 77 | subs.filter() 78 | subs.save() 79 | ``` 80 | 81 | Instead of saving to disk, print the output. 82 | 83 | ```python 84 | subs.print() 85 | ``` 86 | 87 | Save the output to a different filepath. 88 | 89 | ```python 90 | subs.save('/path/to/newsub.srt') 91 | ``` 92 | 93 | Use custom filter flags. 94 | 95 | ```python 96 | subs.filter( 97 | rm_fonts=True, 98 | rm_ast=False, 99 | rm_music=True, 100 | rm_effects=True, 101 | rm_names=False, 102 | rm_author=False, 103 | ) 104 | ``` 105 | 106 | ### Issues & Requests 107 | 108 | If you spot any issues with the filtered subtitles, or would like to request new features, please create an issue on GitHub and provide examples. 109 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | '''setup.py, use this to install module''' 3 | from os import path 4 | from setuptools import setup 5 | 6 | version = '1.5.0' 7 | this_dir = path.abspath(path.dirname(__file__)) 8 | with open(path.join(this_dir, 'README.md'), encoding='utf-8') as f: 9 | long_description = f.read() 10 | 11 | setup( 12 | name='subtitle-filter', 13 | version=version, 14 | description='Filter SDH entries and more from .srt files', 15 | author='Matt Lyon', 16 | author_email='matthewlyon18@gmail.com', 17 | url='https://github.com/mattlyon93/filter-subs', 18 | download_url='https://github.com/mattlyon93/filter-subs/archive/v{}.tar.gz'.format(version), 19 | long_description=long_description, 20 | long_description_content_type='text/markdown', 21 | python_requires='>=3.5', 22 | license='MIT License', 23 | packages=['subtitle_filter', 'subtitle_filter/libs'], 24 | classifiers=[ 25 | 'Programming Language :: Python', 26 | 'Operating System :: Unix', 27 | 'Operating System :: MacOS', 28 | 'Topic :: Text Processing :: Filters', 29 | 'Topic :: Multimedia :: Sound/Audio :: Speech', 30 | ], 31 | keywords=['subtitle', 'SDH', 'hard-of-hearing', 'filter', 'movie', 'tv'], 32 | scripts=['subtitle_filter/bin/filter-subtitles.py'], 33 | ) 34 | -------------------------------------------------------------------------------- /subtitle_filter/__init__.py: -------------------------------------------------------------------------------- 1 | from subtitle_filter.libs.subtitle import Subtitles 2 | -------------------------------------------------------------------------------- /subtitle_filter/bin/filter-subtitles.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | '''Script to Filter SDH tags from subtitles''' 3 | 4 | import argparse 5 | 6 | from subtitle_filter.libs.subtitle import Subtitles 7 | 8 | 9 | def run(args): 10 | '''Main entry point of script''' 11 | subs = Subtitles(args.sub_fpath) 12 | subs.filter( 13 | rm_fonts=args.rm_fonts, 14 | rm_ast=args.rm_ast, 15 | rm_music=args.rm_music, 16 | rm_effects=args.rm_effects, 17 | rm_names=args.rm_names, 18 | rm_author=args.rm_author, 19 | rm_lone_dashes=args.rm_lone_dashes, 20 | fix_commas=args.fix_commas, 21 | ) 22 | 23 | if args.print: 24 | subs.print() 25 | return 26 | 27 | subs.save(new_filepath=args.out_fpath) 28 | 29 | 30 | if __name__ == '__main__': 31 | ap = argparse.ArgumentParser( 32 | description='Filter subtitles to remove various SDH (Deaf or Hard-of-Hearing) tags.' 33 | ) 34 | 35 | ap.add_argument( 36 | '-s', 37 | '--subtitle', 38 | dest='sub_fpath', 39 | type=str, 40 | help='Subtitle file to filter', 41 | required=True, 42 | ) 43 | ap.add_argument( 44 | '-o', 45 | '--output', 46 | dest='out_fpath', 47 | type=str, 48 | help='Path to save filtered subtitle, omit to save inplace', 49 | default=None, 50 | ) 51 | ap.add_argument( 52 | '-p', 53 | '--print-only', 54 | dest='print', 55 | action='store_true', 56 | default=False, 57 | help='Print output subtitles instead of saving to disk.', 58 | ) 59 | ap.add_argument( 60 | '--keep-fonts', 61 | dest='rm_fonts', 62 | default=True, 63 | action='store_false', 64 | help='Do not remove font tags from subtitles.', 65 | ) 66 | ap.add_argument( 67 | '--keep-ast', 68 | dest='rm_ast', 69 | default=True, 70 | action='store_false', 71 | help='Do not remove subtitles containing asterisks: (*).', 72 | ) 73 | ap.add_argument( 74 | '--keep-music', 75 | dest='rm_music', 76 | default=True, 77 | action='store_false', 78 | help='Do not remove "♪" symbols and text contained within two "♪" symbols.', 79 | ) 80 | ap.add_argument( 81 | '--keep-effects', 82 | dest='rm_effects', 83 | default=True, 84 | action='store_false', 85 | help='Do not remove text between and including parenthesis () or brackets []', 86 | ) 87 | ap.add_argument( 88 | '--keep-names', 89 | dest='rm_names', 90 | default=True, 91 | action='store_false', 92 | help='Do not replace names in CAPITALS with "-" tags', 93 | ) 94 | ap.add_argument( 95 | '--keep-author', 96 | dest='rm_author', 97 | default=True, 98 | action='store_false', 99 | help='Do not remove author tags, eg. Subtitles by some guy.', 100 | ) 101 | ap.add_argument( 102 | '--keep-lone-dashes', 103 | dest='rm_lone_dashes', 104 | default=True, 105 | action='store_false', 106 | help='Do not remove lone dashes from subtitles.', 107 | ) 108 | ap.add_argument( 109 | '--keep-commas', 110 | dest='fix_commas', 111 | default=True, 112 | action='store_false', 113 | help='Do not fix comma spacings.', 114 | ) 115 | 116 | arguments = ap.parse_args() 117 | 118 | run(arguments) 119 | -------------------------------------------------------------------------------- /subtitle_filter/libs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-lyon/filter-subs/8f89122d8ea1b967a8ed2bb47e635d3df1e61acf/subtitle_filter/libs/__init__.py -------------------------------------------------------------------------------- /subtitle_filter/libs/subtitle.py: -------------------------------------------------------------------------------- 1 | '''Module containing Subtitle and Subtitles classes''' 2 | 3 | import os 4 | import re 5 | 6 | AUTHOR_STRINGS = ( 7 | 'synced and corrected by', 8 | 'sync and corrections by', 9 | 'subtitles by', 10 | 'encoded and released by', 11 | 'opensubtitles.org', 12 | 'please rate this subtitle', 13 | 'captioning sponsored by', 14 | 'captioned by', 15 | ) 16 | 17 | 18 | def has_bom(filename): 19 | '''Tests whether file byte order marking''' 20 | with open(filename, 'rb') as file: 21 | bom_bytes = file.read(4) 22 | return bom_bytes.startswith( 23 | (b'\xef\xbb\xbf', b'\xff\xfe', b'\xfe\xff', b'\xff\xfe\x00\x00', b'\x00\x00\xfe\xff') 24 | ) 25 | 26 | 27 | class Subtitle: 28 | '''Subtitle contents object 29 | (invidual subtitle entry) 30 | ''' 31 | 32 | def __init__(self): 33 | self._index = None 34 | self._contents = '' 35 | self.start = None 36 | self.end = None 37 | 38 | def __str__(self): 39 | return '{}\n{} --> {}\n{}\n'.format(self._index, self.start, self.end, self._contents) 40 | 41 | def __eq__(self, other): 42 | if self.__str__() == other.__str__(): 43 | return True 44 | return False 45 | 46 | def _contents_to_list(self): 47 | if isinstance(self._contents, str): 48 | self._contents = self._contents.split('\n') 49 | 50 | def _contents_to_str(self): 51 | if isinstance(self._contents, list): 52 | self._contents = '\n'.join(self._contents) 53 | 54 | @property 55 | def index(self): 56 | '''Returns the index number for subtitle, or False if index is not assigned''' 57 | if self._index is None: 58 | return False 59 | return self._index 60 | 61 | @index.setter 62 | def index(self, index): 63 | self._index = int(index) 64 | 65 | @property 66 | def contents(self): 67 | '''Returns the contents lines for the subtitle''' 68 | return self._contents 69 | 70 | @contents.setter 71 | def contents(self, item): 72 | if self._contents: 73 | self._contents += '\n{}'.format(item) 74 | else: 75 | self._contents = '{}'.format(item) 76 | 77 | def _filter_empty(self): 78 | '''Removes empty quotes from contents list, 79 | Converts self.index to 0 80 | ''' 81 | # Set index as 0 for later deletion 82 | if not self.contents: 83 | self.index = 0 84 | 85 | @property 86 | def lines(self): 87 | '''Subtitle entry as a newline separated list''' 88 | return [ 89 | str(self._index), 90 | '{} --> {}'.format(self.start, self.end), 91 | *self._contents.split('\n'), 92 | ] 93 | 94 | @staticmethod 95 | def _remove_comma_space(matchobj): 96 | return matchobj.group(0).replace(' ,', ',') 97 | 98 | @staticmethod 99 | def _add_comma_space(matchobj): 100 | return matchobj.group(0).replace(',', ', ') 101 | 102 | def fix_comma_spaces(self): 103 | '''Fixes comma space seperation''' 104 | for _ in re.findall(r'[A-Za-z]+\s+,', self._contents): 105 | self._contents = re.sub(r'[A-Za-z]+\s+,', self._remove_comma_space, self._contents) 106 | for _ in re.findall(r'[A-Za-z]+,[A-Za-z]+', self._contents): 107 | self._contents = re.sub(r'[A-Za-z]+,[A-Za-z]+', self._add_comma_space, self._contents) 108 | 109 | def remove_font_colours(self): 110 | '''Removes tags from contents''' 111 | self._contents = re.sub(r'\(.*)\', '', self._contents, flags=re.DOTALL) 112 | self._filter_empty() 113 | 114 | def remove_music(self): 115 | '''Removes music symbols from contents''' 116 | # Remove music symbol behaving as parenthesis 117 | self._contents = re.sub(r'♪(.*)♪', '', self._contents, flags=re.DOTALL) 118 | # Remove behaving as inline 119 | self._contents_to_list() 120 | for idx, _ in enumerate(self._contents): 121 | if any(symbol in self._contents[idx] for symbol in ['#', '♪']): 122 | self._contents[idx] = '' 123 | self._contents_to_str() 124 | self._filter_empty() 125 | 126 | def remove_sound_effects(self): 127 | '''Removes text in between parenthesis, brackets, and forward slashes''' 128 | # Remove single line brackets 129 | self._contents_to_list() 130 | for idx, _ in enumerate(self._contents): 131 | # Have split this check into a for loop across the delimiters as providing 132 | # them in one regex expression will yield errors for forward slash within italics 133 | # tag when used with square brackets/parenthesis. e.g line 8 of 134 | # subtitle_sound_effects_before.srt 135 | for prefix, suffix in (('(', ')'), ('[', ']'), ('/', '/'), ('*', '*')): 136 | self._contents[idx] = re.sub( 137 | rf'[\{prefix}][\S ]*[\{suffix}][\s:]*', '', self._contents[idx] 138 | ) 139 | self._remove_lone_symbols() 140 | self._contents_to_str() 141 | # Remove multi-line brackets 142 | self._contents = re.sub(r'[\*\(\[][\S\s]*[\*\)\]][\s:]*', '', self._contents) 143 | self._filter_empty() 144 | 145 | def replace_names(self): 146 | '''Replace names in all caps''' 147 | # Care is taken here to preserve genuine sentences with a colon. 148 | name_regex = r"((?=.*[A-Z])[A-Z0-9 ][A-Z0-9' ]*: *|[A-Z]{1}[a-zA-Z ]+ *: *|^[A-Za-z]+: *)" 149 | names = re.findall(name_regex, self._contents) 150 | # dialogues from different people preceeded with - 151 | # TODO: does this cover the case where the names are the same? 152 | replacement = '- ' if len(names) > 1 else '' 153 | 154 | def replace_if_not_hour(match): 155 | # group0 = entire match 156 | start, end = match.span(0) 157 | original_match = match.string[start:end] 158 | 159 | def is_hour(): 160 | hour_candidate = match.string[start : end + 2].strip() 161 | assert ( 162 | ":" in hour_candidate 163 | ), "it has to have a ':' character because it was matched by a regexp" 164 | lhs, rhs = hour_candidate.split(":") 165 | return rhs and lhs and len(lhs) <= 2 and "".join([lhs, rhs]).isnumeric() 166 | 167 | return original_match if is_hour() else replacement 168 | 169 | self._contents = re.sub(name_regex, replace_if_not_hour, self._contents).lstrip() 170 | # TODO: would it make sense to make a context manager and do this on exit and expose all the high level methods 171 | # in said context manager? 172 | self._filter_empty() 173 | 174 | def remove_author(self): 175 | '''Removes "Subtitles by" subtitle entries etc''' 176 | for author_str in AUTHOR_STRINGS: 177 | if author_str in self._contents.lower(): 178 | self.index = 0 179 | break 180 | 181 | def remove_asterisks(self): 182 | '''Removes line if it contains only an asterisk and/or whitespace''' 183 | self._contents = re.sub(r'^[\*\s]*$', '', self._contents) 184 | self._filter_empty() 185 | 186 | def fix_italics(self): 187 | '''Fixes lone or tags, and removes empty tags, and empty dashes''' 188 | if '' in self._contents and '' not in self._contents: 189 | self._contents += '' 190 | if '' in self._contents and '' not in self._contents: 191 | self._contents = '' + self._contents 192 | self._contents = re.sub(r'[\_\-\‐\?#\s¶]*', '', self._contents, flags=re.DOTALL) 193 | self._remove_lone_symbols() 194 | 195 | def _remove_lone_symbols(self): 196 | self._contents_to_list() 197 | for idx, _ in enumerate(self._contents): 198 | self._contents[idx] = re.sub(r'^[\_\-\‐\?#\s¶]*$', '', self._contents[idx]) 199 | self._contents[idx] = re.sub( 200 | r'^[\_\-\‐\?#\s¶]*[\_\-\‐\?#\s¶]*$', '', self._contents[idx] 201 | ) 202 | self._contents[idx] = re.sub( 203 | r'^[\_\-\‐\?#\s¶]*[\_\-\‐\?#\s¶]*$', '', self._contents[idx] 204 | ) 205 | # Removes empty strings 206 | self._contents = list(filter(None, self._contents)) 207 | # Set index as 0 for later deletion 208 | if len(self.contents) == 0: 209 | self.index = 0 210 | self._contents_to_str() 211 | 212 | def remove_single_dash(self): 213 | '''Removes single dashes from contents''' 214 | if re.match(r'^[^\n]*$', self._contents): 215 | self._contents = re.sub(r'(?m)^\s*-\s*(.*)$', r'\1', self._contents) 216 | self._filter_empty() 217 | 218 | 219 | class Subtitles: 220 | '''Content filtering object for subtitles file''' 221 | 222 | EXTENSIONS = ['.srt'] 223 | 224 | def __init__(self, fpath): 225 | if not os.path.exists(fpath): 226 | raise IOError('{} does not exist'.format(fpath)) 227 | if not os.path.isfile(fpath): 228 | raise IOError('{} is not a file'.format(fpath)) 229 | self._fullpath = fpath 230 | if self.ext not in self.EXTENSIONS: 231 | raise IOError('{} is not valid subtitle file: {}'.format(self._fullpath, self.ext)) 232 | self._line_list = self._get_line_list() 233 | self.subtitles = self._parse_subs() 234 | 235 | def __repr__(self): 236 | return "".join(map(str, self.subtitles)) 237 | 238 | def __eq__(self, other): 239 | if len(self.subtitles) != len(other.subtitles): 240 | return False 241 | for idx, _ in enumerate(self.subtitles): 242 | if self.subtitles[idx] != other.subtitles[idx]: 243 | return False 244 | return True 245 | 246 | @property 247 | def filepath(self): 248 | '''Filepath of mediafile''' 249 | return self._fullpath 250 | 251 | @property 252 | def ext(self): 253 | '''Extension of mediafile''' 254 | _, ext = os.path.splitext(self._fullpath) 255 | return ext 256 | 257 | def _get_line_list(self): 258 | if has_bom(self.filepath): 259 | with open(self.filepath, 'r', encoding='utf-8-sig') as fdata: 260 | line_list = fdata.readlines() 261 | else: 262 | with open(self.filepath, 'r', encoding='utf-8') as fdata: 263 | line_list = fdata.readlines() 264 | line_list_filtered = [x.rstrip() for x in line_list] 265 | return line_list_filtered 266 | 267 | def _parse_subs(self): 268 | sub_list = [Subtitle()] 269 | for line in self._line_list: 270 | # If the index has not yet been created in latest sublist item 271 | if not sub_list[-1].index: 272 | try: 273 | sub_list[-1].index = int(line) 274 | except ValueError: 275 | continue 276 | # Time line 277 | elif sub_list[-1].start is None: 278 | if ' --> ' in line: 279 | sub_list[-1].start, sub_list[-1].end = line.split(' --> ') 280 | else: 281 | continue 282 | # New subtitle entry 283 | elif not line: 284 | sub_list.append(Subtitle()) 285 | # Contents 286 | else: 287 | sub_list[-1].contents = line 288 | return sub_list 289 | 290 | def filter(self, **kw): 291 | '''Filters subtitles to remove SDH items''' 292 | # Filter contents 293 | if kw.get('rm_fonts', True): 294 | any(map(lambda sub: sub.remove_font_colours(), self.subtitles)) 295 | if kw.get('rm_ast', True): 296 | any(map(lambda sub: sub.remove_asterisks(), self.subtitles)) 297 | if kw.get('rm_music', True): 298 | any(map(lambda sub: sub.remove_music(), self.subtitles)) 299 | if kw.get('rm_effects', True): 300 | any(map(lambda sub: sub.remove_sound_effects(), self.subtitles)) 301 | if kw.get('rm_names', True): 302 | any(map(lambda sub: sub.replace_names(), self.subtitles)) 303 | if kw.get('rm_author', True): 304 | any(map(lambda sub: sub.remove_author(), self.subtitles)) 305 | if kw.get('fix_commas', True): 306 | any(map(lambda sub: sub.fix_comma_spaces(), self.subtitles)) 307 | if kw.get('rm_lone_dashes', True): 308 | any(map(lambda sub: sub.remove_single_dash(), self.subtitles)) 309 | any(map(lambda sub: sub.fix_italics(), self.subtitles)) 310 | # Remove filtered items from list 311 | self.subtitles[:] = [sub for sub in self.subtitles if sub.index] 312 | # Reassign indices 313 | for idx, sub in enumerate(self.subtitles): 314 | sub.index = idx + 1 315 | 316 | def print(self): 317 | '''Prints all subtitle entries''' 318 | for sub in self.subtitles: 319 | print(sub) 320 | 321 | def save(self, new_filepath=None): 322 | '''Saves subtitle object to disk, 323 | omit new_filepath to save inplace 324 | ''' 325 | if new_filepath is not None: 326 | self._fullpath = new_filepath 327 | with open(self._fullpath, 'w', encoding='utf-8') as fp: 328 | for sub in self.subtitles: 329 | fp.write(str(sub) + '\n') 330 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/m-lyon/filter-subs/8f89122d8ea1b967a8ed2bb47e635d3df1e61acf/tests/__init__.py -------------------------------------------------------------------------------- /tests/data/apostrphone_in_name_after.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:10:50,567 --> 00:10:52,569 3 | Yo! 4 | -------------------------------------------------------------------------------- /tests/data/apostrphone_in_name_before.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:10:50,567 --> 00:10:52,569 3 | MOTHER'S MILK: 4 | Yo! 5 | -------------------------------------------------------------------------------- /tests/data/hour_in_dialogue_after.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:45:52,000 --> 00:45:55,295 3 | Just be in Stillwell's office 4 | at 10:00. 5 | -------------------------------------------------------------------------------- /tests/data/hour_in_dialogue_before.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:45:52,000 --> 00:45:55,295 3 | Just be in Stillwell's office 4 | at 10:00. 5 | -------------------------------------------------------------------------------- /tests/data/subtitle_angle_brackets_after.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:01:40,723 --> 00:01:42,707 3 | This stays <\i> 4 | 5 | 2 6 | 00:00:57,088 --> 00:00:58,788 7 | This goes. 8 | 9 | 3 10 | 00:45:39,418 --> 00:45:41,084 11 | something something <\i> 12 | 13 | 4 14 | 00:20:36,309 --> 00:20:39,277 15 | - something some 16 | Listen here.<\i> 17 | 18 | 5 19 | 00:11:31,995 --> 00:11:35,346 20 | something 21 | Listen here.<\i> -------------------------------------------------------------------------------- /tests/data/subtitle_angle_brackets_before.srt: -------------------------------------------------------------------------------- 1 | 50 2 | 00:01:40,723 --> 00:01:42,707 3 | This stays <\i> 4 | 5 | 28 6 | 00:00:57,088 --> 00:00:58,788 7 | This goes. 8 | 9 | 740 10 | 00:45:39,418 --> 00:45:41,084 11 | something something <\i> 12 | 13 | 398 14 | 00:20:36,309 --> 00:20:39,277 15 | - something some 16 | Listen here.<\i> 17 | 18 | 262 19 | 00:11:31,995 --> 00:11:35,346 20 | something 21 | Listen here.<\i> -------------------------------------------------------------------------------- /tests/data/subtitle_author_after.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:02:18,389 --> 00:02:19,929 3 | Hey, darling. 4 | -------------------------------------------------------------------------------- /tests/data/subtitle_author_before.srt: -------------------------------------------------------------------------------- 1 | 2 2 | 00:00:15,142 --> 00:00:25,142 3 | � ENCODED AND RELEASED BY Sharpysword ? 4 | 5 | 3 6 | 00:02:18,389 --> 00:02:19,929 7 | Hey, darling. 8 | 9 | 3 10 | 00:00:00,062 --> 00:00:02,507 11 | Subtitles by explosiveskull 12 | www.OpenSubtitles.org 13 | 14 | 3 15 | 00:00:00,062 --> 00:00:02,507 16 | Synced and Corrected by Your Mum. 17 | 18 | 976 19 | 00:42:12,094 --> 00:42:15,054 20 | Captioning sponsored by 21 | CBS 22 | 23 | 978 24 | 00:42:18,884 --> 00:42:21,060 25 | Captioned by 26 | Media Access Group at WGBH 27 | access.wgbh.org -------------------------------------------------------------------------------- /tests/data/subtitle_bom_after.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:01:21,999 --> 00:01:23,292 3 | Gosh. 4 | 5 | 2 6 | 00:02:03,666 --> 00:02:05,459 7 | How long until we get to his estate? 8 | -------------------------------------------------------------------------------- /tests/data/subtitle_bom_before.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:01:21,999 --> 00:01:23,292 3 | Gosh. 4 | 5 | 2 6 | 00:02:03,666 --> 00:02:05,459 7 | How long until we get to his estate? 8 | -------------------------------------------------------------------------------- /tests/data/subtitle_commas_after.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:01:42,460 --> 00:01:47,420 3 | By decree, all persons 4 | found guilty of piracy, 5 | 6 | 2 7 | 00:01:11,600 --> 00:01:14,730 8 | duly appointed representative 9 | of His Majesty, the king. -------------------------------------------------------------------------------- /tests/data/subtitle_commas_before.srt: -------------------------------------------------------------------------------- 1 | 12 2 | 00:01:42,460 --> 00:01:47,420 3 | By decree,all persons 4 | found guilty of piracy, 5 | 6 | 5 7 | 00:01:11,600 --> 00:01:14,730 8 | duly appointed representative 9 | of His Majesty , the king. -------------------------------------------------------------------------------- /tests/data/subtitle_example_after.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:01:40,723 --> 00:01:42,707 3 | - It's almost as if... 4 | - Yes? 5 | 6 | 2 7 | 00:01:40,723 --> 00:01:42,707 8 | Gooby pls... 9 | 10 | 3 11 | 00:01:42,709 --> 00:01:46,210 12 | Something's come alive. 13 | 14 | 4 15 | 00:01:42,709 --> 00:01:46,210 16 | Again. 17 | 18 | 5 19 | 00:02:08,501 --> 00:02:11,869 20 | Guh 21 | 22 | 6 23 | 00:02:22,048 --> 00:02:25,083 24 | Now wait a minute, Elliot. 25 | 26 | 7 27 | 00:02:45,238 --> 00:02:46,904 28 | Wait, wait, wait! 29 | 30 | 8 31 | 00:00:57,088 --> 00:00:58,788 32 | And not me? 33 | -------------------------------------------------------------------------------- /tests/data/subtitle_example_before.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:01:19,519 --> 00:01:26,557 3 | ♪ ♪ 4 | 5 | 1 6 | 00:01:19,519 --> 00:01:26,557 7 | ♪ 8 | ♪ 9 | 10 | 2 11 | 00:01:40,723 --> 00:01:42,707 12 | ELLIOT: It's almost as if... 13 | JOE: Yes? 14 | 15 | 2 16 | 00:01:40,723 --> 00:01:42,707 17 | ELLIOT: Gooby pls... 18 | 19 | 3 20 | 00:01:42,709 --> 00:01:46,210 21 | (CHUCCCKLEEES) Something's come alive. 22 | 23 | 3 24 | 00:01:42,709 --> 00:01:46,210 25 | (CHUCKLES) 26 | Again. 27 | 28 | 4 29 | 00:02:08,501 --> 00:02:11,869 30 | (POPCORN MACHINE DOOR CLOSES) Guh 31 | 32 | 5 33 | 00:02:22,048 --> 00:02:25,083 34 | Now wait a minute, Elliot. 35 | 36 | 6 37 | 00:11:08,086 --> 00:11:15,984 38 | - Synced and corrected by VitoSilans - 39 | -- www.Addic7ed.com -- 40 | 41 | 3 42 | 00:00:00,062 --> 00:00:02,507 43 | Subtitles by explosiveskull 44 | www.OpenSubtitles.org 45 | 46 | 6 47 | 00:02:45,238 --> 00:02:46,904 48 | - (GUN COCKS) 49 | - Wait, wait, wait! 50 | 51 | 46 52 | 00:01:43,719 --> 00:01:46,506 53 | - ♪ Now that the day is over ♪ 54 | - [beeps] 55 | 56 | 28 57 | 00:00:57,088 --> 00:00:58,788 58 | - [gunshot] 59 | - [Shot] And not me? 60 | 61 | 740 62 | 00:45:39,418 --> 00:45:41,084 63 | [Barenaked Ladies' "One Week" plays] 64 | 65 | 398 66 | 00:20:36,309 --> 00:20:39,277 67 | [Roxette's "Listen to 68 | Your Heart" plays softly] 69 | 70 | 584 71 | 00:37:43,795 --> 00:37:47,899 72 | * 73 | 74 | 487 75 | 00:29:34,006 --> 00:29:37,944 76 | - * Send me an angel 77 | who flies from Montgomery * 78 | 79 | -------------------------------------------------------------------------------- /tests/data/subtitle_font_after.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:01:42,709 --> 00:01:46,210 3 | Something's come alive. 4 | 5 | 2 6 | 00:01:42,709 --> 00:01:46,210 7 | Again. 8 | 9 | 3 10 | 00:02:45,238 --> 00:02:46,904 11 | Wait, wait, wait! 12 | -------------------------------------------------------------------------------- /tests/data/subtitle_font_before.srt: -------------------------------------------------------------------------------- 1 | 3 2 | 00:01:42,709 --> 00:01:46,210 3 | What Something's come alive. 4 | 5 | 5 6 | 00:01:42,709 --> 00:01:46,210 7 | (CHUCKLES) 8 | Again. 9 | 10 | 6 11 | 00:02:45,238 --> 00:02:46,904 12 | - (GUN COCKS) 13 | - Wait, wait, wait! 14 | -------------------------------------------------------------------------------- /tests/data/subtitle_italics_after.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:02:22,048 --> 00:02:25,083 3 | Now wait a minute, Elliot. 4 | -------------------------------------------------------------------------------- /tests/data/subtitle_italics_before.srt: -------------------------------------------------------------------------------- 1 | 46 2 | 00:01:43,719 --> 00:01:46,506 3 | - ♪ Now that the day is over ♪ 4 | - [beeps] 5 | 6 | 5 7 | 00:02:22,048 --> 00:02:25,083 8 | Now wait a minute, Elliot. 9 | 10 | 1 11 | 00:01:19,519 --> 00:01:26,557 12 | ♪ 13 | ♪ 14 | -------------------------------------------------------------------------------- /tests/data/subtitle_music_after.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:01:40,723 --> 00:01:42,707 3 | What's up dawg. 4 | 5 | 2 6 | 00:01:19,519 --> 00:01:26,557 7 | Is real 8 | 9 | 3 10 | 00:00:57,939 --> 00:01:01,636 11 | - They want to see the Ram Jam! 12 | -------------------------------------------------------------------------------- /tests/data/subtitle_music_before.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:01:19,519 --> 00:01:26,557 3 | ♪ ♪ 4 | 5 | 2 6 | 00:01:19,519 --> 00:01:26,557 7 | ♪ 8 | ♪ 9 | 10 | 3 11 | 00:01:40,723 --> 00:01:42,707 12 | What's up dawg. 13 | 14 | 4 15 | 00:01:19,519 --> 00:01:26,557 16 | ♪ This sound 17 | Is real 18 | 19 | 5 20 | 00:01:19,519 --> 00:01:26,557 21 | ♪ This sound 22 | Is not ♪ 23 | 24 | 6 25 | 00:00:57,939 --> 00:01:01,636 26 | - They want to see the Ram Jam! 27 | - # Well, I'm frustrated # 28 | 29 | 7 30 | 00:01:01,709 --> 00:01:04,007 31 | # And outdated # 32 | 33 | 8 34 | 00:01:33,125 --> 00:01:36,291 35 | ♪ 'Cause it sure looks to me 36 | Like them people 37 | Ain't playin'♪ 38 | 39 | -------------------------------------------------------------------------------- /tests/data/subtitle_names_after.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:01:40,723 --> 00:01:42,707 3 | - It's almost as if... 4 | - Yes? 5 | 6 | 2 7 | 00:01:40,723 --> 00:01:42,707 8 | Gooby pls... 9 | 10 | 3 11 | 00:03:06,139 --> 00:03:07,639 12 | You're gonna need 13 | most of that 14 | 15 | 4 16 | 00:00:05,377 --> 00:00:06,378 17 | No. 18 | 19 | 5 20 | 00:03:32,296 --> 00:03:34,840 21 | Regular text. 22 | 23 | 6 24 | 00:00:55,296 --> 00:00:58,931 25 | Cop cuties, 26 | cute and on duty 27 | 28 | 7 29 | 00:07:40,362 --> 00:07:44,153 30 | Born in humble circumstances 31 | in Dundee, Scotland, 32 | 33 | 8 34 | 00:16:06,299 --> 00:16:08,802 35 | See? Hebrews 13:4. 36 | -------------------------------------------------------------------------------- /tests/data/subtitle_names_before.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:01:40,723 --> 00:01:42,707 3 | ELLIOT: It's almost as if... 4 | JOE: Yes? 5 | 6 | 2 7 | 00:01:40,723 --> 00:01:42,707 8 | ELLIOT: Gooby pls... 9 | 10 | 17 11 | 00:03:06,139 --> 00:03:07,639 12 | Bartender: 13 | You're gonna need 14 | most of that 15 | 16 | 5 17 | 00:00:05,377 --> 00:00:06,378 18 | THE FOOBAR: No. 19 | 20 | 39 21 | 00:03:32,296 --> 00:03:34,840 22 | FOO BAR 4: (IN ENGLISH) 23 | Regular text. 24 | 25 | 20 26 | 00:00:55,296 --> 00:00:58,931 27 | all: ♪ Cop cuties, 28 | cute and on duty ♪ 29 | 30 | 20 31 | 00:00:55,296 --> 00:00:58,931 32 | all: Cop cuties, 33 | cute and on duty 34 | 35 | 152 36 | 00:07:40,362 --> 00:07:44,153 37 | Man on Video: 38 | Born in humble circumstances 39 | in Dundee, Scotland, 40 | 41 | 1 42 | 00:16:06,299 --> 00:16:08,802 43 | See? Hebrews 13:4. -------------------------------------------------------------------------------- /tests/data/subtitle_sound_effects_after.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:01:40,723 --> 00:01:42,707 3 | Examples are tough. 4 | 5 | 2 6 | 00:00:57,088 --> 00:00:58,788 7 | And not me? 8 | 9 | 3 10 | 00:11:31,995 --> 00:11:35,346 11 | I'm raising a teenage boy. 12 | 13 | 4 14 | 00:12:31,995 --> 00:12:35,346 15 | twice now. 16 | 17 | 5 18 | 00:15:31,995 --> 00:51:35,346 19 | Third time. 20 | 21 | 6 22 | 00:32:06,925 --> 00:32:09,057 23 | What do you got for me, Cabrera? 24 | -------------------------------------------------------------------------------- /tests/data/subtitle_sound_effects_before.srt: -------------------------------------------------------------------------------- 1 | 50 2 | 00:01:40,723 --> 00:01:42,707 3 | Examples are tough. 4 | 5 | 28 6 | 00:00:57,088 --> 00:00:58,788 7 | - [gunshot] 8 | - [Shot] And not me? 9 | 10 | 740 11 | 00:45:39,418 --> 00:45:41,084 12 | [Barenaked Ladies' "One Week" plays] 13 | 14 | 398 15 | 00:20:36,309 --> 00:20:39,277 16 | [Roxette's "Listen to 17 | Your Heart" plays softly] 18 | 19 | 262 20 | 00:11:31,995 --> 00:11:35,346 21 | [chuckles softly]: 22 | I'm raising a teenage boy. 23 | 24 | 263 25 | 00:12:31,995 --> 00:12:35,346 26 | (chuckles softly): 27 | twice now. 28 | 29 | 398 30 | 00:20:36,309 --> 00:20:39,277 31 | /BOOM/ 32 | 33 | 400 34 | 00:15:31,995 --> 00:51:35,346 35 | /here we go/: 36 | Third time. 37 | 38 | 768 39 | 00:32:06,925 --> 00:32:09,057 40 | *Whistles* 41 | What do you got for me, Cabrera? 42 | -------------------------------------------------------------------------------- /tests/data/subtitle_space_parsing_after.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:00:06,507 --> 00:00:08,467 3 | The world 4 | is broken. 5 | 6 | 2 7 | 00:00:10,386 --> 00:00:12,555 8 | men who were born 9 | with great power believed 10 | -------------------------------------------------------------------------------- /tests/data/subtitle_space_parsing_before.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 3 | 00:00:06,507 --> 00:00:08,467 4 | The world 5 | is broken. 6 | 7 | 8 | 2 9 | 00:00:10,386 --> 00:00:12,555 10 | men who were born 11 | with great power believed 12 | -------------------------------------------------------------------------------- /tests/data/subtitle_symbols_after.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:02:31,229 --> 00:02:33,939 3 | Morning, Maeve. 4 | -------------------------------------------------------------------------------- /tests/data/subtitle_symbols_before.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:00:05,118 --> 00:00:15,118 3 | ?? ## _ 4 | 5 | 1 6 | 00:00:05,118 --> 00:00:15,118 7 | - --- 8 | - ¶ 9 | 10 | 4 11 | 00:02:31,229 --> 00:02:33,939 12 | Morning, Maeve. -------------------------------------------------------------------------------- /tests/io_test.py: -------------------------------------------------------------------------------- 1 | '''I/O test cases''' 2 | import copy 3 | import unittest 4 | import tempfile 5 | 6 | from os.path import join, dirname 7 | 8 | from subtitle_filter import Subtitles 9 | 10 | DATA_DIR = join(dirname(__file__), 'data') 11 | 12 | class SubtitleFilterFontTestCase(unittest.TestCase): 13 | 14 | def setUp(self): 15 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_music_before.srt')) 16 | 17 | def test_subtitle_save(self): 18 | subs = copy.deepcopy(self.subs_before) 19 | subs.filter(rm_music=False) 20 | with tempfile.TemporaryDirectory() as dirpath: 21 | fpath = join(dirpath, 'test.srt') 22 | subs.save(fpath) 23 | subs_after = Subtitles(fpath) 24 | self.assertEqual(self.subs_before, subs_after) -------------------------------------------------------------------------------- /tests/subtitles_test.py: -------------------------------------------------------------------------------- 1 | '''Subtitle test cases''' 2 | 3 | import unittest 4 | 5 | from os.path import join, dirname 6 | 7 | from subtitle_filter import Subtitles 8 | 9 | DATA_DIR = join(dirname(__file__), 'data') 10 | 11 | 12 | class SubtitleFilterFontTestCase(unittest.TestCase): 13 | def setUp(self): 14 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_font_before.srt')) 15 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_font_after.srt')) 16 | 17 | def test_subtitle_font(self): 18 | self.subs_before.filter() 19 | self.assertEqual(self.subs_before, self.subs_after) 20 | 21 | 22 | class SubtitleFilterMusicTestCase(unittest.TestCase): 23 | def setUp(self): 24 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_music_before.srt')) 25 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_music_after.srt')) 26 | 27 | def test_subtitle_music(self): 28 | self.subs_before.filter() 29 | self.assertEqual(self.subs_before, self.subs_after) 30 | 31 | 32 | class SubtitleFilterSoundEffectsTestCase(unittest.TestCase): 33 | def setUp(self): 34 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_sound_effects_before.srt')) 35 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_sound_effects_after.srt')) 36 | 37 | def test_subtitle_sound_effects(self): 38 | self.subs_before.filter() 39 | self.assertEqual(self.subs_before, self.subs_after) 40 | 41 | 42 | class SubtitleFilterItalicsTestCase(unittest.TestCase): 43 | def setUp(self): 44 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_italics_before.srt')) 45 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_italics_after.srt')) 46 | 47 | def test_subtitle_italics(self): 48 | self.subs_before.filter() 49 | self.assertEqual(self.subs_before, self.subs_after) 50 | 51 | 52 | class SubtitleAllTestCase(unittest.TestCase): 53 | def setUp(self): 54 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_example_before.srt')) 55 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_example_after.srt')) 56 | 57 | def test_subtitle_all(self): 58 | self.subs_before.filter() 59 | self.assertEqual(self.subs_before, self.subs_after) 60 | 61 | 62 | class SubtitleFilterNamesTestCase(unittest.TestCase): 63 | def setUp(self): 64 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_names_before.srt')) 65 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_names_after.srt')) 66 | 67 | def test_subtitle_names(self): 68 | self.subs_before.filter() 69 | self.assertEqual(self.subs_before, self.subs_after) 70 | 71 | 72 | class SubtitleFilterSymbolsTestCase(unittest.TestCase): 73 | def setUp(self): 74 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_symbols_before.srt')) 75 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_symbols_after.srt')) 76 | 77 | def test_subtitle_symbols(self): 78 | self.subs_before.filter() 79 | self.assertEqual(self.subs_before, self.subs_after) 80 | 81 | 82 | class SubtitleFilterAuthorTestCase(unittest.TestCase): 83 | def setUp(self): 84 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_author_before.srt')) 85 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_author_after.srt')) 86 | 87 | def test_subtitle_author(self): 88 | self.subs_before.filter() 89 | self.assertEqual(self.subs_before, self.subs_after) 90 | 91 | 92 | class SubtitleFilterCommaTestCase(unittest.TestCase): 93 | def setUp(self): 94 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_commas_before.srt')) 95 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_commas_after.srt')) 96 | 97 | def test_subtitle_commas(self): 98 | self.subs_before.filter() 99 | self.assertEqual(self.subs_before, self.subs_after) 100 | 101 | 102 | class SubtitleParseSpacingTestCase(unittest.TestCase): 103 | def setUp(self): 104 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_space_parsing_before.srt')) 105 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_space_parsing_after.srt')) 106 | 107 | def test_space_parsing_commas(self): 108 | self.subs_before.filter() 109 | self.assertEqual(self.subs_before, self.subs_after) 110 | 111 | 112 | class SubtitleBOMTestCase(unittest.TestCase): 113 | def setUp(self): 114 | self.subs_before = Subtitles(join(DATA_DIR, 'subtitle_bom_before.srt')) 115 | self.subs_after = Subtitles(join(DATA_DIR, 'subtitle_bom_after.srt')) 116 | 117 | def test_bom(self): 118 | self.subs_before.filter() 119 | self.assertEqual(self.subs_before, self.subs_after) 120 | 121 | 122 | class SubtitleHoursTestCase(unittest.TestCase): 123 | def setUp(self): 124 | self.subs_before = Subtitles(join(DATA_DIR, 'hour_in_dialogue_before.srt')) 125 | self.subs_after = Subtitles(join(DATA_DIR, 'hour_in_dialogue_after.srt')) 126 | 127 | def test_hours(self): 128 | self.subs_before.filter() 129 | self.assertEqual(self.subs_before, self.subs_after) 130 | 131 | 132 | class SubtitleApostrophe(unittest.TestCase): 133 | def setUp(self): 134 | self.subs_before = Subtitles(join(DATA_DIR, 'apostrphone_in_name_before.srt')) 135 | self.subs_after = Subtitles(join(DATA_DIR, 'apostrphone_in_name_after.srt')) 136 | 137 | def test_thing(self): 138 | self.subs_before.filter() 139 | self.assertEqual(self.subs_before, self.subs_after) 140 | --------------------------------------------------------------------------------