├── lib
    ├── dateutil
    │   ├── requires.txt
    │   ├── __init__.py
    │   ├── MANIFEST.in
    │   ├── NEWS
    │   ├── parser.py
    │   ├── zoneinfo
    │   │   ├── dateutil-zoneinfo.tar.gz
    │   │   └── __init__.py
    │   ├── PKG-INFO
    │   ├── LICENSE
    │   ├── easter.py
    │   ├── README.rst
    │   └── tzwin.py
    ├── guessit
    │   ├── config
    │   │   └── options.json
    │   ├── rules
    │   │   ├── markers
    │   │   │   ├── __init__.py
    │   │   │   ├── path.py
    │   │   │   └── groups.py
    │   │   ├── properties
    │   │   │   ├── __init__.py
    │   │   │   ├── size.py
    │   │   │   ├── mimetype.py
    │   │   │   ├── cds.py
    │   │   │   ├── part.py
    │   │   │   ├── film.py
    │   │   │   ├── bonus.py
    │   │   │   ├── edition.py
    │   │   │   ├── type.py
    │   │   │   ├── container.py
    │   │   │   ├── crc.py
    │   │   │   ├── date.py
    │   │   │   ├── format.py
    │   │   │   ├── video_codec.py
    │   │   │   ├── country.py
    │   │   │   ├── website.py
    │   │   │   ├── screen_size.py
    │   │   │   ├── streaming_service.py
    │   │   │   ├── audio_codec.py
    │   │   │   └── release_group.py
    │   │   ├── common
    │   │   │   ├── __init__.py
    │   │   │   ├── validators.py
    │   │   │   ├── expected.py
    │   │   │   ├── comparators.py
    │   │   │   ├── words.py
    │   │   │   ├── formatters.py
    │   │   │   ├── date.py
    │   │   │   └── numeral.py
    │   │   ├── __init__.py
    │   │   └── processors.py
    │   ├── __version__.py
    │   ├── __init__.py
    │   ├── backports.py
    │   ├── reutils.py
    │   ├── jsonutils.py
    │   ├── yamlutils.py
    │   ├── tlds-alpha-by-domain.txt
    │   ├── api.py
    │   └── __main__.py
    ├── rebulk
    │   ├── __version__.py
    │   ├── remodule.py
    │   ├── __init__.py
    │   ├── formatters.py
    │   ├── debug.py
    │   ├── validators.py
    │   ├── toposort.py
    │   ├── processors.py
    │   ├── introspector.py
    │   ├── utils.py
    │   └── loose.py
    └── babelfish
    │   ├── converters
    │       ├── name.py
    │       ├── alpha2.py
    │       ├── alpha3b.py
    │       ├── alpha3t.py
    │       ├── scope.py
    │       ├── type.py
    │       ├── countryname.py
    │       └── opensubtitles.py
    │   ├── __init__.py
    │   ├── script.py
    │   ├── exceptions.py
    │   ├── country.py
    │   ├── data
    │       └── iso-3166-1.txt
    │   └── language.py
├── .gitignore
├── README.txt
├── README.md
├── ChangeLog.txt
├── testsort.py
└── testdata.json


/lib/dateutil/requires.txt:
--------------------------------------------------------------------------------
1 | six
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.pyo
3 | .DS_Store
4 | .vscode
5 | __
6 | 


--------------------------------------------------------------------------------
/lib/dateutil/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __version__ = "2.3"
3 | 


--------------------------------------------------------------------------------
/lib/dateutil/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE NEWS zonefile_metadata.json updatezinfo.py
2 | 


--------------------------------------------------------------------------------
/lib/dateutil/NEWS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nzbget/VideoSort/HEAD/lib/dateutil/NEWS


--------------------------------------------------------------------------------
/lib/guessit/config/options.json:
--------------------------------------------------------------------------------
1 | {
2 |   "expected_title": [
3 |     "OSS 117"
4 |   ]
5 | }


--------------------------------------------------------------------------------
/lib/dateutil/parser.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nzbget/VideoSort/HEAD/lib/dateutil/parser.py


--------------------------------------------------------------------------------
/lib/guessit/rules/markers/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """
4 | Markers
5 | """
6 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """
4 | Properties
5 | """
6 | 


--------------------------------------------------------------------------------
/lib/dateutil/zoneinfo/dateutil-zoneinfo.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nzbget/VideoSort/HEAD/lib/dateutil/zoneinfo/dateutil-zoneinfo.tar.gz


--------------------------------------------------------------------------------
/lib/rebulk/__version__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """
4 | Version module
5 | """
6 | # pragma: no cover
7 | __version__ = '0.9.0'
8 | 


--------------------------------------------------------------------------------
/lib/guessit/__version__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """
4 | Version module
5 | """
6 | # pragma: no cover
7 | __version__ = '2.1.4'
8 | 


--------------------------------------------------------------------------------
/lib/guessit/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Extracts as much information as possible from a video file.
 5 | """
 6 | from .api import guessit, GuessItApi
 7 | from .options import ConfigurationException
 8 | 
 9 | from .__version__ import __version__
10 | 


--------------------------------------------------------------------------------
/lib/rebulk/remodule.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Uniform re module
 5 | """
 6 | # pylint: disable-all
 7 | import os
 8 | 
 9 | REGEX_AVAILABLE = False
10 | if os.environ.get('REGEX_DISABLED') in ["1", "true", "True", "Y"]:
11 |     import re
12 | else:
13 |     try:
14 |         import regex as re
15 |         REGEX_AVAILABLE = True
16 |     except ImportError:
17 |         import re
18 | 


--------------------------------------------------------------------------------
/lib/rebulk/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Define simple search patterns in bulk to perform advanced matching on any string.
 5 | """
 6 | #  pylint:disable=import-self
 7 | from .rebulk import Rebulk
 8 | from .rules import Rule, CustomRule, AppendMatch, RemoveMatch, RenameMatch, AppendTags, RemoveTags
 9 | from .processors import ConflictSolver, PrivateRemover, POST_PROCESS, PRE_PROCESS
10 | from .pattern import REGEX_AVAILABLE
11 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/common/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Common module
 5 | """
 6 | import re
 7 | 
 8 | seps = r' [](){}+*|=-_~#/\\.,;:'  # list of tags/words separators
 9 | seps_no_groups = seps.replace('[](){}', '')
10 | seps_no_fs = seps.replace('/', '').replace('\\', '')
11 | 
12 | title_seps = r'-+/\|'  # separators for title
13 | 
14 | dash = (r'-', r'['+re.escape(seps_no_fs)+']')  # abbreviation used by many rebulk objects.
15 | alt_dash = (r'@', r'['+re.escape(seps_no_fs)+']')  # abbreviation used by many rebulk objects.
16 | 


--------------------------------------------------------------------------------
/lib/babelfish/converters/name.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 4 | # Use of this source code is governed by the 3-clause BSD license
 5 | # that can be found in the LICENSE file.
 6 | #
 7 | from __future__ import unicode_literals
 8 | from . import LanguageEquivalenceConverter
 9 | from ..language import LANGUAGE_MATRIX
10 | 
11 | 
12 | class NameConverter(LanguageEquivalenceConverter):
13 |     CASE_SENSITIVE = False
14 |     SYMBOLS = {}
15 |     for iso_language in LANGUAGE_MATRIX:
16 |         if iso_language.name:
17 |             SYMBOLS[iso_language.alpha3] = iso_language.name
18 | 


--------------------------------------------------------------------------------
/lib/babelfish/converters/alpha2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 4 | # Use of this source code is governed by the 3-clause BSD license
 5 | # that can be found in the LICENSE file.
 6 | #
 7 | from __future__ import unicode_literals
 8 | from . import LanguageEquivalenceConverter
 9 | from ..language import LANGUAGE_MATRIX
10 | 
11 | 
12 | class Alpha2Converter(LanguageEquivalenceConverter):
13 |     CASE_SENSITIVE = True
14 |     SYMBOLS = {}
15 |     for iso_language in LANGUAGE_MATRIX:
16 |         if iso_language.alpha2:
17 |             SYMBOLS[iso_language.alpha3] = iso_language.alpha2
18 | 


--------------------------------------------------------------------------------
/lib/babelfish/converters/alpha3b.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 4 | # Use of this source code is governed by the 3-clause BSD license
 5 | # that can be found in the LICENSE file.
 6 | #
 7 | from __future__ import unicode_literals
 8 | from . import LanguageEquivalenceConverter
 9 | from ..language import LANGUAGE_MATRIX
10 | 
11 | 
12 | class Alpha3BConverter(LanguageEquivalenceConverter):
13 |     CASE_SENSITIVE = True
14 |     SYMBOLS = {}
15 |     for iso_language in LANGUAGE_MATRIX:
16 |         if iso_language.alpha3b:
17 |             SYMBOLS[iso_language.alpha3] = iso_language.alpha3b
18 | 


--------------------------------------------------------------------------------
/lib/babelfish/converters/alpha3t.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 4 | # Use of this source code is governed by the 3-clause BSD license
 5 | # that can be found in the LICENSE file.
 6 | #
 7 | from __future__ import unicode_literals
 8 | from . import LanguageEquivalenceConverter
 9 | from ..language import LANGUAGE_MATRIX
10 | 
11 | 
12 | class Alpha3TConverter(LanguageEquivalenceConverter):
13 |     CASE_SENSITIVE = True
14 |     SYMBOLS = {}
15 |     for iso_language in LANGUAGE_MATRIX:
16 |         if iso_language.alpha3t:
17 |             SYMBOLS[iso_language.alpha3] = iso_language.alpha3t
18 | 


--------------------------------------------------------------------------------
/lib/rebulk/formatters.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Formatter functions to use in patterns.
 5 | 
 6 | All those function have last argument as match.value (str).
 7 | """
 8 | 
 9 | 
10 | def formatters(*chained_formatters):
11 |     """
12 |     Chain formatter functions.
13 |     :param chained_formatters:
14 |     :type chained_formatters:
15 |     :return:
16 |     :rtype:
17 |     """
18 |     def formatters_chain(input_string):  # pylint:disable=missing-docstring
19 |         for chained_formatter in chained_formatters:
20 |             input_string = chained_formatter(input_string)
21 |         return input_string
22 | 
23 |     return formatters_chain
24 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/size.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | size property
 5 | """
 6 | import re
 7 | 
 8 | from rebulk import Rebulk
 9 | 
10 | from ..common.validators import seps_surround
11 | from ..common import dash
12 | 
13 | 
14 | def size():
15 |     """
16 |     Builder for rebulk object.
17 |     :return: Created Rebulk object
18 |     :rtype: Rebulk
19 |     """
20 | 
21 |     def format_size(value):
22 |         """Format size using uppercase and no space."""
23 |         return re.sub(r'(?<=\d)[.](?=[^\d])', '', value.upper())
24 | 
25 |     rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
26 |     rebulk.defaults(name='size', validator=seps_surround)
27 |     rebulk.regex(r'\d+\.?[mgt]b', r'\d+\.\d+[mgt]b', formatter=format_size, tags=['release-group-prefix'])
28 | 
29 |     return rebulk
30 | 


--------------------------------------------------------------------------------
/lib/babelfish/converters/scope.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 4 | # Use of this source code is governed by the 3-clause BSD license
 5 | # that can be found in the LICENSE file.
 6 | #
 7 | from __future__ import unicode_literals
 8 | from . import LanguageConverter
 9 | from ..exceptions import LanguageConvertError
10 | from ..language import LANGUAGE_MATRIX
11 | 
12 | 
13 | class ScopeConverter(LanguageConverter):
14 |     FULLNAME = {'I': 'individual', 'M': 'macrolanguage', 'S': 'special'}
15 |     SYMBOLS = {}
16 |     for iso_language in LANGUAGE_MATRIX:
17 |         SYMBOLS[iso_language.alpha3] = iso_language.scope
18 |     codes = set(SYMBOLS.values())
19 | 
20 |     def convert(self, alpha3, country=None, script=None):
21 |         if self.SYMBOLS[alpha3] in self.FULLNAME:
22 |             return self.FULLNAME[self.SYMBOLS[alpha3]]
23 |         raise LanguageConvertError(alpha3, country, script)
24 | 


--------------------------------------------------------------------------------
/lib/guessit/backports.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Backports
 5 | """
 6 | # pragma: no-cover
 7 | # pylint: disabled
 8 | 
 9 | def cmp_to_key(mycmp):
10 |     """functools.cmp_to_key backport"""
11 |     class KeyClass(object):
12 |         """Key class"""
13 |         def __init__(self, obj, *args):  # pylint: disable=unused-argument
14 |             self.obj = obj
15 |         def __lt__(self, other):
16 |             return mycmp(self.obj, other.obj) < 0
17 |         def __gt__(self, other):
18 |             return mycmp(self.obj, other.obj) > 0
19 |         def __eq__(self, other):
20 |             return mycmp(self.obj, other.obj) == 0
21 |         def __le__(self, other):
22 |             return mycmp(self.obj, other.obj) <= 0
23 |         def __ge__(self, other):
24 |             return mycmp(self.obj, other.obj) >= 0
25 |         def __ne__(self, other):
26 |             return mycmp(self.obj, other.obj) != 0
27 |     return KeyClass
28 | 


--------------------------------------------------------------------------------
/lib/guessit/reutils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Utils for re module
 5 | """
 6 | 
 7 | from rebulk.remodule import re
 8 | 
 9 | 
10 | def build_or_pattern(patterns, name=None, escape=False):
11 |     """
12 |     Build a or pattern string from a list of possible patterns
13 | 
14 |     :param patterns:
15 |     :type patterns:
16 |     :param name:
17 |     :type name:
18 |     :param escape:
19 |     :type escape:
20 |     :return:
21 |     :rtype:
22 |     """
23 |     or_pattern = []
24 |     for pattern in patterns:
25 |         if not or_pattern:
26 |             or_pattern.append('(?')
27 |             if name:
28 |                 or_pattern.append('P<' + name + '>')
29 |             else:
30 |                 or_pattern.append(':')
31 |         else:
32 |             or_pattern.append('|')
33 |         or_pattern.append('(?:%s)' % re.escape(pattern) if escape else pattern)
34 |     or_pattern.append(')')
35 |     return ''.join(or_pattern)
36 | 


--------------------------------------------------------------------------------
/lib/guessit/jsonutils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | JSON Utils
 5 | """
 6 | import json
 7 | try:
 8 |     from collections import OrderedDict
 9 | except ImportError:  # pragma: no-cover
10 |     from ordereddict import OrderedDict  # pylint:disable=import-error
11 | 
12 | from rebulk.match import Match
13 | 
14 | 
15 | class GuessitEncoder(json.JSONEncoder):
16 |     """
17 |     JSON Encoder for guessit response
18 |     """
19 | 
20 |     def default(self, o):  # pylint:disable=method-hidden
21 |         if isinstance(o, Match):
22 |             ret = OrderedDict()
23 |             ret['value'] = o.value
24 |             if o.raw:
25 |                 ret['raw'] = o.raw
26 |             ret['start'] = o.start
27 |             ret['end'] = o.end
28 |             return ret
29 |         elif hasattr(o, 'name'):  # Babelfish languages/countries long name
30 |             return str(o.name)
31 |         else:  # pragma: no cover
32 |             return str(o)
33 | 


--------------------------------------------------------------------------------
/lib/babelfish/converters/type.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 4 | # Use of this source code is governed by the 3-clause BSD license
 5 | # that can be found in the LICENSE file.
 6 | #
 7 | from __future__ import unicode_literals
 8 | from . import LanguageConverter
 9 | from ..exceptions import LanguageConvertError
10 | from ..language import LANGUAGE_MATRIX
11 | 
12 | 
13 | class LanguageTypeConverter(LanguageConverter):
14 |     FULLNAME = {'A': 'ancient', 'C': 'constructed', 'E': 'extinct', 'H': 'historical', 'L': 'living', 'S': 'special'}
15 |     SYMBOLS = {}
16 |     for iso_language in LANGUAGE_MATRIX:
17 |         SYMBOLS[iso_language.alpha3] = iso_language.type
18 |     codes = set(SYMBOLS.values())
19 | 
20 |     def convert(self, alpha3, country=None, script=None):
21 |         if self.SYMBOLS[alpha3] in self.FULLNAME:
22 |             return self.FULLNAME[self.SYMBOLS[alpha3]]
23 |         raise LanguageConvertError(alpha3, country, script)
24 | 


--------------------------------------------------------------------------------
/lib/babelfish/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 4 | # Use of this source code is governed by the 3-clause BSD license
 5 | # that can be found in the LICENSE file.
 6 | #
 7 | __title__ = 'babelfish'
 8 | __version__ = '0.5.5-dev'
 9 | __author__ = 'Antoine Bertin'
10 | __license__ = 'BSD'
11 | __copyright__ = 'Copyright 2015 the BabelFish authors'
12 | 
13 | import sys
14 | 
15 | if sys.version_info[0] >= 3:
16 |     basestr = str
17 | else:
18 |     basestr = basestring
19 | 
20 | from .converters import (LanguageConverter, LanguageReverseConverter, LanguageEquivalenceConverter, CountryConverter,
21 |     CountryReverseConverter)
22 | from .country import country_converters, COUNTRIES, COUNTRY_MATRIX, Country
23 | from .exceptions import Error, LanguageConvertError, LanguageReverseError, CountryConvertError, CountryReverseError
24 | from .language import language_converters, LANGUAGES, LANGUAGE_MATRIX, Language
25 | from .script import SCRIPTS, SCRIPT_MATRIX, Script
26 | 


--------------------------------------------------------------------------------
/lib/dateutil/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.1
 2 | Name: python-dateutil
 3 | Version: 2.3
 4 | Summary: Extensions to the standard Python datetime module
 5 | Home-page: https://dateutil.readthedocs.org
 6 | Author: Yaron de Leeuw
 7 | Author-email: me@jarondl.net
 8 | License: Simplified BSD
 9 | Description: 
10 |         The dateutil module provides powerful extensions to the
11 |         datetime module available in the Python standard library.
12 |         
13 | Platform: UNKNOWN
14 | Classifier: Development Status :: 5 - Production/Stable
15 | Classifier: Intended Audience :: Developers
16 | Classifier: License :: OSI Approved :: BSD License
17 | Classifier: Programming Language :: Python
18 | Classifier: Programming Language :: Python :: 2
19 | Classifier: Programming Language :: Python :: 2.6
20 | Classifier: Programming Language :: Python :: 2.7
21 | Classifier: Programming Language :: Python :: 3
22 | Classifier: Programming Language :: Python :: 3.2
23 | Classifier: Programming Language :: Python :: 3.3
24 | Classifier: Programming Language :: Python :: 3.4
25 | Classifier: Topic :: Software Development :: Libraries
26 | Requires: six
27 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/markers/path.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Path markers
 5 | """
 6 | from rebulk import Rebulk
 7 | 
 8 | from rebulk.utils import find_all
 9 | 
10 | 
11 | def path():
12 |     """
13 |     Builder for rebulk object.
14 |     :return: Created Rebulk object
15 |     :rtype: Rebulk
16 |     """
17 |     rebulk = Rebulk()
18 |     rebulk.defaults(name="path", marker=True)
19 | 
20 |     def mark_path(input_string, context):
21 |         """
22 |         Functional pattern to mark path elements.
23 | 
24 |         :param input_string:
25 |         :return:
26 |         """
27 |         ret = []
28 |         if context.get('name_only', False):
29 |             ret.append((0, len(input_string)))
30 |         else:
31 |             indices = list(find_all(input_string, '/'))
32 |             indices += list(find_all(input_string, '\\'))
33 |             indices += [-1, len(input_string)]
34 | 
35 |             indices.sort()
36 | 
37 |             for i in range(0, len(indices) - 1):
38 |                 ret.append((indices[i] + 1, indices[i + 1]))
39 | 
40 |         return ret
41 | 
42 |     rebulk.functional(mark_path)
43 |     return rebulk
44 | 


--------------------------------------------------------------------------------
/lib/babelfish/converters/countryname.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 4 | # Use of this source code is governed by the 3-clause BSD license
 5 | # that can be found in the LICENSE file.
 6 | #
 7 | from __future__ import unicode_literals
 8 | from . import CountryReverseConverter, CaseInsensitiveDict
 9 | from ..country import COUNTRY_MATRIX
10 | from ..exceptions import CountryConvertError, CountryReverseError
11 | 
12 | 
13 | class CountryNameConverter(CountryReverseConverter):
14 |     def __init__(self):
15 |         self.codes = set()
16 |         self.to_name = {}
17 |         self.from_name = CaseInsensitiveDict()
18 |         for country in COUNTRY_MATRIX:
19 |             self.codes.add(country.name)
20 |             self.to_name[country.alpha2] = country.name
21 |             self.from_name[country.name] = country.alpha2
22 | 
23 |     def convert(self, alpha2):
24 |         if alpha2 not in self.to_name:
25 |             raise CountryConvertError(alpha2)
26 |         return self.to_name[alpha2]
27 | 
28 |     def reverse(self, name):
29 |         if name not in self.from_name:
30 |             raise CountryReverseError(name)
31 |         return self.from_name[name]
32 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/common/validators.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Validators
 5 | """
 6 | from functools import partial
 7 | 
 8 | from rebulk.validators import chars_before, chars_after, chars_surround
 9 | from . import seps
10 | 
11 | seps_before = partial(chars_before, seps)
12 | seps_after = partial(chars_after, seps)
13 | seps_surround = partial(chars_surround, seps)
14 | 
15 | 
16 | def int_coercable(string):
17 |     """
18 |     Check if string can be coerced to int
19 |     :param string:
20 |     :type string:
21 |     :return:
22 |     :rtype:
23 |     """
24 |     try:
25 |         int(string)
26 |         return True
27 |     except ValueError:
28 |         return False
29 | 
30 | 
31 | def compose(*validators):
32 |     """
33 |     Compose validators functions
34 |     :param validators:
35 |     :type validators:
36 |     :return:
37 |     :rtype:
38 |     """
39 |     def composed(string):
40 |         """
41 |         Composed validators function
42 |         :param string:
43 |         :type string:
44 |         :return:
45 |         :rtype:
46 |         """
47 |         for validator in validators:
48 |             if not validator(string):
49 |                 return False
50 |         return True
51 |     return composed
52 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/mimetype.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | mimetype property
 5 | """
 6 | import mimetypes
 7 | 
 8 | from rebulk import Rebulk, CustomRule, POST_PROCESS
 9 | from rebulk.match import Match
10 | 
11 | from ...rules.processors import Processors
12 | 
13 | 
14 | def mimetype():
15 |     """
16 |     Builder for rebulk object.
17 |     :return: Created Rebulk object
18 |     :rtype: Rebulk
19 |     """
20 |     return Rebulk().rules(Mimetype)
21 | 
22 | 
23 | class Mimetype(CustomRule):
24 |     """
25 |     Mimetype post processor
26 |     :param matches:
27 |     :type matches:
28 |     :return:
29 |     :rtype:
30 |     """
31 |     priority = POST_PROCESS
32 | 
33 |     dependency = Processors
34 | 
35 |     def when(self, matches, context):
36 |         mime, _ = mimetypes.guess_type(matches.input_string, strict=False)
37 |         return mime
38 | 
39 |     def then(self, matches, when_response, context):
40 |         mime = when_response
41 |         matches.append(Match(len(matches.input_string), len(matches.input_string), name='mimetype', value=mime))
42 | 
43 |     @property
44 |     def properties(self):
45 |         """
46 |         Properties for this rule.
47 |         """
48 |         return {'mimetype': [None]}
49 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/cds.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | cd and cd_count properties
 5 | """
 6 | from rebulk.remodule import re
 7 | 
 8 | from rebulk import Rebulk
 9 | from ..common import dash
10 | 
11 | 
12 | def cds():
13 |     """
14 |     Builder for rebulk object.
15 |     :return: Created Rebulk object
16 |     :rtype: Rebulk
17 |     """
18 |     rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
19 | 
20 |     rebulk.regex(r'cd-?(?P<cd>\d+)(?:-?of-?(?P<cd_count>\d+))?',
21 |                  validator={'cd': lambda match: 0 < match.value < 100,
22 |                             'cd_count': lambda match: 0 < match.value < 100},
23 |                  formatter={'cd': int, 'cd_count': int},
24 |                  children=True,
25 |                  private_parent=True,
26 |                  properties={'cd': [None], 'cd_count': [None]})
27 |     rebulk.regex(r'(?P<cd_count>\d+)-?cds?',
28 |                  validator={'cd': lambda match: 0 < match.value < 100,
29 |                             'cd_count': lambda match: 0 < match.value < 100},
30 |                  formatter={'cd_count': int},
31 |                  children=True,
32 |                  private_parent=True,
33 |                  properties={'cd': [None], 'cd_count': [None]})
34 | 
35 |     return rebulk
36 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/markers/groups.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Groups markers (...), [...] and {...}
 5 | """
 6 | from rebulk import Rebulk
 7 | 
 8 | 
 9 | def groups():
10 |     """
11 |     Builder for rebulk object.
12 |     :return: Created Rebulk object
13 |     :rtype: Rebulk
14 |     """
15 |     rebulk = Rebulk()
16 |     rebulk.defaults(name="group", marker=True)
17 | 
18 |     starting = '([{'
19 |     ending = ')]}'
20 | 
21 |     def mark_groups(input_string):
22 |         """
23 |         Functional pattern to mark groups (...), [...] and {...}.
24 | 
25 |         :param input_string:
26 |         :return:
27 |         """
28 |         openings = ([], [], [])
29 |         i = 0
30 | 
31 |         ret = []
32 |         for char in input_string:
33 |             start_type = starting.find(char)
34 |             if start_type > -1:
35 |                 openings[start_type].append(i)
36 | 
37 |             i += 1
38 | 
39 |             end_type = ending.find(char)
40 |             if end_type > -1:
41 |                 try:
42 |                     start_index = openings[end_type].pop()
43 |                     ret.append((start_index, i))
44 |                 except IndexError:
45 |                     pass
46 |         return ret
47 | 
48 |     rebulk.functional(mark_groups)
49 |     return rebulk
50 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/part.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | part property
 5 | """
 6 | from rebulk.remodule import re
 7 | 
 8 | from rebulk import Rebulk
 9 | from ..common import dash
10 | from ..common.validators import seps_surround, int_coercable, compose
11 | from ..common.numeral import numeral, parse_numeral
12 | from ...reutils import build_or_pattern
13 | 
14 | 
15 | def part():
16 |     """
17 |     Builder for rebulk object.
18 |     :return: Created Rebulk object
19 |     :rtype: Rebulk
20 |     """
21 |     rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], validator={'__parent__': seps_surround})
22 | 
23 |     prefixes = ['pt', 'part']
24 | 
25 |     def validate_roman(match):
26 |         """
27 |         Validate a roman match if surrounded by separators
28 |         :param match:
29 |         :type match:
30 |         :return:
31 |         :rtype:
32 |         """
33 |         if int_coercable(match.raw):
34 |             return True
35 |         return seps_surround(match)
36 | 
37 |     rebulk.regex(build_or_pattern(prefixes) + r'-?(?P<part>' + numeral + r')',
38 |                  prefixes=prefixes, validate_all=True, private_parent=True, children=True, formatter=parse_numeral,
39 |                  validator={'part': compose(validate_roman, lambda m: 0 < m.value < 100)})
40 | 
41 |     return rebulk
42 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/film.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | film property
 5 | """
 6 | from rebulk import Rebulk, AppendMatch, Rule
 7 | from rebulk.remodule import re
 8 | 
 9 | from ..common.formatters import cleanup
10 | from ..common.validators import seps_surround
11 | 
12 | 
13 | def film():
14 |     """
15 |     Builder for rebulk object.
16 |     :return: Created Rebulk object
17 |     :rtype: Rebulk
18 |     """
19 |     rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, validate_all=True, validator={'__parent__': seps_surround})
20 | 
21 |     rebulk.regex(r'f(\d{1,2})', name='film', private_parent=True, children=True, formatter=int)
22 | 
23 |     rebulk.rules(FilmTitleRule)
24 | 
25 |     return rebulk
26 | 
27 | 
28 | class FilmTitleRule(Rule):
29 |     """
30 |     Rule to find out film_title (hole after film property
31 |     """
32 |     consequence = AppendMatch
33 | 
34 |     properties = {'film_title': [None]}
35 | 
36 |     def when(self, matches, context):
37 |         bonus_number = matches.named('film', lambda match: not match.private, index=0)
38 |         if bonus_number:
39 |             filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)
40 |             hole = matches.holes(filepath.start, bonus_number.start + 1, formatter=cleanup, index=0)
41 |             if hole and hole.value:
42 |                 hole.name = 'film_title'
43 |                 return hole
44 | 


--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
 1 | # VideoSort post-processing script for NZBGet.
 2 | #
 3 | # Copyright (C) 2013-2020 Andrey Prygunkov <hugbug@users.sourceforge.net>
 4 | #
 5 | # This program is free software; you can redistribute it and/or modify it
 6 | # under the terms of the GNU Lesser General Public License as published by
 7 | # the Free Software Foundation; either version 3 of the License, or
 8 | # (at your option) any later version.
 9 | #
10 | # This program is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
13 | # GNU Lesser General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU Lesser General Public License
16 | # along with subliminal.  If not, see <http://www.gnu.org/licenses/>.
17 | #
18 | 
19 | # Sort movies and tv shows.
20 | #
21 | # This is a script for downloaded TV shows and movies. It uses scene-standard
22 | # naming conventions to match TV shows and movies and rename/move/sort/organize
23 | # them as you like.
24 | #
25 | # The script relies on python library "guessit" (http://guessit.readthedocs.org)
26 | # to extract information from file names and includes portions of code from
27 | # "SABnzbd+" (http://sabnzbd.org).
28 | #
29 | # Info about pp-script:
30 | # Author: Andrey Prygunkov (nzbget@gmail.com).
31 | # Web-site: https://github.com/nzbget/VideoSort.
32 | # PP-Script Version: see <VideoSort.py>.
33 | 


--------------------------------------------------------------------------------
/lib/rebulk/debug.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Debug tools.
 5 | 
 6 | Can be configured by changing values of those variable.
 7 | 
 8 | DEBUG = False
 9 | Enable this variable to activate debug features (like defined_at parameters). It can slow down Rebulk
10 | 
11 | LOG_LEVEL = 0
12 | Default log level of generated rebulk logs.
13 | """
14 | 
15 | import inspect
16 | import logging
17 | import os
18 | from collections import namedtuple
19 | 
20 | 
21 | DEBUG = False
22 | LOG_LEVEL = logging.DEBUG
23 | 
24 | 
25 | class Frame(namedtuple('Frame', ['lineno', 'package', 'name', 'filename'])):
26 |     """
27 |     Stack frame representation.
28 |     """
29 |     __slots__ = ()
30 | 
31 |     def __repr__(self):
32 |         return "%s#L%s" % (os.path.basename(self.filename), self.lineno)
33 | 
34 | 
35 | def defined_at():
36 |     """
37 |     Get definition location of a pattern or a match (outside of rebulk package).
38 |     :return:
39 |     :rtype:
40 |     """
41 |     if DEBUG:
42 |         frame = inspect.currentframe()
43 |         while frame:
44 |             try:
45 |                 if frame.f_globals['__package__'] != __package__:
46 |                     break
47 |             except KeyError:  # pragma:no cover
48 |                 # If package is missing, consider we are in. Workaround for python 3.3.
49 |                 break
50 |             frame = frame.f_back
51 |         ret = Frame(frame.f_lineno,
52 |                     frame.f_globals.get('__package__'),
53 |                     frame.f_globals.get('__name__'),
54 |                     frame.f_code.co_filename)
55 |         del frame
56 |         return ret
57 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/common/expected.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Expected property factory
 5 | """
 6 | import re
 7 | 
 8 | from rebulk import Rebulk
 9 | from rebulk.utils import find_all
10 | 
11 | from . import dash, seps
12 | 
13 | 
14 | def build_expected_function(context_key):
15 |     """
16 |     Creates a expected property function
17 |     :param context_key:
18 |     :type context_key:
19 |     :param cleanup:
20 |     :type cleanup:
21 |     :return:
22 |     :rtype:
23 |     """
24 | 
25 |     def expected(input_string, context):
26 |         """
27 |         Expected property functional pattern.
28 |         :param input_string:
29 |         :type input_string:
30 |         :param context:
31 |         :type context:
32 |         :return:
33 |         :rtype:
34 |         """
35 |         ret = []
36 |         for search in context.get(context_key):
37 |             if search.startswith('re:'):
38 |                 search = search[3:]
39 |                 search = search.replace(' ', '-')
40 |                 matches = Rebulk().regex(search, abbreviations=[dash], flags=re.IGNORECASE) \
41 |                     .matches(input_string, context)
42 |                 for match in matches:
43 |                     ret.append(match.span)
44 |             else:
45 |                 value = search
46 |                 for sep in seps:
47 |                     input_string = input_string.replace(sep, ' ')
48 |                     search = search.replace(sep, ' ')
49 |                 for start in find_all(input_string, search, ignore_case=True):
50 |                     ret.append({'start': start, 'end': start + len(search), 'value': value})
51 |         return ret
52 | 
53 |     return expected
54 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/bonus.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | bonus property
 5 | """
 6 | from rebulk.remodule import re
 7 | 
 8 | from rebulk import Rebulk, AppendMatch, Rule
 9 | 
10 | from .title import TitleFromPosition
11 | from ..common.formatters import cleanup
12 | from ..common.validators import seps_surround
13 | 
14 | 
15 | def bonus():
16 |     """
17 |     Builder for rebulk object.
18 |     :return: Created Rebulk object
19 |     :rtype: Rebulk
20 |     """
21 |     rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
22 | 
23 |     rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int,
24 |                  validator={'__parent__': lambda match: seps_surround},
25 |                  conflict_solver=lambda match, conflicting: match
26 |                  if conflicting.name in ['video_codec', 'episode'] and 'bonus-conflict' not in conflicting.tags
27 |                  else '__default__')
28 | 
29 |     rebulk.rules(BonusTitleRule)
30 | 
31 |     return rebulk
32 | 
33 | 
34 | class BonusTitleRule(Rule):
35 |     """
36 |     Find bonus title after bonus.
37 |     """
38 |     dependency = TitleFromPosition
39 |     consequence = AppendMatch
40 | 
41 |     properties = {'bonus_title': [None]}
42 | 
43 |     def when(self, matches, context):
44 |         bonus_number = matches.named('bonus', lambda match: not match.private, index=0)
45 |         if bonus_number:
46 |             filepath = matches.markers.at_match(bonus_number, lambda marker: marker.name == 'path', 0)
47 |             hole = matches.holes(bonus_number.end, filepath.end + 1, formatter=cleanup, index=0)
48 |             if hole and hole.value:
49 |                 hole.name = 'bonus_title'
50 |                 return hole
51 | 


--------------------------------------------------------------------------------
/lib/dateutil/LICENSE:
--------------------------------------------------------------------------------
 1 | dateutil - Extensions to the standard Python datetime module.
 2 | 
 3 | Copyright (c) 2003-2011 - Gustavo Niemeyer <gustavo@niemeyer.net>
 4 | Copyright (c) 2012-2014 - Tomi Pieviläinen <tomi.pievilainen@iki.fi>
 5 | Copyright (c) 2014      - Yaron de Leeuw <me@jarondl.net>
 6 | 
 7 | All rights reserved.
 8 | 
 9 | Redistribution and use in source and binary forms, with or without
10 | modification, are permitted provided that the following conditions are met:
11 | 
12 |     * Redistributions of source code must retain the above copyright notice,
13 |       this list of conditions and the following disclaimer.
14 |     * Redistributions in binary form must reproduce the above copyright notice,
15 |       this list of conditions and the following disclaimer in the documentation
16 |       and/or other materials provided with the distribution.
17 |     * Neither the name of the copyright holder nor the names of its
18 |       contributors may be used to endorse or promote products derived from
19 |       this software without specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | 


--------------------------------------------------------------------------------
/lib/babelfish/converters/opensubtitles.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 4 | # Use of this source code is governed by the 3-clause BSD license
 5 | # that can be found in the LICENSE file.
 6 | #
 7 | from __future__ import unicode_literals
 8 | from . import LanguageReverseConverter, CaseInsensitiveDict
 9 | from ..exceptions import LanguageReverseError
10 | from ..language import language_converters
11 | 
12 | 
13 | class OpenSubtitlesConverter(LanguageReverseConverter):
14 |     def __init__(self):
15 |         self.alpha3b_converter = language_converters['alpha3b']
16 |         self.alpha2_converter = language_converters['alpha2']
17 |         self.to_opensubtitles = {('por', 'BR'): 'pob', ('gre', None): 'ell', ('srp', None): 'scc', ('srp', 'ME'): 'mne'}
18 |         self.from_opensubtitles = CaseInsensitiveDict({'pob': ('por', 'BR'), 'pb': ('por', 'BR'), 'ell': ('ell', None),
19 |                                                        'scc': ('srp', None), 'mne': ('srp', 'ME')})
20 |         self.codes = (self.alpha2_converter.codes | self.alpha3b_converter.codes | set(['pob', 'pb', 'scc', 'mne']))
21 | 
22 |     def convert(self, alpha3, country=None, script=None):
23 |         alpha3b = self.alpha3b_converter.convert(alpha3, country, script)
24 |         if (alpha3b, country) in self.to_opensubtitles:
25 |             return self.to_opensubtitles[(alpha3b, country)]
26 |         return alpha3b
27 | 
28 |     def reverse(self, opensubtitles):
29 |         if opensubtitles in self.from_opensubtitles:
30 |             return self.from_opensubtitles[opensubtitles]
31 |         for conv in [self.alpha3b_converter, self.alpha2_converter]:
32 |             try:
33 |                 return conv.reverse(opensubtitles)
34 |             except LanguageReverseError:
35 |                 pass
36 |         raise LanguageReverseError(opensubtitles)
37 | 


--------------------------------------------------------------------------------
/lib/rebulk/validators.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Validator functions to use in patterns.
 5 | 
 6 | All those function have last argument as match, so it's possible to use functools.partial to bind previous arguments.
 7 | """
 8 | 
 9 | 
10 | def chars_before(chars, match):
11 |     """
12 |     Validate the match if left character is in a given sequence.
13 | 
14 |     :param chars:
15 |     :type chars:
16 |     :param match:
17 |     :type match:
18 |     :return:
19 |     :rtype:
20 |     """
21 |     if match.start <= 0:
22 |         return True
23 |     return match.input_string[match.start - 1] in chars
24 | 
25 | 
26 | def chars_after(chars, match):
27 |     """
28 |     Validate the match if right character is in a given sequence.
29 | 
30 |     :param chars:
31 |     :type chars:
32 |     :param match:
33 |     :type match:
34 |     :return:
35 |     :rtype:
36 |     """
37 |     if match.end >= len(match.input_string):
38 |         return True
39 |     return match.input_string[match.end] in chars
40 | 
41 | 
42 | def chars_surround(chars, match):
43 |     """
44 |     Validate the match if surrounding characters are in a given sequence.
45 | 
46 |     :param chars:
47 |     :type chars:
48 |     :param match:
49 |     :type match:
50 |     :return:
51 |     :rtype:
52 |     """
53 |     return chars_before(chars, match) and chars_after(chars, match)
54 | 
55 | 
56 | def validators(*chained_validators):
57 |     """
58 |     Creates a validator chain from several validator functions.
59 | 
60 |     :param chained_validators:
61 |     :type chained_validators:
62 |     :return:
63 |     :rtype:
64 |     """
65 |     def validator_chain(match):  # pylint:disable=missing-docstring
66 |         for chained_validator in chained_validators:
67 |             if not chained_validator(match):
68 |                 return False
69 |         return True
70 |     return validator_chain
71 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/edition.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | edition property
 5 | """
 6 | from rebulk.remodule import re
 7 | 
 8 | from rebulk import Rebulk
 9 | from ..common import dash
10 | from ..common.validators import seps_surround
11 | 
12 | 
13 | def edition():
14 |     """
15 |     Builder for rebulk object.
16 |     :return: Created Rebulk object
17 |     :rtype: Rebulk
18 |     """
19 |     rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
20 |     rebulk.defaults(name='edition', validator=seps_surround)
21 | 
22 |     rebulk.regex('collector', 'collector-edition', 'edition-collector', value='Collector Edition')
23 |     rebulk.regex('special-edition', 'edition-special', value='Special Edition',
24 |                  conflict_solver=lambda match, other: other
25 |                  if other.name == 'episode_details' and other.value == 'Special'
26 |                  else '__default__')
27 |     rebulk.string('se', value='Special Edition', tags='has-neighbor')
28 |     rebulk.regex('criterion-edition', 'edition-criterion', value='Criterion Edition')
29 |     rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe Edition')
30 |     rebulk.regex('limited', 'limited-edition', value='Limited Edition', tags=['has-neighbor', 'release-group-prefix'])
31 |     rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical Edition')
32 |     rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC',
33 |                  value="Director's Cut")
34 |     rebulk.regex('extended', 'extended-?cut', 'extended-?version',
35 |                  value='Extended', tags=['has-neighbor', 'release-group-prefix'])
36 |     rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix'])
37 |     for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
38 |         rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
39 |     rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])
40 | 
41 |     return rebulk
42 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/type.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | type property
 5 | """
 6 | from rebulk import CustomRule, Rebulk, POST_PROCESS
 7 | from rebulk.match import Match
 8 | 
 9 | from ...rules.processors import Processors
10 | 
11 | 
12 | def _type(matches, value):
13 |     """
14 |     Define type match with given value.
15 |     :param matches:
16 |     :param value:
17 |     :return:
18 |     """
19 |     matches.append(Match(len(matches.input_string), len(matches.input_string), name='type', value=value))
20 | 
21 | 
22 | def type_():
23 |     """
24 |     Builder for rebulk object.
25 |     :return: Created Rebulk object
26 |     :rtype: Rebulk
27 |     """
28 |     return Rebulk().rules(TypeProcessor)
29 | 
30 | 
31 | class TypeProcessor(CustomRule):
32 |     """
33 |     Post processor to find file type based on all others found matches.
34 |     """
35 |     priority = POST_PROCESS
36 | 
37 |     dependency = Processors
38 | 
39 |     properties = {'type': ['episode', 'movie']}
40 | 
41 |     def when(self, matches, context):  # pylint:disable=too-many-return-statements
42 |         option_type = context.get('type', None)
43 |         if option_type:
44 |             return option_type
45 | 
46 |         episode = matches.named('episode')
47 |         season = matches.named('season')
48 |         episode_details = matches.named('episode_details')
49 | 
50 |         if episode or season or episode_details:
51 |             return 'episode'
52 | 
53 |         film = matches.named('film')
54 |         if film:
55 |             return 'movie'
56 | 
57 |         year = matches.named('year')
58 |         date = matches.named('date')
59 | 
60 |         if date and not year:
61 |             return 'episode'
62 | 
63 |         bonus = matches.named('bonus')
64 |         if bonus and not year:
65 |             return 'episode'
66 | 
67 |         crc32 = matches.named('crc32')
68 |         anime_release_group = matches.named('release_group', lambda match: 'anime' in match.tags)
69 |         if crc32 and anime_release_group:
70 |             return 'episode'
71 | 
72 |         return 'movie'
73 | 
74 |     def then(self, matches, when_response, context):
75 |         _type(matches, when_response)
76 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/common/comparators.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Comparators
 5 | """
 6 | try:
 7 |     from functools import cmp_to_key
 8 | except ImportError:
 9 |     from ...backports import cmp_to_key
10 | 
11 | 
12 | def marker_comparator_predicate(match):
13 |     """
14 |     Match predicate used in comparator
15 |     """
16 |     return not match.private and \
17 |            match.name not in ['proper_count', 'title', 'episode_title', 'alternative_title'] and \
18 |            not (match.name == 'container' and 'extension' in match.tags)
19 | 
20 | 
21 | def marker_weight(matches, marker, predicate):
22 |     """
23 |     Compute the comparator weight of a marker
24 |     :param matches:
25 |     :param marker:
26 |     :param predicate:
27 |     :return:
28 |     """
29 |     return len(set(match.name for match in matches.range(*marker.span, predicate=predicate)))
30 | 
31 | 
32 | def marker_comparator(matches, markers, predicate):
33 |     """
34 |     Builds a comparator that returns markers sorted from the most valuable to the less.
35 | 
36 |     Take the parts where matches count is higher, then when length is higher, then when position is at left.
37 | 
38 |     :param matches:
39 |     :type matches:
40 |     :param markers:
41 |     :param predicate:
42 |     :return:
43 |     :rtype:
44 |     """
45 | 
46 |     def comparator(marker1, marker2):
47 |         """
48 |         The actual comparator function.
49 |         """
50 |         matches_count = marker_weight(matches, marker2, predicate) - marker_weight(matches, marker1, predicate)
51 |         if matches_count:
52 |             return matches_count
53 |         len_diff = len(marker2) - len(marker1)
54 |         if len_diff:
55 |             return len_diff
56 |         return markers.index(marker2) - markers.index(marker1)
57 | 
58 |     return comparator
59 | 
60 | 
61 | def marker_sorted(markers, matches, predicate=marker_comparator_predicate):
62 |     """
63 |     Sort markers from matches, from the most valuable to the less.
64 | 
65 |     :param markers:
66 |     :type markers:
67 |     :param matches:
68 |     :type matches:
69 |     :param predicate:
70 |     :return:
71 |     :rtype:
72 |     """
73 |     return sorted(markers, key=cmp_to_key(marker_comparator(matches, markers, predicate=predicate)))
74 | 


--------------------------------------------------------------------------------
/lib/babelfish/script.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 4 | # Use of this source code is governed by the 3-clause BSD license
 5 | # that can be found in the LICENSE file.
 6 | #
 7 | from __future__ import unicode_literals
 8 | from collections import namedtuple
 9 | from pkg_resources import resource_stream  # @UnresolvedImport
10 | from . import basestr
11 | 
12 | #: Script code to script name mapping
13 | SCRIPTS = {}
14 | 
15 | #: List of countries in the ISO-15924 as namedtuple of code, number, name, french_name, pva and date
16 | SCRIPT_MATRIX = []
17 | 
18 | #: The namedtuple used in the :data:`SCRIPT_MATRIX`
19 | IsoScript = namedtuple('IsoScript', ['code', 'number', 'name', 'french_name', 'pva', 'date'])
20 | 
21 | f = resource_stream('babelfish', 'data/iso15924-utf8-20131012.txt')
22 | f.readline()
23 | for l in f:
24 |     l = l.decode('utf-8').strip()
25 |     if not l or l.startswith('#'):
26 |         continue
27 |     script = IsoScript._make(l.split(';'))
28 |     SCRIPT_MATRIX.append(script)
29 |     SCRIPTS[script.code] = script.name
30 | f.close()
31 | 
32 | 
33 | class Script(object):
34 |     """A human writing system
35 | 
36 |     A script is represented by a 4-letter code from the ISO-15924 standard
37 | 
38 |     :param string script: 4-letter ISO-15924 script code
39 | 
40 |     """
41 |     def __init__(self, script):
42 |         if script not in SCRIPTS:
43 |             raise ValueError('%r is not a valid script' % script)
44 | 
45 |         #: ISO-15924 4-letter script code
46 |         self.code = script
47 | 
48 |     @property
49 |     def name(self):
50 |         """English name of the script"""
51 |         return SCRIPTS[self.code]
52 | 
53 |     def __getstate__(self):
54 |         return self.code
55 | 
56 |     def __setstate__(self, state):
57 |         self.code = state
58 | 
59 |     def __hash__(self):
60 |         return hash(self.code)
61 | 
62 |     def __eq__(self, other):
63 |         if isinstance(other, basestr):
64 |             return self.code == other
65 |         if not isinstance(other, Script):
66 |             return False
67 |         return self.code == other.code
68 | 
69 |     def __ne__(self, other):
70 |         return not self == other
71 | 
72 |     def __repr__(self):
73 |         return '<Script [%s]>' % self
74 | 
75 |     def __str__(self):
76 |         return self.code
77 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/container.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | container property
 5 | """
 6 | from rebulk.remodule import re
 7 | 
 8 | from rebulk import Rebulk
 9 | 
10 | from ..common import seps
11 | from ..common.validators import seps_surround
12 | from ...reutils import build_or_pattern
13 | 
14 | 
15 | def container():
16 |     """
17 |     Builder for rebulk object.
18 |     :return: Created Rebulk object
19 |     :rtype: Rebulk
20 |     """
21 |     rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
22 |     rebulk.defaults(name='container',
23 |                     formatter=lambda value: value.strip(seps),
24 |                     tags=['extension'],
25 |                     conflict_solver=lambda match, other: other
26 |                     if other.name in ['format', 'video_codec'] or
27 |                     other.name == 'container' and 'extension' not in other.tags
28 |                     else '__default__')
29 | 
30 |     subtitles = ['srt', 'idx', 'sub', 'ssa', 'ass']
31 |     info = ['nfo']
32 |     videos = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2',
33 |               'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm',
34 |               'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv',
35 |               'iso', 'vob']
36 |     torrent = ['torrent']
37 |     nzb = ['nzb']
38 | 
39 |     rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle'])
40 |     rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info'])
41 |     rebulk.regex(r'\.'+build_or_pattern(videos)+'$', exts=videos, tags=['extension', 'video'])
42 |     rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent'])
43 |     rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb'])
44 | 
45 |     rebulk.defaults(name='container',
46 |                     validator=seps_surround,
47 |                     formatter=lambda s: s.lower(),
48 |                     conflict_solver=lambda match, other: match
49 |                     if other.name in ['format',
50 |                                       'video_codec'] or other.name == 'container' and 'extension' in other.tags
51 |                     else '__default__')
52 | 
53 |     rebulk.string(*[sub for sub in subtitles if sub not in ['sub']], tags=['subtitle'])
54 |     rebulk.string(*videos, tags=['video'])
55 |     rebulk.string(*torrent, tags=['torrent'])
56 |     rebulk.string(*nzb, tags=['nzb'])
57 | 
58 |     return rebulk
59 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/crc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | crc and uuid properties
 5 | """
 6 | from rebulk.remodule import re
 7 | 
 8 | from rebulk import Rebulk
 9 | from ..common.validators import seps_surround
10 | 
11 | 
12 | def crc():
13 |     """
14 |     Builder for rebulk object.
15 |     :return: Created Rebulk object
16 |     :rtype: Rebulk
17 |     """
18 |     rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE)
19 |     rebulk.defaults(validator=seps_surround)
20 | 
21 |     rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32',
22 |                  conflict_solver=lambda match, other: match
23 |                  if other.name in ['episode', 'season']
24 |                  else '__default__')
25 | 
26 |     rebulk.functional(guess_idnumber, name='uuid',
27 |                       conflict_solver=lambda match, other: match
28 |                       if other.name in ['episode', 'season']
29 |                       else '__default__')
30 |     return rebulk
31 | 
32 | 
33 | _DIGIT = 0
34 | _LETTER = 1
35 | _OTHER = 2
36 | 
37 | _idnum = re.compile(r'(?P<uuid>[a-zA-Z0-9-]{20,})')  # 1.0, (0, 0))
38 | 
39 | 
40 | def guess_idnumber(string):
41 |     """
42 |     Guess id number function
43 |     :param string:
44 |     :type string:
45 |     :return:
46 |     :rtype:
47 |     """
48 |     # pylint:disable=invalid-name
49 |     ret = []
50 | 
51 |     matches = list(_idnum.finditer(string))
52 |     for match in matches:
53 |         result = match.groupdict()
54 |         switch_count = 0
55 |         switch_letter_count = 0
56 |         letter_count = 0
57 |         last_letter = None
58 | 
59 |         last = _LETTER
60 |         for c in result['uuid']:
61 |             if c in '0123456789':
62 |                 ci = _DIGIT
63 |             elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
64 |                 ci = _LETTER
65 |                 if c != last_letter:
66 |                     switch_letter_count += 1
67 |                 last_letter = c
68 |                 letter_count += 1
69 |             else:
70 |                 ci = _OTHER
71 | 
72 |             if ci != last:
73 |                 switch_count += 1
74 | 
75 |             last = ci
76 | 
77 |         # only return the result as probable if we alternate often between
78 |         # char type (more likely for hash values than for common words)
79 |         switch_ratio = float(switch_count) / len(result['uuid'])
80 |         letters_ratio = (float(switch_letter_count) / letter_count) if letter_count > 0 else 1
81 | 
82 |         if switch_ratio > 0.4 and letters_ratio > 0.4:
83 |             ret.append(match.span())
84 | 
85 |     return ret
86 | 


--------------------------------------------------------------------------------
/lib/guessit/yamlutils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Options
 5 | """
 6 | try:
 7 |     from collections import OrderedDict
 8 | except ImportError:  # pragma: no-cover
 9 |     from ordereddict import OrderedDict  # pylint:disable=import-error
10 | import babelfish
11 | 
12 | import yaml
13 | 
14 | 
15 | class OrderedDictYAMLLoader(yaml.Loader):
16 |     """
17 |     A YAML loader that loads mappings into ordered dictionaries.
18 |     From https://gist.github.com/enaeseth/844388
19 |     """
20 | 
21 |     def __init__(self, *args, **kwargs):
22 |         yaml.Loader.__init__(self, *args, **kwargs)
23 | 
24 |         self.add_constructor(u'tag:yaml.org,2002:map', type(self).construct_yaml_map)
25 |         self.add_constructor(u'tag:yaml.org,2002:omap', type(self).construct_yaml_map)
26 | 
27 |     def construct_yaml_map(self, node):
28 |         data = OrderedDict()
29 |         yield data
30 |         value = self.construct_mapping(node)
31 |         data.update(value)
32 | 
33 |     def construct_mapping(self, node, deep=False):
34 |         if isinstance(node, yaml.MappingNode):
35 |             self.flatten_mapping(node)
36 |         else:  # pragma: no cover
37 |             raise yaml.constructor.ConstructorError(None, None,
38 |                                                     'expected a mapping node, but found %s' % node.id, node.start_mark)
39 | 
40 |         mapping = OrderedDict()
41 |         for key_node, value_node in node.value:
42 |             key = self.construct_object(key_node, deep=deep)
43 |             try:
44 |                 hash(key)
45 |             except TypeError as exc:  # pragma: no cover
46 |                 raise yaml.constructor.ConstructorError('while constructing a mapping',
47 |                                                         node.start_mark, 'found unacceptable key (%s)'
48 |                                                         % exc, key_node.start_mark)
49 |             value = self.construct_object(value_node, deep=deep)
50 |             mapping[key] = value
51 |         return mapping
52 | 
53 | 
54 | class CustomDumper(yaml.SafeDumper):
55 |     """
56 |     Custom YAML Dumper.
57 |     """
58 |     pass
59 | 
60 | 
61 | def default_representer(dumper, data):
62 |     """Default representer"""
63 |     return dumper.represent_str(str(data))
64 | CustomDumper.add_representer(babelfish.Language, default_representer)
65 | CustomDumper.add_representer(babelfish.Country, default_representer)
66 | 
67 | 
68 | def ordered_dict_representer(dumper, data):
69 |     """OrderedDict representer"""
70 |     return dumper.represent_dict(data)
71 | CustomDumper.add_representer(OrderedDict, ordered_dict_representer)
72 | 


--------------------------------------------------------------------------------
/lib/babelfish/exceptions.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # Copyright (c) 2013 the BabelFish authors. All rights reserved.
 4 | # Use of this source code is governed by the 3-clause BSD license
 5 | # that can be found in the LICENSE file.
 6 | #
 7 | from __future__ import unicode_literals
 8 | 
 9 | 
10 | class Error(Exception):
11 |     """Base class for all exceptions in babelfish"""
12 |     pass
13 | 
14 | 
15 | class LanguageError(Error, AttributeError):
16 |     """Base class for all language exceptions in babelfish"""
17 |     pass
18 | 
19 | 
20 | class LanguageConvertError(LanguageError):
21 |     """Exception raised by converters when :meth:`~babelfish.converters.LanguageConverter.convert` fails
22 | 
23 |     :param string alpha3: alpha3 code that failed conversion
24 |     :param country: country code that failed conversion, if any
25 |     :type country: string or None
26 |     :param script: script code that failed conversion, if any
27 |     :type script: string or None
28 | 
29 |     """
30 |     def __init__(self, alpha3, country=None, script=None):
31 |         self.alpha3 = alpha3
32 |         self.country = country
33 |         self.script = script
34 | 
35 |     def __str__(self):
36 |         s = self.alpha3
37 |         if self.country is not None:
38 |             s += '-' + self.country
39 |         if self.script is not None:
40 |             s += '-' + self.script
41 |         return s
42 | 
43 | 
44 | class LanguageReverseError(LanguageError):
45 |     """Exception raised by converters when :meth:`~babelfish.converters.LanguageReverseConverter.reverse` fails
46 | 
47 |     :param string code: code that failed reverse conversion
48 | 
49 |     """
50 |     def __init__(self, code):
51 |         self.code = code
52 | 
53 |     def __str__(self):
54 |         return repr(self.code)
55 | 
56 | 
57 | class CountryError(Error, AttributeError):
58 |     """Base class for all country exceptions in babelfish"""
59 |     pass
60 | 
61 | 
62 | class CountryConvertError(CountryError):
63 |     """Exception raised by converters when :meth:`~babelfish.converters.CountryConverter.convert` fails
64 | 
65 |     :param string alpha2: alpha2 code that failed conversion
66 | 
67 |     """
68 |     def __init__(self, alpha2):
69 |         self.alpha2 = alpha2
70 | 
71 |     def __str__(self):
72 |         return self.alpha2
73 | 
74 | 
75 | class CountryReverseError(CountryError):
76 |     """Exception raised by converters when :meth:`~babelfish.converters.CountryReverseConverter.reverse` fails
77 | 
78 |     :param string code: code that failed reverse conversion
79 | 
80 |     """
81 |     def __init__(self, code):
82 |         self.code = code
83 | 
84 |     def __str__(self):
85 |         return repr(self.code)
86 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/date.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | date and year properties
 5 | """
 6 | from rebulk import Rebulk, RemoveMatch, Rule
 7 | 
 8 | from ..common.date import search_date, valid_year
 9 | from ..common.validators import seps_surround
10 | 
11 | 
12 | def date():
13 |     """
14 |     Builder for rebulk object.
15 |     :return: Created Rebulk object
16 |     :rtype: Rebulk
17 |     """
18 |     rebulk = Rebulk().defaults(validator=seps_surround)
19 | 
20 |     rebulk.regex(r"\d{4}", name="year", formatter=int,
21 |                  validator=lambda match: seps_surround(match) and valid_year(match.value))
22 | 
23 |     def date_functional(string, context):
24 |         """
25 |         Search for date in the string and retrieves match
26 | 
27 |         :param string:
28 |         :return:
29 |         """
30 | 
31 |         ret = search_date(string, context.get('date_year_first'), context.get('date_day_first'))
32 |         if ret:
33 |             return ret[0], ret[1], {'value': ret[2]}
34 | 
35 |     rebulk.functional(date_functional, name="date", properties={'date': [None]},
36 |                       conflict_solver=lambda match, other: other
37 |                       if other.name in ['episode', 'season']
38 |                       else '__default__')
39 | 
40 |     rebulk.rules(KeepMarkedYearInFilepart)
41 | 
42 |     return rebulk
43 | 
44 | 
45 | class KeepMarkedYearInFilepart(Rule):
46 |     """
47 |     Keep first years marked with [](){} in filepart, or if no year is marked, ensure it won't override titles.
48 |     """
49 |     priority = 64
50 |     consequence = RemoveMatch
51 | 
52 |     def when(self, matches, context):
53 |         ret = []
54 |         if len(matches.named('year')) > 1:
55 |             for filepart in matches.markers.named('path'):
56 |                 years = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year')
57 |                 if len(years) > 1:
58 |                     group_years = []
59 |                     ungroup_years = []
60 |                     for year in years:
61 |                         if matches.markers.at_match(year, lambda marker: marker.name == 'group'):
62 |                             group_years.append(year)
63 |                         else:
64 |                             ungroup_years.append(year)
65 |                     if group_years and ungroup_years:
66 |                         ret.extend(ungroup_years)
67 |                         ret.extend(group_years[1:])  # Keep the first year in marker.
68 |                     elif not group_years:
69 |                         ret.append(ungroup_years[0])  # Keep first year for title.
70 |                         if len(ungroup_years) > 2:
71 |                             ret.extend(ungroup_years[2:])
72 |         return ret
73 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/common/words.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Words utils
 5 | """
 6 | from collections import namedtuple
 7 | 
 8 | from . import seps
 9 | 
10 | _Word = namedtuple('_Word', ['span', 'value'])
11 | 
12 | 
13 | def iter_words(string):
14 |     """
15 |     Iterate on all words in a string
16 |     :param string:
17 |     :type string:
18 |     :return:
19 |     :rtype: iterable[str]
20 |     """
21 |     i = 0
22 |     last_sep_index = -1
23 |     inside_word = False
24 |     for char in string:
25 |         if ord(char) < 128 and char in seps:  # Make sure we don't exclude unicode characters.
26 |             if inside_word:
27 |                 yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
28 |             inside_word = False
29 |             last_sep_index = i
30 |         else:
31 |             inside_word = True
32 |         i += 1
33 |     if inside_word:
34 |         yield _Word(span=(last_sep_index+1, i), value=string[last_sep_index+1:i])
35 | 
36 | 
37 | # list of common words which could be interpreted as properties, but which
38 | # are far too common to be able to say they represent a property in the
39 | # middle of a string (where they most likely carry their commmon meaning)
40 | COMMON_WORDS = frozenset([
41 |     # english words
42 |     'is', 'it', 'am', 'mad', 'men', 'man', 'run', 'sin', 'st', 'to',
43 |     'no', 'non', 'war', 'min', 'new', 'car', 'day', 'bad', 'bat', 'fan',
44 |     'fry', 'cop', 'zen', 'gay', 'fat', 'one', 'cherokee', 'got', 'an', 'as',
45 |     'cat', 'her', 'be', 'hat', 'sun', 'may', 'my', 'mr', 'rum', 'pi', 'bb',
46 |     'bt', 'tv', 'aw', 'by', 'md', 'mp', 'cd', 'lt', 'gt', 'in', 'ad', 'ice',
47 |     'ay', 'at', 'star', 'so', 'he', 'do', 'ax', 'mx',
48 |     # french words
49 |     'bas', 'de', 'le', 'son', 'ne', 'ca', 'ce', 'et', 'que',
50 |     'mal', 'est', 'vol', 'or', 'mon', 'se', 'je', 'tu', 'me',
51 |     'ne', 'ma', 'va', 'au', 'lu',
52 |     # japanese words,
53 |     'wa', 'ga', 'ao',
54 |     # spanish words
55 |     'la', 'el', 'del', 'por', 'mar', 'al',
56 |     # italian words
57 |     'un',
58 |     # other
59 |     'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii',
60 |     'vi', 'ben', 'da', 'lt', 'ch', 'sr', 'ps', 'cx', 'vo',
61 |     # new from babelfish
62 |     'mkv', 'avi', 'dmd', 'the', 'dis', 'cut', 'stv', 'des', 'dia', 'and',
63 |     'cab', 'sub', 'mia', 'rim', 'las', 'une', 'par', 'srt', 'ano', 'toy',
64 |     'job', 'gag', 'reel', 'www', 'for', 'ayu', 'csi', 'ren', 'moi', 'sur',
65 |     'fer', 'fun', 'two', 'big', 'psy', 'air',
66 |     # movie title
67 |     'brazil', 'jordan',
68 |     # release groups
69 |     'bs',  # Bosnian
70 |     'kz',
71 |     # countries
72 |     'gt', 'lt', 'im',
73 |     # part/pt
74 |     'pt',
75 |     # screener
76 |     'scr',
77 |     # quality
78 |     'sd', 'hr'
79 | ])
80 | 


--------------------------------------------------------------------------------
/lib/rebulk/toposort.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # Copyright 2014 True Blade Systems, Inc.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Original:
12 | #   - https://bitbucket.org/ericvsmith/toposort (1.4)
13 | # Modifications:
14 | #   - merged Pull request #2 for CyclicDependency error
15 | #   - import reduce as original name
16 | #   - support python 2.6 dict comprehension
17 | 
18 | # pylint: skip-file
19 | from functools import reduce
20 | 
21 | 
22 | class CyclicDependency(ValueError):
23 |     def __init__(self, cyclic):
24 |         s = 'Cyclic dependencies exist among these items: {0}'.format(', '.join(repr(x) for x in cyclic.items()))
25 |         super(CyclicDependency, self).__init__(s)
26 |         self.cyclic = cyclic
27 | 
28 | 
29 | def toposort(data):
30 |     """
31 |     Dependencies are expressed as a dictionary whose keys are items
32 |     and whose values are a set of dependent items. Output is a list of
33 |     sets in topological order. The first set consists of items with no
34 |     dependences, each subsequent set consists of items that depend upon
35 |     items in the preceeding sets.
36 |     :param data:
37 |     :type data:
38 |     :return:
39 |     :rtype:
40 |     """
41 | 
42 |     # Special case empty input.
43 |     if len(data) == 0:
44 |         return
45 | 
46 |     # Copy the input so as to leave it unmodified.
47 |     data = data.copy()
48 | 
49 |     # Ignore self dependencies.
50 |     for k, v in data.items():
51 |         v.discard(k)
52 |     # Find all items that don't depend on anything.
53 |     extra_items_in_deps = reduce(set.union, data.values()) - set(data.keys())
54 |     # Add empty dependences where needed.
55 |     data.update(dict((item, set()) for item in extra_items_in_deps))
56 |     while True:
57 |         ordered = set(item for item, dep in data.items() if len(dep) == 0)
58 |         if not ordered:
59 |             break
60 |         yield ordered
61 |         data = dict((item, (dep - ordered))
62 |                 for item, dep in data.items()
63 |                 if item not in ordered)
64 |     if len(data) != 0:
65 |         raise CyclicDependency(data)
66 | 
67 | 
68 | def toposort_flatten(data, sort=True):
69 |     """
70 |     Returns a single list of dependencies. For any set returned by
71 |     toposort(), those items are sorted and appended to the result (just to
72 |     make the results deterministic).
73 |     :param data:
74 |     :type data:
75 |     :param sort:
76 |     :type sort:
77 |     :return: Single list of dependencies.
78 |     :rtype: list
79 |     """
80 | 
81 |     result = []
82 |     for d in toposort(data):
83 |         result.extend((sorted if sort else list)(d))
84 |     return result
85 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Rebulk object default builder
 5 | """
 6 | from rebulk import Rebulk
 7 | 
 8 | from .markers.path import path
 9 | from .markers.groups import groups
10 | 
11 | from .properties.episodes import episodes
12 | from .properties.container import container
13 | from .properties.format import format_
14 | from .properties.video_codec import video_codec
15 | from .properties.audio_codec import audio_codec
16 | from .properties.screen_size import screen_size
17 | from .properties.website import website
18 | from .properties.date import date
19 | from .properties.title import title
20 | from .properties.episode_title import episode_title
21 | from .properties.language import language
22 | from .properties.country import country
23 | from .properties.release_group import release_group
24 | from .properties.streaming_service import streaming_service
25 | from .properties.other import other
26 | from .properties.size import size
27 | from .properties.edition import edition
28 | from .properties.cds import cds
29 | from .properties.bonus import bonus
30 | from .properties.film import film
31 | from .properties.part import part
32 | from .properties.crc import crc
33 | from .properties.mimetype import mimetype
34 | from .properties.type import type_
35 | 
36 | from .processors import processors
37 | 
38 | 
39 | def rebulk_builder():
40 |     """
41 |     Default builder for main Rebulk object used by api.
42 |     :return: Main Rebulk object
43 |     :rtype: Rebulk
44 |     """
45 |     rebulk = Rebulk()
46 | 
47 |     rebulk.rebulk(path())
48 |     rebulk.rebulk(groups())
49 | 
50 |     rebulk.rebulk(episodes())
51 |     rebulk.rebulk(container())
52 |     rebulk.rebulk(format_())
53 |     rebulk.rebulk(video_codec())
54 |     rebulk.rebulk(audio_codec())
55 |     rebulk.rebulk(screen_size())
56 |     rebulk.rebulk(website())
57 |     rebulk.rebulk(date())
58 |     rebulk.rebulk(title())
59 |     rebulk.rebulk(episode_title())
60 |     rebulk.rebulk(language())
61 |     rebulk.rebulk(country())
62 |     rebulk.rebulk(release_group())
63 |     rebulk.rebulk(streaming_service())
64 |     rebulk.rebulk(other())
65 |     rebulk.rebulk(size())
66 |     rebulk.rebulk(edition())
67 |     rebulk.rebulk(cds())
68 |     rebulk.rebulk(bonus())
69 |     rebulk.rebulk(film())
70 |     rebulk.rebulk(part())
71 |     rebulk.rebulk(crc())
72 | 
73 |     rebulk.rebulk(processors())
74 | 
75 |     rebulk.rebulk(mimetype())
76 |     rebulk.rebulk(type_())
77 | 
78 |     def customize_properties(properties):
79 |         """
80 |         Customize default rebulk properties
81 |         """
82 |         count = properties['count']
83 |         del properties['count']
84 | 
85 |         properties['season_count'] = count
86 |         properties['episode_count'] = count
87 | 
88 |         return properties
89 | 
90 |     rebulk.customize_properties = customize_properties
91 | 
92 |     return rebulk
93 | 


--------------------------------------------------------------------------------
/lib/dateutil/easter.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | This module offers a generic easter computing method for any given year, using
 4 | Western, Orthodox or Julian algorithms.
 5 | """
 6 | 
 7 | import datetime
 8 | 
 9 | __all__ = ["easter", "EASTER_JULIAN", "EASTER_ORTHODOX", "EASTER_WESTERN"]
10 | 
11 | EASTER_JULIAN = 1
12 | EASTER_ORTHODOX = 2
13 | EASTER_WESTERN = 3
14 | 
15 | 
16 | def easter(year, method=EASTER_WESTERN):
17 |     """
18 |     This method was ported from the work done by GM Arts,
19 |     on top of the algorithm by Claus Tondering, which was
20 |     based in part on the algorithm of Ouding (1940), as
21 |     quoted in "Explanatory Supplement to the Astronomical
22 |     Almanac", P.  Kenneth Seidelmann, editor.
23 | 
24 |     This algorithm implements three different easter
25 |     calculation methods:
26 | 
27 |     1 - Original calculation in Julian calendar, valid in
28 |         dates after 326 AD
29 |     2 - Original method, with date converted to Gregorian
30 |         calendar, valid in years 1583 to 4099
31 |     3 - Revised method, in Gregorian calendar, valid in
32 |         years 1583 to 4099 as well
33 | 
34 |     These methods are represented by the constants:
35 | 
36 |     EASTER_JULIAN   = 1
37 |     EASTER_ORTHODOX = 2
38 |     EASTER_WESTERN  = 3
39 | 
40 |     The default method is method 3.
41 | 
42 |     More about the algorithm may be found at:
43 | 
44 |     http://users.chariot.net.au/~gmarts/eastalg.htm
45 | 
46 |     and
47 | 
48 |     http://www.tondering.dk/claus/calendar.html
49 | 
50 |     """
51 | 
52 |     if not (1 <= method <= 3):
53 |         raise ValueError("invalid method")
54 | 
55 |     # g - Golden year - 1
56 |     # c - Century
57 |     # h - (23 - Epact) mod 30
58 |     # i - Number of days from March 21 to Paschal Full Moon
59 |     # j - Weekday for PFM (0=Sunday, etc)
60 |     # p - Number of days from March 21 to Sunday on or before PFM
61 |     #     (-6 to 28 methods 1 & 3, to 56 for method 2)
62 |     # e - Extra days to add for method 2 (converting Julian
63 |     #     date to Gregorian date)
64 | 
65 |     y = year
66 |     g = y % 19
67 |     e = 0
68 |     if method < 3:
69 |         # Old method
70 |         i = (19*g + 15) % 30
71 |         j = (y + y//4 + i) % 7
72 |         if method == 2:
73 |             # Extra dates to convert Julian to Gregorian date
74 |             e = 10
75 |             if y > 1600:
76 |                 e = e + y//100 - 16 - (y//100 - 16)//4
77 |     else:
78 |         # New method
79 |         c = y//100
80 |         h = (c - c//4 - (8*c + 13)//25 + 19*g + 15) % 30
81 |         i = h - (h//28)*(1 - (h//28)*(29//(h + 1))*((21 - g)//11))
82 |         j = (y + y//4 + i + 2 - c + c//4) % 7
83 | 
84 |     # p can be from -6 to 56 corresponding to dates 22 March to 23 May
85 |     # (later dates apply to method 2, although 23 May never actually occurs)
86 |     p = i - j + e
87 |     d = 1 + (p + 27 + (p + 6)//40) % 31
88 |     m = 3 + (p + 26)//30
89 |     return datetime.date(int(y), int(m), int(d))
90 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/format.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | format property
 5 | """
 6 | from rebulk.remodule import re
 7 | 
 8 | from rebulk import Rebulk, RemoveMatch, Rule
 9 | from ..common import dash
10 | from ..common.validators import seps_before, seps_after
11 | 
12 | 
13 | def format_():
14 |     """
15 |     Builder for rebulk object.
16 |     :return: Created Rebulk object
17 |     :rtype: Rebulk
18 |     """
19 |     rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
20 |     rebulk.defaults(name="format", tags=['video-codec-prefix', 'streaming_service.suffix'])
21 | 
22 |     rebulk.regex("VHS", "VHS-?Rip", value="VHS")
23 |     rebulk.regex("CAM", "CAM-?Rip", "HD-?CAM", value="Cam")
24 |     rebulk.regex("TELESYNC", "TS", "HD-?TS", value="Telesync")
25 |     rebulk.regex("WORKPRINT", "WP", value="Workprint")
26 |     rebulk.regex("TELECINE", "TC", value="Telecine")
27 |     rebulk.regex("PPV", "PPV-?Rip", value="PPV")  # Pay Per View
28 |     rebulk.regex("SD-?TV", "SD-?TV-?Rip", "Rip-?SD-?TV", "TV-?Rip",
29 |                  "Rip-?TV", "TV-?(?=Dub)", value="TV")  # TV is too common to allow matching
30 |     rebulk.regex("DVB-?Rip", "DVB", "PD-?TV", value="DVB")
31 |     rebulk.regex("DVD", "DVD-?Rip", "VIDEO-?TS", "DVD-?R(?:$|(?!E))",  # "DVD-?R(?:$|^E)" => DVD-Real ...
32 |                  "DVD-?9", "DVD-?5", value="DVD")
33 | 
34 |     rebulk.regex("HD-?TV", "TV-?RIP-?HD", "HD-?TV-?RIP", "HD-?RIP", value="HDTV",
35 |                  conflict_solver=lambda match, other: other if other.name == 'other' else '__default__')
36 |     rebulk.regex("VOD", "VOD-?Rip", value="VOD")
37 |     rebulk.regex("WEB-?Rip", "WEB-?DL-?Rip", "WEB-?Cap", value="WEBRip")
38 |     rebulk.regex("WEB-?DL", "WEB-?HD", "WEB", "DL-?WEB", "DL(?=-?Mux)", value="WEB-DL")
39 |     rebulk.regex("HD-?DVD-?Rip", "HD-?DVD", value="HD-DVD")
40 |     rebulk.regex("Blu-?ray(?:-?Rip)?", "B[DR]", "B[DR]-?Rip", "BD[59]", "BD25", "BD50", value="BluRay")
41 |     rebulk.regex("AHDTV", value="AHDTV")
42 |     rebulk.regex('UHD-?TV', 'UHD-?Rip', value='UHDTV',
43 |                  conflict_solver=lambda match, other: other if other.name == 'other' else '__default__')
44 |     rebulk.regex("HDTC", value="HDTC")
45 |     rebulk.regex("DSR", "DSR?-?Rip", "SAT-?Rip", "DTH", "DTH-?Rip", value="SATRip")
46 | 
47 |     rebulk.rules(ValidateFormat)
48 | 
49 |     return rebulk
50 | 
51 | 
52 | class ValidateFormat(Rule):
53 |     """
54 |     Validate format with screener property, with video_codec property or separated
55 |     """
56 |     priority = 64
57 |     consequence = RemoveMatch
58 | 
59 |     def when(self, matches, context):
60 |         ret = []
61 |         for format_match in matches.named('format'):
62 |             if not seps_before(format_match) and \
63 |                     not matches.range(format_match.start - 1, format_match.start - 2,
64 |                                       lambda match: 'format-prefix' in match.tags):
65 |                 ret.append(format_match)
66 |                 continue
67 |             if not seps_after(format_match) and \
68 |                     not matches.range(format_match.end, format_match.end + 1,
69 |                                       lambda match: 'format-suffix' in match.tags):
70 |                 ret.append(format_match)
71 |                 continue
72 |         return ret
73 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/video_codec.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | video_codec and video_profile property
 5 | """
 6 | from rebulk.remodule import re
 7 | 
 8 | from rebulk import Rebulk, Rule, RemoveMatch
 9 | 
10 | from ..common import dash
11 | from ..common.validators import seps_after, seps_before, seps_surround
12 | 
13 | 
14 | def video_codec():
15 |     """
16 |     Builder for rebulk object.
17 |     :return: Created Rebulk object
18 |     :rtype: Rebulk
19 |     """
20 |     rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
21 |     rebulk.defaults(name="video_codec", tags=['format-suffix', 'streaming_service.suffix'])
22 | 
23 |     rebulk.regex(r"Rv\d{2}", value="Real")
24 |     rebulk.regex("Mpeg2", value="Mpeg2")
25 |     rebulk.regex("DVDivX", "DivX", value="DivX")
26 |     rebulk.regex("XviD", value="XviD")
27 |     rebulk.regex("[hx]-?264(?:-?AVC(HD)?)?", "MPEG-?4(?:-?AVC(HD)?)", "AVC(?:HD)?", value="h264")
28 |     rebulk.regex("[hx]-?265(?:-?HEVC)?", "HEVC", value="h265")
29 |     rebulk.regex('(?P<video_codec>hevc)(?P<video_profile>10)', value={'video_codec': 'h265', 'video_profile': '10bit'},
30 |                  tags=['video-codec-suffix'], children=True)
31 | 
32 |     # http://blog.mediacoderhq.com/h264-profiles-and-levels/
33 |     # http://fr.wikipedia.org/wiki/H.264
34 |     rebulk.defaults(name="video_profile", validator=seps_surround)
35 | 
36 |     rebulk.regex('10.?bits?', 'Hi10P?', 'YUV420P10', value='10bit')
37 |     rebulk.regex('8.?bits?', value='8bit')
38 | 
39 |     rebulk.string('BP', value='BP', tags='video_profile.rule')
40 |     rebulk.string('XP', 'EP', value='XP', tags='video_profile.rule')
41 |     rebulk.string('MP', value='MP', tags='video_profile.rule')
42 |     rebulk.string('HP', 'HiP', value='HP', tags='video_profile.rule')
43 |     rebulk.regex('Hi422P', value='Hi422P', tags='video_profile.rule')
44 |     rebulk.regex('Hi444PP', value='Hi444PP', tags='video_profile.rule')
45 | 
46 |     rebulk.string('DXVA', value='DXVA', name='video_api')
47 | 
48 |     rebulk.rules(ValidateVideoCodec, VideoProfileRule)
49 | 
50 |     return rebulk
51 | 
52 | 
53 | class ValidateVideoCodec(Rule):
54 |     """
55 |     Validate video_codec with format property or separated
56 |     """
57 |     priority = 64
58 |     consequence = RemoveMatch
59 | 
60 |     def when(self, matches, context):
61 |         ret = []
62 |         for codec in matches.named('video_codec'):
63 |             if not seps_before(codec) and \
64 |                     not matches.at_index(codec.start - 1, lambda match: 'video-codec-prefix' in match.tags):
65 |                 ret.append(codec)
66 |                 continue
67 |             if not seps_after(codec) and \
68 |                     not matches.at_index(codec.end + 1, lambda match: 'video-codec-suffix' in match.tags):
69 |                 ret.append(codec)
70 |                 continue
71 |         return ret
72 | 
73 | 
74 | class VideoProfileRule(Rule):
75 |     """
76 |     Rule to validate video_profile
77 |     """
78 |     consequence = RemoveMatch
79 | 
80 |     def when(self, matches, context):
81 |         profile_list = matches.named('video_profile', lambda match: 'video_profile.rule' in match.tags)
82 |         ret = []
83 |         for profile in profile_list:
84 |             codec = matches.previous(profile, lambda match: match.name == 'video_codec')
85 |             if not codec:
86 |                 codec = matches.next(profile, lambda match: match.name == 'video_codec')
87 |             if not codec:
88 |                 ret.append(profile)
89 |         return ret
90 | 


--------------------------------------------------------------------------------
/lib/guessit/tlds-alpha-by-domain.txt:
--------------------------------------------------------------------------------
  1 | # Version 2013112900, Last Updated Fri Nov 29 07:07:01 2013 UTC
  2 | AC
  3 | AD
  4 | AE
  5 | AERO
  6 | AF
  7 | AG
  8 | AI
  9 | AL
 10 | AM
 11 | AN
 12 | AO
 13 | AQ
 14 | AR
 15 | ARPA
 16 | AS
 17 | ASIA
 18 | AT
 19 | AU
 20 | AW
 21 | AX
 22 | AZ
 23 | BA
 24 | BB
 25 | BD
 26 | BE
 27 | BF
 28 | BG
 29 | BH
 30 | BI
 31 | BIKE
 32 | BIZ
 33 | BJ
 34 | BM
 35 | BN
 36 | BO
 37 | BR
 38 | BS
 39 | BT
 40 | BV
 41 | BW
 42 | BY
 43 | BZ
 44 | CA
 45 | CAMERA
 46 | CAT
 47 | CC
 48 | CD
 49 | CF
 50 | CG
 51 | CH
 52 | CI
 53 | CK
 54 | CL
 55 | CLOTHING
 56 | CM
 57 | CN
 58 | CO
 59 | COM
 60 | CONSTRUCTION
 61 | CONTRACTORS
 62 | COOP
 63 | CR
 64 | CU
 65 | CV
 66 | CW
 67 | CX
 68 | CY
 69 | CZ
 70 | DE
 71 | DIAMONDS
 72 | DIRECTORY
 73 | DJ
 74 | DK
 75 | DM
 76 | DO
 77 | DZ
 78 | EC
 79 | EDU
 80 | EE
 81 | EG
 82 | ENTERPRISES
 83 | EQUIPMENT
 84 | ER
 85 | ES
 86 | ESTATE
 87 | ET
 88 | EU
 89 | FI
 90 | FJ
 91 | FK
 92 | FM
 93 | FO
 94 | FR
 95 | GA
 96 | GALLERY
 97 | GB
 98 | GD
 99 | GE
100 | GF
101 | GG
102 | GH
103 | GI
104 | GL
105 | GM
106 | GN
107 | GOV
108 | GP
109 | GQ
110 | GR
111 | GRAPHICS
112 | GS
113 | GT
114 | GU
115 | GURU
116 | GW
117 | GY
118 | HK
119 | HM
120 | HN
121 | HOLDINGS
122 | HR
123 | HT
124 | HU
125 | ID
126 | IE
127 | IL
128 | IM
129 | IN
130 | INFO
131 | INT
132 | IO
133 | IQ
134 | IR
135 | IS
136 | IT
137 | JE
138 | JM
139 | JO
140 | JOBS
141 | JP
142 | KE
143 | KG
144 | KH
145 | KI
146 | KITCHEN
147 | KM
148 | KN
149 | KP
150 | KR
151 | KW
152 | KY
153 | KZ
154 | LA
155 | LAND
156 | LB
157 | LC
158 | LI
159 | LIGHTING
160 | LK
161 | LR
162 | LS
163 | LT
164 | LU
165 | LV
166 | LY
167 | MA
168 | MC
169 | MD
170 | ME
171 | MG
172 | MH
173 | MIL
174 | MK
175 | ML
176 | MM
177 | MN
178 | MO
179 | MOBI
180 | MP
181 | MQ
182 | MR
183 | MS
184 | MT
185 | MU
186 | MUSEUM
187 | MV
188 | MW
189 | MX
190 | MY
191 | MZ
192 | NA
193 | NAME
194 | NC
195 | NE
196 | NET
197 | NF
198 | NG
199 | NI
200 | NL
201 | NO
202 | NP
203 | NR
204 | NU
205 | NZ
206 | OM
207 | ORG
208 | PA
209 | PE
210 | PF
211 | PG
212 | PH
213 | PHOTOGRAPHY
214 | PK
215 | PL
216 | PLUMBING
217 | PM
218 | PN
219 | POST
220 | PR
221 | PRO
222 | PS
223 | PT
224 | PW
225 | PY
226 | QA
227 | RE
228 | RO
229 | RS
230 | RU
231 | RW
232 | SA
233 | SB
234 | SC
235 | SD
236 | SE
237 | SEXY
238 | SG
239 | SH
240 | SI
241 | SINGLES
242 | SJ
243 | SK
244 | SL
245 | SM
246 | SN
247 | SO
248 | SR
249 | ST
250 | SU
251 | SV
252 | SX
253 | SY
254 | SZ
255 | TATTOO
256 | TC
257 | TD
258 | TECHNOLOGY
259 | TEL
260 | TF
261 | TG
262 | TH
263 | TIPS
264 | TJ
265 | TK
266 | TL
267 | TM
268 | TN
269 | TO
270 | TODAY
271 | TP
272 | TR
273 | TRAVEL
274 | TT
275 | TV
276 | TW
277 | TZ
278 | UA
279 | UG
280 | UK
281 | US
282 | UY
283 | UZ
284 | VA
285 | VC
286 | VE
287 | VENTURES
288 | VG
289 | VI
290 | VN
291 | VOYAGE
292 | VU
293 | WF
294 | WS
295 | XN--3E0B707E
296 | XN--45BRJ9C
297 | XN--80AO21A
298 | XN--80ASEHDB
299 | XN--80ASWG
300 | XN--90A3AC
301 | XN--CLCHC0EA0B2G2A9GCD
302 | XN--FIQS8S
303 | XN--FIQZ9S
304 | XN--FPCRJ9C3D
305 | XN--FZC2C9E2C
306 | XN--GECRJ9C
307 | XN--H2BRJ9C
308 | XN--J1AMH
309 | XN--J6W193G
310 | XN--KPRW13D
311 | XN--KPRY57D
312 | XN--L1ACC
313 | XN--LGBBAT1AD8J
314 | XN--MGB9AWBF
315 | XN--MGBA3A4F16A
316 | XN--MGBAAM7A8H
317 | XN--MGBAYH7GPA
318 | XN--MGBBH1A71E
319 | XN--MGBC0A9AZCG
320 | XN--MGBERP4A5D4AR
321 | XN--MGBX4CD0AB
322 | XN--NGBC5AZD
323 | XN--O3CW4H
324 | XN--OGBPF8FL
325 | XN--P1AI
326 | XN--PGBS0DH
327 | XN--Q9JYB4C
328 | XN--S9BRJ9C
329 | XN--UNUP4Y
330 | XN--WGBH1C
331 | XN--WGBL6A
332 | XN--XKC2AL3HYE2A
333 | XN--XKC2DL3A5EE0H
334 | XN--YFRO4I67O
335 | XN--YGBI2AMMX
336 | XXX
337 | YE
338 | YT
339 | ZA
340 | ZM
341 | ZW
342 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # VideoSort
 2 | Post-processing script for [NZBGet](http://nzbget.net).
 3 | 
 4 | This is a script for downloaded TV shows and movies. It uses scene-standard naming conventions to match TV shows and movies and rename/move/sort/organize them as you like.
 5 | 
 6 | ## Example
 7 | 
 8 | Let's say the download folder has following files:
 9 | 
10 |     [dir]	/home/user/downloads/Futurama.S07E18.The.Inhuman.Torch.XVID
11 |     [file]	/home/user/downloads/Futurama.S07E18.The.Inhuman.Torch.XVID/F0718TIT.avi
12 | 
13 | VideoSort can rename the video-file and move it into another directory creating sub-directories when necessary:
14 | 
15 |     [dir]	/home/user/videos/Futurama
16 |     [dir]	/home/user/videos/Futurama/Season 7
17 |     [file]	/home/user/videos/Futurama/Season 7/Futurama - S07E18 - The Inhuman Torch.avi
18 | 
19 | The formatting rules for destination file name (and sub-directories) are definable via configuration options.
20 | 
21 | VideoSort can organize:
22 |  - seasoned tv shows;
23 |  - dated tv shows;
24 |  - movies.
25 | 
26 | ## Installation
27 | 
28 |  - Download the newest version from [releases page](https://github.com/nzbget/VideoSort/releases/latest).
29 |  - Unpack into pp-scripts directory. Your pp-scripts directory now should have folder "videosort" with subfolder "lib" and file "VideoSort.py";
30 |  - Open settings tab in NZBGet web-interface and define settings for VideoSort;
31 |  - Save changes and restart NZBGet.
32 | 
33 | ## Formatting string
34 | 
35 | ### Movies
36 | 
37 |  - %t, %.t, %_t - movie title with words separated with spaces, dots or underscores (case-adjusted);
38 |  - %tT, %t.T, %t_T - movie title (original letter case);
39 |  - %y	- year;
40 |  - %decade - two-digits decade (90, 00, 10);
41 |  - %0decade - four-digits decade (1990, 2000, 2010);
42 |  - %imdb - IMDb ID, requires DNZB-header "X-DNZB-MoreInfo", containing link to imdb.com;
43 |  - %cpimdb - IMDb ID (formatted for CouchPotato), requires DNZB-header "X-DNZB-MoreInfo", containing link to imdb.com.
44 |  
45 | ### Seasoned TV shows
46 | 
47 |  - %sn, %s.n, %s_n - show name with words separated with spaces, dots or underscores (case-adjusted);
48 |  - %sN, %s.N, %s_N - show name (original letter case);
49 |  - %s - season number (1, 2);
50 |  - %0s - two-digits season number (01, 02);
51 |  - %e - episode number (1, 2);
52 |  - %0e - two-digits episode number (01, 02);
53 |  - %en, %e.n, %e_n - episode name (case-adjusted);
54 |  - %eN, %e.N, %e_N - episode name (original letter case);
55 | 
56 | ### Dated TV shows
57 | 
58 |  - %sn, %s.n, %s_n - show name with words separated with spaces, dots or underscores (case-adjusted);
59 |  - %sN, %s.N, %s_N - show name (original letter case);
60 |  - %y	- year;
61 |  - %decade - two-digits decade (90, 00, 10);
62 |  - %0decade - four-digits decade (1990, 2000, 2010).
63 |  - %m	- month (1-12);
64 |  - %0m	- two-digits month (01-12);
65 |  - %d	- day (1-31);
66 |  - %0d	- two-digits day (01-31);
67 | 
68 | ### General
69 | 
70 | These specifiers can be used with all three types of supported video files:
71 | 
72 |  - %dn - original directory name (nzb-name);
73 |  - %fn - original filename;
74 |  - %ext - file extension;
75 |  - %Ext - file extension (case-adjusted);
76 |  - %qf - video format (HTDV, BluRay, WEB-DL);
77 |  - %qss - screen size (720p, 1080i);
78 |  - %qvc - video codec (x264);
79 |  - %qac - audio codec (DTS);
80 |  - %qah - audio channels (5.1);
81 |  - %qrg - release group;
82 |  - {{text}} - uppercase the text;
83 |  - {TEXT} - lowercase the text;
84 | 
85 | Credits
86 | -------
87 | The script relies on python library "guessit" (http://guessit.readthedocs.org) to extract information from file names and includes portions of code from "SABnzbd+" (http://sabnzbd.org).
88 | 


--------------------------------------------------------------------------------
/lib/babelfish/country.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Copyright (c) 2013 the BabelFish authors. All rights reserved.
  4 | # Use of this source code is governed by the 3-clause BSD license
  5 | # that can be found in the LICENSE file.
  6 | #
  7 | from __future__ import unicode_literals
  8 | from collections import namedtuple
  9 | from functools import partial
 10 | from pkg_resources import resource_stream  # @UnresolvedImport
 11 | from .converters import ConverterManager
 12 | from . import basestr
 13 | 
 14 | 
 15 | COUNTRIES = {}
 16 | COUNTRY_MATRIX = []
 17 | 
 18 | #: The namedtuple used in the :data:`COUNTRY_MATRIX`
 19 | IsoCountry = namedtuple('IsoCountry', ['name', 'alpha2'])
 20 | 
 21 | f = resource_stream('babelfish', 'data/iso-3166-1.txt')
 22 | f.readline()
 23 | for l in f:
 24 |     iso_country = IsoCountry(*l.decode('utf-8').strip().split(';'))
 25 |     COUNTRIES[iso_country.alpha2] = iso_country.name
 26 |     COUNTRY_MATRIX.append(iso_country)
 27 | f.close()
 28 | 
 29 | 
 30 | class CountryConverterManager(ConverterManager):
 31 |     """:class:`~babelfish.converters.ConverterManager` for country converters"""
 32 |     entry_point = 'babelfish.country_converters'
 33 |     internal_converters = ['name = babelfish.converters.countryname:CountryNameConverter']
 34 | 
 35 | country_converters = CountryConverterManager()
 36 | 
 37 | 
 38 | class CountryMeta(type):
 39 |     """The :class:`Country` metaclass
 40 | 
 41 |     Dynamically redirect :meth:`Country.frommycode` to :meth:`Country.fromcode` with the ``mycode`` `converter`
 42 | 
 43 |     """
 44 |     def __getattr__(cls, name):
 45 |         if name.startswith('from'):
 46 |             return partial(cls.fromcode, converter=name[4:])
 47 |         return type.__getattribute__(cls, name)
 48 | 
 49 | 
 50 | class Country(CountryMeta(str('CountryBase'), (object,), {})):
 51 |     """A country on Earth
 52 | 
 53 |     A country is represented by a 2-letter code from the ISO-3166 standard
 54 | 
 55 |     :param string country: 2-letter ISO-3166 country code
 56 | 
 57 |     """
 58 |     def __init__(self, country):
 59 |         if country not in COUNTRIES:
 60 |             raise ValueError('%r is not a valid country' % country)
 61 | 
 62 |         #: ISO-3166 2-letter country code
 63 |         self.alpha2 = country
 64 | 
 65 |     @classmethod
 66 |     def fromcode(cls, code, converter):
 67 |         """Create a :class:`Country` by its `code` using `converter` to
 68 |         :meth:`~babelfish.converters.CountryReverseConverter.reverse` it
 69 | 
 70 |         :param string code: the code to reverse
 71 |         :param string converter: name of the :class:`~babelfish.converters.CountryReverseConverter` to use
 72 |         :return: the corresponding :class:`Country` instance
 73 |         :rtype: :class:`Country`
 74 | 
 75 |         """
 76 |         return cls(country_converters[converter].reverse(code))
 77 | 
 78 |     def __getstate__(self):
 79 |         return self.alpha2
 80 | 
 81 |     def __setstate__(self, state):
 82 |         self.alpha2 = state
 83 | 
 84 |     def __getattr__(self, name):
 85 |         try:
 86 |             return country_converters[name].convert(self.alpha2)
 87 |         except KeyError:
 88 |             raise AttributeError(name)
 89 | 
 90 |     def __hash__(self):
 91 |         return hash(self.alpha2)
 92 | 
 93 |     def __eq__(self, other):
 94 |         if isinstance(other, basestr):
 95 |             return str(self) == other
 96 |         if not isinstance(other, Country):
 97 |             return False
 98 |         return self.alpha2 == other.alpha2
 99 | 
100 |     def __ne__(self, other):
101 |         return not self == other
102 | 
103 |     def __repr__(self):
104 |         return '<Country [%s]>' % self
105 | 
106 |     def __str__(self):
107 |         return self.alpha2
108 | 


--------------------------------------------------------------------------------
/lib/rebulk/processors.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Processor functions
  5 | """
  6 | from logging import getLogger
  7 | 
  8 | from .utils import IdentitySet
  9 | 
 10 | from .rules import Rule, RemoveMatch
 11 | 
 12 | log = getLogger(__name__).log
 13 | 
 14 | DEFAULT = '__default__'
 15 | 
 16 | POST_PROCESS = -2048
 17 | PRE_PROCESS = 2048
 18 | 
 19 | 
 20 | def _default_conflict_solver(match, conflicting_match):
 21 |     """
 22 |     Default conflict solver for matches, shorter matches if they conflicts with longer ones
 23 | 
 24 |     :param conflicting_match:
 25 |     :type conflicting_match:
 26 |     :param match:
 27 |     :type match:
 28 |     :return:
 29 |     :rtype:
 30 |     """
 31 |     if len(conflicting_match.initiator) < len(match.initiator):
 32 |         return conflicting_match
 33 |     elif len(match.initiator) < len(conflicting_match.initiator):
 34 |         return match
 35 |     return None
 36 | 
 37 | 
 38 | class ConflictSolver(Rule):
 39 |     """
 40 |     Remove conflicting matches.
 41 |     """
 42 |     priority = PRE_PROCESS
 43 | 
 44 |     consequence = RemoveMatch
 45 | 
 46 |     @property
 47 |     def default_conflict_solver(self):  # pylint:disable=no-self-use
 48 |         """
 49 |         Default conflict solver to use.
 50 |         """
 51 |         return _default_conflict_solver
 52 | 
 53 |     def when(self, matches, context):
 54 |         # pylint:disable=too-many-nested-blocks
 55 |         to_remove_matches = IdentitySet()
 56 | 
 57 |         public_matches = [match for match in matches if not match.private]
 58 |         public_matches.sort(key=len)
 59 | 
 60 |         for match in public_matches:
 61 |             conflicting_matches = matches.conflicting(match)
 62 | 
 63 |             if conflicting_matches:
 64 |                 # keep the match only if it's the longest
 65 |                 conflicting_matches = [conflicting_match for conflicting_match in conflicting_matches if
 66 |                                        not conflicting_match.private]
 67 |                 conflicting_matches.sort(key=len)
 68 | 
 69 |                 for conflicting_match in conflicting_matches:
 70 |                     conflict_solvers = [(self.default_conflict_solver, False)]
 71 | 
 72 |                     if match.conflict_solver:
 73 |                         conflict_solvers.append((match.conflict_solver, False))
 74 |                     if conflicting_match.conflict_solver:
 75 |                         conflict_solvers.append((conflicting_match.conflict_solver, True))
 76 | 
 77 |                     for conflict_solver, reverse in reversed(conflict_solvers):
 78 |                         if reverse:
 79 |                             to_remove = conflict_solver(conflicting_match, match)
 80 |                         else:
 81 |                             to_remove = conflict_solver(match, conflicting_match)
 82 |                         if to_remove == DEFAULT:
 83 |                             continue
 84 |                         if to_remove and to_remove not in to_remove_matches:
 85 |                             both_matches = [match, conflicting_match]
 86 |                             both_matches.remove(to_remove)
 87 |                             to_keep = both_matches[0]
 88 | 
 89 |                             if to_keep not in to_remove_matches:
 90 |                                 log(self.log_level, "Conflicting match %s will be removed in favor of match %s",
 91 |                                     to_remove, to_keep)
 92 | 
 93 |                                 to_remove_matches.add(to_remove)
 94 |                         break
 95 |         return to_remove_matches
 96 | 
 97 | 
 98 | class PrivateRemover(Rule):
 99 |     """
100 |     Removes private matches rule.
101 |     """
102 |     priority = POST_PROCESS
103 | 
104 |     consequence = RemoveMatch
105 | 
106 |     def when(self, matches, context):
107 |         return [match for match in matches if match.private]
108 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/country.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | country property
  5 | """
  6 | # pylint: disable=no-member
  7 | import babelfish
  8 | 
  9 | from rebulk import Rebulk
 10 | from ..common.words import COMMON_WORDS, iter_words
 11 | 
 12 | 
 13 | def country():
 14 |     """
 15 |     Builder for rebulk object.
 16 |     :return: Created Rebulk object
 17 |     :rtype: Rebulk
 18 |     """
 19 |     rebulk = Rebulk().defaults(name='country')
 20 | 
 21 |     rebulk.functional(find_countries,
 22 |                       #  Prefer language and any other property over country if not US or GB.
 23 |                       conflict_solver=lambda match, other: match
 24 |                       if other.name != 'language' or match.value not in [babelfish.Country('US'),
 25 |                                                                          babelfish.Country('GB')]
 26 |                       else other,
 27 |                       properties={'country': [None]})
 28 | 
 29 |     return rebulk
 30 | 
 31 | 
 32 | COUNTRIES_SYN = {'ES': ['españa'],
 33 |                  'GB': ['UK'],
 34 |                  'BR': ['brazilian', 'bra'],
 35 |                  'CA': ['québec', 'quebec', 'qc'],
 36 |                  # FIXME: this one is a bit of a stretch, not sure how to do it properly, though...
 37 |                  'MX': ['Latinoamérica', 'latin america']}
 38 | 
 39 | 
 40 | class GuessitCountryConverter(babelfish.CountryReverseConverter):  # pylint: disable=missing-docstring
 41 |     def __init__(self):
 42 |         self.guessit_exceptions = {}
 43 | 
 44 |         for alpha2, synlist in COUNTRIES_SYN.items():
 45 |             for syn in synlist:
 46 |                 self.guessit_exceptions[syn.lower()] = alpha2
 47 | 
 48 |     @property
 49 |     def codes(self):  # pylint: disable=missing-docstring
 50 |         return (babelfish.country_converters['name'].codes |
 51 |                 frozenset(babelfish.COUNTRIES.values()) |
 52 |                 frozenset(self.guessit_exceptions.keys()))
 53 | 
 54 |     def convert(self, alpha2):
 55 |         if alpha2 == 'GB':
 56 |             return 'UK'
 57 |         return str(babelfish.Country(alpha2))
 58 | 
 59 |     def reverse(self, name):  # pylint:disable=arguments-differ
 60 |         # exceptions come first, as they need to override a potential match
 61 |         # with any of the other guessers
 62 |         try:
 63 |             return self.guessit_exceptions[name.lower()]
 64 |         except KeyError:
 65 |             pass
 66 | 
 67 |         try:
 68 |             return babelfish.Country(name.upper()).alpha2
 69 |         except ValueError:
 70 |             pass
 71 | 
 72 |         for conv in [babelfish.Country.fromname]:
 73 |             try:
 74 |                 return conv(name).alpha2
 75 |             except babelfish.CountryReverseError:
 76 |                 pass
 77 | 
 78 |         raise babelfish.CountryReverseError(name)
 79 | 
 80 | 
 81 | babelfish.country_converters['guessit'] = GuessitCountryConverter()
 82 | 
 83 | 
 84 | def is_allowed_country(country_object, context=None):
 85 |     """
 86 |     Check if country is allowed.
 87 |     """
 88 |     if context and context.get('allowed_countries'):
 89 |         allowed_countries = context.get('allowed_countries')
 90 |         return country_object.name.lower() in allowed_countries or country_object.alpha2.lower() in allowed_countries
 91 |     return True
 92 | 
 93 | 
 94 | def find_countries(string, context=None):
 95 |     """
 96 |     Find countries in given string.
 97 |     """
 98 |     ret = []
 99 |     for word_match in iter_words(string.strip().lower()):
100 |         word = word_match.value
101 |         if word.lower() in COMMON_WORDS:
102 |             continue
103 |         try:
104 |             country_object = babelfish.Country.fromguessit(word)
105 |             if is_allowed_country(country_object, context):
106 |                 ret.append((word_match.span[0], word_match.span[1], {'value': country_object}))
107 |         except babelfish.Error:
108 |             continue
109 |     return ret
110 | 


--------------------------------------------------------------------------------
/ChangeLog.txt:
--------------------------------------------------------------------------------
 1 | videosort-8.0:
 2 |   - updated guessit library from v1 to v2; this improves analysis in many cases;
 3 |   - many code changes in VideoSort to adopt to new API of fully rewritten
 4 |     guessit library;
 5 |   - alternative formatting specifiers for %dn and %fn: %^dn, %.dn, %_dn,
 6 |     %^dN, %.dN, %_dN, %^fn, %.fn, %_fn, %^fN, %.fN, %_fN;
 7 |   - moving subtitles which are in sub-folders;
 8 |   - skip rename if media file is already named correctly;
 9 |   - better support of mounts to SAMBA shares or NTFS partitions;
10 |   - changed search path order for python modules to avoid possible conflicts
11 |     with other versions of installed libraries;
12 |   - created test suite;
13 |   - definable formatting of first and last episode on a multi-episode file;
14 |   - fixed: spaces entered in option "VideoExtensions" were not ignored.
15 | 
16 | videosort-6.1:
17 |   - updated library babelfish used by guessit; this fixes a rare error regarding
18 |     unicode characters.
19 | 
20 | videosort-6.0:
21 |   - added support for "part"s; they are now treated as episodes;
22 |   - fixed: certain words were stripped from titles;
23 |   - updated guessit-library to the newest release (0.9.4);
24 |   - new format specifiers "%cat" ("%.cat", etc.) to refer to category;
25 |   - new format specifier "%up" to navigate to parent directory
26 |     (useful with category's DestDir);
27 |   - added workaround for titles starting with digits (guessit-library
28 |     doesn't process them properly);
29 |   - video extensions are now detected regardless of letter case.
30 | 
31 | videosort-5.0:
32 |   - improved matching of subtitles to releases (thanks Chris Hamilton for the
33 |     patch);
34 |   - added nfo deep scan to improve matching of nfo files (thanks Chris Hamilton
35 |     for the patch);
36 |   - new option "PreferNZBName";
37 |   - fixed: script may crash on certain tv show names when option "SeriesYear"
38 |     was active;
39 |   - updated guessit-library to the newest release - this includes support for
40 |     the autodetection of nfo files.
41 | 
42 | videosort-4.0:
43 |   - improved detection of obfuscated file and directory names;
44 |   - added support for DNZB-Headers "X-DNZB-ProperName", "X-DNZB-EpisodeName",
45 |     "X-DNZB-MovieYear";
46 |   - removed support for DNZB-Header "X-DNZB-UseNZBName";
47 |   - new option "DNZBHeaders" to disable using of DNZB-Headers if necessary;
48 |   - new format specifier "%imdb" and "%cpimdb" (thanks Chris Hamilton for the
49 |     patch);
50 |   - removing invalid characters from generated file name;
51 |   - updated guessit-library to the newest release - this fixes several issues.
52 | 
53 | videosort-3.0:
54 |   - added for seasoned TV shows: if year in the file name goes directly after
55 |     show name, it will be added to show name. This may be necessary for
56 |     media players like XBMC, Boxee or Plex (or anyone using TheTVDB) to
57 |     properly index TV show. New option option "SeriesYear";
58 |   - added detection of obfuscated file names; if such file name is detected
59 |     a nzb-name is used instead.
60 |   
61 | videosort-2.0:
62 |   - new options "TvCategories", "OtherTvDir" and "OtherTvFormat" for TV shows,
63 |     whose file names look like movies (neither series nor dated shows);
64 |   - new format specifier "{TEXT}" to make text lowercase;
65 |   - new format specifiers "%y", "%decade", "%0decade" for seasoned TV shows;
66 |   - added support for multi episode file names (example: My.Show.S01E02-03.mkv);
67 |   - new option "EpisodeSeparator" to adjust formatting of multi episode file names;
68 |   - added support for DNZB-Header "X-DNZB-UseNZBName";
69 |   - added printing info-message when skipping small files;
70 |   - added using of command "[NZB] FINALDIR" to inform NZBGet about new files
71 |     location; this path is then shown in the history dialog as "Destination";
72 |   - destination directories (options "MoviesDir", etc.) can be left empty to use
73 |     global "DestDir" or "CategoryX.DestDir"  as destination;
74 |   - updated guessit-library to the newest release - this fixes several issues.
75 | 
76 | videosort-1.0:
77 |   - initial release.
78 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/website.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Website property.
  5 | """
  6 | from pkg_resources import resource_stream  # @UnresolvedImport
  7 | from rebulk.remodule import re
  8 | 
  9 | from rebulk import Rebulk, Rule, RemoveMatch
 10 | from ..common import seps
 11 | from ..common.formatters import cleanup
 12 | from ..common.validators import seps_surround
 13 | from ...reutils import build_or_pattern
 14 | 
 15 | 
 16 | def website():
 17 |     """
 18 |     Builder for rebulk object.
 19 |     :return: Created Rebulk object
 20 |     :rtype: Rebulk
 21 |     """
 22 |     rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
 23 |     rebulk.defaults(name="website")
 24 | 
 25 |     tlds = [l.strip().decode('utf-8')
 26 |             for l in resource_stream('guessit', 'tlds-alpha-by-domain.txt').readlines()
 27 |             if b'--' not in l][1:]  # All registered domain extension
 28 | 
 29 |     safe_tlds = ['com', 'org', 'net']  # For sure a website extension
 30 |     safe_subdomains = ['www']  # For sure a website subdomain
 31 |     safe_prefix = ['co', 'com', 'org', 'net']  # Those words before a tlds are sure
 32 | 
 33 |     website_prefixes = ['from']
 34 | 
 35 |     rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
 36 |                  r'\.)+(?:[a-z-]+\.)+(?:'+build_or_pattern(tlds) +
 37 |                  r'))(?:[^a-z0-9]|$)',
 38 |                  children=True)
 39 |     rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
 40 |                  r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_tlds) +
 41 |                  r'))(?:[^a-z0-9]|$)',
 42 |                  safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True)
 43 |     rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
 44 |                  r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_prefix) +
 45 |                  r'\.)+(?:'+build_or_pattern(tlds) +
 46 |                  r'))(?:[^a-z0-9]|$)',
 47 |                  safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True)
 48 | 
 49 |     rebulk.string(*website_prefixes,
 50 |                   validator=seps_surround, private=True, tags=['website.prefix'])
 51 | 
 52 |     class PreferTitleOverWebsite(Rule):
 53 |         """
 54 |         If found match is more likely a title, remove website.
 55 |         """
 56 |         consequence = RemoveMatch
 57 | 
 58 |         @staticmethod
 59 |         def valid_followers(match):
 60 |             """
 61 |             Validator for next website matches
 62 |             """
 63 |             return any(name in ['season', 'episode', 'year'] for name in match.names)
 64 | 
 65 |         def when(self, matches, context):
 66 |             to_remove = []
 67 |             for website_match in matches.named('website'):
 68 |                 safe = False
 69 |                 for safe_start in safe_subdomains + safe_prefix:
 70 |                     if website_match.value.lower().startswith(safe_start):
 71 |                         safe = True
 72 |                         break
 73 |                 if not safe:
 74 |                     suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0)
 75 |                     if suffix:
 76 |                         to_remove.append(website_match)
 77 |             return to_remove
 78 | 
 79 |     rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix)
 80 | 
 81 |     return rebulk
 82 | 
 83 | 
 84 | class ValidateWebsitePrefix(Rule):
 85 |     """
 86 |     Validate website prefixes
 87 |     """
 88 |     priority = 64
 89 |     consequence = RemoveMatch
 90 | 
 91 |     def when(self, matches, context):
 92 |         to_remove = []
 93 |         for prefix in matches.tagged('website.prefix'):
 94 |             website_match = matches.next(prefix, predicate=lambda match: match.name == 'website', index=0)
 95 |             if (not website_match or
 96 |                     matches.holes(prefix.end, website_match.start,
 97 |                                   formatter=cleanup, seps=seps, predicate=lambda match: match.value)):
 98 |                 to_remove.append(prefix)
 99 |         return to_remove
100 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/common/formatters.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Formatters
  5 | """
  6 | from rebulk.formatters import formatters
  7 | from rebulk.remodule import re
  8 | from . import seps
  9 | 
 10 | _excluded_clean_chars = ',:;-/\\'
 11 | clean_chars = ""
 12 | for sep in seps:
 13 |     if sep not in _excluded_clean_chars:
 14 |         clean_chars += sep
 15 | 
 16 | 
 17 | def _potential_before(i, input_string):
 18 |     """
 19 |     Check if the character at position i can be a potential single char separator considering what's before it.
 20 | 
 21 |     :param i:
 22 |     :type i: int
 23 |     :param input_string:
 24 |     :type input_string: str
 25 |     :return:
 26 |     :rtype: bool
 27 |     """
 28 |     return i - 2 >= 0 and input_string[i] == input_string[i - 2] and input_string[i - 1] not in seps
 29 | 
 30 | 
 31 | def _potential_after(i, input_string):
 32 |     """
 33 |     Check if the character at position i can be a potential single char separator considering what's after it.
 34 | 
 35 |     :param i:
 36 |     :type i: int
 37 |     :param input_string:
 38 |     :type input_string: str
 39 |     :return:
 40 |     :rtype: bool
 41 |     """
 42 |     return i + 2 >= len(input_string) or \
 43 |            input_string[i + 2] == input_string[i] and input_string[i + 1] not in seps
 44 | 
 45 | 
 46 | def cleanup(input_string):
 47 |     """
 48 |     Removes and strip separators from input_string (but keep ',;' characters)
 49 | 
 50 |     It also keep separators for single characters (Mavels Agents of S.H.I.E.L.D.)
 51 | 
 52 |     :param input_string:
 53 |     :type input_string: str
 54 |     :return:
 55 |     :rtype:
 56 |     """
 57 |     clean_string = input_string
 58 |     for char in clean_chars:
 59 |         clean_string = clean_string.replace(char, ' ')
 60 | 
 61 |     # Restore input separator if they separate single characters.
 62 |     # Useful for Mavels Agents of S.H.I.E.L.D.
 63 |     # https://github.com/guessit-io/guessit/issues/278
 64 | 
 65 |     indices = [i for i, letter in enumerate(clean_string) if letter in seps]
 66 | 
 67 |     dots = set()
 68 |     if indices:
 69 |         clean_list = list(clean_string)
 70 | 
 71 |         potential_indices = []
 72 | 
 73 |         for i in indices:
 74 |             if _potential_before(i, input_string) and _potential_after(i, input_string):
 75 |                 potential_indices.append(i)
 76 | 
 77 |         replace_indices = []
 78 | 
 79 |         for potential_index in potential_indices:
 80 |             if potential_index - 2 in potential_indices or potential_index + 2 in potential_indices:
 81 |                 replace_indices.append(potential_index)
 82 | 
 83 |         if replace_indices:
 84 |             for replace_index in replace_indices:
 85 |                 dots.add(input_string[replace_index])
 86 |                 clean_list[replace_index] = input_string[replace_index]
 87 |             clean_string = ''.join(clean_list)
 88 | 
 89 |     clean_string = strip(clean_string, ''.join([c for c in seps if c not in dots]))
 90 | 
 91 |     clean_string = re.sub(' +', ' ', clean_string)
 92 |     return clean_string
 93 | 
 94 | 
 95 | def strip(input_string, chars=seps):
 96 |     """
 97 |     Strip separators from input_string
 98 |     :param input_string:
 99 |     :param chars:
100 |     :type input_string:
101 |     :return:
102 |     :rtype:
103 |     """
104 |     return input_string.strip(chars)
105 | 
106 | 
107 | def raw_cleanup(raw):
108 |     """
109 |     Cleanup a raw value to perform raw comparison
110 |     :param raw:
111 |     :type raw:
112 |     :return:
113 |     :rtype:
114 |     """
115 |     return formatters(cleanup, strip)(raw.lower())
116 | 
117 | 
118 | def reorder_title(title, articles=('the',), separators=(',', ', ')):
119 |     """
120 |     Reorder the title
121 |     :param title:
122 |     :type title:
123 |     :param articles:
124 |     :type articles:
125 |     :param separators:
126 |     :type separators:
127 |     :return:
128 |     :rtype:
129 |     """
130 |     ltitle = title.lower()
131 |     for article in articles:
132 |         for separator in separators:
133 |             suffix = separator + article
134 |             if ltitle[-len(suffix):] == suffix:
135 |                 return title[-len(suffix) + len(separator):] + ' ' + title[:-len(suffix)]
136 |     return title
137 | 


--------------------------------------------------------------------------------
/lib/dateutil/README.rst:
--------------------------------------------------------------------------------
  1 | dateutil - powerfull extensions to datetime
  2 | ===========================================
  3 | The `dateutil` module provides powerful extensions to
  4 | the standard `datetime` module, available in Python.
  5 | 
  6 | 
  7 | Download
  8 | ========
  9 | dateutil is available on PyPI
 10 | https://pypi.python.org/pypi/python-dateutil/
 11 | 
 12 | The documentation is hosted at:
 13 | https://dateutil.readthedocs.org/
 14 | 
 15 | Code
 16 | ====
 17 | https://github.com/dateutil/dateutil/
 18 | 
 19 | Features
 20 | ========
 21 | 
 22 | * Computing of relative deltas (next month, next year,
 23 |   next monday, last week of month, etc);
 24 | * Computing of relative deltas between two given
 25 |   date and/or datetime objects;
 26 | * Computing of dates based on very flexible recurrence rules,
 27 |   using a superset of the
 28 |   [ftp://ftp.rfc-editor.org/in-notes/rfc2445.txt iCalendar]
 29 |   specification. Parsing of RFC strings is supported as well.
 30 | * Generic parsing of dates in almost any string format;
 31 | * Timezone (tzinfo) implementations for tzfile(5) format
 32 |   files (/etc/localtime, /usr/share/zoneinfo, etc), TZ
 33 |   environment string (in all known formats), iCalendar
 34 |   format files, given ranges (with help from relative deltas),
 35 |   local machine timezone, fixed offset timezone, UTC timezone,
 36 |   and Windows registry-based time zones.
 37 | * Internal up-to-date world timezone information based on
 38 |   Olson's database.
 39 | * Computing of Easter Sunday dates for any given year,
 40 |   using Western, Orthodox or Julian algorithms;
 41 | * More than 400 test cases.
 42 | 
 43 | Quick example
 44 | =============
 45 | Here's a snapshot, just to give an idea about the power of the
 46 | package. For more examples, look at the documentation.
 47 | 
 48 | Suppose you want to know how much time is left, in
 49 | years/months/days/etc, before the next easter happening on a
 50 | year with a Friday 13th in August, and you want to get today's
 51 | date out of the "date" unix system command. Here is the code:
 52 | 
 53 | .. doctest:: readmeexample
 54 | 
 55 |     >>> from dateutil.relativedelta import *
 56 |     >>> from dateutil.easter import *
 57 |     >>> from dateutil.rrule import *
 58 |     >>> from dateutil.parser import *
 59 |     >>> from datetime import *
 60 |     >>> now = parse("Sat Oct 11 17:13:46 UTC 2003")
 61 |     >>> today = now.date()
 62 |     >>> year = rrule(YEARLY,dtstart=now,bymonth=8,bymonthday=13,byweekday=FR)[0].year
 63 |     >>> rdelta = relativedelta(easter(year), today)
 64 |     >>> print("Today is: %s" % today)
 65 |     Today is: 2003-10-11
 66 |     >>> print("Year with next Aug 13th on a Friday is: %s" % year)
 67 |     Year with next Aug 13th on a Friday is: 2004
 68 |     >>> print("How far is the Easter of that year: %s" % rdelta)
 69 |     How far is the Easter of that year: relativedelta(months=+6)
 70 |     >>> print("And the Easter of that year is: %s" % (today+rdelta))
 71 |     And the Easter of that year is: 2004-04-11
 72 | 
 73 | Being exactly 6 months ahead was **really** a coincidence :)
 74 | 
 75 | 
 76 | Author
 77 | ======
 78 | The dateutil module was written by Gustavo Niemeyer <gustavo@niemeyer.net>
 79 | in 2003
 80 | 
 81 | It is maintained by:
 82 | 
 83 | * Gustavo Niemeyer <gustavo@niemeyer.net> 2003-2011
 84 | * Tomi Pieviläinen <tomi.pievilainen@iki.fi> 2012-2014
 85 | * Yaron de Leeuw <me@jarondl.net> 2014-
 86 | 
 87 | Building and releasing
 88 | ======================
 89 | When you get the source, it does not contain the internal zoneinfo
 90 | database. To get (and update) the database, run the updatezinfo.py script. Make sure
 91 | that the zic command is in your path, and that you have network connectivity
 92 | to get the latest timezone information from IANA. If you have downloaded
 93 | the timezone data earlier, you can give the tarball as a parameter to
 94 | updatezinfo.py.
 95 | 
 96 | 
 97 | Testing
 98 | =======
 99 | dateutil has a comprehensive test suite, which can be run simply by running
100 | `python setup.py test [-q]` in the project root. Note that if you don't have the internal
101 | zoneinfo database, some tests will fail. Apart from that, all tests should pass.
102 | 
103 | To easily test dateutil against all supported Python versions, you can use
104 | `tox <https://tox.readthedocs.org/en/latest/>`_.
105 | 
106 | All github pull requests are automatically tested using travis.
107 | 


--------------------------------------------------------------------------------
/lib/dateutil/zoneinfo/__init__.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import logging
  3 | import os
  4 | import warnings
  5 | import tempfile
  6 | import shutil
  7 | from subprocess import check_call
  8 | from tarfile import TarFile
  9 | from pkgutil import get_data
 10 | from io import BytesIO
 11 | from contextlib import closing
 12 | 
 13 | from dateutil.tz import tzfile
 14 | 
 15 | __all__ = ["setcachesize", "gettz", "rebuild"]
 16 | 
 17 | _ZONEFILENAME = "dateutil-zoneinfo.tar.gz"
 18 | 
 19 | # python2.6 compatability. Note that TarFile.__exit__ != TarFile.close, but
 20 | # it's close enough for python2.6
 21 | _tar_open = TarFile.open
 22 | if not hasattr(TarFile, '__exit__'):
 23 |     def _tar_open(*args, **kwargs):
 24 |         return closing(TarFile.open(*args, **kwargs))
 25 | 
 26 | 
 27 | class tzfile(tzfile):
 28 |     def __reduce__(self):
 29 |         return (gettz, (self._filename,))
 30 | 
 31 | 
 32 | def getzoneinfofile_stream():
 33 |     try:
 34 |         return BytesIO(get_data(__name__, _ZONEFILENAME))
 35 |     except IOError as e:  # TODO  switch to FileNotFoundError?
 36 |         warnings.warn("I/O error({0}): {1}".format(e.errno, e.strerror))
 37 |         return None
 38 | 
 39 | 
 40 | class ZoneInfoFile(object):
 41 |     def __init__(self, zonefile_stream=None):
 42 |         if zonefile_stream is not None:
 43 |             with _tar_open(fileobj=zonefile_stream, mode='r') as tf:
 44 |                 # dict comprehension does not work on python2.6
 45 |                 # TODO: get back to the nicer syntax when we ditch python2.6
 46 |                 # self.zones = {zf.name: tzfile(tf.extractfile(zf),
 47 |                 #               filename = zf.name)
 48 |                 #              for zf in tf.getmembers() if zf.isfile()}
 49 |                 self.zones = dict((zf.name, tzfile(tf.extractfile(zf),
 50 |                                                    filename=zf.name))
 51 |                                   for zf in tf.getmembers() if zf.isfile())
 52 |                 # deal with links: They'll point to their parent object. Less
 53 |                 # waste of memory
 54 |                 # links = {zl.name: self.zones[zl.linkname]
 55 |                 #        for zl in tf.getmembers() if zl.islnk() or zl.issym()}
 56 |                 links = dict((zl.name, self.zones[zl.linkname])
 57 |                              for zl in tf.getmembers() if
 58 |                              zl.islnk() or zl.issym())
 59 |                 self.zones.update(links)
 60 |         else:
 61 |             self.zones = dict()
 62 | 
 63 | 
 64 | # The current API has gettz as a module function, although in fact it taps into
 65 | # a stateful class. So as a workaround for now, without changing the API, we
 66 | # will create a new "global" class instance the first time a user requests a
 67 | # timezone. Ugly, but adheres to the api.
 68 | #
 69 | # TODO: deprecate this.
 70 | _CLASS_ZONE_INSTANCE = list()
 71 | 
 72 | 
 73 | def gettz(name):
 74 |     if len(_CLASS_ZONE_INSTANCE) == 0:
 75 |         _CLASS_ZONE_INSTANCE.append(ZoneInfoFile(getzoneinfofile_stream()))
 76 |     return _CLASS_ZONE_INSTANCE[0].zones.get(name)
 77 | 
 78 | 
 79 | def rebuild(filename, tag=None, format="gz", zonegroups=[]):
 80 |     """Rebuild the internal timezone info in dateutil/zoneinfo/zoneinfo*tar*
 81 | 
 82 |     filename is the timezone tarball from ftp.iana.org/tz.
 83 | 
 84 |     """
 85 |     tmpdir = tempfile.mkdtemp()
 86 |     zonedir = os.path.join(tmpdir, "zoneinfo")
 87 |     moduledir = os.path.dirname(__file__)
 88 |     try:
 89 |         with _tar_open(filename) as tf:
 90 |             for name in zonegroups:
 91 |                 tf.extract(name, tmpdir)
 92 |             filepaths = [os.path.join(tmpdir, n) for n in zonegroups]
 93 |             try:
 94 |                 check_call(["zic", "-d", zonedir] + filepaths)
 95 |             except OSError as e:
 96 |                 if e.errno == 2:
 97 |                     logging.error(
 98 |                         "Could not find zic. Perhaps you need to install "
 99 |                         "libc-bin or some other package that provides it, "
100 |                         "or it's not in your PATH?")
101 |                     raise
102 |         target = os.path.join(moduledir, _ZONEFILENAME)
103 |         with _tar_open(target, "w:%s" % format) as tf:
104 |             for entry in os.listdir(zonedir):
105 |                 entrypath = os.path.join(zonedir, entry)
106 |                 tf.add(entrypath, entry)
107 |     finally:
108 |         shutil.rmtree(tmpdir)
109 | 


--------------------------------------------------------------------------------
/lib/rebulk/introspector.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Introspect rebulk object to retrieve capabilities.
  5 | """
  6 | from abc import ABCMeta, abstractproperty
  7 | from collections import defaultdict
  8 | 
  9 | import six
 10 | from .pattern import StringPattern, RePattern, FunctionalPattern
 11 | from .utils import extend_safe
 12 | 
 13 | 
 14 | @six.add_metaclass(ABCMeta)
 15 | class Description(object):
 16 |     """
 17 |     Abstract class for a description.
 18 |     """
 19 |     @abstractproperty
 20 |     def properties(self):  # pragma: no cover
 21 |         """
 22 |         Properties of described object.
 23 |         :return: all properties that described object can generate grouped by name.
 24 |         :rtype: dict
 25 |         """
 26 |         pass
 27 | 
 28 | 
 29 | class PatternDescription(Description):
 30 |     """
 31 |     Description of a pattern.
 32 |     """
 33 |     def __init__(self, pattern):  # pylint:disable=too-many-branches
 34 |         self.pattern = pattern
 35 |         self._properties = defaultdict(list)
 36 | 
 37 |         if pattern.properties:
 38 |             for key, values in pattern.properties.items():
 39 |                 extend_safe(self._properties[key], values)
 40 |         elif 'value' in pattern.match_options:
 41 |             self._properties[pattern.name].append(pattern.match_options['value'])
 42 |         elif isinstance(pattern, StringPattern):
 43 |             extend_safe(self._properties[pattern.name], pattern.patterns)
 44 |         elif isinstance(pattern, RePattern):
 45 |             if pattern.name and pattern.name not in pattern.private_names:
 46 |                 extend_safe(self._properties[pattern.name], [None])
 47 |             if not pattern.private_children:
 48 |                 for regex_pattern in pattern.patterns:
 49 |                     for group_name, values in regex_pattern.groupindex.items():
 50 |                         if group_name not in pattern.private_names:
 51 |                             extend_safe(self._properties[group_name], [None])
 52 |         elif isinstance(pattern, FunctionalPattern):
 53 |             if pattern.name and pattern.name not in pattern.private_names:
 54 |                 extend_safe(self._properties[pattern.name], [None])
 55 | 
 56 | 
 57 |     @property
 58 |     def properties(self):
 59 |         """
 60 |         Properties for this rule.
 61 |         :return:
 62 |         :rtype: dict
 63 |         """
 64 |         return self._properties
 65 | 
 66 | 
 67 | class RuleDescription(Description):
 68 |     """
 69 |     Description of a rule.
 70 |     """
 71 |     def __init__(self, rule):
 72 |         self.rule = rule
 73 | 
 74 |         self._properties = defaultdict(list)
 75 | 
 76 |         if rule.properties:
 77 |             for key, values in rule.properties.items():
 78 |                 extend_safe(self._properties[key], values)
 79 | 
 80 |     @property
 81 |     def properties(self):
 82 |         """
 83 |         Properties for this rule.
 84 |         :return:
 85 |         :rtype: dict
 86 |         """
 87 |         return self._properties
 88 | 
 89 | 
 90 | class Introspection(Description):
 91 |     """
 92 |     Introspection results.
 93 |     """
 94 |     def __init__(self, rebulk, context=None):
 95 |         self.patterns = [PatternDescription(pattern) for pattern in rebulk.effective_patterns(context)
 96 |                          if not pattern.private and not pattern.marker]
 97 |         self.rules = [RuleDescription(rule) for rule in rebulk.effective_rules(context)]
 98 | 
 99 |     @property
100 |     def properties(self):
101 |         """
102 |         Properties for Introspection results.
103 |         :return:
104 |         :rtype:
105 |         """
106 |         properties = defaultdict(list)
107 |         for pattern in self.patterns:
108 |             for key, values in pattern.properties.items():
109 |                 extend_safe(properties[key], values)
110 |         for rule in self.rules:
111 |             for key, values in rule.properties.items():
112 |                 extend_safe(properties[key], values)
113 |         return properties
114 | 
115 | 
116 | def introspect(rebulk, context=None):
117 |     """
118 |     Introspect a Rebulk instance to grab defined objects and properties that can be generated.
119 |     :param rebulk:
120 |     :type rebulk: Rebulk
121 |     :param context:
122 |     :type context:
123 |     :return: Introspection instance
124 |     :rtype: Introspection
125 |     """
126 |     return Introspection(rebulk, context)
127 | 


--------------------------------------------------------------------------------
/lib/rebulk/utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Various utilities functions
  5 | """
  6 | from collections import MutableSet
  7 | 
  8 | from types import GeneratorType
  9 | 
 10 | 
 11 | def find_all(string, sub, start=None, end=None, ignore_case=False, **kwargs):
 12 |     """
 13 |     Return all indices in string s where substring sub is
 14 |     found, such that sub is contained in the slice s[start:end].
 15 | 
 16 |     >>> list(find_all('The quick brown fox jumps over the lazy dog', 'fox'))
 17 |     [16]
 18 | 
 19 |     >>> list(find_all('The quick brown fox jumps over the lazy dog', 'mountain'))
 20 |     []
 21 | 
 22 |     >>> list(find_all('The quick brown fox jumps over the lazy dog', 'The'))
 23 |     [0]
 24 | 
 25 |     >>> list(find_all(
 26 |     ... 'Carved symbols in a mountain hollow on the bank of an inlet irritated an eccentric person',
 27 |     ... 'an'))
 28 |     [44, 51, 70]
 29 | 
 30 |     >>> list(find_all(
 31 |     ... 'Carved symbols in a mountain hollow on the bank of an inlet irritated an eccentric person',
 32 |     ... 'an',
 33 |     ... 50,
 34 |     ... 60))
 35 |     [51]
 36 | 
 37 |     :param string: the input string
 38 |     :type string: str
 39 |     :param sub: the substring
 40 |     :type sub: str
 41 |     :return: all indices in the input string
 42 |     :rtype: __generator[str]
 43 |     """
 44 |     #pylint: disable=unused-argument
 45 |     if ignore_case:
 46 |         sub = sub.lower()
 47 |         string = string.lower()
 48 |     while True:
 49 |         start = string.find(sub, start, end)
 50 |         if start == -1:
 51 |             return
 52 |         yield start
 53 |         start += len(sub)
 54 | 
 55 | 
 56 | def get_first_defined(data, keys, default_value=None):
 57 |     """
 58 |     Get the first defined key in data.
 59 |     :param data:
 60 |     :type data:
 61 |     :param keys:
 62 |     :type keys:
 63 |     :param default_value:
 64 |     :type default_value:
 65 |     :return:
 66 |     :rtype:
 67 |     """
 68 |     for key in keys:
 69 |         if key in data:
 70 |             return data[key]
 71 |     return default_value
 72 | 
 73 | 
 74 | def is_iterable(obj):
 75 |     """
 76 |     Are we being asked to look up a list of things, instead of a single thing?
 77 |     We check for the `__iter__` attribute so that this can cover types that
 78 |     don't have to be known by this module, such as NumPy arrays.
 79 | 
 80 |     Strings, however, should be considered as atomic values to look up, not
 81 |     iterables.
 82 | 
 83 |     We don't need to check for the Python 2 `unicode` type, because it doesn't
 84 |     have an `__iter__` attribute anyway.
 85 |     """
 86 |     # pylint: disable=consider-using-ternary
 87 |     return hasattr(obj, '__iter__') and not isinstance(obj, str) or isinstance(obj, GeneratorType)
 88 | 
 89 | 
 90 | def extend_safe(target, source):
 91 |     """
 92 |     Extends source list to target list only if elements doesn't exists in target list.
 93 |     :param target:
 94 |     :type target: list
 95 |     :param source:
 96 |     :type source: list
 97 |     """
 98 |     for elt in source:
 99 |         if elt not in target:
100 |             target.append(elt)
101 | 
102 | 
103 | class _Ref(object):
104 |     """
105 |     Reference for IdentitySet
106 |     """
107 |     def __init__(self, value):
108 |         self.value = value
109 | 
110 |     def __eq__(self, other):
111 |         return self.value is other.value
112 | 
113 |     def __hash__(self):
114 |         return id(self.value)
115 | 
116 | 
117 | class IdentitySet(MutableSet):  # pragma: no cover
118 |     """
119 |     Set based on identity
120 |     """
121 |     def __init__(self, items=None):  # pylint: disable=super-init-not-called
122 |         if items is None:
123 |             items = []
124 |         self.refs = set(map(_Ref, items))
125 | 
126 |     def __contains__(self, elem):
127 |         return _Ref(elem) in self.refs
128 | 
129 |     def __iter__(self):
130 |         return (ref.value for ref in self.refs)
131 | 
132 |     def __len__(self):
133 |         return len(self.refs)
134 | 
135 |     def add(self, value):
136 |         self.refs.add(_Ref(value))
137 | 
138 |     def discard(self, value):
139 |         self.refs.discard(_Ref(value))
140 | 
141 |     def update(self, iterable):
142 |         """
143 |         Update set with iterable
144 |         :param iterable:
145 |         :type iterable:
146 |         :return:
147 |         :rtype:
148 |         """
149 |         for elem in iterable:
150 |             self.add(elem)
151 | 
152 |     def __repr__(self):  # pragma: no cover
153 |         return "%s(%s)" % (type(self).__name__, list(self))
154 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/common/date.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Date
  5 | """
  6 | from dateutil import parser
  7 | 
  8 | from rebulk.remodule import re
  9 | 
 10 | _dsep = r'[-/ \.]'
 11 | _dsep_bis = r'[-/ \.x]'
 12 | 
 13 | date_regexps = [
 14 |     re.compile(r'%s((\d{8}))%s' % (_dsep, _dsep), re.IGNORECASE),
 15 |     re.compile(r'%s((\d{6}))%s' % (_dsep, _dsep), re.IGNORECASE),
 16 |     re.compile(r'(?:^|[^\d])((\d{2})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
 17 |     re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
 18 |     re.compile(r'(?:^|[^\d])((\d{4})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep_bis, _dsep), re.IGNORECASE),
 19 |     re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{4}))(?:$|[^\d])' % (_dsep, _dsep_bis), re.IGNORECASE),
 20 |     re.compile(r'(?:^|[^\d])((\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4}))(?:$|[^\d])' % (_dsep, _dsep),
 21 |                re.IGNORECASE)]
 22 | 
 23 | 
 24 | def valid_year(year):
 25 |     """Check if number is a valid year"""
 26 |     return 1920 <= year < 2030
 27 | 
 28 | 
 29 | def _is_int(string):
 30 |     """
 31 |     Check if the input string is an integer
 32 | 
 33 |     :param string:
 34 |     :type string:
 35 |     :return:
 36 |     :rtype:
 37 |     """
 38 |     try:
 39 |         int(string)
 40 |         return True
 41 |     except ValueError:
 42 |         return False
 43 | 
 44 | 
 45 | def _guess_day_first_parameter(groups):
 46 |     """
 47 |     If day_first is not defined, use some heuristic to fix it.
 48 |     It helps to solve issues with python dateutils 2.5.3 parser changes.
 49 | 
 50 |     :param groups: match groups found for the date
 51 |     :type groups: list of match objects
 52 |     :return: day_first option guessed value
 53 |     :rtype: bool
 54 |     """
 55 | 
 56 |     # If match starts with a long year, then day_first is force to false.
 57 |     if _is_int(groups[0]) and valid_year(int(groups[0][:4])):
 58 |         return False
 59 |     # If match ends with a long year, the day_first is forced to true.
 60 |     elif _is_int(groups[-1]) and valid_year(int(groups[-1][-4:])):
 61 |         return True
 62 |     # If match starts with a short year, then day_first is force to false.
 63 |     elif _is_int(groups[0]) and int(groups[0][:2]) > 31:
 64 |         return False
 65 |     # If match ends with a short year, then day_first is force to true.
 66 |     elif _is_int(groups[-1]) and int(groups[-1][-2:]) > 31:
 67 |         return True
 68 | 
 69 | 
 70 | def search_date(string, year_first=None, day_first=None):
 71 |     """Looks for date patterns, and if found return the date and group span.
 72 | 
 73 |     Assumes there are sentinels at the beginning and end of the string that
 74 |     always allow matching a non-digit delimiting the date.
 75 | 
 76 |     Year can be defined on two digit only. It will return the nearest possible
 77 |     date from today.
 78 | 
 79 |     >>> search_date(' This happened on 2002-04-22. ')
 80 |     (18, 28, datetime.date(2002, 4, 22))
 81 | 
 82 |     >>> search_date(' And this on 17-06-1998. ')
 83 |     (13, 23, datetime.date(1998, 6, 17))
 84 | 
 85 |     >>> search_date(' no date in here ')
 86 |     """
 87 |     for date_re in date_regexps:
 88 |         search_match = date_re.search(string)
 89 |         if not search_match:
 90 |             continue
 91 | 
 92 |         start, end = search_match.start(1), search_match.end(1)
 93 |         groups = search_match.groups()[1:]
 94 |         match = '-'.join(groups)
 95 | 
 96 |         if match is None:
 97 |             continue
 98 | 
 99 |         if year_first and day_first is None:
100 |             day_first = False
101 | 
102 |         if day_first is None:
103 |             day_first = _guess_day_first_parameter(groups)
104 | 
105 |         # If day_first/year_first is undefined, parse is made using both possible values.
106 |         yearfirst_opts = [False, True]
107 |         if year_first is not None:
108 |             yearfirst_opts = [year_first]
109 | 
110 |         dayfirst_opts = [True, False]
111 |         if day_first is not None:
112 |             dayfirst_opts = [day_first]
113 | 
114 |         kwargs_list = ({'dayfirst': d, 'yearfirst': y}
115 |                        for d in dayfirst_opts for y in yearfirst_opts)
116 |         for kwargs in kwargs_list:
117 |             try:
118 |                 date = parser.parse(match, **kwargs)
119 |             except (ValueError, TypeError):  # pragma: no cover
120 |                 # see https://bugs.launchpad.net/dateutil/+bug/1247643
121 |                 date = None
122 | 
123 |             # check date plausibility
124 |             if date and valid_year(date.year):  # pylint:disable=no-member
125 |                 return start, end, date.date()  # pylint:disable=no-member
126 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/screen_size.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | screen_size property
  5 | """
  6 | from rebulk.remodule import re
  7 | 
  8 | from rebulk import Rebulk, Rule, RemoveMatch
  9 | from ..common.validators import seps_surround
 10 | from ..common import dash, seps
 11 | 
 12 | 
 13 | def screen_size():
 14 |     """
 15 |     Builder for rebulk object.
 16 |     :return: Created Rebulk object
 17 |     :rtype: Rebulk
 18 |     """
 19 |     def conflict_solver(match, other):
 20 |         """
 21 |         Conflict solver for most screen_size.
 22 |         """
 23 |         if other.name == 'screen_size':
 24 |             if 'resolution' in other.tags:
 25 |                 # The chtouile to solve conflict in "720 x 432" string matching both 720p pattern
 26 |                 int_value = _digits_re.findall(match.raw)[-1]
 27 |                 if other.value.startswith(int_value):
 28 |                     return match
 29 |             return other
 30 |         return '__default__'
 31 | 
 32 |     rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE)
 33 |     rebulk.defaults(name="screen_size", validator=seps_surround, conflict_solver=conflict_solver)
 34 | 
 35 |     rebulk.regex(r"(?:\d{3,}(?:x|\*))?360(?:i|p?x?)", value="360p")
 36 |     rebulk.regex(r"(?:\d{3,}(?:x|\*))?368(?:i|p?x?)", value="368p")
 37 |     rebulk.regex(r"(?:\d{3,}(?:x|\*))?480(?:i|p?x?)", value="480p")
 38 |     rebulk.regex(r"(?:\d{3,}(?:x|\*))?576(?:i|p?x?)", value="576p")
 39 |     rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:i|p?(?:50|60)?x?)", value="720p")
 40 |     rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:p(?:50|60)?x?)", value="720p")
 41 |     rebulk.regex(r"(?:\d{3,}(?:x|\*))?720p?hd", value="720p")
 42 |     rebulk.regex(r"(?:\d{3,}(?:x|\*))?900(?:i|p?x?)", value="900p")
 43 |     rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080i", value="1080i")
 44 |     rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?x?", value="1080p")
 45 |     rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080(?:p(?:50|60)?x?)", value="1080p")
 46 |     rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?hd", value="1080p")
 47 |     rebulk.regex(r"(?:\d{3,}(?:x|\*))?2160(?:i|p?x?)", value="4K")
 48 |     rebulk.string('4k', value='4K')
 49 | 
 50 |     _digits_re = re.compile(r'\d+')
 51 | 
 52 |     rebulk.defaults(name="screen_size", validator=seps_surround)
 53 |     rebulk.regex(r'\d{3,}-?(?:x|\*)-?\d{3,}',
 54 |                  formatter=lambda value: 'x'.join(_digits_re.findall(value)),
 55 |                  abbreviations=[dash],
 56 |                  tags=['resolution'],
 57 |                  conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other)
 58 | 
 59 |     rebulk.rules(ScreenSizeOnlyOne, RemoveScreenSizeConflicts)
 60 | 
 61 |     return rebulk
 62 | 
 63 | 
 64 | class ScreenSizeOnlyOne(Rule):
 65 |     """
 66 |     Keep a single screen_size pet filepath part.
 67 |     """
 68 |     consequence = RemoveMatch
 69 | 
 70 |     def when(self, matches, context):
 71 |         to_remove = []
 72 |         for filepart in matches.markers.named('path'):
 73 |             screensize = list(reversed(matches.range(filepart.start, filepart.end,
 74 |                                                      lambda match: match.name == 'screen_size')))
 75 |             if len(screensize) > 1:
 76 |                 to_remove.extend(screensize[1:])
 77 | 
 78 |         return to_remove
 79 | 
 80 | 
 81 | class RemoveScreenSizeConflicts(Rule):
 82 |     """
 83 |     Remove season and episode matches which conflicts with screen_size match.
 84 |     """
 85 |     consequence = RemoveMatch
 86 | 
 87 |     def when(self, matches, context):
 88 |         to_remove = []
 89 |         for filepart in matches.markers.named('path'):
 90 |             screensize = matches.range(filepart.start, filepart.end, lambda match: match.name == 'screen_size', 0)
 91 |             if not screensize:
 92 |                 continue
 93 | 
 94 |             conflicts = matches.conflicting(screensize, lambda match: match.name in ('season', 'episode'))
 95 |             if not conflicts:
 96 |                 continue
 97 | 
 98 |             video_profile = matches.range(screensize.end, filepart.end, lambda match: match.name == 'video_profile', 0)
 99 |             if video_profile and not matches.holes(screensize.end, video_profile.start,
100 |                                                    predicate=lambda h: h.value and h.value.strip(seps)):
101 |                 to_remove.extend(conflicts)
102 | 
103 |             date = matches.previous(screensize, lambda match: match.name == 'date', 0)
104 |             if date and not matches.holes(date.end, screensize.start,
105 |                                           predicate=lambda h: h.value and h.value.strip(seps)):
106 |                 to_remove.extend(conflicts)
107 | 
108 |         return to_remove
109 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/streaming_service.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | streaming_service property
  5 | """
  6 | import re
  7 | 
  8 | from rebulk import Rebulk
  9 | from rebulk.rules import Rule, RemoveMatch
 10 | 
 11 | from ...rules.common import seps, dash
 12 | 
 13 | 
 14 | def streaming_service():
 15 |     """Streaming service property.
 16 | 
 17 |     :return:
 18 |     :rtype: Rebulk
 19 |     """
 20 |     rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
 21 |     rebulk.defaults(name='streaming_service', tags=['format-prefix'])
 22 | 
 23 |     rebulk.string('AE', 'A&E', value='A&E')
 24 |     rebulk.string('AMBC', value='ABC')
 25 |     rebulk.string('AMC', value='AMC')
 26 |     rebulk.string('AMZN', 'AmazonPrime', value='Amazon Prime')
 27 |     rebulk.regex('Amazon-Prime', value='Amazon Prime')
 28 |     rebulk.string('AS', 'AdultSwim', value='Adult Swim')
 29 |     rebulk.regex('Adult-Swim', value='Adult Swim')
 30 |     rebulk.string('iP', 'BBCiPlayer', value='BBC iPlayer')
 31 |     rebulk.regex('BBC-iPlayer', value='BBC iPlayer')
 32 |     rebulk.string('CBS', value='CBS')
 33 |     rebulk.string('CC', 'ComedyCentral', value='Comedy Central')
 34 |     rebulk.regex('Comedy-Central', value='Comedy Central')
 35 |     rebulk.string('CR', 'CrunchyRoll', value='Crunchy Roll')
 36 |     rebulk.regex('Crunchy-Roll', value='Crunchy Roll')
 37 |     rebulk.string('CW', 'TheCW', value='The CW')
 38 |     rebulk.regex('The-CW', value='The CW')
 39 |     rebulk.string('DISC', 'Discovery', value='Discovery')
 40 |     rebulk.string('DIY', value='DIY Network')
 41 |     rebulk.string('DSNY', 'Disney', value='Disney')
 42 |     rebulk.string('EPIX', 'ePix', value='ePix')
 43 |     rebulk.string('HBO', 'HBOGo', value='HBO Go')
 44 |     rebulk.regex('HBO-Go', value='HBO Go')
 45 |     rebulk.string('HIST', 'History', value='History')
 46 |     rebulk.string('ID', value='Investigation Discovery')
 47 |     rebulk.string('IFC', 'IFC', value='IFC')
 48 |     rebulk.string('PBS', 'PBS', value='PBS')
 49 |     rebulk.string('NATG', 'NationalGeographic', value='National Geographic')
 50 |     rebulk.regex('National-Geographic', value='National Geographic')
 51 |     rebulk.string('NBA', 'NBATV', value='NBA TV')
 52 |     rebulk.regex('NBA-TV', value='NBA TV')
 53 |     rebulk.string('NBC', value='NBC')
 54 |     rebulk.string('NFL', value='NFL')
 55 |     rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon')
 56 |     rebulk.string('NF', 'Netflix', value='Netflix')
 57 |     rebulk.string('iTunes', value='iTunes')
 58 |     rebulk.string('RTE', value='RTÉ One')
 59 |     rebulk.string('SESO', 'SeeSo', value='SeeSo')
 60 |     rebulk.string('SPKE', 'SpikeTV', 'Spike TV', value='Spike TV')
 61 |     rebulk.string('SYFY', 'Syfy', value='Syfy')
 62 |     rebulk.string('TFOU', 'TFou', value='TFou')
 63 |     rebulk.string('TLC', value='TLC')
 64 |     rebulk.string('TV3', value='TV3 Ireland')
 65 |     rebulk.string('TV4', value='TV4 Sweeden')
 66 |     rebulk.string('TVL', 'TVLand', 'TV Land', value='TV Land')
 67 |     rebulk.string('UFC', value='UFC')
 68 |     rebulk.string('USAN', value='USA Network')
 69 | 
 70 |     rebulk.rules(ValidateStreamingService)
 71 | 
 72 |     return rebulk
 73 | 
 74 | 
 75 | class ValidateStreamingService(Rule):
 76 |     """Validate streaming service matches."""
 77 | 
 78 |     priority = 32
 79 |     consequence = RemoveMatch
 80 | 
 81 |     def when(self, matches, context):
 82 |         """Streaming service is always before format.
 83 | 
 84 |         :param matches:
 85 |         :type matches: rebulk.match.Matches
 86 |         :param context:
 87 |         :type context: dict
 88 |         :return:
 89 |         """
 90 |         to_remove = []
 91 |         for service in matches.named('streaming_service'):
 92 |             next_match = matches.next(service, lambda match: 'streaming_service.suffix' in match.tags, 0)
 93 |             previous_match = matches.previous(service, lambda match: 'streaming_service.prefix' in match.tags, 0)
 94 |             has_other = service.initiator and service.initiator.children.named('other')
 95 | 
 96 |             if not has_other and \
 97 |                 (not next_match or matches.holes(service.end, next_match.start,
 98 |                                                  predicate=lambda match: match.value.strip(seps))) and \
 99 |                 (not previous_match or matches.holes(previous_match.end, service.start,
100 |                                                      predicate=lambda match: match.value.strip(seps))):
101 |                 to_remove.append(service)
102 |                 continue
103 | 
104 |             if service.value == 'Comedy Central':
105 |                 # Current match is a valid streaming service, removing invalid closed caption (CC) matches
106 |                 to_remove.extend(matches.named('other', predicate=lambda match: match.value == 'CC'))
107 | 
108 |         return to_remove
109 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/common/numeral.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | parse numeral from various formats
  5 | """
  6 | from rebulk.remodule import re
  7 | 
  8 | digital_numeral = r'\d{1,4}'
  9 | 
 10 | roman_numeral = r'(?=[MCDLXVI]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})'
 11 | 
 12 | english_word_numeral_list = [
 13 |     'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten',
 14 |     'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty'
 15 | ]
 16 | 
 17 | french_word_numeral_list = [
 18 |     'zéro', 'un', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
 19 |     'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dix-sept', 'dix-huit', 'dix-neuf', 'vingt'
 20 | ]
 21 | 
 22 | french_alt_word_numeral_list = [
 23 |     'zero', 'une', 'deux', 'trois', 'quatre', 'cinq', 'six', 'sept', 'huit', 'neuf', 'dix',
 24 |     'onze', 'douze', 'treize', 'quatorze', 'quinze', 'seize', 'dixsept', 'dixhuit', 'dixneuf', 'vingt'
 25 | ]
 26 | 
 27 | 
 28 | def __build_word_numeral(*args):
 29 |     """
 30 |     Build word numeral regexp from list.
 31 | 
 32 |     :param args:
 33 |     :type args:
 34 |     :param kwargs:
 35 |     :type kwargs:
 36 |     :return:
 37 |     :rtype:
 38 |     """
 39 |     re_ = None
 40 |     for word_list in args:
 41 |         for word in word_list:
 42 |             if not re_:
 43 |                 re_ = r'(?:(?=\w+)'
 44 |             else:
 45 |                 re_ += '|'
 46 |             re_ += word
 47 |     re_ += ')'
 48 |     return re_
 49 | 
 50 | 
 51 | word_numeral = __build_word_numeral(english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list)
 52 | 
 53 | numeral = '(?:' + digital_numeral + '|' + roman_numeral + '|' + word_numeral + ')'
 54 | 
 55 | __romanNumeralMap = (
 56 |     ('M', 1000),
 57 |     ('CM', 900),
 58 |     ('D', 500),
 59 |     ('CD', 400),
 60 |     ('C', 100),
 61 |     ('XC', 90),
 62 |     ('L', 50),
 63 |     ('XL', 40),
 64 |     ('X', 10),
 65 |     ('IX', 9),
 66 |     ('V', 5),
 67 |     ('IV', 4),
 68 |     ('I', 1)
 69 | )
 70 | 
 71 | __romanNumeralPattern = re.compile('^' + roman_numeral + '$')
 72 | 
 73 | 
 74 | def __parse_roman(value):
 75 |     """
 76 |     convert Roman numeral to integer
 77 | 
 78 |     :param value: Value to parse
 79 |     :type value: string
 80 |     :return:
 81 |     :rtype:
 82 |     """
 83 |     if not __romanNumeralPattern.search(value):
 84 |         raise ValueError('Invalid Roman numeral: %s' % value)
 85 | 
 86 |     result = 0
 87 |     index = 0
 88 |     for num, integer in __romanNumeralMap:
 89 |         while value[index:index + len(num)] == num:
 90 |             result += integer
 91 |             index += len(num)
 92 |     return result
 93 | 
 94 | 
 95 | def __parse_word(value):
 96 |     """
 97 |     Convert Word numeral to integer
 98 | 
 99 |     :param value: Value to parse
100 |     :type value: string
101 |     :return:
102 |     :rtype:
103 |     """
104 |     for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
105 |         try:
106 |             return word_list.index(value.lower())
107 |         except ValueError:
108 |             pass
109 |     raise ValueError  # pragma: no cover
110 | 
111 | 
112 | _clean_re = re.compile(r'[^\d]*(\d+)[^\d]*')
113 | 
114 | 
115 | def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
116 |     """
117 |     Parse a numeric value into integer.
118 | 
119 |     :param value: Value to parse. Can be an integer, roman numeral or word.
120 |     :type value: string
121 |     :param int_enabled:
122 |     :type int_enabled:
123 |     :param roman_enabled:
124 |     :type roman_enabled:
125 |     :param word_enabled:
126 |     :type word_enabled:
127 |     :param clean:
128 |     :type clean:
129 |     :return: Numeric value, or None if value can't be parsed
130 |     :rtype: int
131 |     """
132 |     # pylint: disable=too-many-branches
133 |     if int_enabled:
134 |         try:
135 |             if clean:
136 |                 match = _clean_re.match(value)
137 |                 if match:
138 |                     clean_value = match.group(1)
139 |                     return int(clean_value)
140 |             return int(value)
141 |         except ValueError:
142 |             pass
143 |     if roman_enabled:
144 |         try:
145 |             if clean:
146 |                 for word in value.split():
147 |                     try:
148 |                         return __parse_roman(word.upper())
149 |                     except ValueError:
150 |                         pass
151 |             return __parse_roman(value)
152 |         except ValueError:
153 |             pass
154 |     if word_enabled:
155 |         try:
156 |             if clean:
157 |                 for word in value.split():
158 |                     try:
159 |                         return __parse_word(word)
160 |                     except ValueError:  # pragma: no cover
161 |                         pass
162 |             return __parse_word(value)  # pragma: no cover
163 |         except ValueError:  # pragma: no cover
164 |             pass
165 |     raise ValueError('Invalid numeral: ' + value)   # pragma: no cover
166 | 


--------------------------------------------------------------------------------
/testsort.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # Test for VideoSort post-processing script for NZBGet.
  4 | #
  5 | # Copyright (C) 2014-2017 Andrey Prygunkov <hugbug@users.sourceforge.net>
  6 | #
  7 | # This program is free software; you can redistribute it and/or modify it
  8 | # under the terms of the GNU Lesser General Public License as published by
  9 | # the Free Software Foundation; either version 3 of the License, or
 10 | # (at your option) any later version.
 11 | #
 12 | # This program is distributed in the hope that it will be useful,
 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    See the
 15 | # GNU Lesser General Public License for more details.
 16 | #
 17 | # You should have received a copy of the GNU Lesser General Public License
 18 | # along with the program.  If not, see <http://www.gnu.org/licenses/>.
 19 | #
 20 | 
 21 | import sys
 22 | from os.path import dirname
 23 | import os
 24 | import traceback
 25 | import re
 26 | import shutil
 27 | import subprocess
 28 | import json
 29 | import getopt
 30 | 
 31 | print('Test script for VideoSort')
 32 | 
 33 | root_dir = dirname(__file__)
 34 | test_dir = root_dir + '/__'
 35 | verbose = False
 36 | test_ids = []
 37 | 
 38 | options, _ = getopt.getopt(sys.argv[1:], 't:v', ['testid=', 'verbose'])
 39 | for opt, arg in options:
 40 | 	if opt in ('-v', '--verbose'):
 41 | 		verbose = True
 42 | 	elif opt in ('-t', '--testid'):
 43 | 		test_ids.append(arg)
 44 | 
 45 | def set_defaults():
 46 | 	# NZBGet global options
 47 | 	os.environ['NZBOP_SCRIPTDIR'] = 'test'
 48 | 
 49 | 	# script options
 50 | 	os.environ['NZBPO_MOVIESDIR'] = root_dir + '/movies'
 51 | 	os.environ['NZBPO_SERIESDIR'] = root_dir + '/series'
 52 | 	os.environ['NZBPO_DATEDDIR'] = root_dir + '/dated'
 53 | 	os.environ['NZBPO_OTHERTVDIR'] = root_dir + '/tv'
 54 | 	os.environ['NZBPO_VIDEOEXTENSIONS'] = '.mkv,.mp4,.avi'
 55 | 	os.environ['NZBPO_SATELLITEEXTENSIONS'] = '.srt'
 56 | 	os.environ['NZBPO_MULTIPLEEPISODES'] = 'list'
 57 | 	os.environ['NZBPO_EPISODESEPARATOR'] = '-'
 58 | 	os.environ['NZBPO_MINSIZE'] = '0'
 59 | 	os.environ['NZBPO_TVCATEGORIES'] = 'tv'
 60 | 	os.environ['NZBPO_MOVIESFORMAT'] = '%fn'
 61 | 	os.environ['NZBPO_OTHERTVFORMAT'] = '%fn'
 62 | 	os.environ['NZBPO_SERIESFORMAT'] = '%fn'
 63 | 	os.environ['NZBPO_DATEDFORMAT'] = '%fn'
 64 | 	os.environ['NZBPO_LOWERWORDS'] = 'the,of,and,at,vs,a,an,but,nor,for,on,so,yet'
 65 | 	os.environ['NZBPO_UPPERWORDS'] = 'III,II,IV'
 66 | 	os.environ['NZBPO_SERIESYEAR'] = 'yes'
 67 | 	os.environ['NZBPO_OVERWRITE'] = 'no'
 68 | 	os.environ['NZBPO_CLEANUP'] = 'no'
 69 | 	os.environ['NZBPO_PREVIEW'] = 'yes'
 70 | 	os.environ['NZBPO_VERBOSE'] = 'yes'
 71 | 
 72 | 	# properties of nzb-file
 73 | 	os.environ['NZBPP_DIRECTORY'] = test_dir
 74 | 	os.environ['NZBPP_NZBNAME'] = 'test'
 75 | 	os.environ['NZBPP_PARSTATUS'] = '2'
 76 | 	os.environ['NZBPP_UNPACKSTATUS'] = '2'
 77 | 	os.environ['NZBPP_CATEGORY'] = ''
 78 | 
 79 | 	# pp-parameters of nzb-file, including DNZB-headers
 80 | 	os.environ['NZBPR__DNZB_USENZBNAME'] = 'no'
 81 | 	os.environ['NZBPR__DNZB_PROPERNAME'] = ''
 82 | 	os.environ['NZBPR__DNZB_EPISODENAME'] = ''
 83 | 
 84 | def run_test(testobj):
 85 | 	set_defaults()
 86 | 	for prop_name in testobj:
 87 | 		os.environ[str(prop_name)] = str(testobj[prop_name])
 88 | 		if verbose:
 89 | 			print('%s: %s' % (prop_name, os.environ[prop_name]))
 90 | 	input_file = testobj['INPUTFILE']
 91 | 	output_file = testobj['OUTPUTFILE']
 92 | 	shutil.rmtree(test_dir, True)
 93 | 	os.mkdir(test_dir)
 94 | 	dir_name, file_name = os.path.split(input_file)
 95 | 	if dir_name != '':
 96 | 		os.mkdir(test_dir + '/' + dir_name)
 97 | 	full_file_name = test_dir + '/' + input_file
 98 | 	out_file = open(full_file_name, 'w')
 99 | 	out_file.write('empty file')
100 | 	out_file.close()
101 | 
102 | 	if verbose:
103 | 		print('Executing...')
104 | 	sys.stdout.flush()
105 | 	proc = subprocess.Popen(['python', root_dir + '/VideoSort.py'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=os.environ.copy())
106 | 	out, err = proc.communicate()
107 | 	out += err
108 | 	ret = proc.returncode
109 | 	if verbose:
110 | 		print('Return code: %i' % ret)
111 | 	success = False
112 | 	dest = ''
113 | 
114 | 	if ret == 93:
115 | 		for line in out.split(b'\n'):
116 | 			if line.startswith(b'destination path: '):
117 | 				line = line[len(b'destination path: '):]
118 | 				if line.startswith(root_dir.encode()):
119 | 					line = line[len(root_dir.encode()):]
120 | 				dest = line.replace(b'\\', b'/').decode()
121 | 		success = dest == output_file and output_file != ''
122 | 
123 | 	if success:
124 | 		print('%s: SUCCESS' % testobj['id'])
125 | 	if not success:
126 | 		if verbose:
127 | 			print('********************************************************')
128 | 			print('*** FAILURE')
129 | 			print(out)
130 | 			print('*** FAILURE')
131 | 			print('id: %s' % testobj['id'])
132 | 			print('expected   : %s' % output_file)
133 | 			print('destination: %s' % dest)
134 | 			print('********************************************************')
135 | 			sys.exit(1)
136 | 		else:
137 | 			print('%s: FAILED' % testobj['id'])
138 | 			if output_file == '':
139 | 				print('destination: %s' % dest)
140 | 	elif verbose:
141 | 		print('expected   : %s' % output_file)
142 | 		print('destination: %s' % dest)
143 | 
144 | testdata = json.load(open(root_dir + '/testdata.json'))
145 | for testobj in testdata:
146 | 	if test_ids == [] or testobj['id'] in test_ids:
147 | 		run_test(testobj)
148 | 


--------------------------------------------------------------------------------
/lib/babelfish/data/iso-3166-1.txt:
--------------------------------------------------------------------------------
  1 | Country Name;ISO 3166-1-alpha-2 code
  2 | AFGHANISTAN;AF
  3 | ÅLAND ISLANDS;AX
  4 | ALBANIA;AL
  5 | ALGERIA;DZ
  6 | AMERICAN SAMOA;AS
  7 | ANDORRA;AD
  8 | ANGOLA;AO
  9 | ANGUILLA;AI
 10 | ANTARCTICA;AQ
 11 | ANTIGUA AND BARBUDA;AG
 12 | ARGENTINA;AR
 13 | ARMENIA;AM
 14 | ARUBA;AW
 15 | AUSTRALIA;AU
 16 | AUSTRIA;AT
 17 | AZERBAIJAN;AZ
 18 | BAHAMAS;BS
 19 | BAHRAIN;BH
 20 | BANGLADESH;BD
 21 | BARBADOS;BB
 22 | BELARUS;BY
 23 | BELGIUM;BE
 24 | BELIZE;BZ
 25 | BENIN;BJ
 26 | BERMUDA;BM
 27 | BHUTAN;BT
 28 | BOLIVIA, PLURINATIONAL STATE OF;BO
 29 | BONAIRE, SINT EUSTATIUS AND SABA;BQ
 30 | BOSNIA AND HERZEGOVINA;BA
 31 | BOTSWANA;BW
 32 | BOUVET ISLAND;BV
 33 | BRAZIL;BR
 34 | BRITISH INDIAN OCEAN TERRITORY;IO
 35 | BRUNEI DARUSSALAM;BN
 36 | BULGARIA;BG
 37 | BURKINA FASO;BF
 38 | BURUNDI;BI
 39 | CAMBODIA;KH
 40 | CAMEROON;CM
 41 | CANADA;CA
 42 | CAPE VERDE;CV
 43 | CAYMAN ISLANDS;KY
 44 | CENTRAL AFRICAN REPUBLIC;CF
 45 | CHAD;TD
 46 | CHILE;CL
 47 | CHINA;CN
 48 | CHRISTMAS ISLAND;CX
 49 | COCOS (KEELING) ISLANDS;CC
 50 | COLOMBIA;CO
 51 | COMOROS;KM
 52 | CONGO;CG
 53 | CONGO, THE DEMOCRATIC REPUBLIC OF THE;CD
 54 | COOK ISLANDS;CK
 55 | COSTA RICA;CR
 56 | CÔTE D'IVOIRE;CI
 57 | CROATIA;HR
 58 | CUBA;CU
 59 | CURAÇAO;CW
 60 | CYPRUS;CY
 61 | CZECH REPUBLIC;CZ
 62 | DENMARK;DK
 63 | DJIBOUTI;DJ
 64 | DOMINICA;DM
 65 | DOMINICAN REPUBLIC;DO
 66 | ECUADOR;EC
 67 | EGYPT;EG
 68 | EL SALVADOR;SV
 69 | EQUATORIAL GUINEA;GQ
 70 | ERITREA;ER
 71 | ESTONIA;EE
 72 | ETHIOPIA;ET
 73 | FALKLAND ISLANDS (MALVINAS);FK
 74 | FAROE ISLANDS;FO
 75 | FIJI;FJ
 76 | FINLAND;FI
 77 | FRANCE;FR
 78 | FRENCH GUIANA;GF
 79 | FRENCH POLYNESIA;PF
 80 | FRENCH SOUTHERN TERRITORIES;TF
 81 | GABON;GA
 82 | GAMBIA;GM
 83 | GEORGIA;GE
 84 | GERMANY;DE
 85 | GHANA;GH
 86 | GIBRALTAR;GI
 87 | GREECE;GR
 88 | GREENLAND;GL
 89 | GRENADA;GD
 90 | GUADELOUPE;GP
 91 | GUAM;GU
 92 | GUATEMALA;GT
 93 | GUERNSEY;GG
 94 | GUINEA;GN
 95 | GUINEA-BISSAU;GW
 96 | GUYANA;GY
 97 | HAITI;HT
 98 | HEARD ISLAND AND MCDONALD ISLANDS;HM
 99 | HOLY SEE (VATICAN CITY STATE);VA
100 | HONDURAS;HN
101 | HONG KONG;HK
102 | HUNGARY;HU
103 | ICELAND;IS
104 | INDIA;IN
105 | INDONESIA;ID
106 | IRAN, ISLAMIC REPUBLIC OF;IR
107 | IRAQ;IQ
108 | IRELAND;IE
109 | ISLE OF MAN;IM
110 | ISRAEL;IL
111 | ITALY;IT
112 | JAMAICA;JM
113 | JAPAN;JP
114 | JERSEY;JE
115 | JORDAN;JO
116 | KAZAKHSTAN;KZ
117 | KENYA;KE
118 | KIRIBATI;KI
119 | KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF;KP
120 | KOREA, REPUBLIC OF;KR
121 | KUWAIT;KW
122 | KYRGYZSTAN;KG
123 | LAO PEOPLE'S DEMOCRATIC REPUBLIC;LA
124 | LATVIA;LV
125 | LEBANON;LB
126 | LESOTHO;LS
127 | LIBERIA;LR
128 | LIBYA;LY
129 | LIECHTENSTEIN;LI
130 | LITHUANIA;LT
131 | LUXEMBOURG;LU
132 | MACAO;MO
133 | MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF;MK
134 | MADAGASCAR;MG
135 | MALAWI;MW
136 | MALAYSIA;MY
137 | MALDIVES;MV
138 | MALI;ML
139 | MALTA;MT
140 | MARSHALL ISLANDS;MH
141 | MARTINIQUE;MQ
142 | MAURITANIA;MR
143 | MAURITIUS;MU
144 | MAYOTTE;YT
145 | MEXICO;MX
146 | MICRONESIA, FEDERATED STATES OF;FM
147 | MOLDOVA, REPUBLIC OF;MD
148 | MONACO;MC
149 | MONGOLIA;MN
150 | MONTENEGRO;ME
151 | MONTSERRAT;MS
152 | MOROCCO;MA
153 | MOZAMBIQUE;MZ
154 | MYANMAR;MM
155 | NAMIBIA;NA
156 | NAURU;NR
157 | NEPAL;NP
158 | NETHERLANDS;NL
159 | NEW CALEDONIA;NC
160 | NEW ZEALAND;NZ
161 | NICARAGUA;NI
162 | NIGER;NE
163 | NIGERIA;NG
164 | NIUE;NU
165 | NORFOLK ISLAND;NF
166 | NORTHERN MARIANA ISLANDS;MP
167 | NORWAY;NO
168 | OMAN;OM
169 | PAKISTAN;PK
170 | PALAU;PW
171 | PALESTINE, STATE OF;PS
172 | PANAMA;PA
173 | PAPUA NEW GUINEA;PG
174 | PARAGUAY;PY
175 | PERU;PE
176 | PHILIPPINES;PH
177 | PITCAIRN;PN
178 | POLAND;PL
179 | PORTUGAL;PT
180 | PUERTO RICO;PR
181 | QATAR;QA
182 | RÉUNION;RE
183 | ROMANIA;RO
184 | RUSSIAN FEDERATION;RU
185 | RWANDA;RW
186 | SAINT BARTHÉLEMY;BL
187 | SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA;SH
188 | SAINT KITTS AND NEVIS;KN
189 | SAINT LUCIA;LC
190 | SAINT MARTIN (FRENCH PART);MF
191 | SAINT PIERRE AND MIQUELON;PM
192 | SAINT VINCENT AND THE GRENADINES;VC
193 | SAMOA;WS
194 | SAN MARINO;SM
195 | SAO TOME AND PRINCIPE;ST
196 | SAUDI ARABIA;SA
197 | SENEGAL;SN
198 | SERBIA;RS
199 | SEYCHELLES;SC
200 | SIERRA LEONE;SL
201 | SINGAPORE;SG
202 | SINT MAARTEN (DUTCH PART);SX
203 | SLOVAKIA;SK
204 | SLOVENIA;SI
205 | SOLOMON ISLANDS;SB
206 | SOMALIA;SO
207 | SOUTH AFRICA;ZA
208 | SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS;GS
209 | SOUTH SUDAN;SS
210 | SPAIN;ES
211 | SRI LANKA;LK
212 | SUDAN;SD
213 | SURINAME;SR
214 | SVALBARD AND JAN MAYEN;SJ
215 | SWAZILAND;SZ
216 | SWEDEN;SE
217 | SWITZERLAND;CH
218 | SYRIAN ARAB REPUBLIC;SY
219 | TAIWAN, PROVINCE OF CHINA;TW
220 | TAJIKISTAN;TJ
221 | TANZANIA, UNITED REPUBLIC OF;TZ
222 | THAILAND;TH
223 | TIMOR-LESTE;TL
224 | TOGO;TG
225 | TOKELAU;TK
226 | TONGA;TO
227 | TRINIDAD AND TOBAGO;TT
228 | TUNISIA;TN
229 | TURKEY;TR
230 | TURKMENISTAN;TM
231 | TURKS AND CAICOS ISLANDS;TC
232 | TUVALU;TV
233 | UGANDA;UG
234 | UKRAINE;UA
235 | UNITED ARAB EMIRATES;AE
236 | UNITED KINGDOM;GB
237 | UNITED STATES;US
238 | UNITED STATES MINOR OUTLYING ISLANDS;UM
239 | URUGUAY;UY
240 | UZBEKISTAN;UZ
241 | VANUATU;VU
242 | VENEZUELA, BOLIVARIAN REPUBLIC OF;VE
243 | VIET NAM;VN
244 | VIRGIN ISLANDS, BRITISH;VG
245 | VIRGIN ISLANDS, U.S.;VI
246 | WALLIS AND FUTUNA;WF
247 | WESTERN SAHARA;EH
248 | YEMEN;YE
249 | ZAMBIA;ZM
250 | ZIMBABWE;ZW


--------------------------------------------------------------------------------
/lib/guessit/api.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | API functions that can be used by external software
  5 | """
  6 | try:
  7 |     from collections import OrderedDict
  8 | except ImportError:  # pragma: no-cover
  9 |     from ordereddict import OrderedDict  # pylint:disable=import-error
 10 | 
 11 | import traceback
 12 | 
 13 | import six
 14 | 
 15 | from rebulk.introspector import introspect
 16 | 
 17 | from .rules import rebulk_builder
 18 | from .options import parse_options
 19 | from .__version__ import __version__
 20 | 
 21 | 
 22 | class GuessitException(Exception):
 23 |     """
 24 |     Exception raised when guessit fails to perform a guess because of an internal error.
 25 |     """
 26 |     def __init__(self, string, options):
 27 |         super(GuessitException, self).__init__("An internal error has occured in guessit.\n"
 28 |                                                "===================== Guessit Exception Report =====================\n"
 29 |                                                "version=%s\n"
 30 |                                                "string=%s\n"
 31 |                                                "options=%s\n"
 32 |                                                "--------------------------------------------------------------------\n"
 33 |                                                "%s"
 34 |                                                "--------------------------------------------------------------------\n"
 35 |                                                "Please report at "
 36 |                                                "https://github.com/guessit-io/guessit/issues.\n"
 37 |                                                "====================================================================" %
 38 |                                                (__version__, str(string), str(options), traceback.format_exc()))
 39 | 
 40 |         self.string = string
 41 |         self.options = options
 42 | 
 43 | 
 44 | def guessit(string, options=None):
 45 |     """
 46 |     Retrieves all matches from string as a dict
 47 |     :param string: the filename or release name
 48 |     :type string: str
 49 |     :param options: the filename or release name
 50 |     :type options: str|dict
 51 |     :return:
 52 |     :rtype:
 53 |     """
 54 |     return default_api.guessit(string, options)
 55 | 
 56 | 
 57 | def properties(options=None):
 58 |     """
 59 |     Retrieves all properties with possible values that can be guessed
 60 |     :param options:
 61 |     :type options:
 62 |     :return:
 63 |     :rtype:
 64 |     """
 65 |     return default_api.properties(options)
 66 | 
 67 | 
 68 | class GuessItApi(object):
 69 |     """
 70 |     An api class that can be configured with custom Rebulk configuration.
 71 |     """
 72 | 
 73 |     def __init__(self, rebulk):
 74 |         """
 75 |         :param rebulk: Rebulk instance to use.
 76 |         :type rebulk: Rebulk
 77 |         :return:
 78 |         :rtype:
 79 |         """
 80 |         self.rebulk = rebulk
 81 | 
 82 |     @staticmethod
 83 |     def _fix_option_encoding(value):
 84 |         if isinstance(value, list):
 85 |             return [GuessItApi._fix_option_encoding(item) for item in value]
 86 |         if six.PY2 and isinstance(value, six.text_type):
 87 |             return value.encode("utf-8")
 88 |         if six.PY3 and isinstance(value, six.binary_type):
 89 |             return value.decode('ascii')
 90 |         return value
 91 | 
 92 |     def guessit(self, string, options=None):
 93 |         """
 94 |         Retrieves all matches from string as a dict
 95 |         :param string: the filename or release name
 96 |         :type string: str
 97 |         :param options: the filename or release name
 98 |         :type options: str|dict
 99 |         :return:
100 |         :rtype:
101 |         """
102 |         try:
103 |             options = parse_options(options, True)
104 |             result_decode = False
105 |             result_encode = False
106 | 
107 |             fixed_options = {}
108 |             for (key, value) in options.items():
109 |                 key = GuessItApi._fix_option_encoding(key)
110 |                 value = GuessItApi._fix_option_encoding(value)
111 |                 fixed_options[key] = value
112 |             options = fixed_options
113 | 
114 |             if six.PY2 and isinstance(string, six.text_type):
115 |                 string = string.encode("utf-8")
116 |                 result_decode = True
117 |             if six.PY3 and isinstance(string, six.binary_type):
118 |                 string = string.decode('ascii')
119 |                 result_encode = True
120 |             matches = self.rebulk.matches(string, options)
121 |             if result_decode:
122 |                 for match in matches:
123 |                     if isinstance(match.value, six.binary_type):
124 |                         match.value = match.value.decode("utf-8")
125 |             if result_encode:
126 |                 for match in matches:
127 |                     if isinstance(match.value, six.text_type):
128 |                         match.value = match.value.encode("ascii")
129 |             return matches.to_dict(options.get('advanced', False), options.get('single_value', False),
130 |                                    options.get('enforce_list', False))
131 |         except:
132 |             raise GuessitException(string, options)
133 | 
134 |     def properties(self, options=None):
135 |         """
136 |         Grab properties and values that can be generated.
137 |         :param options:
138 |         :type options:
139 |         :return:
140 |         :rtype:
141 |         """
142 |         unordered = introspect(self.rebulk, options).properties
143 |         ordered = OrderedDict()
144 |         for k in sorted(unordered.keys(), key=six.text_type):
145 |             ordered[k] = list(sorted(unordered[k], key=six.text_type))
146 |         if hasattr(self.rebulk, 'customize_properties'):
147 |             ordered = self.rebulk.customize_properties(ordered)
148 |         return ordered
149 | 
150 | 
151 | default_api = GuessItApi(rebulk_builder())
152 | 


--------------------------------------------------------------------------------
/lib/guessit/__main__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Entry point module
  5 | """
  6 | # pragma: no cover
  7 | from __future__ import print_function
  8 | 
  9 | import json
 10 | import logging
 11 | import os
 12 | import sys
 13 | 
 14 | import six
 15 | from rebulk.__version__ import __version__ as __rebulk_version__
 16 | 
 17 | from guessit import api
 18 | from guessit.__version__ import __version__
 19 | from guessit.jsonutils import GuessitEncoder
 20 | from guessit.options import argument_parser, parse_options, load_config
 21 | 
 22 | 
 23 | def guess_filename(filename, options):
 24 |     """
 25 |     Guess a single filename using given options
 26 |     :param filename: filename to parse
 27 |     :type filename: str
 28 |     :param options:
 29 |     :type options: dict
 30 |     :return:
 31 |     :rtype:
 32 |     """
 33 |     if not options.get('yaml') and not options.get('json') and not options.get('show_property'):
 34 |         print('For:', filename)
 35 | 
 36 |     guess = api.guessit(filename, options)
 37 | 
 38 |     if options.get('show_property'):
 39 |         print(guess.get(options.get('show_property'), ''))
 40 |         return
 41 | 
 42 |     if options.get('json'):
 43 |         print(json.dumps(guess, cls=GuessitEncoder, ensure_ascii=False))
 44 |     elif options.get('yaml'):
 45 |         import yaml
 46 |         from guessit import yamlutils
 47 | 
 48 |         ystr = yaml.dump({filename: dict(guess)}, Dumper=yamlutils.CustomDumper, default_flow_style=False,
 49 |                          allow_unicode=True)
 50 |         i = 0
 51 |         for yline in ystr.splitlines():
 52 |             if i == 0:
 53 |                 print("? " + yline[:-1])
 54 |             elif i == 1:
 55 |                 print(":" + yline[1:])
 56 |             else:
 57 |                 print(yline)
 58 |             i += 1
 59 |     else:
 60 |         print('GuessIt found:', json.dumps(guess, cls=GuessitEncoder, indent=4, ensure_ascii=False))
 61 | 
 62 | 
 63 | def display_properties(options):
 64 |     """
 65 |     Display properties
 66 |     """
 67 |     properties = api.properties(options)
 68 | 
 69 |     if options.get('json'):
 70 |         if options.get('values'):
 71 |             print(json.dumps(properties, cls=GuessitEncoder, ensure_ascii=False))
 72 |         else:
 73 |             print(json.dumps(list(properties.keys()), cls=GuessitEncoder, ensure_ascii=False))
 74 |     elif options.get('yaml'):
 75 |         import yaml
 76 |         from guessit import yamlutils
 77 |         if options.get('values'):
 78 |             print(yaml.dump(properties, Dumper=yamlutils.CustomDumper, default_flow_style=False, allow_unicode=True))
 79 |         else:
 80 |             print(yaml.dump(list(properties.keys()), Dumper=yamlutils.CustomDumper, default_flow_style=False,
 81 |                             allow_unicode=True))
 82 |     else:
 83 |         print('GuessIt properties:')
 84 | 
 85 |         properties_list = list(sorted(properties.keys()))
 86 |         for property_name in properties_list:
 87 |             property_values = properties.get(property_name)
 88 |             print(2 * ' ' + '[+] %s' % (property_name,))
 89 |             if property_values and options.get('values'):
 90 |                 for property_value in property_values:
 91 |                     print(4 * ' ' + '[!] %s' % (property_value,))
 92 | 
 93 | 
 94 | def main(args=None):  # pylint:disable=too-many-branches
 95 |     """
 96 |     Main function for entry point
 97 |     """
 98 |     if six.PY2 and os.name == 'nt':  # pragma: no cover
 99 |         # see http://bugs.python.org/issue2128
100 |         import locale
101 | 
102 |         for i, j in enumerate(sys.argv):
103 |             sys.argv[i] = j.decode(locale.getpreferredencoding())
104 | 
105 |     if args is None:  # pragma: no cover
106 |         options = parse_options()
107 |     else:
108 |         options = parse_options(args)
109 |     options = load_config(options)
110 |     if options.get('verbose'):
111 |         logging.basicConfig(stream=sys.stdout, format='%(message)s')
112 |         logging.getLogger().setLevel(logging.DEBUG)
113 | 
114 |     help_required = True
115 | 
116 |     if options.get('version'):
117 |         print('+-------------------------------------------------------+')
118 |         print('+                   GuessIt ' + __version__ + (28 - len(__version__)) * ' ' + '+')
119 |         print('+-------------------------------------------------------+')
120 |         print('+                   Rebulk ' + __rebulk_version__ + (29 - len(__rebulk_version__)) * ' ' + '+')
121 |         print('+-------------------------------------------------------+')
122 |         print('|      Please report any bug or feature request at      |')
123 |         print('|     https://github.com/guessit-io/guessit/issues.     |')
124 |         print('+-------------------------------------------------------+')
125 |         help_required = False
126 | 
127 |     if options.get('yaml'):
128 |         try:
129 |             import yaml  # pylint:disable=unused-variable
130 |         except ImportError:  # pragma: no cover
131 |             del options['yaml']
132 |             print('PyYAML is not installed. \'--yaml\' option will be ignored ...', file=sys.stderr)
133 | 
134 |     if options.get('properties') or options.get('values'):
135 |         display_properties(options)
136 |         help_required = False
137 | 
138 |     filenames = []
139 |     if options.get('filename'):
140 |         for filename in options.get('filename'):
141 |             filenames.append(filename)
142 |     if options.get('input_file'):
143 |         if six.PY2:
144 |             input_file = open(options.get('input_file'), 'r')
145 |         else:
146 |             input_file = open(options.get('input_file'), 'r', encoding='utf-8')
147 |         try:
148 |             filenames.extend([line.strip() for line in input_file.readlines()])
149 |         finally:
150 |             input_file.close()
151 | 
152 |     filenames = list(filter(lambda f: f, filenames))
153 | 
154 |     if filenames:
155 |         for filename in filenames:
156 |             help_required = False
157 |             guess_filename(filename, options)
158 | 
159 |     if help_required:  # pragma: no cover
160 |         argument_parser.print_help()
161 | 
162 | 
163 | if __name__ == '__main__':  # pragma: no cover
164 |     main()
165 | 


--------------------------------------------------------------------------------
/lib/rebulk/loose.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Various utilities functions
  5 | """
  6 | import inspect
  7 | import sys
  8 | from .utils import is_iterable
  9 | 
 10 | if sys.version_info < (3, 4, 0):  # pragma: no cover
 11 |     def _constructor(class_):
 12 |         """
 13 |         Retrieves constructor from given class
 14 | 
 15 |         :param class_:
 16 |         :type class_: class
 17 |         :return: constructor from given class
 18 |         :rtype: callable
 19 |         """
 20 |         return class_.__init__
 21 | else:  # pragma: no cover
 22 |     def _constructor(class_):
 23 |         """
 24 |         Retrieves constructor from given class
 25 | 
 26 |         :param class_:
 27 |         :type class_: class
 28 |         :return: constructor from given class
 29 |         :rtype: callable
 30 |         """
 31 |         return class_
 32 | 
 33 | 
 34 | def call(function, *args, **kwargs):
 35 |     """
 36 |     Call a function or constructor with given args and kwargs after removing args and kwargs that doesn't match
 37 |     function or constructor signature
 38 | 
 39 |     :param function: Function or constructor to call
 40 |     :type function: callable
 41 |     :param args:
 42 |     :type args:
 43 |     :param kwargs:
 44 |     :type kwargs:
 45 |     :return: sale vakye as default function call
 46 |     :rtype: object
 47 |     """
 48 |     func = constructor_args if inspect.isclass(function) else function_args
 49 |     call_args, call_kwargs = func(function, *args, **kwargs)
 50 |     return function(*call_args, **call_kwargs)
 51 | 
 52 | 
 53 | def function_args(callable_, *args, **kwargs):
 54 |     """
 55 |     Return (args, kwargs) matching the function signature
 56 | 
 57 |     :param callable: callable to inspect
 58 |     :type callable: callable
 59 |     :param args:
 60 |     :type args:
 61 |     :param kwargs:
 62 |     :type kwargs:
 63 |     :return: (args, kwargs) matching the function signature
 64 |     :rtype: tuple
 65 |     """
 66 |     argspec = inspect.getargspec(callable_)  # pylint:disable=deprecated-method
 67 |     return argspec_args(argspec, False, *args, **kwargs)
 68 | 
 69 | 
 70 | def constructor_args(class_, *args, **kwargs):
 71 |     """
 72 |     Return (args, kwargs) matching the function signature
 73 | 
 74 |     :param callable: callable to inspect
 75 |     :type callable: Callable
 76 |     :param args:
 77 |     :type args:
 78 |     :param kwargs:
 79 |     :type kwargs:
 80 |     :return: (args, kwargs) matching the function signature
 81 |     :rtype: tuple
 82 |     """
 83 |     argspec = inspect.getargspec(_constructor(class_))  # pylint:disable=deprecated-method
 84 |     return argspec_args(argspec, True, *args, **kwargs)
 85 | 
 86 | 
 87 | def argspec_args(argspec, constructor, *args, **kwargs):
 88 |     """
 89 |     Return (args, kwargs) matching the argspec object
 90 | 
 91 |     :param argspec: argspec to use
 92 |     :type argspec: argspec
 93 |     :param constructor: is it a constructor ?
 94 |     :type constructor: bool
 95 |     :param args:
 96 |     :type args:
 97 |     :param kwargs:
 98 |     :type kwargs:
 99 |     :return: (args, kwargs) matching the function signature
100 |     :rtype: tuple
101 |     """
102 |     if argspec.keywords:
103 |         call_kwarg = kwargs
104 |     else:
105 |         call_kwarg = dict((k, kwargs[k]) for k in kwargs if k in argspec.args)  # Python 2.6 dict comprehension
106 |     if argspec.varargs:
107 |         call_args = args
108 |     else:
109 |         call_args = args[:len(argspec.args) - (1 if constructor else 0)]
110 |     return call_args, call_kwarg
111 | 
112 | 
113 | def ensure_list(param):
114 |     """
115 |     Retrieves a list from given parameter.
116 | 
117 |     :param param:
118 |     :type param:
119 |     :return:
120 |     :rtype:
121 |     """
122 |     if not param:
123 |         param = []
124 |     elif not is_iterable(param):
125 |         param = [param]
126 |     return param
127 | 
128 | 
129 | def ensure_dict(param, default_value, default_key=None):
130 |     """
131 |     Retrieves a dict and a default value from given parameter.
132 | 
133 |     if parameter is not a dict, it will be promoted as the default value.
134 | 
135 |     :param param:
136 |     :type param:
137 |     :param default_value:
138 |     :type default_value:
139 |     :param default_key:
140 |     :type default_key:
141 |     :return:
142 |     :rtype:
143 |     """
144 |     if not param:
145 |         param = default_value
146 |     if not isinstance(param, dict):
147 |         if param:
148 |             default_value = param
149 |         return {default_key: param}, default_value
150 |     return param, default_value
151 | 
152 | 
153 | def filter_index(collection, predicate=None, index=None):
154 |     """
155 |     Filter collection with predicate function and index.
156 | 
157 |     If index is not found, returns None.
158 |     :param collection:
159 |     :type collection: collection supporting iteration and slicing
160 |     :param predicate: function to filter the collection with
161 |     :type predicate: function
162 |     :param index: position of a single element to retrieve
163 |     :type index: int
164 |     :return: filtered list, or single element of filtered list if index is defined
165 |     :rtype: list or object
166 |     """
167 |     if index is None and isinstance(predicate, int):
168 |         index = predicate
169 |         predicate = None
170 |     if predicate:
171 |         collection = collection.__class__(filter(predicate, collection))
172 |     if index is not None:
173 |         try:
174 |             collection = collection[index]
175 |         except IndexError:
176 |             collection = None
177 |     return collection
178 | 
179 | 
180 | def set_defaults(defaults, kwargs):
181 |     """
182 |     Set defaults from defaults dict to kwargs dict
183 |     :param defaults:
184 |     :type defaults:
185 |     :param kwargs:
186 |     :type kwargs:
187 |     :return:
188 |     :rtype:
189 |     """
190 |     for key, value in defaults.items():
191 |         if key not in kwargs and value is not None:
192 |             kwargs[key] = value
193 |         elif isinstance(value, list) and isinstance(kwargs[key], list):
194 |             kwargs[key] = list(value) + kwargs[key]
195 |         elif isinstance(value, dict) and isinstance(kwargs[key], dict):
196 |             set_defaults(value, kwargs[key])
197 |         elif key in kwargs and value is None:
198 |             kwargs[key] = None
199 | 


--------------------------------------------------------------------------------
/lib/dateutil/tzwin.py:
--------------------------------------------------------------------------------
  1 | # This code was originally contributed by Jeffrey Harris.
  2 | import datetime
  3 | import struct
  4 | 
  5 | from six.moves import winreg
  6 | 
  7 | __all__ = ["tzwin", "tzwinlocal"]
  8 | 
  9 | ONEWEEK = datetime.timedelta(7)
 10 | 
 11 | TZKEYNAMENT = r"SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones"
 12 | TZKEYNAME9X = r"SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones"
 13 | TZLOCALKEYNAME = r"SYSTEM\CurrentControlSet\Control\TimeZoneInformation"
 14 | 
 15 | 
 16 | def _settzkeyname():
 17 |     handle = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE)
 18 |     try:
 19 |         winreg.OpenKey(handle, TZKEYNAMENT).Close()
 20 |         TZKEYNAME = TZKEYNAMENT
 21 |     except WindowsError:
 22 |         TZKEYNAME = TZKEYNAME9X
 23 |     handle.Close()
 24 |     return TZKEYNAME
 25 | 
 26 | TZKEYNAME = _settzkeyname()
 27 | 
 28 | 
 29 | class tzwinbase(datetime.tzinfo):
 30 |     """tzinfo class based on win32's timezones available in the registry."""
 31 | 
 32 |     def utcoffset(self, dt):
 33 |         if self._isdst(dt):
 34 |             return datetime.timedelta(minutes=self._dstoffset)
 35 |         else:
 36 |             return datetime.timedelta(minutes=self._stdoffset)
 37 | 
 38 |     def dst(self, dt):
 39 |         if self._isdst(dt):
 40 |             minutes = self._dstoffset - self._stdoffset
 41 |             return datetime.timedelta(minutes=minutes)
 42 |         else:
 43 |             return datetime.timedelta(0)
 44 | 
 45 |     def tzname(self, dt):
 46 |         if self._isdst(dt):
 47 |             return self._dstname
 48 |         else:
 49 |             return self._stdname
 50 | 
 51 |     def list():
 52 |         """Return a list of all time zones known to the system."""
 53 |         handle = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE)
 54 |         tzkey = winreg.OpenKey(handle, TZKEYNAME)
 55 |         result = [winreg.EnumKey(tzkey, i)
 56 |                   for i in range(winreg.QueryInfoKey(tzkey)[0])]
 57 |         tzkey.Close()
 58 |         handle.Close()
 59 |         return result
 60 |     list = staticmethod(list)
 61 | 
 62 |     def display(self):
 63 |         return self._display
 64 | 
 65 |     def _isdst(self, dt):
 66 |         dston = picknthweekday(dt.year, self._dstmonth, self._dstdayofweek,
 67 |                                self._dsthour, self._dstminute,
 68 |                                self._dstweeknumber)
 69 |         dstoff = picknthweekday(dt.year, self._stdmonth, self._stddayofweek,
 70 |                                 self._stdhour, self._stdminute,
 71 |                                 self._stdweeknumber)
 72 |         if dston < dstoff:
 73 |             return dston <= dt.replace(tzinfo=None) < dstoff
 74 |         else:
 75 |             return not dstoff <= dt.replace(tzinfo=None) < dston
 76 | 
 77 | 
 78 | class tzwin(tzwinbase):
 79 | 
 80 |     def __init__(self, name):
 81 |         self._name = name
 82 | 
 83 |         handle = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE)
 84 |         tzkey = winreg.OpenKey(handle, "%s\%s" % (TZKEYNAME, name))
 85 |         keydict = valuestodict(tzkey)
 86 |         tzkey.Close()
 87 |         handle.Close()
 88 | 
 89 |         self._stdname = keydict["Std"].encode("iso-8859-1")
 90 |         self._dstname = keydict["Dlt"].encode("iso-8859-1")
 91 | 
 92 |         self._display = keydict["Display"]
 93 | 
 94 |         # See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm
 95 |         tup = struct.unpack("=3l16h", keydict["TZI"])
 96 |         self._stdoffset = -tup[0]-tup[1]          # Bias + StandardBias * -1
 97 |         self._dstoffset = self._stdoffset-tup[2]  # + DaylightBias * -1
 98 | 
 99 |         (self._stdmonth,
100 |          self._stddayofweek,   # Sunday = 0
101 |          self._stdweeknumber,  # Last = 5
102 |          self._stdhour,
103 |          self._stdminute) = tup[4:9]
104 | 
105 |         (self._dstmonth,
106 |          self._dstdayofweek,   # Sunday = 0
107 |          self._dstweeknumber,  # Last = 5
108 |          self._dsthour,
109 |          self._dstminute) = tup[12:17]
110 | 
111 |     def __repr__(self):
112 |         return "tzwin(%s)" % repr(self._name)
113 | 
114 |     def __reduce__(self):
115 |         return (self.__class__, (self._name,))
116 | 
117 | 
118 | class tzwinlocal(tzwinbase):
119 | 
120 |     def __init__(self):
121 | 
122 |         handle = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE)
123 | 
124 |         tzlocalkey = winreg.OpenKey(handle, TZLOCALKEYNAME)
125 |         keydict = valuestodict(tzlocalkey)
126 |         tzlocalkey.Close()
127 | 
128 |         self._stdname = keydict["StandardName"].encode("iso-8859-1")
129 |         self._dstname = keydict["DaylightName"].encode("iso-8859-1")
130 | 
131 |         try:
132 |             tzkey = winreg.OpenKey(handle, "%s\%s" % (TZKEYNAME,
133 |                                                       self._stdname))
134 |             _keydict = valuestodict(tzkey)
135 |             self._display = _keydict["Display"]
136 |             tzkey.Close()
137 |         except OSError:
138 |             self._display = None
139 | 
140 |         handle.Close()
141 | 
142 |         self._stdoffset = -keydict["Bias"]-keydict["StandardBias"]
143 |         self._dstoffset = self._stdoffset-keydict["DaylightBias"]
144 | 
145 |         # See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm
146 |         tup = struct.unpack("=8h", keydict["StandardStart"])
147 | 
148 |         (self._stdmonth,
149 |          self._stddayofweek,   # Sunday = 0
150 |          self._stdweeknumber,  # Last = 5
151 |          self._stdhour,
152 |          self._stdminute) = tup[1:6]
153 | 
154 |         tup = struct.unpack("=8h", keydict["DaylightStart"])
155 | 
156 |         (self._dstmonth,
157 |          self._dstdayofweek,   # Sunday = 0
158 |          self._dstweeknumber,  # Last = 5
159 |          self._dsthour,
160 |          self._dstminute) = tup[1:6]
161 | 
162 |     def __reduce__(self):
163 |         return (self.__class__, ())
164 | 
165 | 
166 | def picknthweekday(year, month, dayofweek, hour, minute, whichweek):
167 |     """dayofweek == 0 means Sunday, whichweek 5 means last instance"""
168 |     first = datetime.datetime(year, month, 1, hour, minute)
169 |     weekdayone = first.replace(day=((dayofweek-first.isoweekday()) % 7+1))
170 |     for n in range(whichweek):
171 |         dt = weekdayone+(whichweek-n)*ONEWEEK
172 |         if dt.month == month:
173 |             return dt
174 | 
175 | 
176 | def valuestodict(key):
177 |     """Convert a registry key's values to a dictionary."""
178 |     dict = {}
179 |     size = winreg.QueryInfoKey(key)[1]
180 |     for i in range(size):
181 |         data = winreg.EnumValue(key, i)
182 |         dict[data[0]] = data[1]
183 |     return dict
184 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/audio_codec.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | audio_codec, audio_profile and audio_channels property
  5 | """
  6 | from rebulk.remodule import re
  7 | 
  8 | from rebulk import Rebulk, Rule, RemoveMatch
  9 | from ..common import dash
 10 | from ..common.validators import seps_before, seps_after
 11 | 
 12 | audio_properties = ['audio_codec', 'audio_profile', 'audio_channels']
 13 | 
 14 | 
 15 | def audio_codec():
 16 |     """
 17 |     Builder for rebulk object.
 18 |     :return: Created Rebulk object
 19 |     :rtype: Rebulk
 20 |     """
 21 |     rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
 22 | 
 23 |     def audio_codec_priority(match1, match2):
 24 |         """
 25 |         Gives priority to audio_codec
 26 |         :param match1:
 27 |         :type match1:
 28 |         :param match2:
 29 |         :type match2:
 30 |         :return:
 31 |         :rtype:
 32 |         """
 33 |         if match1.name == 'audio_codec' and match2.name in ['audio_profile', 'audio_channels']:
 34 |             return match2
 35 |         if match1.name in ['audio_profile', 'audio_channels'] and match2.name == 'audio_codec':
 36 |             return match1
 37 |         return '__default__'
 38 | 
 39 |     rebulk.defaults(name="audio_codec", conflict_solver=audio_codec_priority)
 40 | 
 41 |     rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3")
 42 |     rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='AC3')
 43 |     rebulk.regex("DolbyAtmos", "Dolby-Atmos", "Atmos", value="DolbyAtmos")
 44 |     rebulk.string("AAC", value="AAC")
 45 |     rebulk.string('EAC3', 'DDP', 'DD+', value="EAC3")
 46 |     rebulk.string("Flac", value="FLAC")
 47 |     rebulk.string("DTS", value="DTS")
 48 |     rebulk.regex("True-?HD", value="TrueHD")
 49 | 
 50 |     rebulk.defaults(name="audio_profile")
 51 |     rebulk.string("HD", value="HD", tags="DTS")
 52 |     rebulk.regex("HD-?MA", value="HDMA", tags="DTS")
 53 |     rebulk.string("HE", value="HE", tags="AAC")
 54 |     rebulk.string("LC", value="LC", tags="AAC")
 55 |     rebulk.string("HQ", value="HQ", tags="AC3")
 56 | 
 57 |     rebulk.defaults(name="audio_channels")
 58 |     rebulk.regex(r'(7[\W_][01](?:ch)?)(?:[^\d]|$)', value='7.1', children=True)
 59 |     rebulk.regex(r'(5[\W_][01](?:ch)?)(?:[^\d]|$)', value='5.1', children=True)
 60 |     rebulk.regex(r'(2[\W_]0(?:ch)?)(?:[^\d]|$)', value='2.0', children=True)
 61 |     rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels')
 62 |     rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels')
 63 |     rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels')
 64 |     rebulk.string('7ch', '8ch', value='7.1')
 65 |     rebulk.string('5ch', '6ch', value='5.1')
 66 |     rebulk.string('2ch', 'stereo', value='2.0')
 67 |     rebulk.string('1ch', 'mono', value='1.0')
 68 | 
 69 |     rebulk.rules(DtsRule, AacRule, Ac3Rule, AudioValidatorRule, HqConflictRule, AudioChannelsValidatorRule)
 70 | 
 71 |     return rebulk
 72 | 
 73 | 
 74 | class AudioValidatorRule(Rule):
 75 |     """
 76 |     Remove audio properties if not surrounded by separators and not next each others
 77 |     """
 78 |     priority = 64
 79 |     consequence = RemoveMatch
 80 | 
 81 |     def when(self, matches, context):
 82 |         ret = []
 83 | 
 84 |         audio_list = matches.range(predicate=lambda match: match.name in audio_properties)
 85 |         for audio in audio_list:
 86 |             if not seps_before(audio):
 87 |                 valid_before = matches.range(audio.start - 1, audio.start,
 88 |                                              lambda match: match.name in audio_properties)
 89 |                 if not valid_before:
 90 |                     ret.append(audio)
 91 |                     continue
 92 |             if not seps_after(audio):
 93 |                 valid_after = matches.range(audio.end, audio.end + 1,
 94 |                                             lambda match: match.name in audio_properties)
 95 |                 if not valid_after:
 96 |                     ret.append(audio)
 97 |                     continue
 98 | 
 99 |         return ret
100 | 
101 | 
102 | class AudioProfileRule(Rule):
103 |     """
104 |     Abstract rule to validate audio profiles
105 |     """
106 |     priority = 64
107 |     dependency = AudioValidatorRule
108 |     consequence = RemoveMatch
109 | 
110 |     def __init__(self, codec):
111 |         super(AudioProfileRule, self).__init__()
112 |         self.codec = codec
113 | 
114 |     def when(self, matches, context):
115 |         profile_list = matches.named('audio_profile', lambda match: self.codec in match.tags)
116 |         ret = []
117 |         for profile in profile_list:
118 |             codec = matches.previous(profile, lambda match: match.name == 'audio_codec' and match.value == self.codec)
119 |             if not codec:
120 |                 codec = matches.next(profile, lambda match: match.name == 'audio_codec' and match.value == self.codec)
121 |             if not codec:
122 |                 ret.append(profile)
123 |         return ret
124 | 
125 | 
126 | class DtsRule(AudioProfileRule):
127 |     """
128 |     Rule to validate DTS profile
129 |     """
130 | 
131 |     def __init__(self):
132 |         super(DtsRule, self).__init__("DTS")
133 | 
134 | 
135 | class AacRule(AudioProfileRule):
136 |     """
137 |     Rule to validate AAC profile
138 |     """
139 | 
140 |     def __init__(self):
141 |         super(AacRule, self).__init__("AAC")
142 | 
143 | 
144 | class Ac3Rule(AudioProfileRule):
145 |     """
146 |     Rule to validate AC3 profile
147 |     """
148 | 
149 |     def __init__(self):
150 |         super(Ac3Rule, self).__init__("AC3")
151 | 
152 | 
153 | class HqConflictRule(Rule):
154 |     """
155 |     Solve conflict between HQ from other property and from audio_profile.
156 |     """
157 | 
158 |     dependency = [DtsRule, AacRule, Ac3Rule]
159 |     consequence = RemoveMatch
160 | 
161 |     def when(self, matches, context):
162 |         hq_audio = matches.named('audio_profile', lambda match: match.value == 'HQ')
163 |         hq_audio_spans = [match.span for match in hq_audio]
164 |         hq_other = matches.named('other', lambda match: match.span in hq_audio_spans)
165 | 
166 |         if hq_other:
167 |             return hq_other
168 | 
169 | 
170 | class AudioChannelsValidatorRule(Rule):
171 |     """
172 |     Remove audio_channel if no audio codec as previous match.
173 |     """
174 |     priority = 128
175 |     consequence = RemoveMatch
176 | 
177 |     def when(self, matches, context):
178 |         ret = []
179 | 
180 |         for audio_channel in matches.tagged('weak-audio_channels'):
181 |             valid_before = matches.range(audio_channel.start - 1, audio_channel.start,
182 |                                          lambda match: match.name == 'audio_codec')
183 |             if not valid_before:
184 |                 ret.append(audio_channel)
185 | 
186 |         return ret
187 | 


--------------------------------------------------------------------------------
/lib/babelfish/language.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Copyright (c) 2013 the BabelFish authors. All rights reserved.
  4 | # Use of this source code is governed by the 3-clause BSD license
  5 | # that can be found in the LICENSE file.
  6 | #
  7 | from __future__ import unicode_literals
  8 | from collections import namedtuple
  9 | from functools import partial
 10 | from pkg_resources import resource_stream  # @UnresolvedImport
 11 | from .converters import ConverterManager
 12 | from .country import Country
 13 | from .exceptions import LanguageConvertError
 14 | from .script import Script
 15 | from . import basestr
 16 | 
 17 | 
 18 | LANGUAGES = set()
 19 | LANGUAGE_MATRIX = []
 20 | 
 21 | #: The namedtuple used in the :data:`LANGUAGE_MATRIX`
 22 | IsoLanguage = namedtuple('IsoLanguage', ['alpha3', 'alpha3b', 'alpha3t', 'alpha2', 'scope', 'type', 'name', 'comment'])
 23 | 
 24 | f = resource_stream('babelfish', 'data/iso-639-3.tab')
 25 | f.readline()
 26 | for l in f:
 27 |     iso_language = IsoLanguage(*l.decode('utf-8').split('\t'))
 28 |     LANGUAGES.add(iso_language.alpha3)
 29 |     LANGUAGE_MATRIX.append(iso_language)
 30 | f.close()
 31 | 
 32 | 
 33 | class LanguageConverterManager(ConverterManager):
 34 |     """:class:`~babelfish.converters.ConverterManager` for language converters"""
 35 |     entry_point = 'babelfish.language_converters'
 36 |     internal_converters = ['alpha2 = babelfish.converters.alpha2:Alpha2Converter',
 37 |                            'alpha3b = babelfish.converters.alpha3b:Alpha3BConverter',
 38 |                            'alpha3t = babelfish.converters.alpha3t:Alpha3TConverter',
 39 |                            'name = babelfish.converters.name:NameConverter',
 40 |                            'scope = babelfish.converters.scope:ScopeConverter',
 41 |                            'type = babelfish.converters.type:LanguageTypeConverter',
 42 |                            'opensubtitles = babelfish.converters.opensubtitles:OpenSubtitlesConverter']
 43 | 
 44 | language_converters = LanguageConverterManager()
 45 | 
 46 | 
 47 | class LanguageMeta(type):
 48 |     """The :class:`Language` metaclass
 49 | 
 50 |     Dynamically redirect :meth:`Language.frommycode` to :meth:`Language.fromcode` with the ``mycode`` `converter`
 51 | 
 52 |     """
 53 |     def __getattr__(cls, name):
 54 |         if name.startswith('from'):
 55 |             return partial(cls.fromcode, converter=name[4:])
 56 |         return type.__getattribute__(cls, name)
 57 | 
 58 | 
 59 | class Language(LanguageMeta(str('LanguageBase'), (object,), {})):
 60 |     """A human language
 61 | 
 62 |     A human language is composed of a language part following the ISO-639
 63 |     standard and can be country-specific when a :class:`~babelfish.country.Country`
 64 |     is specified.
 65 | 
 66 |     The :class:`Language` is extensible with custom converters (see :ref:`custom_converters`)
 67 | 
 68 |     :param string language: the language as a 3-letter ISO-639-3 code
 69 |     :param country: the country (if any) as a 2-letter ISO-3166 code or :class:`~babelfish.country.Country` instance
 70 |     :type country: string or :class:`~babelfish.country.Country` or None
 71 |     :param script: the script (if any) as a 4-letter ISO-15924 code or :class:`~babelfish.script.Script` instance
 72 |     :type script: string or :class:`~babelfish.script.Script` or None
 73 |     :param unknown: the unknown language as a three-letters ISO-639-3 code to use as fallback
 74 |     :type unknown: string or None
 75 |     :raise: ValueError if the language could not be recognized and `unknown` is ``None``
 76 | 
 77 |     """
 78 |     def __init__(self, language, country=None, script=None, unknown=None):
 79 |         if unknown is not None and language not in LANGUAGES:
 80 |             language = unknown
 81 |         if language not in LANGUAGES:
 82 |             raise ValueError('%r is not a valid language' % language)
 83 |         self.alpha3 = language
 84 |         self.country = None
 85 |         if isinstance(country, Country):
 86 |             self.country = country
 87 |         elif country is None:
 88 |             self.country = None
 89 |         else:
 90 |             self.country = Country(country)
 91 |         self.script = None
 92 |         if isinstance(script, Script):
 93 |             self.script = script
 94 |         elif script is None:
 95 |             self.script = None
 96 |         else:
 97 |             self.script = Script(script)
 98 | 
 99 |     @classmethod
100 |     def fromcode(cls, code, converter):
101 |         """Create a :class:`Language` by its `code` using `converter` to
102 |         :meth:`~babelfish.converters.LanguageReverseConverter.reverse` it
103 | 
104 |         :param string code: the code to reverse
105 |         :param string converter: name of the :class:`~babelfish.converters.LanguageReverseConverter` to use
106 |         :return: the corresponding :class:`Language` instance
107 |         :rtype: :class:`Language`
108 | 
109 |         """
110 |         return cls(*language_converters[converter].reverse(code))
111 | 
112 |     @classmethod
113 |     def fromietf(cls, ietf):
114 |         """Create a :class:`Language` by from an IETF language code
115 | 
116 |         :param string ietf: the ietf code
117 |         :return: the corresponding :class:`Language` instance
118 |         :rtype: :class:`Language`
119 | 
120 |         """
121 |         subtags = ietf.split('-')
122 |         language_subtag = subtags.pop(0).lower()
123 |         if len(language_subtag) == 2:
124 |             language = cls.fromalpha2(language_subtag)
125 |         else:
126 |             language = cls(language_subtag)
127 |         while subtags:
128 |             subtag = subtags.pop(0)
129 |             if len(subtag) == 2:
130 |                 language.country = Country(subtag.upper())
131 |             else:
132 |                 language.script = Script(subtag.capitalize())
133 |             if language.script is not None:
134 |                 if subtags:
135 |                     raise ValueError('Wrong IETF format. Unmatched subtags: %r' % subtags)
136 |                 break
137 |         return language
138 | 
139 |     def __getstate__(self):
140 |         return self.alpha3, self.country, self.script
141 | 
142 |     def __setstate__(self, state):
143 |         self.alpha3, self.country, self.script = state
144 | 
145 |     def __getattr__(self, name):
146 |         alpha3 = self.alpha3
147 |         country = self.country.alpha2 if self.country is not None else None
148 |         script = self.script.code if self.script is not None else None
149 |         try:
150 |             return language_converters[name].convert(alpha3, country, script)
151 |         except KeyError:
152 |             raise AttributeError(name)
153 | 
154 |     def __hash__(self):
155 |         return hash(str(self))
156 | 
157 |     def __eq__(self, other):
158 |         if isinstance(other, basestr):
159 |             return str(self) == other
160 |         if not isinstance(other, Language):
161 |             return False
162 |         return (self.alpha3 == other.alpha3 and
163 |                 self.country == other.country and
164 |                 self.script == other.script)
165 | 
166 |     def __ne__(self, other):
167 |         return not self == other
168 | 
169 |     def __bool__(self):
170 |         return self.alpha3 != 'und'
171 |     __nonzero__ = __bool__
172 | 
173 |     def __repr__(self):
174 |         return '<Language [%s]>' % self
175 | 
176 |     def __str__(self):
177 |         try:
178 |             s = self.alpha2
179 |         except LanguageConvertError:
180 |             s = self.alpha3
181 |         if self.country is not None:
182 |             s += '-' + str(self.country)
183 |         if self.script is not None:
184 |             s += '-' + str(self.script)
185 |         return s
186 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/properties/release_group.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | release_group property
  5 | """
  6 | import copy
  7 | 
  8 | from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch
  9 | 
 10 | from ..common import seps
 11 | from ..common.expected import build_expected_function
 12 | from ..common.comparators import marker_sorted
 13 | from ..common.formatters import cleanup
 14 | from ..common.validators import int_coercable, seps_surround
 15 | from ..properties.title import TitleFromPosition
 16 | 
 17 | 
 18 | def release_group():
 19 |     """
 20 |     Builder for rebulk object.
 21 |     :return: Created Rebulk object
 22 |     :rtype: Rebulk
 23 |     """
 24 |     rebulk = Rebulk()
 25 | 
 26 |     expected_group = build_expected_function('expected_group')
 27 | 
 28 |     rebulk.functional(expected_group, name='release_group', tags=['expected'],
 29 |                       validator=seps_surround,
 30 |                       conflict_solver=lambda match, other: other,
 31 |                       disabled=lambda context: not context.get('expected_group'))
 32 | 
 33 |     return rebulk.rules(SceneReleaseGroup, AnimeReleaseGroup)
 34 | 
 35 | 
 36 | forbidden_groupnames = ['rip', 'by', 'for', 'par', 'pour', 'bonus']
 37 | 
 38 | groupname_ignore_seps = '[]{}()'
 39 | groupname_seps = ''.join([c for c in seps if c not in groupname_ignore_seps])
 40 | 
 41 | 
 42 | def clean_groupname(string):
 43 |     """
 44 |     Removes and strip separators from input_string
 45 |     :param string:
 46 |     :type string:
 47 |     :return:
 48 |     :rtype:
 49 |     """
 50 |     string = string.strip(groupname_seps)
 51 |     if not (string.endswith(tuple(groupname_ignore_seps)) and string.startswith(tuple(groupname_ignore_seps))) \
 52 |             and not any(i in string.strip(groupname_ignore_seps) for i in groupname_ignore_seps):
 53 |         string = string.strip(groupname_ignore_seps)
 54 |     for forbidden in forbidden_groupnames:
 55 |         if string.lower().startswith(forbidden) and string[len(forbidden):len(forbidden)+1] in seps:
 56 |             string = string[len(forbidden):]
 57 |             string = string.strip(groupname_seps)
 58 |         if string.lower().endswith(forbidden) and string[-len(forbidden)-1:-len(forbidden)] in seps:
 59 |             string = string[:len(forbidden)]
 60 |             string = string.strip(groupname_seps)
 61 |     return string
 62 | 
 63 | 
 64 | _scene_previous_names = ['video_codec', 'format', 'video_api', 'audio_codec', 'audio_profile', 'video_profile',
 65 |                          'audio_channels', 'screen_size', 'other', 'container', 'language', 'subtitle_language',
 66 |                          'subtitle_language.suffix', 'subtitle_language.prefix', 'language.suffix']
 67 | 
 68 | _scene_previous_tags = ['release-group-prefix']
 69 | 
 70 | 
 71 | class SceneReleaseGroup(Rule):
 72 |     """
 73 |     Add release_group match in existing matches (scene format).
 74 | 
 75 |     Something.XViD-ReleaseGroup.mkv
 76 |     """
 77 |     dependency = [TitleFromPosition]
 78 |     consequence = AppendMatch
 79 | 
 80 |     properties = {'release_group': [None]}
 81 | 
 82 |     def when(self, matches, context):
 83 |         # If a release_group is found before, ignore this kind of release_group rule.
 84 | 
 85 |         ret = []
 86 | 
 87 |         for filepart in marker_sorted(matches.markers.named('path'), matches):
 88 |             # pylint:disable=cell-var-from-loop
 89 |             start, end = filepart.span
 90 | 
 91 |             titles = matches.named('title', predicate=lambda m: m.start >= start and m.end <= end)
 92 | 
 93 |             def keep_only_first_title(match):
 94 |                 """
 95 |                 Keep only first title from this filepart, as other ones are most likely release group.
 96 | 
 97 |                 :param match:
 98 |                 :type match:
 99 |                 :return:
100 |                 :rtype:
101 |                 """
102 |                 return match in titles[1:]
103 | 
104 |             last_hole = matches.holes(start, end + 1, formatter=clean_groupname,
105 |                                       ignore=keep_only_first_title,
106 |                                       predicate=lambda hole: cleanup(hole.value), index=-1)
107 | 
108 |             if last_hole:
109 |                 def previous_match_filter(match):
110 |                     """
111 |                     Filter to apply to find previous match
112 | 
113 |                     :param match:
114 |                     :type match:
115 |                     :return:
116 |                     :rtype:
117 |                     """
118 | 
119 |                     if match.start < filepart.start:
120 |                         return False
121 |                     return not match.private or match.name in _scene_previous_names
122 | 
123 |                 previous_match = matches.previous(last_hole,
124 |                                                   previous_match_filter,
125 |                                                   index=0)
126 |                 if previous_match and (previous_match.name in _scene_previous_names or
127 |                                        any(tag in previous_match.tags for tag in _scene_previous_tags)) and \
128 |                         not matches.input_string[previous_match.end:last_hole.start].strip(seps) \
129 |                         and not int_coercable(last_hole.value.strip(seps)):
130 | 
131 |                     last_hole.name = 'release_group'
132 |                     last_hole.tags = ['scene']
133 | 
134 |                     # if hole is inside a group marker with same value, remove [](){} ...
135 |                     group = matches.markers.at_match(last_hole, lambda marker: marker.name == 'group', 0)
136 |                     if group:
137 |                         group.formatter = clean_groupname
138 |                         if group.value == last_hole.value:
139 |                             last_hole.start = group.start + 1
140 |                             last_hole.end = group.end - 1
141 |                             last_hole.tags = ['anime']
142 | 
143 |                     ignored_matches = matches.range(last_hole.start, last_hole.end, keep_only_first_title)
144 | 
145 |                     for ignored_match in ignored_matches:
146 |                         matches.remove(ignored_match)
147 | 
148 |                     ret.append(last_hole)
149 |         return ret
150 | 
151 | 
152 | class AnimeReleaseGroup(Rule):
153 |     """
154 |     Add release_group match in existing matches (anime format)
155 |     ...[ReleaseGroup] Something.mkv
156 |     """
157 |     dependency = [SceneReleaseGroup, TitleFromPosition]
158 |     consequence = [RemoveMatch, AppendMatch]
159 | 
160 |     properties = {'release_group': [None]}
161 | 
162 |     def when(self, matches, context):
163 |         to_remove = []
164 |         to_append = []
165 | 
166 |         # If a release_group is found before, ignore this kind of release_group rule.
167 |         if matches.named('release_group'):
168 |             return
169 | 
170 |         if not matches.named('episode') and not matches.named('season') and matches.named('release_group'):
171 |             # This doesn't seems to be an anime, and we already found another release_group.
172 |             return
173 | 
174 |         for filepart in marker_sorted(matches.markers.named('path'), matches):
175 | 
176 |             # pylint:disable=bad-continuation
177 |             empty_group = matches.markers.range(filepart.start,
178 |                                                 filepart.end,
179 |                                                 lambda marker: (marker.name == 'group'
180 |                                                                 and not matches.range(marker.start, marker.end,
181 |                                                                                       lambda m:
182 |                                                                                       'weak-language' not in m.tags)
183 |                                                                 and marker.value.strip(seps)
184 |                                                                 and not int_coercable(marker.value.strip(seps))), 0)
185 | 
186 |             if empty_group:
187 |                 group = copy.copy(empty_group)
188 |                 group.marker = False
189 |                 group.raw_start += 1
190 |                 group.raw_end -= 1
191 |                 group.tags = ['anime']
192 |                 group.name = 'release_group'
193 |                 to_append.append(group)
194 |                 to_remove.extend(matches.range(empty_group.start, empty_group.end,
195 |                                                lambda m: 'weak-language' in m.tags))
196 |         return to_remove, to_append
197 | 


--------------------------------------------------------------------------------
/lib/guessit/rules/processors.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Processors
  5 | """
  6 | from collections import defaultdict
  7 | import copy
  8 | 
  9 | import six
 10 | 
 11 | from rebulk import Rebulk, Rule, CustomRule, POST_PROCESS, PRE_PROCESS, AppendMatch, RemoveMatch
 12 | 
 13 | from .common import seps_no_groups
 14 | from .common.formatters import cleanup
 15 | from .common.comparators import marker_sorted
 16 | from .common.date import valid_year
 17 | from .common.words import iter_words
 18 | 
 19 | 
 20 | class EnlargeGroupMatches(CustomRule):
 21 |     """
 22 |     Enlarge matches that are starting and/or ending group to include brackets in their span.
 23 |     """
 24 |     priority = PRE_PROCESS
 25 | 
 26 |     def when(self, matches, context):
 27 |         starting = []
 28 |         ending = []
 29 | 
 30 |         for group in matches.markers.named('group'):
 31 |             for match in matches.starting(group.start + 1):
 32 |                 starting.append(match)
 33 | 
 34 |             for match in matches.ending(group.end - 1):
 35 |                 ending.append(match)
 36 | 
 37 |         if starting or ending:
 38 |             return starting, ending
 39 | 
 40 |     def then(self, matches, when_response, context):
 41 |         starting, ending = when_response
 42 |         for match in starting:
 43 |             matches.remove(match)
 44 |             match.start -= 1
 45 |             match.raw_start += 1
 46 |             matches.append(match)
 47 | 
 48 |         for match in ending:
 49 |             matches.remove(match)
 50 |             match.end += 1
 51 |             match.raw_end -= 1
 52 |             matches.append(match)
 53 | 
 54 | 
 55 | class EquivalentHoles(Rule):
 56 |     """
 57 |     Creates equivalent matches for holes that have same values than existing (case insensitive)
 58 |     """
 59 |     priority = POST_PROCESS
 60 |     consequence = AppendMatch
 61 | 
 62 |     def when(self, matches, context):
 63 |         new_matches = []
 64 | 
 65 |         for filepath in marker_sorted(matches.markers.named('path'), matches):
 66 |             holes = matches.holes(start=filepath.start, end=filepath.end, formatter=cleanup)
 67 |             for name in matches.names:
 68 |                 for hole in list(holes):
 69 |                     for current_match in matches.named(name):
 70 |                         if isinstance(current_match.value, six.string_types) and \
 71 |                                         hole.value.lower() == current_match.value.lower():
 72 |                             if 'equivalent-ignore' in current_match.tags:
 73 |                                 continue
 74 |                             new_value = _preferred_string(hole.value, current_match.value)
 75 |                             if hole.value != new_value:
 76 |                                 hole.value = new_value
 77 |                             if current_match.value != new_value:
 78 |                                 current_match.value = new_value
 79 |                             hole.name = name
 80 |                             hole.tags = ['equivalent']
 81 |                             new_matches.append(hole)
 82 |                             if hole in holes:
 83 |                                 holes.remove(hole)
 84 | 
 85 |         return new_matches
 86 | 
 87 | 
 88 | class RemoveAmbiguous(Rule):
 89 |     """
 90 |     If multiple matches are found with same name and different values, keep the one in the most valuable filepart.
 91 |     Also keep others match with same name and values than those kept ones.
 92 |     """
 93 | 
 94 |     priority = POST_PROCESS
 95 |     consequence = RemoveMatch
 96 | 
 97 |     def __init__(self, sort_function=marker_sorted, predicate=None):
 98 |         super(RemoveAmbiguous, self).__init__()
 99 |         self.sort_function = sort_function
100 |         self.predicate = predicate
101 | 
102 |     def when(self, matches, context):
103 |         fileparts = self.sort_function(matches.markers.named('path'), matches)
104 | 
105 |         previous_fileparts_names = set()
106 |         values = defaultdict(list)
107 | 
108 |         to_remove = []
109 |         for filepart in fileparts:
110 |             filepart_matches = matches.range(filepart.start, filepart.end, predicate=self.predicate)
111 | 
112 |             filepart_names = set()
113 |             for match in filepart_matches:
114 |                 filepart_names.add(match.name)
115 |                 if match.name in previous_fileparts_names:
116 |                     if match.value not in values[match.name]:
117 |                         to_remove.append(match)
118 |                 else:
119 |                     if match.value not in values[match.name]:
120 |                         values[match.name].append(match.value)
121 | 
122 |             previous_fileparts_names.update(filepart_names)
123 | 
124 |         return to_remove
125 | 
126 | 
127 | class RemoveLessSpecificSeasonEpisode(RemoveAmbiguous):
128 |     """
129 |     If multiple season/episodes matches are found with different values,
130 |     keep the one tagged as 'SxxExx' or in the rightmost filepart.
131 |     """
132 |     def __init__(self, name):
133 |         super(RemoveLessSpecificSeasonEpisode, self).__init__(
134 |             sort_function=(lambda markers, matches:
135 |                            marker_sorted(list(reversed(markers)), matches,
136 |                                          lambda match: match.name == name and 'SxxExx' in match.tags)),
137 |             predicate=lambda match: match.name == name)
138 | 
139 | 
140 | def _preferred_string(value1, value2):  # pylint:disable=too-many-return-statements
141 |     """
142 |     Retrieves preferred title from both values.
143 |     :param value1:
144 |     :type value1: str
145 |     :param value2:
146 |     :type value2: str
147 |     :return: The preferred title
148 |     :rtype: str
149 |     """
150 |     if value1 == value2:
151 |         return value1
152 |     if value1.istitle() and not value2.istitle():
153 |         return value1
154 |     if not value1.isupper() and value2.isupper():
155 |         return value1
156 |     if not value1.isupper() and value1[0].isupper() and not value2[0].isupper():
157 |         return value1
158 |     if _count_title_words(value1) > _count_title_words(value2):
159 |         return value1
160 |     return value2
161 | 
162 | 
163 | def _count_title_words(value):
164 |     """
165 |     Count only many words are titles in value.
166 |     :param value:
167 |     :type value:
168 |     :return:
169 |     :rtype:
170 |     """
171 |     ret = 0
172 |     for word in iter_words(value):
173 |         if word.value.istitle():
174 |             ret += 1
175 |     return ret
176 | 
177 | 
178 | class SeasonYear(Rule):
179 |     """
180 |     If a season is a valid year and no year was found, create an match with year.
181 |     """
182 |     priority = POST_PROCESS
183 |     consequence = AppendMatch
184 | 
185 |     def when(self, matches, context):
186 |         ret = []
187 |         if not matches.named('year'):
188 |             for season in matches.named('season'):
189 |                 if valid_year(season.value):
190 |                     year = copy.copy(season)
191 |                     year.name = 'year'
192 |                     ret.append(year)
193 |         return ret
194 | 
195 | 
196 | class Processors(CustomRule):
197 |     """
198 |     Empty rule for ordering post_processing properly.
199 |     """
200 |     priority = POST_PROCESS
201 | 
202 |     def when(self, matches, context):
203 |         pass
204 | 
205 |     def then(self, matches, when_response, context):  # pragma: no cover
206 |         pass
207 | 
208 | 
209 | class StripSeparators(CustomRule):
210 |     """
211 |     Strip separators from matches. Keep separators if they are from acronyms, like in ".S.H.I.E.L.D."
212 |     """
213 |     priority = POST_PROCESS
214 | 
215 |     def when(self, matches, context):
216 |         return matches
217 | 
218 |     def then(self, matches, when_response, context):  # pragma: no cover
219 |         for match in matches:
220 |             for _ in range(0, len(match.span)):
221 |                 if match.raw[0] in seps_no_groups and (len(match.raw) < 3 or match.raw[2] not in seps_no_groups):
222 |                     match.raw_start += 1
223 | 
224 |             for _ in reversed(range(0, len(match.span))):
225 |                 if match.raw[-1] in seps_no_groups and (len(match.raw) < 3 or match.raw[-3] not in seps_no_groups):
226 |                     match.raw_end -= 1
227 | 
228 | 
229 | def processors():
230 |     """
231 |     Builder for rebulk object.
232 |     :return: Created Rebulk object
233 |     :rtype: Rebulk
234 |     """
235 |     return Rebulk().rules(EnlargeGroupMatches, EquivalentHoles,
236 |                           RemoveLessSpecificSeasonEpisode('season'),
237 |                           RemoveLessSpecificSeasonEpisode('episode'),
238 |                           RemoveAmbiguous, SeasonYear, Processors, StripSeparators)
239 | 


--------------------------------------------------------------------------------
/testdata.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "id": "up-ext-1",
  4 |     "INPUTFILE": "71.2014.720p.Web-Dl.x264.Aac-IFT.MKV",
  5 |     "OUTPUTFILE": "/movies/71 (2014).MKV",
  6 |     "NZBPO_MOVIESFORMAT": "%t (%y).%ext"
  7 |   },
  8 |   {
  9 |     "id": "lead-num-2",
 10 |     "INPUTFILE": "71.2014.720p.Web-Dl.x264.Aac-IFT.mkv",
 11 |     "OUTPUTFILE": "/movies/71 (2014).mkv",
 12 |     "NZBPO_MOVIESFORMAT": "%t (%y).%ext"
 13 |   },
 14 |   {
 15 |     "id": "lead-num-3",
 16 |     "INPUTFILE": "22 Jump Street 2014 1080p BluRay x264 YIFY.mkv",
 17 |     "OUTPUTFILE": "/movies/22 Jump Street (2014).mkv",
 18 |     "NZBPO_MOVIESFORMAT": "%t (%y).%ext"
 19 |   },
 20 |   {
 21 |     "id": "lead-num-4",
 22 |     "INPUTFILE": "2001 A Space Odyssey (1968).mkv",
 23 |     "OUTPUTFILE": "/movies/2001 a Space Odyssey (1968)/2001 a Space Odyssey (1968).mkv",
 24 |     "NZBPO_MOVIESFORMAT": "%title (%y)/%title (%y).%ext"
 25 |   },
 26 |   {
 27 |     "id": "up-1",
 28 |     "INPUTFILE": "Cartoon.2014.720p.Web-Dl.x264.Aac-IFT.mkv",
 29 |     "OUTPUTFILE": "/Cartoon (2014).mkv",
 30 |     "NZBPO_MOVIESFORMAT": "%up/%t (%y).%ext",
 31 |     "NZBPO_MOVIESDIR": "",
 32 |     "NZBPP_CATEGORY": "Kids cartoons"
 33 |   },
 34 |   {
 35 |     "id": "cat-2",
 36 |     "INPUTFILE": "Cartoon.2014.720p.Web-Dl.x264.Aac-IFT.mkv",
 37 |     "OUTPUTFILE": "/Kids.Cartoons/Cartoon (2014).mkv",
 38 |     "NZBPO_MOVIESFORMAT": "%.cat/%t (%y).%ext",
 39 |     "NZBPO_MOVIESDIR": "",
 40 |     "NZBPP_CATEGORY": "Kids cartoons"
 41 |   },
 42 |   {
 43 |     "id": "movies-title-case-correct",
 44 |     "INPUTFILE": "the.silence.of.the.lambs.1991.1080p.bluray.custom.plus.criterion.comm.dts.x264-mag.mkv",
 45 |     "OUTPUTFILE": "/movies/The Silence of the Lambs 1991.mkv",
 46 |     "NZBPO_MOVIESFORMAT": "%t %y.%ext"
 47 |   },
 48 |   {
 49 |     "id": "movies-title-case-correct-_",
 50 |     "INPUTFILE": "the.silence.of.the.lambs.1991.1080p.bluray.custom.plus.criterion.comm.dts.x264-mag.mkv",
 51 |     "OUTPUTFILE": "/movies/The_Silence_of_the_Lambs 1991.mkv",
 52 |     "NZBPO_MOVIESFORMAT": "%_t %y.%ext"
 53 |   },
 54 |   {
 55 |     "id": "movies-title-case-preserve",
 56 |     "INPUTFILE": "the.silence.of.the.lambs.1991.1080p.bluray.custom.plus.criterion.comm.dts.x264-mag.mkv",
 57 |     "OUTPUTFILE": "/movies/the silence of the lambs 1991.mkv",
 58 |     "NZBPO_MOVIESFORMAT": "%tT %y.%ext"
 59 |   },
 60 |   {
 61 |     "id": "mini-1",
 62 |     "INPUTFILE": "Band.of.Brothers.E10.Points.720p.BRRip.mkv",
 63 |     "OUTPUTFILE": "/series/Mkv/Band of Brothers/Season 1/Band_of_Brothers - S01E10 - Points - 720p.BluRay.mkv",
 64 |     "NZBPO_SERIESFORMAT": "%Ext/%sn %y/Season %s/- %s_n - S%0sE%0e - %en - %qss.%qf.%ext"
 65 |   },
 66 |   {
 67 |     "id": "mini-2",
 68 |     "INPUTFILE": "Band.of.Brothers.EP10.Points.720p.BRRip.mkv",
 69 |     "OUTPUTFILE": "/series/Mkv/Band of Brothers/Season 1/Band_of_Brothers - S01E10 - Points - 720p.BluRay.mkv",
 70 |     "NZBPO_SERIESFORMAT": "%Ext/%sn %y/Season %s/- %s_n - S%0sE%0e - %en - %qss.%qf.%ext"
 71 |   },
 72 |   {
 73 |     "id": "mini-3",
 74 |     "INPUTFILE": "The.Pacific.2010.EP09.BluRay.720p.DTS.x264-CHD.mkv",
 75 |     "OUTPUTFILE": "/series/Mkv/The Pacific 2010/Season 1/The_Pacific_2010 - S01E09 - 720p.BluRay.mkv",
 76 |     "NZBPO_SERIESFORMAT": "%Ext/%sn/Season %s/- %s_n - S%0sE%0e - %en - %qss.%qf.%ext"
 77 |   },
 78 |   {
 79 |     "id": "mini-4",
 80 |     "INPUTFILE": "Ascension.Part.3.HDTV.x264-SYS.mkv",
 81 |     "OUTPUTFILE": "/series/Mkv/Ascension/Season 1/Ascension - S01E03.mkv",
 82 |     "NZBPO_SERIESFORMAT": "%Ext/%sn %y/Season %s/- %s_n - S%0sE%0e.%ext"
 83 |   },
 84 |   {
 85 |     "id": "mini-5",
 86 |     "INPUTFILE": "The.Pacific.Pt.II.720p.HDTV.x264-IMMERSE.mkv",
 87 |     "OUTPUTFILE": "/series/Mkv/The Pacific/Season 1/The_Pacific - S01E02.mkv",
 88 |     "NZBPO_SERIESFORMAT": "%Ext/%sn %y/Season %s/- %s_n - S%0sE%0e.%ext"
 89 |   },
 90 |   {
 91 |     "id": "series-1",
 92 |     "INPUTFILE": "The.Walking.Dead.2010.S01E04.BluRay.1080p.DD5.1.x264-CHD/fdlasdflkjghfklgsdfl.mkv",
 93 |     "OUTPUTFILE": "/series/Mkv/The Walking Dead 2010/Season 1/The_Walking_Dead_2010 - S01E04 - 1080p.BluRay.mkv",
 94 |     "NZBPO_SERIESFORMAT": "%Ext/%sn/Season %s/- %s_n - S%0sE%0e - %en - %qss.%qf.%ext"
 95 |   },
 96 |   {
 97 |     "id": "series-2",
 98 |     "INPUTFILE": "Doctor Who 2005 S00E09 Christmas Special 720p BluRay x264-SHORTBREHD.mkv",
 99 |     "OUTPUTFILE": "/series/Mkv/Doctor Who 2005 2005/Season 0/Doctor_Who_2005 - S00E09 - Christmas Special - 720p.BluRay.mkv",
100 |     "NZBPO_SERIESYEAR": "yes",
101 |     "NZBPO_SERIESFORMAT": "%Ext/%sn %y/Season %s/- %s_n - S%0sE%0e - %en - %qss.%qf.%ext"
102 |   },
103 |   {
104 |     "id": "series-3",
105 |     "INPUTFILE": "Doctor Who 2005 S00E09 Christmas Special 2008 720p BluRay x264-SHORTBREHD.mkv",
106 |     "OUTPUTFILE": "/series/Mkv/Doctor Who 2008/Season 0/Doctor_Who - S00E09 - Christmas Special - 720p.BluRay.mkv",
107 |     "NZBPO_SERIESYEAR": "no",
108 |     "NZBPO_SERIESFORMAT": "%Ext/%sn %y/Season %s/- %s_n - S%0sE%0e - %en - %qss.%qf.%ext"
109 |   },
110 |   {
111 |     "id": "series-4",
112 |     "INPUTFILE": "Castle.S01E02.720p.BluRay.x264-SiNNERS.mkv",
113 |     "OUTPUTFILE": "/series/Mkv/Castle/Season 1/Castle - S01E02 - 720p.BluRay.mkv",
114 |     "NZBPO_SERIESFORMAT": "%Ext/%sn %y/Season %s/- %s_n - S%0sE%0e - %en - %qss.%qf.%ext"
115 |   },
116 |   {
117 |     "id": "series-5",
118 |     "INPUTFILE": "Doctor Who 2005 S00E09 Christmas Special 2008 720p BluRay x264-SHORTBREHD.mkv",
119 |     "OUTPUTFILE": "/series/Mkv/Doctor Who 2005 2008/Season 0/Doctor_Who_2005 - S00E09 - Christmas Special - 720p.BluRay.mkv",
120 |     "NZBPO_SERIESYEAR": "yes",
121 |     "NZBPO_SERIESFORMAT": "%Ext/%sn %y/Season %s/- %s_n - S%0sE%0e - %en - %qss.%qf.%ext"
122 |   },
123 |   {
124 |     "id": "series-6",
125 |     "INPUTFILE": "Orphan.Black.S05E04.720p.HDTV.x264-AVS.mkv",
126 |     "OUTPUTFILE": "/series/Orphan Black/Season 05/Orphan.Black.S05E04.Let.the.Children.the.Childbearers.Toil.mkv",
127 |     "NZBPO_SERIESYEAR": "yes",
128 |     "NZBPO_SERIESFORMAT": "%sn/Season %0s/%s.n.S%0sE%0e.%e.n",
129 |     "NZBPO_DNZBHEADERS": "yes",
130 |     "NZBPR__DNZB_PROPERNAME": "Orphan Black",
131 |     "NZBPR__DNZB_EPISODENAME": "Let the Children the Childbearers Toil"
132 |   },
133 |   {
134 |     "id": "dated-deprecated-t-1",
135 |     "INPUTFILE": "The.Daily.Show.2013.06.27.Tom.Goldstein.HDTV.x264-FQM.mkv",
136 |     "OUTPUTFILE": "/dated/2013-06/The Daily Show - 2013-6-27.mkv",
137 |     "NZBPO_DATEDFORMAT": "%y-%0m/%t - %y-%m-%0d.%ext"
138 |   },
139 |   {
140 |     "id": "dated-deprecated-t-2",
141 |     "INPUTFILE": "Real.Time.with.Bill.Maher.2014.10.31.720p.HDTV.x264-BATV.mkv",
142 |     "OUTPUTFILE": "/dated/2014-10/Real Time With Bill Maher - 2014-10-31.mkv",
143 |     "NZBPO_DATEDFORMAT": "%y-%0m/%t - %y-%m-%0d.%ext"
144 |   },
145 |   {
146 |     "id": "dated-case-correct",
147 |     "INPUTFILE": "the.daily.show.2013.6.2.tom.goldstein.HDTV.x264-FQM.mkv",
148 |     "OUTPUTFILE": "/dated/2013-06/The Daily Show - Tom Goldstein - 2013-6-02.mkv",
149 |     "NZBPO_DATEDFORMAT": "%y-%0m/%sn - %en - %y-%m-%0d.%ext"
150 |   },
151 |   {
152 |     "id": "dated-case-correct-_",
153 |     "INPUTFILE": "the.daily.show.2013.6.2.tom.goldstein.HDTV.x264-FQM.mkv",
154 |     "OUTPUTFILE": "/dated/2013-06/The_Daily_Show - Tom_Goldstein - 2013-6-02.mkv",
155 |     "NZBPO_DATEDFORMAT": "%y-%0m/%s_n - %e_n - %y-%m-%0d.%ext"
156 |   },
157 |   {
158 |     "id": "dated-case-preserve",
159 |     "INPUTFILE": "the.daily.show.2013.6.2.tom.goldstein.HDTV.x264-FQM.mkv",
160 |     "OUTPUTFILE": "/dated/2013-06/the daily show - tom goldstein - 2013-6-02.mkv",
161 |     "NZBPO_DATEDFORMAT": "%y-%0m/%sN - %eN - %y-%m-%0d.%ext"
162 |   },
163 |   {
164 |     "id": "multi-1",
165 |     "INPUTFILE": "Castle.S01E02E03.720p.BluRay.x264-SiNNERS.mkv",
166 |     "OUTPUTFILE": "/series/Mkv/Castle/Season 1/Castle - S01E02-03 - 720p.BluRay.mkv",
167 |     "NZBPO_SERIESFORMAT": "%Ext/%sn %y/Season %s/- %s_n - S%0sE%0e - %en - %qss.%qf.%ext"
168 |   },
169 |   {
170 |     "id": "multi-2",
171 |     "INPUTFILE": "Castle.S01E02E03E04.720p.BluRay.x264-SiNNERS.mkv",
172 |     "OUTPUTFILE": "/series/Mkv/Castle/Season 1/Castle - S01E02-03-04 - 720p.BluRay.mkv",
173 |     "NZBPO_SERIESFORMAT": "%Ext/%sn %y/Season %s/- %s_n - S%0sE%0e - %en - %qss.%qf.%ext"
174 |   },
175 |   {
176 |     "id": "multi-3",
177 |     "INPUTFILE": "Castle.S01E02E03E04.720p.BluRay.x264-SiNNERS.mkv",
178 |     "OUTPUTFILE": "/series/Mkv/Castle/Season 1/Castle - S01E02-E04 - 720p.BluRay.mkv",
179 |     "NZBPO_SERIESFORMAT": "%Ext/%sn %y/Season %s/- %s_n - S%0sE%0e - %en - %qss.%qf.%ext",
180 |     "NZBPO_MULTIPLEEPISODES": "range",
181 |     "NZBPO_EPISODESEPARATOR": "-E"
182 |   },
183 |   {
184 |     "id": "group-20-40",
185 |     "INPUTFILE": "Fargo.1996.REMASTERED.BluRay.720p.H264-20-40.mp4",
186 |     "OUTPUTFILE": "/movies/Fargo (1996).mp4",
187 |     "NZBPO_MOVIESFORMAT": "%t (%y).%ext"
188 |   },
189 |   {
190 |     "id": "multi-cat",
191 |     "INPUTFILE": "Bohemian.Rhapsody.2018.REMUX.2160p.(10bit).BluRay.UHD.HDR.HEVC.TrueHD.DTS-HD.MA.7.1-LEGi0N.mkv",
192 |     "OUTPUTFILE": "/movies/Bohemian Rhapsody (2018) BluRay-4K h265 7.1 TrueHD.DTS.mkv",
193 |     "NZBPO_MOVIESFORMAT": "/movies/%t (%y) %qf-%qss %qvc %qah %qac.%ext"
194 |   }
195 | ]
196 | 


--------------------------------------------------------------------------------