├── tests
    ├── __init__.py
    ├── test_core.py
    ├── test_utils.py
    ├── test_processors.py
    ├── test_summary.py
    └── test_expressions.py
├── .coveragerc
├── docs
    ├── example.png
    ├── utils.rst
    ├── index.rst
    ├── get_started.rst
    ├── processors.rst
    └── conf.py
├── .gitignore
├── setup.cfg
├── MANIFEST.in
├── .isort.cfg
├── test_requirements.txt
├── pytest.ini
├── tox.ini
├── .pylintrc
├── .travis.yml
├── typus
    ├── processors
    │   ├── __init__.py
    │   ├── base.py
    │   ├── escapes.py
    │   ├── quotes.py
    │   └── expressions.py
    ├── __init__.py
    ├── chars.py
    ├── core.py
    └── utils.py
├── setup.py
├── LICENSE
└── README.rst


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | source = typus 
3 | 
4 | [report]
5 | omit = *tests*
6 | 


--------------------------------------------------------------------------------
/docs/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/byashimov/typus/HEAD/docs/example.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info
2 | *.py[co]
3 | .coverage
4 | __pycache__
5 | build/
6 | dist/
7 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal = 1
3 | 
4 | [metadata]
5 | description-file = README.rst


--------------------------------------------------------------------------------
/docs/utils.rst:
--------------------------------------------------------------------------------
1 | .. _Utils:
2 | 
3 | Utils
4 | ======
5 | 
6 | .. automodule:: typus.utils
7 |     :members:


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include MANIFEST.in
3 | graft typus
4 | graft tests
5 | global-exclude __pycache__
6 | global-exclude *.py[co]


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | skip=.tox
3 | not_skip=__init__.py
4 | multi_line_output = 3
5 | balanced_wrapping = 1
6 | include_trailing_comma = 1
7 | 


--------------------------------------------------------------------------------
/test_requirements.txt:
--------------------------------------------------------------------------------
1 | pytest==3.6.3
2 | pytest-cov==2.5.1
3 | pytest-pylint==0.11.0
4 | pytest-mock==1.10.0
5 | pytest-isort==0.2.0
6 | Sphinx==1.7.6
7 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
 1 | [pytest]
 2 | addopts =
 3 |     -v
 4 |     -rs
 5 |     --cov=typus
 6 |     --cov-report=term-missing
 7 |     --pylint
 8 |     --doctest-modules
 9 |     --isort
10 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [testenv]
2 | deps = -rtest_requirements.txt
3 | commands =
4 |     pytest --cache-clear
5 |     sphinx-build -b doctest docs build
6 |     python -m doctest README.rst
7 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
 1 | [MASTER]
 2 | ignore=docs
 3 | disable=
 4 |     missing-docstring,
 5 |     wildcard-import,
 6 |     unused-wildcard-import,
 7 |     too-few-public-methods,
 8 |     invalid-name,
 9 |     arguments-differ,
10 |     too-many-instance-attributes,
11 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | sudo: required
 3 | dist: xenial
 4 | python:
 5 |   - "3.6"
 6 |   - "3.7"
 7 | cache:
 8 |   directories:
 9 |     - $HOME/.cache/pip
10 | install:
11 |   - travis_retry pip install tox-travis codecov
12 | script:
13 |   - tox
14 | after_success:
15 |   - coverage report
16 |   - codecov
17 | 


--------------------------------------------------------------------------------
/typus/processors/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import BaseProcessor
 2 | from .escapes import BaseEscapeProcessor, EscapeHtml, EscapePhrases
 3 | from .expressions import BaseExpressions, EnRuExpressions
 4 | from .quotes import BaseQuotes, EnQuotes, RuQuotes
 5 | 
 6 | __all__ = (
 7 |     'BaseProcessor',
 8 |     'BaseEscapeProcessor',
 9 |     'EscapeHtml',
10 |     'EscapePhrases',
11 |     'BaseExpressions',
12 |     'EnRuExpressions',
13 |     'BaseQuotes',
14 |     'EnQuotes',
15 |     'RuQuotes',
16 | )
17 | 


--------------------------------------------------------------------------------
/tests/test_core.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from typus import TypusCore, ru_typus
 4 | 
 5 | 
 6 | def test_empty_string(mocker):
 7 |     mocker.patch('typus.ru_typus.procs')
 8 |     assert ru_typus('') == ''
 9 |     ru_typus.procs.run.assert_not_called()
10 | 
11 | 
12 | def test_debug_true():
13 |     assert ru_typus('2mm', debug=True) == '2_mm'
14 | 
15 | 
16 | def test_no_processors():
17 |     class Testus(TypusCore):
18 |         pass
19 | 
20 |     with pytest.raises(AssertionError):
21 |         Testus()
22 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. Typus documentation master file, created by
 2 |    sphinx-quickstart on Tue Jul 12 22:26:26 2016.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | .. include:: ../README.rst
 7 | 
 8 | Contents
 9 | --------
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 | 
14 |    get_started
15 |    processors
16 |    utils
17 | 
18 | 
19 | Indices and tables
20 | ------------------
21 | 
22 | * :ref:`genindex`
23 | * :ref:`modindex`
24 | * :ref:`search`
25 | 


--------------------------------------------------------------------------------
/typus/__init__.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=invalid-name
 2 | 
 3 | from .core import TypusCore
 4 | from .processors import (
 5 |     EnQuotes,
 6 |     EnRuExpressions,
 7 |     EscapeHtml,
 8 |     EscapePhrases,
 9 |     RuQuotes,
10 | )
11 | 
12 | 
13 | class EnTypus(TypusCore):
14 |     processors = (
15 |         EscapePhrases,
16 |         EscapeHtml,
17 |         EnQuotes,
18 |         EnRuExpressions,
19 |     )
20 | 
21 | 
22 | class RuTypus(TypusCore):
23 |     processors = (
24 |         EscapePhrases,
25 |         EscapeHtml,
26 |         RuQuotes,
27 |         EnRuExpressions,
28 |     )
29 | 
30 | 
31 | en_typus, ru_typus = EnTypus(), RuTypus()
32 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name='typus',
 5 |     version='0.2.2',
 6 |     description='Multilanguage language typograph',
 7 |     url='https://github.com/byashimov/typus',
 8 |     author='Murad Byashimov',
 9 |     author_email='byashimov@gmail.com',
10 |     packages=['typus', 'typus.processors'],
11 |     license='BSD',
12 |     classifiers=[
13 |         'Development Status :: 4 - Beta',
14 |         'Intended Audience :: Developers',
15 |         'Topic :: Software Development :: Libraries :: Python Modules',
16 |         'License :: OSI Approved :: BSD License',
17 |         'Operating System :: OS Independent',
18 |         'Programming Language :: Python',
19 |         'Programming Language :: Python :: 3',
20 |         'Programming Language :: Python :: 3.6',
21 |         'Programming Language :: Python :: 3.7',
22 |     ],
23 | )
24 | 


--------------------------------------------------------------------------------
/typus/processors/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Type
 3 | 
 4 | from typus.core import TypusCore
 5 | 
 6 | 
 7 | class BaseProcessor(ABC):
 8 |     """
 9 |     Processors are the workers of Typus. See subclasses for examples.
10 |     """
11 | 
12 |     other: 'BaseProcessor' = None
13 | 
14 |     def __init__(self, typus: TypusCore):
15 |         # Stores Typus to access it's configuration
16 |         self.typus = typus
17 | 
18 |     def __radd__(self, other: Type['BaseProcessor']):
19 |         self.other = other
20 |         return self
21 | 
22 |     @abstractmethod
23 |     def run(self, text: str, **kwargs) -> str:
24 |         """
25 |         :param text: Input text
26 |         :param kwargs: Optional settings for the current call
27 |         :return: Output text
28 |         """
29 | 
30 |     def run_other(self, text: str, **kwargs) -> str:
31 |         if self.other:
32 |             return self.other.run(text, **kwargs)
33 |         return text
34 | 


--------------------------------------------------------------------------------
/typus/chars.py:
--------------------------------------------------------------------------------
 1 | __all__ = (
 2 |     'ANYSP',
 3 |     'DLQUO',
 4 |     'DPRIME',
 5 |     'LAQUO',
 6 |     'LDQUO',
 7 |     'LSQUO',
 8 |     'MDASH',
 9 |     'MDASH_PAIR',
10 |     'MINUS',
11 |     'NBSP',
12 |     'NDASH',
13 |     'NNBSP',
14 |     'RAQUO',
15 |     'RDQUO',
16 |     'RSQUO',
17 |     'SPRIME',
18 |     'THNSP',
19 |     'TIMES',
20 |     'WHSP',
21 | )
22 | 
23 | NBSP = '\u00A0'
24 | NNBSP = '\u202F'
25 | THNSP = '\u2009'
26 | WHSP = ' '
27 | ANYSP = r'[{}{}{}{}]'.format(WHSP, NBSP, NNBSP, THNSP)
28 | 
29 | NDASH = '–'
30 | MDASH = '—'
31 | MDASH_PAIR = NNBSP + MDASH + THNSP
32 | HYPHEN = ''
33 | 
34 | MINUS = '−'
35 | TIMES = '×'
36 | 
37 | LSQUO = '‘'  # left curly quote mark
38 | RSQUO = '’'  # right curly quote mark/apostrophe
39 | LDQUO = '“'  # left curly quote marks
40 | RDQUO = '”'  # right curly quote marks
41 | DLQUO = '„'  # double low curly quote mark
42 | LAQUO = '«'  # left angle quote marks
43 | RAQUO = '»'  # right angle quote marks
44 | 
45 | SPRIME = '′'
46 | DPRIME = '″'
47 | 


--------------------------------------------------------------------------------
/typus/core.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=unused-argument, method-hidden
 2 | 
 3 | from functools import update_wrapper
 4 | 
 5 | from .chars import NBSP, NNBSP
 6 | from .utils import re_compile
 7 | 
 8 | __all__ = ('TypusCore', )
 9 | 
10 | 
11 | class TypusCore:
12 |     """
13 |     This class runs :mod:`typus.processors` chained together.
14 |     """
15 | 
16 |     processors = ()
17 |     re_nbsp = re_compile('[{}{}]'.format(NBSP, NNBSP))
18 | 
19 |     def __init__(self):
20 |         assert self.processors, 'Empty typus. Set processors'
21 | 
22 |         # Makes possible to decorate Typus.
23 |         # updated=() skips __dict__ attribute
24 |         update_wrapper(self, self.__class__, updated=())
25 | 
26 |         # Chains all processors into one single function
27 |         self.procs = sum(p(self) for p in reversed(self.processors))
28 | 
29 |     def __call__(self, source: str, *, debug=False, **kwargs):
30 |         text = source.strip()
31 |         if not text:
32 |             return ''
33 | 
34 |         # All the magic
35 |         processed = self.procs.run(text, debug=debug, **kwargs)
36 | 
37 |         # Makes nbsp visible
38 |         if debug:
39 |             return self.re_nbsp.sub('_', processed)
40 |         return processed
41 | 


--------------------------------------------------------------------------------
/docs/get_started.rst:
--------------------------------------------------------------------------------
 1 | What it's for?
 2 | ==============
 3 | 
 4 | Well, when you write text you make sure it's grammatically correct.
 5 | Typography is *an aesthetic* grammar. Everything you type should be typographied
 6 | in order to respect the reader. For instance, when you write *“you’re”* you
 7 | put *apostrophe* instead of *single quote*, because of the same reason you
 8 | place dot at the end of sentence instead of comma, even though they look
 9 | similar.
10 | 
11 | Unfortunately all typographic characters are well hidden in your keyboard
12 | layout which makes them almost impossible to use. Fortunately Typus can do
13 | that for you.
14 | 
15 | 
16 | The anatomy
17 | -----------
18 | 
19 | :py:class:`typus.core.TypusCore` runs :ref:`Processors` to do the job
20 | which can be plugged in for desired configuration.
21 | Here is a quick example:
22 | 
23 | .. testcode::
24 | 
25 |     from typus.core import TypusCore
26 |     from typus.processors import EnQuotes
27 | 
28 |     class MyTypus(TypusCore):
29 |         processors = (EnQuotes, )
30 | 
31 |     my_typus = MyTypus()
32 |     assert my_typus('"quoted text"') == '“quoted text”'
33 | 
34 | :py:class:`typus.core.TypusCore` runs :py:class:`typus.processors.EnQuotes`
35 | processor which improves *quotes* only.
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) Murad Byashimov and other contributors.
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification,
 5 | are permitted provided that the following conditions are met:
 6 | 
 7 |     1. Redistributions of source code must retain the above copyright notice,
 8 |        this list of conditions and the following disclaimer.
 9 | 
10 |     2. Redistributions in binary form must reproduce the above copyright
11 |        notice, this list of conditions and the following disclaimer in the
12 |        documentation and/or other materials provided with the distribution.
13 | 
14 |     3. Neither the name of Django nor the names of its contributors may be used
15 |        to endorse or promote products derived from this software without
16 |        specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=anomalous-backslash-in-string
 2 | 
 3 | import pytest
 4 | 
 5 | from typus.utils import idict, splinter
 6 | 
 7 | 
 8 | @pytest.mark.parametrize('source, expected', (
 9 |     ({'A': 0, 'b': 1, 'BAr': 2}, {'a': 0, 'b': 1, 'bar': 2}),
10 | ))
11 | def test_idict(source, expected):
12 |     result = idict(source)
13 |     assert result == expected
14 |     assert source != result
15 | 
16 | 
17 | @pytest.mark.parametrize('source, expected', (
18 |     ('a, b,c', ['a', 'b', 'c']),
19 |     ('a, b\,c', ['a', 'b,c']),
20 | ))
21 | def test_splinter_basic(source, expected):
22 |     split = splinter(',')
23 |     assert split(source) == expected
24 | 
25 | 
26 | @pytest.mark.parametrize('source', (
27 |     '\\', '\\  ', '  ',
28 | ))
29 | def test_splinter_junk_delimiter(source):
30 |     with pytest.raises(ValueError):
31 |         splinter(source)
32 | 
33 | 
34 | @pytest.mark.parametrize('source, expected', (
35 |     (' a; b;c', ['a', 'b', 'c']),
36 |     (' a; b ;c', ['a', 'b', 'c']),
37 |     (' a; b ;c ', ['a', 'b', 'c']),
38 | ))
39 | def test_splinter_positional_spaces(source, expected):
40 |     split = splinter(';')
41 |     assert split(source) == expected
42 | 
43 | 
44 | def test_splinter_delimiter_with_spaces():
45 |     split = splinter(' @  ')
46 |     assert split('a@ b@ c ') == ['a', 'b', 'c']
47 | 
48 | 
49 | def test_splinter_regex_delimiter():
50 |     split = splinter('$')
51 |     assert split('a$b$c') == ['a', 'b', 'c']
52 | 
53 | 
54 | def test_splinter_doesnt_remove_other_slashes():
55 |     split = splinter('*')
56 |     assert split('a * b * c\*c \\b') == ['a', 'b', 'c*c \\b']
57 | 


--------------------------------------------------------------------------------
/docs/processors.rst:
--------------------------------------------------------------------------------
 1 | .. _Processors:
 2 | 
 3 | Processors
 4 | ==========
 5 | 
 6 | Processors are the core of Typus. Multiple processors are nested and chained
 7 | in one single function to do things which may depend on the result returned by
 8 | inner processors. Say, we set ``EscapeHtml`` and ``MyTrimProcessor``,
 9 | this is how it works:
10 | 
11 | ::
12 | 
13 |     extract html tags
14 |         pass text further if condition is true
15 |             do something and return
16 |         return the text
17 |     put tags back and return
18 | 
19 | In python:
20 | 
21 | .. testcode::
22 | 
23 |     from typus.core import TypusCore
24 |     from typus.processors import BaseProcessor, EscapeHtml
25 | 
26 |     class MyTrimProcessor(BaseProcessor):
27 |         def run(self, text, **kwargs):
28 |             # When processor is initiated it gets typus instance
29 |             # as the first argument so you can access to it's configuration
30 |             # any time
31 |             if self.typus.trim:
32 |                 trimmed = text.strip()
33 |             else:
34 |                 trimmed = text
35 |             return self.run_other(trimmed, **kwargs)
36 | 
37 |     class MyTypus(TypusCore):
38 |         # This becomes a single function. EscapeHtml goes first
39 |         processors = (EscapeHtml, MyTrimProcessor)
40 | 
41 |         # Set it to `False` to disable trimming
42 |         trim = True
43 | 
44 |     my_typus = MyTypus()
45 |     assert my_typus('    test    ') == 'test'
46 | 
47 | 
48 | Built-in processors
49 | -------------------
50 | 
51 | .. automodule:: typus.processors
52 |     :members: EnQuotes, RuQuotes, EnRuExpressions, EscapeHtml, EscapePhrases
53 | 


--------------------------------------------------------------------------------
/typus/processors/escapes.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | from itertools import count
 3 | 
 4 | from ..utils import re_compile
 5 | from .base import BaseProcessor
 6 | 
 7 | 
 8 | class BaseEscapeProcessor(BaseProcessor):
 9 |     def run(self, text: str, **kwargs) -> str:
10 |         storage = []
11 |         counter = count()
12 |         escaped = self._save_values(text, storage, counter, **kwargs)
13 | 
14 |         # Runs typus
15 |         processed = self.run_other(escaped, **kwargs)
16 |         if not storage:
17 |             return processed
18 | 
19 |         restored = self._restore_values(processed, storage)
20 |         return restored
21 | 
22 |     @abstractmethod
23 |     def _save_values(self, *args, **kwargs):
24 |         pass  # pragma: nocover
25 | 
26 |     @staticmethod
27 |     def _restore_values(text, storage):
28 |         """
29 |         Puts data into the text in reversed order.
30 |         It's important to loop over and restore text step by step
31 |         because some 'stored' chunks may contain keys to other ones.
32 |         """
33 |         for key, value in reversed(storage):
34 |             text = text.replace(key, value)
35 |         return text
36 | 
37 | 
38 | class EscapePhrases(BaseEscapeProcessor):
39 |     """
40 |     Escapes phrases which should never be processed.
41 | 
42 |     >>> from typus import en_typus
43 |     >>> en_typus('Typus turns `(c)` into "(c)"', escape_phrases=['`(c)`'])
44 |     'Typus turns `(c)` into “©”'
45 | 
46 |     Also there is a little helper :func:`typus.utils.splinter` which should
47 |     help you to split string into the phrases.
48 |     """
49 | 
50 |     placeholder = '{{#phrase{0}#}}'
51 | 
52 |     def _save_values(
53 |             self, text, storage, counter, escape_phrases=(), **kwargs):
54 |         for phrase in escape_phrases:
55 |             if not phrase.strip():
56 |                 continue
57 |             key = self.placeholder.format(next(counter))
58 |             text = text.replace(phrase, key)
59 |             storage.append((key, phrase))
60 |         return text
61 | 
62 | 
63 | class EscapeHtml(BaseEscapeProcessor):
64 |     """
65 |     Extracts html tags and puts them back after.
66 | 
67 |     >>> from typus import en_typus
68 |     >>> en_typus('Typus turns <code>(c)</code> into "(c)"')
69 |     'Typus turns <code>(c)</code> into “©”'
70 | 
71 |     .. caution::
72 |         Doesn't support nested ``<code>`` tags.
73 |     """
74 | 
75 |     placeholder = '{{#html{0}#}}'
76 |     skiptags = 'head|iframe|pre|code|script|style|video|audio|canvas'
77 |     patterns = (
78 |         re_compile(r'(<)({0})(.*?>.*?</\2>)'.format(skiptags)),
79 |         # Doctype, xml, closing tag, any tag
80 |         re_compile(r'(<[\!\?/]?[a-z]+.*?>)'),
81 |         # Comments
82 |         re_compile(r'(<\!\-\-.*?\-\->)'),
83 |     )
84 | 
85 |     def _save_values(self, text, storage, counter, **kwargs):
86 |         for pattern in self.patterns:
87 |             text = pattern.sub(self._replace(storage, counter), text)
88 |         return text
89 | 
90 |     def _replace(self, storage, counter):
91 |         def inner(match):
92 |             key = self.placeholder.format(next(counter))
93 |             html = ''.join(match.groups())
94 |             storage.append((key, html))
95 |             return key
96 |         return inner
97 | 


--------------------------------------------------------------------------------
/typus/processors/quotes.py:
--------------------------------------------------------------------------------
  1 | from itertools import cycle
  2 | from typing import Match
  3 | 
  4 | from ..chars import DLQUO, LAQUO, LDQUO, LSQUO, RAQUO, RDQUO, RSQUO
  5 | from ..utils import re_compile
  6 | from .base import BaseProcessor
  7 | 
  8 | 
  9 | class BaseQuotes(BaseProcessor):
 10 |     """
 11 |     Replaces regular quotes with typographic ones.
 12 |     Supports any level nesting, but doesn't work well with minutes ``1'``
 13 |     and inches ``1"`` within the quotes, that kind of cases are ignored.
 14 |     Please, provide ``loq, roq, leq, req`` attributes with custom quotes.
 15 | 
 16 |     >>> from typus import en_typus
 17 |     >>> en_typus('Say "what" again!')
 18 |     'Say “what” again!'
 19 |     """
 20 | 
 21 |     loq = roq = leq = req = NotImplemented
 22 | 
 23 |     def __init__(self, *args, **kwargs):
 24 |         super().__init__(*args, **kwargs)
 25 | 
 26 |         # Pairs of odd and even quotes. Already *switched* in one dimension.
 27 |         # See :meth:`_switch_nested` for more help.
 28 |         self.switch = (self.loq + self.req, self.leq + self.roq)
 29 | 
 30 |         # Replaces all quotes with `'`
 31 |         quotes = ''.join((LSQUO, RSQUO, LDQUO, RDQUO, DLQUO, LAQUO, RAQUO))
 32 |         self.re_normalize = re_compile(r'[{0}]'.format(quotes))
 33 | 
 34 |         # Matches nested quotes (with no quotes within)
 35 |         # and replaces with odd level quotes
 36 |         self.re_normal = re_compile(
 37 |             # No words before
 38 |             r'(?<!\w)'
 39 |             # Starts with quote
 40 |             r'(["\'])'
 41 |             r'(?!\s)'
 42 |             # Everything but quote inside
 43 |             r'((?!\1).+?)'
 44 |             r'(?!\s)'
 45 |             # Ends with same quote from the beginning
 46 |             r'\1'
 47 |             # No words afterwards
 48 |             r'(?!\w)'
 49 |         )
 50 |         self.re_normal_replace = r'{0}\2{1}'.format(self.loq, self.roq)
 51 | 
 52 |         # Matches with typo quotes
 53 |         self.re_nested = re_compile(r'({0}|{1})'.format(self.loq, self.roq))
 54 | 
 55 |     def run(self, text: str, **kwargs) -> str:
 56 |         # Normalizes editor's quotes to double one
 57 |         normalized = self.re_normalize.sub('\'', text)
 58 | 
 59 |         # Replaces normalized quotes with first level ones, starting
 60 |         # from inner pairs, moves to sides
 61 |         nested = 0
 62 |         while True:
 63 |             normalized, replaced = self.re_normal.subn(
 64 |                 self.re_normal_replace, normalized)
 65 |             if not replaced:
 66 |                 break
 67 |             nested += 1
 68 | 
 69 |         # Saves some cpu :)
 70 |         # Most cases are about just one level quoting
 71 |         if nested < 2:
 72 |             return self.run_other(normalized, **kwargs)
 73 | 
 74 |         # At this point all quotes are of odd type, have to fix it
 75 |         switched = self._switch_nested(normalized)
 76 |         return self.run_other(switched, **kwargs)
 77 | 
 78 |     def _switch_nested(self, text: str):
 79 |         """
 80 |         Switches nested quotes to another type.
 81 |         This function stored in a separate method to make possible to mock it
 82 |         in tests to make sure it doesn't called without special need.
 83 |         """
 84 | 
 85 |         # Stores a cycled pairs of possible quotes. Every other loop it's
 86 |         # switched to provide *next* type of a given quote
 87 |         quotes = cycle(self.switch)
 88 | 
 89 |         def replace(match: Match):
 90 |             # Since only odd quotes are matched, comparison is the way to
 91 |             # choose whether it's left or right one of type should be returned.
 92 |             # As the first quote is the left one, makes negative equal which
 93 |             # return false, i.e. zero index
 94 |             return next(quotes)[match.group() != self.loq]
 95 |         return self.re_nested.sub(replace, text)
 96 | 
 97 | 
 98 | class EnQuotes(BaseQuotes):
 99 |     r"""
100 |     Provides English quotes configutation for :class:`typus.processors.Quotes`
101 |     processor.
102 | 
103 |     >>> from typus import en_typus
104 |     >>> en_typus('He said "\'Winnie-the-Pooh\' is my favorite book!".')
105 |     'He said “‘Winnie-the-Pooh’ is my favorite book!”.'
106 |     """
107 | 
108 |     # Left odd, right odd, left even, right even
109 |     loq = LDQUO
110 |     roq = RDQUO
111 |     leq = LSQUO
112 |     req = RSQUO
113 | 
114 | 
115 | class RuQuotes(BaseQuotes):
116 |     r"""
117 |     Provides Russian quotes configutation for :class:`typus.processors.Quotes`
118 |     processor.
119 | 
120 |     >>> from typus import ru_typus
121 |     >>> ru_typus('Он сказал: "\'Винни-Пух\' -- моя любимая книга!".')
122 |     'Он\xa0сказал: «„Винни-Пух“\u202f—\u2009моя любимая книга!».'
123 |     """
124 | 
125 |     # Left odd, right odd, left even, right even
126 |     loq = LAQUO
127 |     roq = RAQUO
128 |     leq = DLQUO
129 |     req = LDQUO
130 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | Welcome to Typus
  2 | ================
  3 | 
  4 | Typus is a typography tool. It means your can write text the way you use to
  5 | and let it handle all that formating headache:
  6 | 
  7 | ::
  8 | 
  9 |     "I don't feel very much like Pooh today..." said Pooh.
 10 |     "There there," said Piglet. "I'll bring you tea and honey until you do."
 11 |     - A.A. Milne, Winnie-the-Pooh
 12 | 
 13 |     “I don’t feel very much like Pooh today…” said Pooh.
 14 |     “There there,” said Piglet. “I’ll bring you tea and honey until you do.”
 15 |     — A. A. Milne, Winnie-the-Pooh
 16 | 
 17 | Copy & paste this example to your rich text editor. Result may depend on
 18 | the font of your choice.
 19 | For instance, there is a tiny non-breaking space between ``A. A.`` you
 20 | can see with Helvetica:
 21 | 
 22 | .. image:: https://raw.githubusercontent.com/byashimov/typus/develop/docs/example.png
 23 | 
 24 | Try out the demo_.
 25 | 
 26 | 
 27 | Web API
 28 | -------
 29 | 
 30 | A tiny `web-service`_ for whatever legal purpose it may serve.
 31 | 
 32 | 
 33 | Installation
 34 | ------------
 35 | 
 36 | .. code-block:: console
 37 | 
 38 |     $ pip install typus 
 39 | 
 40 | 
 41 | Usage
 42 | -----
 43 | 
 44 | Currently Typus supports English and Russian languages only.
 45 | But it doesn't mean it can't handle more. I'm quite sure it covers Serbian
 46 | and Turkmen.
 47 | 
 48 | In fact, Typus doesn't make difference between languages. It works with text.
 49 | If you use Cyrillic then only relative processors will affect that text.
 50 | In another words, give it a try if your language is not on the list
 51 | 
 52 | Here is a short example:
 53 | 
 54 | .. code-block:: python
 55 | 
 56 |     >>> from typus import en_typus, ru_typus
 57 |     ...
 58 |     >>> # Underscore is for nbsp in debug mode
 59 |     >>> en_typus('"Beautiful is better than ugly." (c) Tim Peters.', debug=True)
 60 |     '“Beautiful is_better than ugly.” ©_Tim Peters.'
 61 |     >>> # Cyrillic 'с' in '(с)'
 62 |     >>> ru_typus('"Красивое лучше, чем уродливое." (с) Тим Петерс.', debug=True)
 63 |     '«Красивое лучше, чем уродливое.» ©_Тим Петерс.'
 64 | 
 65 | 
 66 | The only difference between ``en_typus`` and ``ru_typus``
 67 | are in quotes they set: ``“‘’”`` for English and ``«„“»`` for Russian. Both of
 68 | them handle mixed text and that is pretty awesome.
 69 | 
 70 | Typus is highly customizable. Not only quotes can be replaced but almost
 71 | everything. For instance, if you don't use html tags you can skip
 72 | ``EscapeHtml`` processor which makes your Typus a little
 73 | faster.
 74 | 
 75 | 
 76 | What it does
 77 | ------------
 78 | 
 79 | - Replaces regular quotes ``"foo 'bar' baz"`` with typographic pairs:
 80 |   ``“foo ‘bar’ baz”``. Quotes style depends on language and your Typus configuration.
 81 | - Replaces regular dash ``foo - bar`` with mdash or ndash or minus.
 82 |   Depends on case: plain text, digit range, math, etc.
 83 | - Replaces complex symbols such as ``(c)`` with unicode characters: ``©``.
 84 |   Cyrillic analogs are supported too.
 85 | - Replaces vulgar fractions ``1/2`` with unicode characters: ``½``.
 86 | - Turns multiply symbol to a real one: ``3x3`` becomes ``3×3``.
 87 | - Replaces quotes with primes: ``2' 4"`` becomes ``2′ 4″``.
 88 | - Puts non-breaking spaces.
 89 | - Puts ruble symbol.
 90 | - Trims spaces at the end of lines.
 91 | - and much more.
 92 | 
 93 | 
 94 | Documentation
 95 | -------------
 96 | 
 97 | Docs are hosted on `readthedocs.org`_.
 98 | 
 99 | .. seealso::
100 | 
101 |     Oh, there is also an outdated Russian article I should not
102 |     probably suggest, but since all docs are in English, this link_ might be
103 |     quite helpful.
104 | 
105 | 
106 | Compatibility
107 | -------------
108 | 
109 | .. image:: https://travis-ci.org/byashimov/typus.svg?branch=develop
110 |     :alt: Build Status
111 |     :target: https://travis-ci.org/byashimov/typus
112 | 
113 | .. image:: https://codecov.io/gh/byashimov/typus/branch/develop/graph/badge.svg
114 |     :alt: Codecov
115 |     :target: https://codecov.io/gh/byashimov/typus
116 | 
117 | Tested on Python 3.6, 3.7.
118 | 
119 | 
120 | Changelog
121 | ---------
122 | 
123 | 0.2.2
124 | ~~~~~
125 | 
126 | - Improved ``mdash``: narrow spaces are used instead of regular ones.
127 | - Improved ``range``: ``mdash`` is replaced with ``ndash``.
128 | - Dropped ``phone`` processing. Using regular hyphen-minus is ok,
129 |   because there is no valuable visual difference between that and hyphen.
130 | 
131 | Thanks to @danaksim for the help.
132 | 
133 | 0.2
134 | ~~~
135 | 
136 | - Python 3.6 and higher are supported only.
137 |   That's because 3.6 string formatting is used in tests to make them easier
138 |   to read and write.
139 | - ``EnRuExpressions`` is no longer a mixin but processor.
140 | - Better, cleaner tests with pytest.
141 | - Minor fixes and improvements.
142 | 
143 | 0.1
144 | ~~~
145 | 
146 | - Initial release.
147 | 
148 | 
149 | .. _demo: https://byashimov.com/typus/
150 | .. _web-service: https://byashimov.com/typus/api/
151 | .. _readthedocs.org: http://py-typus.readthedocs.io/en/latest/
152 | .. _link: https://habrahabr.ru/post/303608/
153 | 


--------------------------------------------------------------------------------
/typus/utils.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=anomalous-backslash-in-string
  2 | 
  3 | import re
  4 | from functools import wraps
  5 | from typing import Callable, Iterable, List
  6 | 
  7 | __all__ = (
  8 |     'RE_SCASE',
  9 |     'RE_ICASE',
 10 |     'doc_map',
 11 |     'idict',
 12 |     'map_choices',
 13 |     're_choices',
 14 |     're_compile',
 15 |     'splinter',
 16 | )
 17 | 
 18 | 
 19 | RE_SCASE = re.U | re.M | re.S  # sensitive case
 20 | RE_ICASE = re.I | RE_SCASE  # insensitive case
 21 | 
 22 | 
 23 | def re_compile(pattern: str, flags: int = RE_ICASE):
 24 |     """
 25 |     A shortcut to compile regex with predefined flags:
 26 |     :const:`re.I`, :const:`re.U`, :const:`re.M`, :const:`re.S`.
 27 | 
 28 |     :param str pattern: A string to compile pattern from.
 29 |     :param int flags: Python :mod:`re` module flags.
 30 | 
 31 |     >>> foo = re_compile('[a-z]')  # matches with 'test' and 'TEST'
 32 |     >>> bool(foo.match('TEST'))
 33 |     True
 34 |     >>> bar = re_compile('[a-z]', flags=0)  # doesn't match with 'TEST'
 35 |     >>> bool(bar.match('TEST'))
 36 |     False
 37 |     """
 38 | 
 39 |     return re.compile(pattern, flags)
 40 | 
 41 | 
 42 | def re_choices(choices: Iterable[str], group: str = r'({})') -> str:
 43 |     """
 44 |     Returns regex group of escaped choices.
 45 | 
 46 |     :param choices: Iterable of strings.
 47 |     :param group: A string to format the group with.
 48 | 
 49 |     >>> re_choices(('foo', 'bar'))
 50 |     '(foo|bar)'
 51 |     """
 52 |     return group.format('|'.join(map(re.escape, choices)))
 53 | 
 54 | 
 55 | class idict(dict):
 56 |     """
 57 |     Case-insensitive dictionary.
 58 | 
 59 |     :param mapping/iterable obj: An object to initialize new dictionary from
 60 |     :param `**kwargs`: ``key=value`` pairs to put in the new dictionary
 61 |     :returns: A regex non-compiled pattern
 62 |     :rtype: str
 63 | 
 64 |     >>> foo = idict({'A': 0, 'b': 1, 'bar': 2})
 65 |     >>> foo['a'], foo['B'], foo['bAr']
 66 |     (0, 1, 2)
 67 | 
 68 |     .. caution::
 69 |         :class:`idict` is not a full-featured case-insensitive dictionary.
 70 |         As it's made for :func:`map_choices` and has limited functionality.
 71 |     """
 72 | 
 73 |     def __init__(self, obj: dict):
 74 |         lowered = ((key.lower(), value) for key, value in obj.items())
 75 |         super().__init__(lowered)
 76 | 
 77 |     def __getitem__(self, key):
 78 |         return super().__getitem__(key.lower())
 79 | 
 80 | 
 81 | def map_choices(data: dict, group: str = r'({})', dict_class=idict) -> tuple:
 82 |     """
 83 |     :class:`typus.processors.Expressions` helper.
 84 |     Builds regex pattern from the dictionary keys and maps them to values via
 85 |     replace function.
 86 | 
 87 |     :param mapping/iterable data: A pairs of (find, replace with) strings
 88 |     :param str group: A string to format in choices.
 89 |     :param class dict_class: A dictionary class to convert source data.
 90 |         By default :class:`idict` is used which is case-insensitive.
 91 |         In instance, to map  ``(c)`` and ``(C)`` to different values pass
 92 |         regular python :class:`dict`. Or if the order matters use
 93 |         :class:`collections.OrderedDict`
 94 | 
 95 |     :returns: A regex non-compiled pattern and replace function
 96 |     :rtype: tuple
 97 | 
 98 |     >>> import re
 99 |     >>> pattern, replace = map_choices({'a': 0, 'b': 1})
100 |     >>> re.sub(pattern, replace, 'abc')
101 |     '01c'
102 |     """
103 | 
104 |     options = dict_class(data)
105 |     pattern = re_choices(options, group=group)
106 | 
107 |     def replace(match):
108 |         return str(options[match.group()])
109 |     return pattern, replace
110 | 
111 | 
112 | def doc_map(data: dict, keys='Before', values='After', delim='|'):
113 |     rows = '\n'.join(f'\t``{k}`` {delim} ``{v}``' for k, v in data.items())
114 |     table = (
115 |         f'\n.. csv-table::'
116 |         f'\n\t:delim: {delim}'
117 |         f'\n\t:header: "{keys}", "{values}"\n'
118 |         f'\n{rows}'
119 |     )
120 | 
121 |     def updater(func):
122 |         func.__doc__ += table
123 |         return func
124 |     return updater
125 | 
126 | 
127 | def splinter(delimiter: str) -> Callable[[str], List[str]]:
128 |     """
129 |     :class:`typus.processors.EscapePhrases` helper.
130 |     Almost like ``str.split()`` but handles delimiter escaping and strips
131 |     spaces.
132 | 
133 |     :param str delimiter: String delimiter
134 |     :raises ValueError: If delimiter is a slash or an empty space
135 | 
136 |     :returns: A list of stripped phrases splitted by the delimiter
137 |     :rtype: list
138 | 
139 |     >>> split = splinter(',  ')  # strips this spaces
140 |     >>> split('a, b,c ,  d\,e')  # and this ones too
141 |     ['a', 'b', 'c', 'd,e']
142 |     """
143 | 
144 |     delim = delimiter.strip(' \\')
145 |     if not delim:
146 |         raise ValueError('Delimiter can not be a slash or an empty space.')
147 | 
148 |     # Doesn't split escaped delimiters
149 |     pattern = re.compile(r'(?<!\\){0}\s*'.format(re.escape(delim)))
150 |     replace = '\\' + delim
151 | 
152 |     @wraps(splinter)
153 |     def inner(phrases: str):
154 |         # Deletes delimiter escaping and strips spaces
155 |         return [
156 |             x.replace(replace, delim).strip()
157 |             for x in pattern.split(phrases)
158 |         ]
159 |     return inner
160 | 


--------------------------------------------------------------------------------
/tests/test_processors.py:
--------------------------------------------------------------------------------
  1 | from unittest import mock
  2 | 
  3 | import pytest
  4 | 
  5 | from typus import EscapeHtml, EscapePhrases, RuQuotes, TypusCore, ru_typus
  6 | 
  7 | 
  8 | @pytest.mark.parametrize('source, expected, escape_phrases', (
  9 |     ('"foo 2""', '«foo 2"»', ['2"']),
 10 |     ('"foo (c) (r) (tm)"', '«foo (c) (r) (tm)»', ['(c)', '(r)', '(tm)']),
 11 | 
 12 |     # Doesn't assert like the same one in EscapeHtmlTest
 13 |     (
 14 |         '<code>dsfsdf <code>"test"</code> "sdfdf"</code>',
 15 |         '<code>dsfsdf <code>"test"</code> "sdfdf"</code>',
 16 |         ['<code>"test"</code>'],
 17 |     ),
 18 | 
 19 |     # Empty string, nothing to escape
 20 |     ('"foo"', '«foo»', ['']),
 21 | ))
 22 | def test_escape_phrases(source, expected, escape_phrases):
 23 |     assert ru_typus(source, escape_phrases=escape_phrases) == expected
 24 | 
 25 | 
 26 | @mock.patch('typus.processors.EscapeHtml._restore_values', return_value='test')
 27 | def test_restore_html_call(mock_restore_values):
 28 |     ru_typus('test')
 29 |     mock_restore_values.assert_not_called()
 30 | 
 31 |     ru_typus('<code>test</code>')
 32 |     mock_restore_values.assert_called_once()
 33 | 
 34 | 
 35 | @pytest.mark.parametrize('source', (
 36 |     '<pre>"test"</pre>',
 37 |     '<code>"test"</code>',
 38 | 
 39 |     # Nested code in pre
 40 |     '<pre><code>"test"</code></pre>',
 41 |     '<pre><code><code>"test"</code></code></pre>',
 42 | 
 43 |     # Script tag
 44 |     '<script>"test"</script>',
 45 |     '<script type="text/javascript" src="/test/">"test"</script>',
 46 | ))
 47 | def test_codeblocks(source):
 48 |     assert ru_typus(source) == source
 49 | 
 50 | 
 51 | @pytest.mark.parametrize('source, expected', (
 52 |     (
 53 |         '<code>dsfsdf <code>"test"</code> "sdfdf"</code>',
 54 |         '<code>dsfsdf <code>"test"</code> «sdfdf»</code>',
 55 |     ),
 56 | ))
 57 | def test_nested_codeblocks(typus, source, expected):
 58 |     # No nested codeblocks
 59 |     assert typus(source) == expected
 60 | 
 61 | 
 62 | @pytest.mark.parametrize('source, expected', (
 63 |     ('<b>"test"</b>', '<b>«test»</b>'),
 64 |     ('<b id="test">"test"</b>', '<b id="test">«test»</b>'),
 65 |     ('<b>"test"</b>', '<b>«test»</b>'),
 66 | 
 67 |     # Image: html + xhtml
 68 |     ('<img>"test"', '<img>«test»'),
 69 |     ('<img alt="test">"test"', '<img alt="test">«test»'),
 70 |     ('<img alt="test"/>"test"', '<img alt="test"/>«test»'),
 71 | ))
 72 | def test_tags(source, expected):
 73 |     assert ru_typus(source) == expected
 74 | 
 75 | 
 76 | @pytest.mark.parametrize('source', (
 77 |     '<!-- "(c)" -->',
 78 |     '<!--"(c)"-->',
 79 |     '<!---->',
 80 | ))
 81 | def test_comments(source):
 82 |     assert ru_typus(source) == source
 83 | 
 84 | 
 85 | @pytest.mark.parametrize('source', (
 86 |     '<!DOCTYPE html>',
 87 |     '<?xml version="1.0" encoding="UTF-8"?>',
 88 | ))
 89 | def test_doctype(source):
 90 |     assert ru_typus(source) == source
 91 | 
 92 | 
 93 | @pytest.mark.parametrize('source', (
 94 |     '<head><title>(c)</title></head>',
 95 | ))
 96 | def test_head(source):
 97 |     assert ru_typus(source) == source
 98 | 
 99 | 
100 | @pytest.mark.parametrize('source', (
101 |     '<iframe height="500" width="500">(c)</iframe>',
102 | ))
103 | def test_iframe(source):
104 |     assert ru_typus(source) == source
105 | 
106 | 
107 | @pytest.fixture(name='typus')
108 | def get_typus():
109 |     class Typus(TypusCore):
110 |         processors = (
111 |             EscapePhrases,
112 |             EscapeHtml,
113 |             RuQuotes,
114 |         )
115 | 
116 |     return Typus()
117 | 
118 | 
119 | @mock.patch('typus.processors.BaseQuotes._switch_nested', return_value='test')
120 | def test_switch_nested_call(mock_switch_nested, typus):
121 |     # No quotes
122 |     typus('00 11 00')
123 |     mock_switch_nested.assert_not_called()
124 | 
125 |     # Odd only
126 |     typus('00 "11" 00')
127 |     mock_switch_nested.assert_not_called()
128 | 
129 |     # Both
130 |     typus('"00 "11" 00"')
131 |     mock_switch_nested.assert_called_once()
132 | 
133 | 
134 | @pytest.mark.parametrize('source, expected', (
135 |     # Levels
136 |     ('00 "11" 00', '00 «11» 00'),  # One
137 |     ('"00 "11" 00"', '«00 „11“ 00»'),  # Two
138 |     ('00" "11 "22" 11"', '00" «11 „22“ 11»'),  # Tree
139 | 
140 |     # Hardcore
141 |     ('00 ""22"" 00', '00 «„22“» 00'),
142 |     ('00 ""22..."" 00', '00 «„22...“» 00'),
143 |     ('00 ""22"..." 00', '00 «„22“...» 00'),
144 |     ('"© test"', '«© test»'),
145 |     ('("test")', '(«test»)'),
146 |     ('"test"*', '«test»*'),
147 |     ('"test"®', '«test»®'),
148 |     ('"""test"""', '«„«test»“»'),
149 |     ('""""test""""', '«„«„test“»“»'),
150 |     ('"""""""test"""""""', '«„«„«„«test»“»“»“»'),
151 |     ('" test"', '" test"'),
152 |     ('" "test""', '" «test»"'),
153 |     ('"foo 2\'"', '«foo 2\'»'),
154 | 
155 |     # False positive
156 |     ('"foo 2""', '«foo 2»"'),
157 | 
158 |     # Weired cases
159 |     ('00 "... "22"" 00', '00 «... „22“» 00'),
160 |     ('00 "..."22"" 00', '00 «...„22“» 00'),
161 | 
162 |     # Punctuation
163 |     ('00 "...11 "22!"" 00', '00 «...11 „22!“» 00'),
164 |     ('00 "11 "22!"..." 00', '00 «11 „22!“...» 00'),
165 |     ('00 "11 "22!"?!." 00', '00 «11 „22!“?!.» 00'),
166 |     ('00 "11 "22!"?!."? 00', '00 «11 „22!“?!.»? 00'),
167 | 
168 |     # Nested on side
169 |     ('00 ""22!" 11" 00', '00 «„22!“ 11» 00'),
170 |     ('00 "11 "22?"" 00', '00 «11 „22?“» 00'),
171 | 
172 |     # Different quotes
173 |     ('00 "“22”" 00', '00 «„22“» 00'),
174 |     ('00 "‘22’" 00', '00 «„22“» 00'),
175 | 
176 |     # Inches, minutes within quotes
177 |     ('00 "11\'" 00 "11"', '00 «11\'» 00 «11»'),
178 |     ('00" "11" 00 "11"', '00" «11» 00 «11»'),
179 | 
180 |     # Fire them all!
181 |     (
182 |         '''00" "11 '22' 11"? "11 '22 "33 33"' 11" 00' "11 '22' 11" 00"''',
183 |         '00" «11 „22“ 11»? «11 „22 «33 33»“ 11» 00\' «11 „22“ 11» 00"',
184 |     ),
185 | ))
186 | def test_quotes(typus, source, expected):
187 |     assert typus(source) == expected
188 | 
189 | 
190 | @pytest.mark.parametrize('source, expected', (
191 |     # Html test
192 |     ('<span>"11"</span>', '<span>«11»</span>'),
193 |     ('"<span>11</span>"', '«<span>11</span>»'),
194 | ))
195 | def test_me(typus, source, expected):
196 |     assert typus(source) == expected
197 | 


--------------------------------------------------------------------------------
/tests/test_summary.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from typus import en_typus, ru_typus
  4 | from typus.chars import *
  5 | 
  6 | QUOTES = (
  7 |     ''.join((LAQUO, RAQUO, DLQUO, LDQUO)),
  8 |     ''.join((LDQUO, RDQUO, LSQUO, RSQUO)),
  9 | )
 10 | TYPUSES = (
 11 |     (ru_typus, {}),
 12 |     (en_typus, str.maketrans(*QUOTES)),
 13 | )
 14 | 
 15 | 
 16 | @pytest.fixture(name='assert_typus', scope='module', params=TYPUSES)
 17 | def get_assert_typus(request):
 18 |     typus, charmap = request.param
 19 | 
 20 |     def assert_typus(source, expected):
 21 |         assert typus(source) == expected.translate(charmap)
 22 |     return assert_typus
 23 | 
 24 | 
 25 | def test_debug():
 26 |     assert ru_typus('1m', debug=True) == '1_m'
 27 | 
 28 | 
 29 | @pytest.mark.parametrize('source, expected', (
 30 |     ('00 "11" 00', '00 «11» 00'),
 31 |     # clashes with digit_spaces
 32 |     (
 33 |         '''00" "11 '22' 11"? "11 '22 "33 33?"' 11" 00 "11 '22' 11" 0"''',
 34 |         f'00{DPRIME} «11 „22“ 11»? «11 „22 «33{NBSP}33?»“ 11» '
 35 |         f'00 «11 „22“ 11» 0{DPRIME}'
 36 |     ),
 37 | ))
 38 | def test_quotes(assert_typus, source, expected):
 39 |     assert_typus(source, expected)
 40 | 
 41 | 
 42 | @pytest.mark.parametrize('source, expected', (
 43 |     ('--', '--'),
 44 |     ('foo - foo', f'foo{MDASH_PAIR}foo'),
 45 |     # Leading comma case
 46 |     (', - foo', f',{MDASH}{THNSP}foo'),
 47 |     (', -- foo', f',{MDASH}{THNSP}foo'),
 48 |     # if line begins, adds nbsp after mdash
 49 |     ('-- foo', f'{MDASH}{NBSP}foo'),
 50 |     # if line ends, adds nbsp before mdash
 51 |     ('foo --', f'foo{NBSP}{MDASH}'),
 52 |     ('foo -- bar', f'foo{MDASH_PAIR}bar'),
 53 |     # Python markdown replaces dash with ndash, don't know why
 54 |     (f'foo {NDASH} foo', f'foo{MDASH_PAIR}foo'),
 55 | 
 56 |     # This one for ru_typus
 57 |     ('foo - "11" 00', f'foo{MDASH_PAIR}«11» 00'),
 58 |     ('2 - 2foo', f'2{MDASH_PAIR}2foo'),  # no units clash
 59 |     ('2 - 2', f'2{NBSP}{MINUS}{NBSP}2'),  # + minus
 60 |     ('Winnie-the-Pooh', 'Winnie-the-Pooh'),
 61 | ))
 62 | def test_mdash(assert_typus, source, expected):
 63 |     assert_typus(source, expected)
 64 | 
 65 | 
 66 | @pytest.mark.parametrize('source, expected', (
 67 |     ('"4"', '«4»'),
 68 |     ('4\'', '4' + SPRIME),
 69 |     ('4"', '4' + DPRIME),
 70 |     ('" 22"', '" 22' + DPRIME),
 71 | ))
 72 | def test_primes(assert_typus, source, expected):
 73 |     assert_typus(source, expected)
 74 | 
 75 | 
 76 | @pytest.mark.parametrize('source, expected', (
 77 |     ('25-foo', '25-foo'),
 78 |     ('2-3', f'2{NDASH}3'),
 79 |     ('2,5-3', f'2,5{NDASH}3'),
 80 |     ('0.5-3', f'0.5{NDASH}3'),
 81 |     ('2-3 foo', f'2{NDASH}3{NBSP}foo'),  # + ranges
 82 |     ('(15-20 items)', f'(15{NDASH}20{NBSP}items)'),
 83 | 
 84 |     # Float
 85 |     ('0,5-3', f'0,5{NDASH}3'),
 86 |     ('-0,5-3', f'{MINUS}0,5{NDASH}3'),
 87 |     ('-5.5-3', f'{MINUS}5.5{NDASH}3'),
 88 |     ('-5,5-3', f'{MINUS}5,5{NDASH}3'),
 89 |     ('-5,5-3.5', f'{MINUS}5,5{NDASH}3.5'),
 90 |     ('2 - 3', f'2{NBSP}{MINUS}{NBSP}3'),
 91 |     ('2-3 x 4', f'2{MINUS}3{NBSP}{TIMES}{NBSP}4'),
 92 |     ('2-3 * 4', f'2{MINUS}3{NBSP}{TIMES}{NBSP}4'),
 93 |     ('2-3 - 4', f'2{MINUS}3{NBSP}{MINUS}{NBSP}4'),
 94 | ))
 95 | def test_ranges(assert_typus, source, expected):
 96 |     assert_typus(source, expected)
 97 | 
 98 | 
 99 | @pytest.mark.parametrize('source, expected', (
100 |     # Minus
101 |     (f'3{NBSP}-{NBSP}2', f'3{NBSP}{MINUS}{NBSP}2'),
102 |     # This one clashes with range
103 |     ('2-3', f'2{NDASH}3'),
104 |     # This one clashes with mdash
105 |     (f'x{NBSP}-{NBSP}3', f'x{NNBSP}{MDASH}{THNSP}3'),
106 |     ('-3', f'{MINUS}3'),
107 | 
108 |     # Star
109 |     ('3*2', f'3{TIMES}2'),
110 |     ('*3', f'{TIMES}3'),
111 |     (f'3{NBSP}*{NBSP}2', f'3{NBSP}{TIMES}{NBSP}2'),
112 |     (f'x{NBSP}*{NBSP}2', f'x{NBSP}{TIMES}{NBSP}2'),
113 | 
114 |     # 'x'
115 |     ('3x2', f'3{TIMES}2'),
116 |     ('x3', f'{TIMES}3'),
117 |     (f'3{NBSP}x{NBSP}2', f'3{NBSP}{TIMES}{NBSP}2'),
118 |     (f'x{NBSP}x{NBSP}2', f'x{NBSP}{TIMES}{NBSP}2'),
119 | 
120 |     # and Russian "х"
121 |     ('3х2', f'3{TIMES}2'),
122 |     ('х3', f'{TIMES}3'),
123 |     (f'3{NBSP}х{NBSP}2', f'3{NBSP}{TIMES}{NBSP}2'),
124 |     (f'x{NBSP}х{NBSP}2', f'x{NBSP}{TIMES}{NBSP}2'),
125 | ))
126 | def test_math(assert_typus, source, expected):
127 |     assert_typus(source, expected)
128 | 
129 | 
130 | @pytest.mark.parametrize('source, expected', (
131 |     ('aaa 2a', f'aaa 2a'),  # doesnt clash with units
132 | ))
133 | def test_pairs(assert_typus, source, expected):
134 |     assert_typus(source, expected)
135 | 
136 | 
137 | @pytest.mark.parametrize('source, expected', (
138 |     ('4444444 fooo', '4444444 fooo'),
139 |     ('444 foo', f'444{NBSP}foo'),
140 |     ('444 +', f'444{NBSP}+'),
141 |     ('444 4444 bucks', f'444{NBSP}4444 bucks'),
142 |     ('4444444 foo', f'4444444 foo'),  # no untis clash
143 |     ('444 -', f'444{NBSP}{MDASH}'),
144 | ))
145 | def test_digit_spaces(assert_typus, source, expected):
146 |     assert_typus(source, expected)
147 | 
148 | 
149 | def test_example(assert_typus):
150 |     source = (
151 |         'Излучение, как следует из вышесказанного, концентрирует '
152 |         'внутримолекулярный предмет - деятельности . "...ff \'Можно?\' '
153 |         'предположить, что силовое - "поле "мент "d" ально" отклоняет" '
154 |         'сенсибельный \'квазар !..\' cc", не учитывая мнения авторитетов. '
155 |         'Искусство испускает данный электрон, учитывая опасность, '
156 |         '<code> "d" test -- test(c)</code> которую    представляли '
157 |         'собой писания Дюринга для не окрепшего еще немецкого рабочего '
158 |         'движения. Смысл жизни -- амбивалентно (с) дискредитирует '
159 |         'закон (r) исключённого(tm) третьего (тм)...      \n\n\n'
160 |         '1500 мА*ч\n\n'
161 |         '1-2=4\n'
162 |         '- Химическое соединение (p) ненаблюдаемо контролирует экран-ый '
163 |         'квазар (р). Идеи 3/4  гедонизма занимают b & b центральное место '
164 |         'в утилитаризме(sm) "Милля и Бентама", однако <- гравитирующая -> '
165 |         'сфера масштабирует фотон, +-2мм изменяя привычную == реальность. '
166 |         'Силовое *3 поле -3 реально 3 * 2 /= 6   3x3 восстанавливает '
167 |         'трансцендентальный 3" 2\' принцип 1000р. восприятия.'
168 |         '"...\'test\'" (c) m&m\'s\n\n\n'
169 |     )
170 |     expected = (
171 |         'Излучение, как следует из_вышесказанного, концентрирует '
172 |         'внутримолекулярный предмет\u202f—\u2009деятельности. «…ff „Можно?“ '
173 |         'предположить, что силовое\u202f—\u2009„поле «мент „d“ ально» '
174 |         'отклоняет“ '
175 |         'сенсибельный „квазар!..“ cc», не_учитывая мнения авторитетов. '
176 |         'Искусство испускает данный электрон, учитывая опасность, '
177 |         '<code> "d" test -- test(c)</code> которую представляли собой '
178 |         'писания Дюринга для не_окрепшего еще немецкого рабочего '
179 |         'движения. Смысл жизни\u202f—\u2009амбивалентно ©_дискредитирует '
180 |         'закон® исключённого™ третьего™…\n\n'
181 |         '1500_мА•ч\n\n'
182 |         '1−2=4\n'
183 |         '—_Химическое соединение℗ ненаблюдаемо контролирует экран-ый '
184 |         'квазар℗. Идеи ¾_гедонизма занимают b_&_b_центральное место '
185 |         'в_утилитаризме℠ «Милля и_Бентама», однако ←_гравитирующая_→ '
186 |         'сфера масштабирует фотон, ±2_мм изменяя привычную_≡_реальность. '
187 |         'Силовое ×3_поле −3_реально 3_×_2_≠_6 3×3 восстанавливает '
188 |         'трансцендентальный 3″ 2′ принцип 1000_₽ восприятия.'
189 |         '«…„test“» ©_m&m’s'
190 |     ).replace('_', NBSP)
191 |     assert_typus(source, expected)
192 | 


--------------------------------------------------------------------------------
/tests/test_expressions.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Tests expressions one by one.
  3 | Some of them may return different results depending on which was
  4 | applied earlier, so order matters. But that also means it's important
  5 | to be sure they don't affect each other more than expected. This case
  6 | tests every expression as if it was the only one to apply.
  7 | """
  8 | import pytest
  9 | 
 10 | from typus.chars import *
 11 | from typus.core import TypusCore
 12 | from typus.processors import EnRuExpressions
 13 | 
 14 | 
 15 | @pytest.fixture(name='factory')
 16 | def get_factory():
 17 |     def factory(*exps):
 18 |         class MyExpressions(EnRuExpressions):
 19 |             expressions = exps
 20 | 
 21 |         class Typus(TypusCore):
 22 |             processors = (MyExpressions, )
 23 |         return Typus()
 24 |     return factory
 25 | 
 26 | 
 27 | @pytest.mark.parametrize('source, expected', (
 28 |     ('110 р', f'110{NBSP}₽'),
 29 |     ('111 р.', f'111{NBSP}₽'),
 30 |     ('112 руб', f'112{NBSP}₽'),
 31 |     ('113 руб.', f'113{NBSP}₽'),
 32 |     # With comma
 33 |     ('114,00 р', f'114,00{NBSP}₽'),
 34 |     ('115.00 р', f'115.00{NBSP}₽'),
 35 |     # Ignores
 36 |     ('116 рубчиков', '116 рубчиков'),
 37 |     ('117 ру', '117 ру'),
 38 |     # Case sensivity
 39 |     ('117 Р', '117 Р'),
 40 | ))
 41 | def test_ruble(factory, source, expected):
 42 |     typus = factory('ruble')
 43 |     assert expected == typus(source)
 44 | 
 45 | 
 46 | @pytest.mark.parametrize('source, expected', (
 47 |     ('foo{}bar'.format(' ' * 30), 'foo bar'),
 48 | ))
 49 | def test_spaces(factory, source, expected):
 50 |     typus = factory('spaces')
 51 |     assert expected == typus(source)
 52 | 
 53 | 
 54 | @pytest.mark.parametrize('source, expected', (
 55 |     ('a\nb', 'a\nb'),
 56 |     ('a\r\nb', 'a\nb'),
 57 |     ('a{0}b'.format('\n' * 5), 'a\n\nb'),
 58 |     ('a\n\n\r\nb', 'a\n\nb'),
 59 | ))
 60 | def test_linebreaks(factory, source, expected):
 61 |     typus = factory('linebreaks')
 62 |     assert expected == typus(source)
 63 | 
 64 | 
 65 | @pytest.mark.parametrize('source, expected', (
 66 |     ("She'd", f'She{RSQUO}d'),
 67 |     ("I'm", f'I{RSQUO}m'),
 68 |     ("it's", f'it{RSQUO}s'),
 69 |     ("don't", f'don{RSQUO}t'),
 70 |     ("you're", f'you{RSQUO}re'),
 71 |     ("he'll", f'he{RSQUO}ll'),
 72 |     ("90's", f'90{RSQUO}s'),
 73 |     ("Карло's", f'Карло{RSQUO}s'),
 74 | ))
 75 | def test_apostrophe(factory, source, expected):
 76 |     typus = factory('apostrophe')
 77 |     assert expected == typus(source)
 78 | 
 79 | 
 80 | @pytest.mark.parametrize('source, expected', (
 81 |     ('--', '--'),
 82 |     ('foo - foo', f'foo{MDASH_PAIR}foo'),
 83 |     # Leading comma case
 84 |     (', - foo', f',{MDASH}{THNSP}foo'),
 85 |     (', -- foo', f',{MDASH}{THNSP}foo'),
 86 |     # if line begins, adds nbsp after mdash
 87 |     ('-- foo', f'{MDASH}{NBSP}foo'),
 88 |     # if line ends, adds nbsp before mdash
 89 |     ('foo --', f'foo{NBSP}{MDASH}'),
 90 |     ('foo -- bar', f'foo{MDASH_PAIR}bar'),
 91 |     # Python markdown replaces dash with ndash, don't know why
 92 |     (f'foo {NDASH} foo', f'foo{MDASH_PAIR}foo'),
 93 |     ('foo - "11" 00', f'foo{MDASH_PAIR}"11" 00'),
 94 |     ('2 - 2foo', f'2{MDASH_PAIR}2foo'),
 95 |     ('2 - 2', '2 - 2'),  # Doesn't clash with minus
 96 | ))
 97 | def test_mdash(factory, source, expected):
 98 |     typus = factory('mdash')
 99 |     assert expected == typus(source)
100 | 
101 | 
102 | @pytest.mark.parametrize('source, expected', (
103 |     ('4\'', '4' + SPRIME),
104 |     ('4"', '4' + DPRIME),
105 |     ('" 22"', '" 22' + DPRIME),
106 |     ('"4"', '"4"'),
107 | ))
108 | def test_primes(factory, source, expected):
109 |     typus = factory('primes')
110 |     assert expected == typus(source)
111 | 
112 | 
113 | @pytest.mark.parametrize('source, expected', (
114 |     ('4444444 fooo', '4444444 fooo'),
115 |     ('444 foo', f'444{NBSP}foo'),
116 |     ('444 +', f'444{NBSP}+'),
117 |     ('444 4444 bucks', f'444{NBSP}4444 bucks'),
118 |     ('444 -', f'444{NBSP}-'),
119 |     ('4444444 foo', '4444444 foo'),
120 | ))
121 | def test_digit_spaces(factory, source, expected):
122 |     typus = factory('digit_spaces')
123 |     assert expected == typus(source)
124 | 
125 | 
126 | @pytest.mark.parametrize('source, expected', (
127 |     ('aaa aaa', 'aaa aaa'),
128 |     ('aaa-aa aa', 'aaa-aa aa'),  # important check -- dash and 2 letters
129 |     ('aaa aa', 'aaa aa'),
130 |     ('I’ll check', 'I’ll check'),
131 |     ('a aa a', f'a{NBSP}aa{NBSP}a'),
132 |     ('aaa 2a', 'aaa 2a')  # letters only, no digits,
133 | ))
134 | def test_pairs(factory, source, expected):
135 |     typus = factory('pairs')
136 |     assert expected == typus(source)
137 | 
138 | 
139 | @pytest.mark.parametrize('source, expected', (
140 |     # Latin
141 |     ('1mm', f'1{NBSP}mm'),
142 |     ('1cm', f'1{NBSP}cm'),
143 |     ('1dm', f'1{NBSP}dm'),
144 |     ('1m', f'1{NBSP}m'),
145 |     ('1km', f'1{NBSP}km'),
146 |     ('1mg', f'1{NBSP}mg'),
147 |     ('1kg', f'1{NBSP}kg'),
148 |     ('1ml', f'1{NBSP}ml'),
149 |     ('1mA•h', f'1{NBSP}mA•h'),
150 |     ('1dpi', f'1{NBSP}dpi'),
151 |     # Cyrillic
152 |     ('1мм', f'1{NBSP}мм'),
153 |     ('1см', f'1{NBSP}см'),
154 |     ('1дм', f'1{NBSP}дм'),
155 |     ('1м', f'1{NBSP}м'),
156 |     ('1км', f'1{NBSP}км'),
157 |     ('1мг', f'1{NBSP}мг'),
158 |     ('1г', f'1{NBSP}г'),
159 |     ('1кг', f'1{NBSP}кг'),
160 |     ('1мл', f'1{NBSP}мл'),
161 |     ('1л', f'1{NBSP}л'),
162 |     ('1т', f'1{NBSP}т'),
163 |     ('1мА•ч', f'1{NBSP}мА•ч'),
164 |     # Skips
165 |     ('1foobar', '1foobar'),
166 |     # Exceptions
167 |     ('3g', '3g'),  # 4G lte
168 |     ('3d', '3d'),  # 3D movie
169 |     ('2nd', '2nd'),  # floor
170 |     ('3rd', '3rd'),  # floor
171 |     ('4th', '4th'),  # floor
172 |     ('1px', '1px'),
173 |     ('1000A', '1000A'),
174 |     # Case sensivity
175 |     ('1000ML', '1000ML'),
176 | ))
177 | def test_units(factory, source, expected):
178 |     typus = factory('units')
179 |     assert expected == typus(source)
180 | 
181 | 
182 | @pytest.mark.parametrize('source, expected', (
183 |     ('25-foo', '25-foo'),
184 |     ('2-3', f'2{NDASH}3'),
185 |     ('2,5-3', f'2,5{NDASH}3'),
186 |     ('0.5-3', f'0.5{NDASH}3'),
187 | 
188 |     ('2-3 foo', f'2{NDASH}3 foo'),
189 |     ('(15-20 items)', f'(15{NDASH}20 items)'),
190 | 
191 |     # Float
192 |     ('0,5-3', f'0,5{NDASH}3'),
193 |     ('-0,5-3', f'-0,5{NDASH}3'),
194 |     ('-5.5-3', f'-5.5{NDASH}3'),
195 |     ('-5,5-3', f'-5,5{NDASH}3'),
196 |     ('-5,5-3.5', f'-5,5{NDASH}3.5'),
197 | 
198 |     # Skips
199 |     ('2 - 3', '2 - 3'),
200 |     ('2-3 x 4', '2-3 x 4'),
201 |     ('2-3 * 4', '2-3 * 4'),
202 |     ('2-3 - 4', '2-3 - 4'),
203 | 
204 |     # Left is less than or equal to right
205 |     ('3-2', '3-2'),
206 |     ('3-3', '3-3'),
207 | 
208 |     # Doesn't affect math
209 |     ('1-2=4', f'1-2=4'),
210 | ))
211 | def test_ranges(factory, source, expected):
212 |     typus = factory('ranges')
213 |     assert expected == typus(source)
214 | 
215 | 
216 | @pytest.mark.parametrize('source, expected', (
217 |     ('(C)', '©'),  # Case insensitive test
218 |     ('...', '…'),
219 |     ('<-', '←'),
220 |     ('->', '→'),
221 |     ('+-', '±'),
222 |     ('+' + MINUS, '±'),
223 |     ('<=', '≤'),
224 |     ('>=', '≥'),
225 |     ('/=', '≠'),
226 |     ('==', '≡'),
227 |     ('(r)', '®'),
228 |     ('(c)', '©'),
229 |     ('(p)', '℗'),
230 |     ('(tm)', '™'),
231 |     ('(sm)', '℠'),
232 |     ('mA*h', 'mA•h'),
233 |     # cyrillic
234 |     ('(с)', '©'),
235 |     ('(р)', '℗'),
236 |     ('(тм)', '™',),
237 |     ('мА*ч', 'мА•ч'),
238 | ))
239 | def test_complex_symbols(factory, source, expected):
240 |     typus = factory('complex_symbols')
241 |     assert expected == typus(source)
242 | 
243 | 
244 | @pytest.mark.parametrize('source, expected', (
245 |     ('1/2', '½'),
246 |     ('1/3', '⅓'),
247 |     ('1/4', '​¼'),
248 |     ('1/5', '⅕'),
249 |     ('1/6', '⅙'),
250 |     ('1/8', '⅛'),
251 |     ('2/3', '⅔'),
252 |     ('2/5', '⅖'),
253 |     ('3/4', '¾'),
254 |     ('3/5', '⅗'),
255 |     ('3/8', '⅜'),
256 |     ('4/5', '⅘'),
257 |     ('5/6', '⅚'),
258 |     ('5/8', '⅝'),
259 |     ('7/8', '⅞'),
260 |     # False positive
261 |     ('11/22', '11/22'),
262 | ))
263 | def test_vulgar_fractions(factory, source, expected):
264 |     typus = factory('vulgar_fractions')
265 |     assert expected == typus(source)
266 | 
267 | 
268 | @pytest.mark.parametrize('source, expected', (
269 |     ('-', MINUS),
270 |     ('*', TIMES),
271 |     ('x', TIMES),
272 |     ('х', TIMES),
273 | ))
274 | def test_math(factory, source, expected):
275 |     typus = factory('math')
276 |     # -3, 3-3, 3 - 3, x - 3
277 |     assert typus(source + '3') == expected + '3'
278 |     assert typus(f'word{source} 3') == f'word{source} 3'
279 |     assert typus(f'3{source}3') == f'3{expected}3'
280 |     assert typus(f'3 {source} 3') == f'3 {expected} 3'
281 |     assert typus(f'x {source} 3') == f'x {expected} 3'
282 |     assert typus(f'3{source}3=3') == f'3{expected}3=3'
283 | 
284 | 
285 | @pytest.mark.parametrize('source, expected', (
286 |     ('т. д.', f'т.{NNBSP}д.'),
287 |     ('т.д.', f'т.{NNBSP}д.'),
288 |     ('т.п.', f'т.{NNBSP}п.'),
289 |     ('т. ч.', f'т.{NNBSP}ч.'),
290 |     ('т.е.', f'т.{NNBSP}е.'),
291 |     ('Пушкин А.С.', f'Пушкин А.{NNBSP}С.'),
292 |     ('А.С. Пушкин', f'А.{NNBSP}С.{NBSP}Пушкин'),
293 | ))
294 | def test_abbrs(factory, source, expected):
295 |     typus = factory('abbrs')
296 |     assert expected == typus(source)
297 | 
298 | 
299 | @pytest.mark.parametrize('char', f'←$€£%±{MINUS}{TIMES}©§¶№')
300 | def test_rep_positional_spaces_after(factory, char):
301 |     typus = factory('rep_positional_spaces')
302 |     assert typus(f'foo {char} bar') == f'foo {char}{NBSP}bar'
303 | 
304 | 
305 | @pytest.mark.parametrize('char', '&≡≤≥≠')
306 | def test_rep_positional_spaces_both(factory, char):
307 |     typus = factory('rep_positional_spaces')
308 |     assert typus(f'foo {char} bar') == f'foo{NBSP}{char}{NBSP}bar'
309 | 
310 | 
311 | @pytest.mark.parametrize('char', '₽→' + MDASH)
312 | def test_rep_positional_spaces_before(factory, char):
313 |     typus = factory('rep_positional_spaces')
314 |     assert typus(f'foo {char} bar') == f'foo{NBSP}{char} bar'
315 | 
316 | 
317 | @pytest.mark.parametrize('char', '®℗™℠:,.?!…')
318 | def test_rdel_positional_spaces_before(factory, char):
319 |     typus = factory('del_positional_spaces')
320 |     assert typus(f'foo {char} bar') == f'foo{char} bar'
321 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Typus documentation build configuration file, created by
  4 | # sphinx-quickstart on Tue Jul 12 22:26:26 2016.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | # If extensions (or modules to document with autodoc) are in another directory,
 16 | # add these directories to sys.path here. If the directory is relative to the
 17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 18 | #
 19 | # import os
 20 | # import sys
 21 | # sys.path.insert(0, os.path.abspath('.'))
 22 | 
 23 | # -- General configuration ------------------------------------------------
 24 | 
 25 | # If your documentation needs a minimal Sphinx version, state it here.
 26 | #
 27 | # needs_sphinx = '1.0'
 28 | 
 29 | # Add any Sphinx extension module names here, as strings. They can be
 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 31 | # ones.
 32 | extensions = [
 33 |     'sphinx.ext.autodoc',
 34 |     'sphinx.ext.doctest',
 35 |     'sphinx.ext.coverage',
 36 | ]
 37 | 
 38 | # Add any paths that contain templates here, relative to this directory.
 39 | templates_path = ['_templates']
 40 | 
 41 | # The suffix(es) of source filenames.
 42 | # You can specify multiple suffix as a list of string:
 43 | #
 44 | # source_suffix = ['.rst', '.md']
 45 | source_suffix = '.rst'
 46 | 
 47 | # The encoding of source files.
 48 | #
 49 | # source_encoding = 'utf-8-sig'
 50 | 
 51 | # The master toctree document.
 52 | master_doc = 'index'
 53 | 
 54 | # General information about the project.
 55 | project = 'Typus'
 56 | copyright = '2016, Murad Byashimov'
 57 | author = 'Murad Byashimov'
 58 | 
 59 | # The version info for the project you're documenting, acts as replacement for
 60 | # |version| and |release|, also used in various other places throughout the
 61 | # built documents.
 62 | #
 63 | # The short X.Y version.
 64 | version = '0.2.2'
 65 | # The full version, including alpha/beta/rc tags.
 66 | release = '0.2.2'
 67 | 
 68 | # The language for content autogenerated by Sphinx. Refer to documentation
 69 | # for a list of supported languages.
 70 | #
 71 | # This is also used if you do content translation via gettext catalogs.
 72 | # Usually you set "language" from the command line for these cases.
 73 | language = None
 74 | 
 75 | # There are two options for replacing |today|: either, you set today to some
 76 | # non-false value, then it is used:
 77 | #
 78 | # today = ''
 79 | #
 80 | # Else, today_fmt is used as the format for a strftime call.
 81 | #
 82 | # today_fmt = '%B %d, %Y'
 83 | 
 84 | # List of patterns, relative to source directory, that match files and
 85 | # directories to ignore when looking for source files.
 86 | # This patterns also effect to html_static_path and html_extra_path
 87 | exclude_patterns = []
 88 | 
 89 | # The reST default role (used for this markup: `text`) to use for all
 90 | # documents.
 91 | #
 92 | # default_role = None
 93 | 
 94 | # If true, '()' will be appended to :func: etc. cross-reference text.
 95 | #
 96 | # add_function_parentheses = True
 97 | 
 98 | # If true, the current module name will be prepended to all description
 99 | # unit titles (such as .. function::).
100 | #
101 | # add_module_names = True
102 | 
103 | # If true, sectionauthor and moduleauthor directives will be shown in the
104 | # output. They are ignored by default.
105 | #
106 | # show_authors = False
107 | 
108 | # The name of the Pygments (syntax highlighting) style to use.
109 | pygments_style = 'sphinx'
110 | 
111 | # A list of ignored prefixes for module index sorting.
112 | # modindex_common_prefix = []
113 | 
114 | # If true, keep warnings as "system message" paragraphs in the built documents.
115 | # keep_warnings = False
116 | 
117 | # If true, `todo` and `todoList` produce output, else they produce nothing.
118 | todo_include_todos = True
119 | 
120 | 
121 | # -- Options for HTML output ----------------------------------------------
122 | 
123 | # The theme to use for HTML and HTML Help pages.  See the documentation for
124 | # a list of builtin themes.
125 | #
126 | html_theme = 'sphinx_rtd_theme'
127 | 
128 | # Theme options are theme-specific and customize the look and feel of a theme
129 | # further.  For a list of options available for each theme, see the
130 | # documentation.
131 | #
132 | # html_theme_options = {}
133 | 
134 | # Add any paths that contain custom themes here, relative to this directory.
135 | # html_theme_path = []
136 | 
137 | # The name for this set of Sphinx documents.
138 | # "<project> v<release> documentation" by default.
139 | #
140 | # html_title = 'Typus v0.0.4'
141 | 
142 | # A shorter title for the navigation bar.  Default is the same as html_title.
143 | #
144 | # html_short_title = None
145 | 
146 | # The name of an image file (relative to this directory) to place at the top
147 | # of the sidebar.
148 | #
149 | # html_logo = None
150 | 
151 | # The name of an image file (relative to this directory) to use as a favicon of
152 | # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
153 | # pixels large.
154 | #
155 | # html_favicon = None
156 | 
157 | # Add any paths that contain custom static files (such as style sheets) here,
158 | # relative to this directory. They are copied after the builtin static files,
159 | # so a file named "default.css" will overwrite the builtin "default.css".
160 | html_static_path = ['_static']
161 | 
162 | # Add any extra paths that contain custom files (such as robots.txt or
163 | # .htaccess) here, relative to this directory. These files are copied
164 | # directly to the root of the documentation.
165 | #
166 | # html_extra_path = []
167 | 
168 | # If not None, a 'Last updated on:' timestamp is inserted at every page
169 | # bottom, using the given strftime format.
170 | # The empty string is equivalent to '%b %d, %Y'.
171 | #
172 | # html_last_updated_fmt = None
173 | 
174 | # If true, SmartyPants will be used to convert quotes and dashes to
175 | # typographically correct entities.
176 | #
177 | # html_use_smartypants = True
178 | 
179 | # Custom sidebar templates, maps document names to template names.
180 | #
181 | # html_sidebars = {}
182 | 
183 | # Additional templates that should be rendered to pages, maps page names to
184 | # template names.
185 | #
186 | # html_additional_pages = {}
187 | 
188 | # If false, no module index is generated.
189 | #
190 | # html_domain_indices = True
191 | 
192 | # If false, no index is generated.
193 | #
194 | # html_use_index = True
195 | 
196 | # If true, the index is split into individual pages for each letter.
197 | #
198 | # html_split_index = False
199 | 
200 | # If true, links to the reST sources are added to the pages.
201 | #
202 | # html_show_sourcelink = True
203 | 
204 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
205 | #
206 | # html_show_sphinx = True
207 | 
208 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
209 | #
210 | # html_show_copyright = True
211 | 
212 | # If true, an OpenSearch description file will be output, and all pages will
213 | # contain a <link> tag referring to it.  The value of this option must be the
214 | # base URL from which the finished HTML is served.
215 | #
216 | # html_use_opensearch = ''
217 | 
218 | # This is the file name suffix for HTML files (e.g. ".xhtml").
219 | # html_file_suffix = None
220 | 
221 | # Language to be used for generating the HTML full-text search index.
222 | # Sphinx supports the following languages:
223 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
224 | #   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
225 | #
226 | # html_search_language = 'en'
227 | 
228 | # A dictionary with options for the search language support, empty by default.
229 | # 'ja' uses this config value.
230 | # 'zh' user can custom change `jieba` dictionary path.
231 | #
232 | # html_search_options = {'type': 'default'}
233 | 
234 | # The name of a javascript file (relative to the configuration directory) that
235 | # implements a search results scorer. If empty, the default will be used.
236 | #
237 | # html_search_scorer = 'scorer.js'
238 | 
239 | # Output file base name for HTML help builder.
240 | htmlhelp_basename = 'Typusdoc'
241 | 
242 | # -- Options for LaTeX output ---------------------------------------------
243 | 
244 | latex_elements = {
245 |      # The paper size ('letterpaper' or 'a4paper').
246 |      #
247 |      # 'papersize': 'letterpaper',
248 | 
249 |      # The font size ('10pt', '11pt' or '12pt').
250 |      #
251 |      # 'pointsize': '10pt',
252 | 
253 |      # Additional stuff for the LaTeX preamble.
254 |      #
255 |      # 'preamble': '',
256 | 
257 |      # Latex figure (float) alignment
258 |      #
259 |      # 'figure_align': 'htbp',
260 | }
261 | 
262 | # Grouping the document tree into LaTeX files. List of tuples
263 | # (source start file, target name, title,
264 | #  author, documentclass [howto, manual, or own class]).
265 | latex_documents = [
266 |     (master_doc, 'Typus.tex', 'Typus Documentation',
267 |      'Murad Byashimov', 'manual'),
268 | ]
269 | 
270 | # The name of an image file (relative to this directory) to place at the top of
271 | # the title page.
272 | #
273 | # latex_logo = None
274 | 
275 | # For "manual" documents, if this is true, then toplevel headings are parts,
276 | # not chapters.
277 | #
278 | # latex_use_parts = False
279 | 
280 | # If true, show page references after internal links.
281 | #
282 | # latex_show_pagerefs = False
283 | 
284 | # If true, show URL addresses after external links.
285 | #
286 | # latex_show_urls = False
287 | 
288 | # Documents to append as an appendix to all manuals.
289 | #
290 | # latex_appendices = []
291 | 
292 | # If false, no module index is generated.
293 | #
294 | # latex_domain_indices = True
295 | 
296 | 
297 | # -- Options for manual page output ---------------------------------------
298 | 
299 | # One entry per manual page. List of tuples
300 | # (source start file, name, description, authors, manual section).
301 | man_pages = [
302 |     (master_doc, 'typus', 'Typus Documentation',
303 |      [author], 1)
304 | ]
305 | 
306 | # If true, show URL addresses after external links.
307 | #
308 | # man_show_urls = False
309 | 
310 | 
311 | # -- Options for Texinfo output -------------------------------------------
312 | 
313 | # Grouping the document tree into Texinfo files. List of tuples
314 | # (source start file, target name, title, author,
315 | #  dir menu entry, description, category)
316 | texinfo_documents = [
317 |     (master_doc, 'Typus', 'Typus Documentation',
318 |      author, 'Typus', 'One line description of project.',
319 |      'Miscellaneous'),
320 | ]
321 | 
322 | # Documents to append as an appendix to all manuals.
323 | #
324 | # texinfo_appendices = []
325 | 
326 | # If false, no module index is generated.
327 | #
328 | # texinfo_domain_indices = True
329 | 
330 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
331 | #
332 | # texinfo_show_urls = 'footnote'
333 | 
334 | # If true, do not generate a @detailmenu in the "Top" node's menu.
335 | #
336 | # texinfo_no_detailmenu = False
337 | 
338 | doctest_global_setup = """
339 | from typus import *
340 | from typus.utils import *
341 | """
342 | 


--------------------------------------------------------------------------------
/typus/processors/expressions.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from functools import partial
  3 | 
  4 | from ..chars import *
  5 | from ..utils import RE_SCASE, doc_map, map_choices, re_choices, re_compile
  6 | from .base import BaseProcessor
  7 | 
  8 | 
  9 | class BaseExpressions(BaseProcessor):
 10 |     r"""
 11 |     Provides regular expressions support. Looks for ``expressions`` list
 12 |     attribute in Typus with expressions name, compiles and runs them on every
 13 |     Typus call.
 14 | 
 15 |     >>> from typus.core import TypusCore
 16 |     >>> from typus.processors import BaseExpressions
 17 |     ...
 18 |     >>> class MyExpressions(BaseExpressions):
 19 |     ...     expressions = ('bold_price', )  # no prefix `expr_`!
 20 |     ...     def expr_bold_price(self):
 21 |     ...         expr = (
 22 |     ...             (r'(\$\d+)', r'<b>\1</b>'),
 23 |     ...         )
 24 |     ...         return expr
 25 |     ...
 26 |     >>> class MyTypus(TypusCore):
 27 |     ...     processors = (MyExpressions, )
 28 |     ...
 29 |     >>> my_typus = MyTypus()  # `expr_bold_price` is compiled and stored
 30 |     >>> my_typus('Get now just for $1000!')
 31 |     'Get now just for <b>$1000</b>!'
 32 | 
 33 |     .. note::
 34 |         *Expression* is a pair of regex and replace strings. Regex strings are
 35 |         compiled with :func:`typus.utils.re_compile` with a bunch of flags:
 36 |         unicode, case-insensitive, etc. If that doesn't suit for you pass your
 37 |         own flags as a third member of the tuple: ``(regex, replace, re.I)``.
 38 |     """
 39 | 
 40 |     expressions = NotImplemented
 41 | 
 42 |     def __init__(self, *args, **kwargs):
 43 |         super().__init__(*args, **kwargs)
 44 | 
 45 |         # Compiles expressions
 46 |         self.compiled = tuple(
 47 |             partial(re_compile(*expr[::2]).sub, expr[1])
 48 |             for name in self.expressions
 49 |             for expr in getattr(self, 'expr_' + name)()
 50 |         )
 51 | 
 52 |     def run(self, text: str, **kwargs) -> str:
 53 |         for expression in self.compiled:
 54 |             text = expression(text)
 55 |         return self.run_other(text, **kwargs)
 56 | 
 57 | 
 58 | class EnRuExpressions(BaseExpressions):
 59 |     """
 60 |     This class holds most of Typus functionality for English and Russian
 61 |     languages.
 62 |     """
 63 | 
 64 |     expressions = (
 65 |         'spaces linebreaks apostrophe complex_symbols mdash primes '
 66 |         'digit_spaces pairs units ranges vulgar_fractions math ruble abbrs '
 67 |         'rep_positional_spaces del_positional_spaces'
 68 |     ).split()
 69 | 
 70 |     # Any unicode word
 71 |     words = r'[^\W\d_]'
 72 | 
 73 |     complex_symbols = {
 74 |         '...': '…',
 75 |         '<-': '←',
 76 |         '->': '→',
 77 |         '+-': '±',
 78 |         '+' + MINUS: '±',
 79 |         '<=': '≤',
 80 |         '>=': '≥',
 81 |         '/=': '≠',
 82 |         '==': '≡',
 83 |         '(r)': '®',
 84 |         '(c)': '©',
 85 |         '(p)': '℗',
 86 |         '(tm)': '™',
 87 |         '(sm)': '℠',
 88 |         'mA*h': 'mA•h',
 89 |         # cyrillic
 90 |         '(с)': '©',
 91 |         '(р)': '℗',
 92 |         '(тм)': '™',
 93 |         'мА*ч': 'мА•ч',
 94 |     }
 95 | 
 96 |     units = (
 97 |         'mm',
 98 |         'cm',
 99 |         'dm',
100 |         'm',
101 |         'km',
102 |         'mg',
103 |         'kg',
104 |         'ml',
105 |         'dpi',
106 |         'mA•h',
107 |         'мм',
108 |         'см',
109 |         'дм',
110 |         'м',
111 |         'км',
112 |         'мг',
113 |         'г',
114 |         'кг',
115 |         'т',
116 |         'мл',
117 |         'л',
118 |         'мА•ч',
119 |     )
120 | 
121 |     # This is for docs
122 |     units_doc_map = {'1' + k: '1{}{}'.format(NBSP, k) for k in units}
123 | 
124 |     vulgar_fractions = {
125 |         '1/2': '½',
126 |         '1/3': '⅓',
127 |         '1/4': '​¼',
128 |         '1/5': '⅕',
129 |         '1/6': '⅙',
130 |         '1/8': '⅛',
131 |         '2/3': '⅔',
132 |         '2/5': '⅖',
133 |         '3/4': '¾',
134 |         '3/5': '⅗',
135 |         '3/8': '⅜',
136 |         '4/5': '⅘',
137 |         '5/6': '⅚',
138 |         '5/8': '⅝',
139 |         '7/8': '⅞',
140 |     }
141 | 
142 |     math = {
143 |         '-': MINUS,
144 |         '*xх': TIMES,
145 |     }
146 | 
147 |     # No need to put >=, +-, etc, after expr_complex_symbols
148 |     math_operators = r'[\-{0}\*xх{1}\+\=±≤≥≠÷\/]'.format(MINUS, TIMES)
149 | 
150 |     rep_positional_spaces = {
151 |         # No need to put vulgar fractions in here because of expr_digit_spaces
152 |         # which joins digits and words afterward
153 |         'after': '←$€£%±{0}{1}©§¶№'.format(MINUS, TIMES),
154 |         'both': '&≡≤≥≠',
155 |         'before': '₽→' + MDASH,
156 |     }
157 | 
158 |     del_positional_spaces = {
159 |         'before': '®℗™℠:,.?!…',
160 |     }
161 | 
162 |     ruble = (
163 |         'руб',
164 |         'р',
165 |     )
166 | 
167 |     @staticmethod
168 |     def expr_spaces():
169 |         """
170 |         Trims spaces at the beginning and end of the line and removes extra
171 |         spaces within.
172 | 
173 |         >>> from typus import en_typus
174 |         >>> en_typus('   foo bar  ')
175 |         'foo bar'
176 | 
177 |         .. caution::
178 |             Doesn't work correctly with nbsp (replaces with whitespace).
179 |         """
180 | 
181 |         expr = (
182 |             (r'{0}{{2,}}'.format(ANYSP), WHSP),
183 |             (r'(?:^{0}+|{0}+$)'.format(ANYSP), ''),
184 |         )
185 |         return expr
186 | 
187 |     @staticmethod
188 |     def expr_linebreaks():
189 |         r"""
190 |         Converts line breaks to unix-style and removes extra breaks
191 |         if found more than two in a row.
192 | 
193 |         >>> from typus import en_typus
194 |         >>> en_typus('foo\r\nbar\n\n\nbaz')
195 |         'foo\nbar\n\nbaz'
196 |         """
197 | 
198 |         expr = (
199 |             (r'\r\n', '\n'),
200 |             (r'\n{2,}', '\n' * 2),
201 |         )
202 |         return expr
203 | 
204 |     def expr_apostrophe(self):
205 |         """
206 |         Replaces single quote with apostrophe.
207 | 
208 |         >>> from typus import en_typus
209 |         >>> en_typus("She'd, I'm, it's, don't, you're, he'll, 90's")
210 |         'She’d, I’m, it’s, don’t, you’re, he’ll, 90’s'
211 | 
212 |         .. note::
213 |             By the way it works with any omitted word. But then again, why not?
214 |         """
215 | 
216 |         expr = (
217 |             (r'(?<={0}|[0-9])\'(?={0})'.format(self.words), RSQUO),
218 |         )
219 |         return expr
220 | 
221 |     @doc_map(complex_symbols)
222 |     def expr_complex_symbols(self):
223 |         """
224 |         Replaces complex symbols with Unicode characters. Doesn't care
225 |         about case-sensitivity and handles Cyrillic-Latin twins
226 |         like ``c`` and ``с``.
227 | 
228 |         >>> from typus import en_typus
229 |         >>> en_typus('(c)(с)(C)(r)(R)...')
230 |         '©©©®®…'
231 |         """
232 | 
233 |         expr = (
234 |             map_choices(self.complex_symbols),
235 |         )
236 |         return expr
237 | 
238 |     @staticmethod
239 |     def expr_mdash():
240 |         """
241 |         Replaces dash with mdash.
242 | 
243 |         >>> from typus import en_typus
244 |         >>> en_typus('foo -- bar')  # adds non-breaking space after `foo`
245 |         'foo\u202f—\u2009bar'
246 |         """
247 | 
248 |         expr = (
249 |             # Double dash guarantees to be replaced with mdash
250 |             (r'{0}--{0}'.format(WHSP), MDASH_PAIR),
251 | 
252 |             # Dash can be between anything except digits
253 |             # because in that case it's not obvious
254 |             (r'{0}+[\-|{1}]{0}+(?!\d\b)'.format(ANYSP, NDASH), MDASH_PAIR),
255 | 
256 |             # Same but backwards
257 |             # It joins non-digit with digit or word
258 |             (r'(\b\D+){0}+[\-|{1}]{0}+'.format(ANYSP, NDASH),
259 |              r'\1{0}'.format(MDASH_PAIR)),
260 | 
261 |             # Line beginning adds nbsp after dash
262 |             (r'^\-{{1,2}}{0}+'.format(ANYSP),
263 |              r'{0}{1}'.format(MDASH, NBSP)),
264 | 
265 |             # Also mdash can be at the end of the line in poems
266 |             (r'{0}+\-{{1,2}}{0}*(?=$|<br/?>)'.format(ANYSP),
267 |              r'{0}{1}'.format(NBSP, MDASH)),
268 | 
269 |             # Special case with leading comma
270 |             (',' + MDASH_PAIR, f',{MDASH}{THNSP}'),
271 |         )
272 |         return expr
273 | 
274 |     @staticmethod
275 |     def expr_primes():
276 |         r"""
277 |         Replaces quotes with prime after digits.
278 | 
279 |         >>> from typus import en_typus
280 |         >>> en_typus('3\' 5" long')
281 |         '3′ 5″ long'
282 | 
283 |         .. caution::
284 |             Won't break ``"4"``, but fails with ``" 4"``.
285 |         """
286 | 
287 |         expr = (
288 |             (r'(^|{0})(\d+)\''.format(ANYSP), r'\1\2' + SPRIME),
289 |             (r'(^|{0})(\d+)"'.format(ANYSP), r'\1\2' + DPRIME),
290 |         )
291 |         return expr
292 | 
293 |     def expr_digit_spaces(self):
294 |         """
295 |         Replaces whitespace with non-breaking space after 4 (and less)
296 |         length digits if word or digit without comma or math operators
297 |         found afterwards:
298 |         3 apples
299 |         40 000 bucks
300 |         400 + 3
301 |         Skips:
302 |         4000 bucks
303 |         40 000,00 bucks
304 |         """
305 | 
306 |         expr = (
307 |             (r'\b(\d{{1,3}}){0}(?=[0-9]+\b|{1}|{2})'
308 |              .format(WHSP, self.words, self.math_operators), r'\1' + NBSP),
309 |         )
310 |         return expr
311 | 
312 |     def expr_pairs(self):
313 |         """
314 |         Replaces whitespace with non-breaking space after 1-2 length words.
315 |         """
316 | 
317 |         expr = (
318 |             # Unions, units and all that small staff
319 |             (r'\b({1}{{1,2}}){0}+'.format(WHSP, self.words), r'\1' + NBSP),
320 |             # Fixes previous with leading dash, ellipsis or apostrophe
321 |             (r'([-…’]{1}{{1,2}}){0}'.format(NBSP, self.words), r'\1' + WHSP),
322 |         )
323 |         return expr
324 | 
325 |     @doc_map(units_doc_map)
326 |     def expr_units(self):
327 |         """
328 |         Puts narrow non-breaking space between digits and units.
329 |         Case sensitive.
330 | 
331 |         >>> from typus import en_typus
332 |         >>> en_typus('1mm', debug=True), en_typus('1mm')
333 |         ('1_mm', '1 mm')
334 |         """
335 | 
336 |         expr = (
337 |             (r'\b(\d+){0}*{1}\b'.format(WHSP, re_choices(self.units)),
338 |              r'\1{0}\2'.format(NBSP), RE_SCASE),
339 |         )
340 |         return expr
341 | 
342 |     def expr_ranges(self):
343 |         """
344 |         Replaces dash with ndash in ranges.
345 |         Supports float and negative values.
346 |         Tries to not mess with minus: skips if any math operator or word
347 |         was found after dash: 3-2=1, 24-pin.
348 |         **NOTE**: _range_ should not have spaces between dash: `2-3` and
349 |         left side should be less than right side.
350 |         """
351 | 
352 |         def ufloat(string):
353 |             return float(string.replace(',', '.'))
354 | 
355 |         def replace(match):
356 |             left, dash, right = match.groups()
357 |             if ufloat(left) < ufloat(right):
358 |                 dash = NDASH
359 |             return '{0}{1}{2}'.format(left, dash, right)
360 | 
361 |         expr = (
362 |             (r'(-?(?:[0-9]+[\.,][0-9]+|[0-9]+))(-)'
363 |              r'([0-9]+[\.,][0-9]+|[0-9]+)'
364 |              r'(?!{0}*{1}|{2})'
365 |              .format(ANYSP, self.math_operators, self.words),
366 |              replace),
367 |         )
368 |         return expr
369 | 
370 |     @doc_map(vulgar_fractions)
371 |     def expr_vulgar_fractions(self):
372 |         """
373 |         Replaces vulgar fractions with appropriate unicode characters.
374 | 
375 |         >>> from typus import en_typus
376 |         >>> en_typus('1/2')
377 |         '½'
378 |         """
379 | 
380 |         expr = (
381 |             # \b to excludes digits which are not on map, like `11/22`
382 |             map_choices(self.vulgar_fractions, r'\b({0})\b'),
383 |         )
384 |         return expr
385 | 
386 |     @doc_map(math)
387 |     def expr_math(self):
388 |         """
389 |         Puts minus and multiplication symbols between pair and before
390 |         single digits.
391 | 
392 |         >>> from typus import en_typus
393 |         >>> en_typus('3 - 3 = 0')
394 |         '3 − 3 = 0'
395 |         >>> en_typus('-3 degrees')
396 |         '−3 degrees'
397 |         >>> en_typus('3 x 3 = 9')
398 |         '3 × 3 = 9'
399 |         >>> en_typus('x3 better!')
400 |         '×3 better!'
401 |         """
402 | 
403 |         expr = (
404 |             (r'(^|{0}|\d)[{1}]({0}*\d)'.format(ANYSP, re.escape(x)),
405 |              r'\1{0}\2'.format(y)) for x, y in self.math.items()
406 |         )
407 |         return expr
408 | 
409 |     def expr_abbrs(self):
410 |         """
411 |         Adds narrow non-breaking space and replaces whitespaces between
412 |         shorten words.
413 |         """
414 | 
415 |         expr = (
416 |             (r'\b({1}\.){0}*({1}\.)'.format(ANYSP, self.words),
417 |              r'\1{0}\2'.format(NNBSP)),
418 |             (r'\b({1}\.){0}*(?={1})'.format(WHSP, self.words),
419 |              r'\1{0}'.format(NBSP)),
420 |         )
421 |         return expr
422 | 
423 |     def expr_ruble(self):
424 |         """
425 |         Replaces `руб` and `р` (with or without dot) after digits
426 |         with ruble symbol. Case sensitive.
427 | 
428 |         >>> from typus import en_typus
429 |         >>> en_typus('1000 р.')
430 |         '1000 ₽'
431 | 
432 |         .. caution::
433 | 
434 |             Drops the dot at the end of sentence if match found in there.
435 |         """
436 | 
437 |         choices = re_choices(self.ruble, r'(?:{0})')
438 |         expr = (
439 |             (r'(\d){0}*{1}\b\.?'.format(ANYSP, choices),
440 |              r'\1{0}₽'.format(NBSP), RE_SCASE),  # case matters
441 |         )
442 |         return expr
443 | 
444 |     @staticmethod
445 |     def _positional_spaces(data, find, replace):
446 |         """
447 |         Helper method for `rep_positional_spaces` and `del_positional_spaces`
448 |         expressions.
449 |         """
450 | 
451 |         both = data.get('both', '')
452 |         before = re.escape(data.get('before', '') + both)
453 |         after = re.escape(data.get('after', '') + both)
454 |         if before:
455 |             yield r'{0}+(?=[{1}])'.format(find, before), replace
456 |         if after:
457 |             yield r'(?<=[{1}]){0}+'.format(find, after), replace
458 | 
459 |     @doc_map(rep_positional_spaces, keys='Direction', values='Characters')
460 |     def expr_rep_positional_spaces(self):
461 |         """
462 |         Replaces whitespaces after and before certain symbols
463 |         with non-breaking space.
464 |         """
465 | 
466 |         expr = self._positional_spaces(self.rep_positional_spaces, WHSP, NBSP)
467 |         return tuple(expr)
468 | 
469 |     @doc_map(del_positional_spaces, keys='Direction', values='Characters')
470 |     def expr_del_positional_spaces(self):
471 |         """
472 |         Removes spaces before and after certain symbols.
473 |         """
474 | 
475 |         expr = self._positional_spaces(self.del_positional_spaces, ANYSP, '')
476 |         return tuple(expr)
477 | 


--------------------------------------------------------------------------------