├── tests ├── __init__.py ├── test_core.py ├── test_utils.py ├── test_processors.py ├── test_summary.py └── test_expressions.py ├── .coveragerc ├── docs ├── example.png ├── utils.rst ├── index.rst ├── get_started.rst ├── processors.rst └── conf.py ├── .gitignore ├── setup.cfg ├── MANIFEST.in ├── .isort.cfg ├── test_requirements.txt ├── pytest.ini ├── tox.ini ├── .pylintrc ├── .travis.yml ├── typus ├── processors │ ├── __init__.py │ ├── base.py │ ├── escapes.py │ ├── quotes.py │ └── expressions.py ├── __init__.py ├── chars.py ├── core.py └── utils.py ├── setup.py ├── LICENSE └── README.rst /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = typus 3 | 4 | [report] 5 | omit = *tests* 6 | -------------------------------------------------------------------------------- /docs/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/byashimov/typus/HEAD/docs/example.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | *.py[co] 3 | .coverage 4 | __pycache__ 5 | build/ 6 | dist/ 7 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | [metadata] 5 | description-file = README.rst -------------------------------------------------------------------------------- /docs/utils.rst: -------------------------------------------------------------------------------- 1 | .. _Utils: 2 | 3 | Utils 4 | ====== 5 | 6 | .. automodule:: typus.utils 7 | :members: -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include MANIFEST.in 3 | graft typus 4 | graft tests 5 | global-exclude __pycache__ 6 | global-exclude *.py[co] -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | skip=.tox 3 | not_skip=__init__.py 4 | multi_line_output = 3 5 | balanced_wrapping = 1 6 | include_trailing_comma = 1 7 | -------------------------------------------------------------------------------- /test_requirements.txt: -------------------------------------------------------------------------------- 1 | pytest==3.6.3 2 | pytest-cov==2.5.1 3 | pytest-pylint==0.11.0 4 | pytest-mock==1.10.0 5 | pytest-isort==0.2.0 6 | Sphinx==1.7.6 7 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = 3 | -v 4 | -rs 5 | --cov=typus 6 | --cov-report=term-missing 7 | --pylint 8 | --doctest-modules 9 | --isort 10 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [testenv] 2 | deps = -rtest_requirements.txt 3 | commands = 4 | pytest --cache-clear 5 | sphinx-build -b doctest docs build 6 | python -m doctest README.rst 7 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | ignore=docs 3 | disable= 4 | missing-docstring, 5 | wildcard-import, 6 | unused-wildcard-import, 7 | too-few-public-methods, 8 | invalid-name, 9 | arguments-differ, 10 | too-many-instance-attributes, 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | sudo: required 3 | dist: xenial 4 | python: 5 | - "3.6" 6 | - "3.7" 7 | cache: 8 | directories: 9 | - $HOME/.cache/pip 10 | install: 11 | - travis_retry pip install tox-travis codecov 12 | script: 13 | - tox 14 | after_success: 15 | - coverage report 16 | - codecov 17 | -------------------------------------------------------------------------------- /typus/processors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseProcessor 2 | from .escapes import BaseEscapeProcessor, EscapeHtml, EscapePhrases 3 | from .expressions import BaseExpressions, EnRuExpressions 4 | from .quotes import BaseQuotes, EnQuotes, RuQuotes 5 | 6 | __all__ = ( 7 | 'BaseProcessor', 8 | 'BaseEscapeProcessor', 9 | 'EscapeHtml', 10 | 'EscapePhrases', 11 | 'BaseExpressions', 12 | 'EnRuExpressions', 13 | 'BaseQuotes', 14 | 'EnQuotes', 15 | 'RuQuotes', 16 | ) 17 | -------------------------------------------------------------------------------- /tests/test_core.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from typus import TypusCore, ru_typus 4 | 5 | 6 | def test_empty_string(mocker): 7 | mocker.patch('typus.ru_typus.procs') 8 | assert ru_typus('') == '' 9 | ru_typus.procs.run.assert_not_called() 10 | 11 | 12 | def test_debug_true(): 13 | assert ru_typus('2mm', debug=True) == '2_mm' 14 | 15 | 16 | def test_no_processors(): 17 | class Testus(TypusCore): 18 | pass 19 | 20 | with pytest.raises(AssertionError): 21 | Testus() 22 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. Typus documentation master file, created by 2 | sphinx-quickstart on Tue Jul 12 22:26:26 2016. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | .. include:: ../README.rst 7 | 8 | Contents 9 | -------- 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | get_started 15 | processors 16 | utils 17 | 18 | 19 | Indices and tables 20 | ------------------ 21 | 22 | * :ref:`genindex` 23 | * :ref:`modindex` 24 | * :ref:`search` 25 | -------------------------------------------------------------------------------- /typus/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=invalid-name 2 | 3 | from .core import TypusCore 4 | from .processors import ( 5 | EnQuotes, 6 | EnRuExpressions, 7 | EscapeHtml, 8 | EscapePhrases, 9 | RuQuotes, 10 | ) 11 | 12 | 13 | class EnTypus(TypusCore): 14 | processors = ( 15 | EscapePhrases, 16 | EscapeHtml, 17 | EnQuotes, 18 | EnRuExpressions, 19 | ) 20 | 21 | 22 | class RuTypus(TypusCore): 23 | processors = ( 24 | EscapePhrases, 25 | EscapeHtml, 26 | RuQuotes, 27 | EnRuExpressions, 28 | ) 29 | 30 | 31 | en_typus, ru_typus = EnTypus(), RuTypus() 32 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='typus', 5 | version='0.2.2', 6 | description='Multilanguage language typograph', 7 | url='https://github.com/byashimov/typus', 8 | author='Murad Byashimov', 9 | author_email='byashimov@gmail.com', 10 | packages=['typus', 'typus.processors'], 11 | license='BSD', 12 | classifiers=[ 13 | 'Development Status :: 4 - Beta', 14 | 'Intended Audience :: Developers', 15 | 'Topic :: Software Development :: Libraries :: Python Modules', 16 | 'License :: OSI Approved :: BSD License', 17 | 'Operating System :: OS Independent', 18 | 'Programming Language :: Python', 19 | 'Programming Language :: Python :: 3', 20 | 'Programming Language :: Python :: 3.6', 21 | 'Programming Language :: Python :: 3.7', 22 | ], 23 | ) 24 | -------------------------------------------------------------------------------- /typus/processors/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Type 3 | 4 | from typus.core import TypusCore 5 | 6 | 7 | class BaseProcessor(ABC): 8 | """ 9 | Processors are the workers of Typus. See subclasses for examples. 10 | """ 11 | 12 | other: 'BaseProcessor' = None 13 | 14 | def __init__(self, typus: TypusCore): 15 | # Stores Typus to access it's configuration 16 | self.typus = typus 17 | 18 | def __radd__(self, other: Type['BaseProcessor']): 19 | self.other = other 20 | return self 21 | 22 | @abstractmethod 23 | def run(self, text: str, **kwargs) -> str: 24 | """ 25 | :param text: Input text 26 | :param kwargs: Optional settings for the current call 27 | :return: Output text 28 | """ 29 | 30 | def run_other(self, text: str, **kwargs) -> str: 31 | if self.other: 32 | return self.other.run(text, **kwargs) 33 | return text 34 | -------------------------------------------------------------------------------- /typus/chars.py: -------------------------------------------------------------------------------- 1 | __all__ = ( 2 | 'ANYSP', 3 | 'DLQUO', 4 | 'DPRIME', 5 | 'LAQUO', 6 | 'LDQUO', 7 | 'LSQUO', 8 | 'MDASH', 9 | 'MDASH_PAIR', 10 | 'MINUS', 11 | 'NBSP', 12 | 'NDASH', 13 | 'NNBSP', 14 | 'RAQUO', 15 | 'RDQUO', 16 | 'RSQUO', 17 | 'SPRIME', 18 | 'THNSP', 19 | 'TIMES', 20 | 'WHSP', 21 | ) 22 | 23 | NBSP = '\u00A0' 24 | NNBSP = '\u202F' 25 | THNSP = '\u2009' 26 | WHSP = ' ' 27 | ANYSP = r'[{}{}{}{}]'.format(WHSP, NBSP, NNBSP, THNSP) 28 | 29 | NDASH = '–' 30 | MDASH = '—' 31 | MDASH_PAIR = NNBSP + MDASH + THNSP 32 | HYPHEN = '' 33 | 34 | MINUS = '−' 35 | TIMES = '×' 36 | 37 | LSQUO = '‘' # left curly quote mark 38 | RSQUO = '’' # right curly quote mark/apostrophe 39 | LDQUO = '“' # left curly quote marks 40 | RDQUO = '”' # right curly quote marks 41 | DLQUO = '„' # double low curly quote mark 42 | LAQUO = '«' # left angle quote marks 43 | RAQUO = '»' # right angle quote marks 44 | 45 | SPRIME = '′' 46 | DPRIME = '″' 47 | -------------------------------------------------------------------------------- /typus/core.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=unused-argument, method-hidden 2 | 3 | from functools import update_wrapper 4 | 5 | from .chars import NBSP, NNBSP 6 | from .utils import re_compile 7 | 8 | __all__ = ('TypusCore', ) 9 | 10 | 11 | class TypusCore: 12 | """ 13 | This class runs :mod:`typus.processors` chained together. 14 | """ 15 | 16 | processors = () 17 | re_nbsp = re_compile('[{}{}]'.format(NBSP, NNBSP)) 18 | 19 | def __init__(self): 20 | assert self.processors, 'Empty typus. Set processors' 21 | 22 | # Makes possible to decorate Typus. 23 | # updated=() skips __dict__ attribute 24 | update_wrapper(self, self.__class__, updated=()) 25 | 26 | # Chains all processors into one single function 27 | self.procs = sum(p(self) for p in reversed(self.processors)) 28 | 29 | def __call__(self, source: str, *, debug=False, **kwargs): 30 | text = source.strip() 31 | if not text: 32 | return '' 33 | 34 | # All the magic 35 | processed = self.procs.run(text, debug=debug, **kwargs) 36 | 37 | # Makes nbsp visible 38 | if debug: 39 | return self.re_nbsp.sub('_', processed) 40 | return processed 41 | -------------------------------------------------------------------------------- /docs/get_started.rst: -------------------------------------------------------------------------------- 1 | What it's for? 2 | ============== 3 | 4 | Well, when you write text you make sure it's grammatically correct. 5 | Typography is *an aesthetic* grammar. Everything you type should be typographied 6 | in order to respect the reader. For instance, when you write *“you’re”* you 7 | put *apostrophe* instead of *single quote*, because of the same reason you 8 | place dot at the end of sentence instead of comma, even though they look 9 | similar. 10 | 11 | Unfortunately all typographic characters are well hidden in your keyboard 12 | layout which makes them almost impossible to use. Fortunately Typus can do 13 | that for you. 14 | 15 | 16 | The anatomy 17 | ----------- 18 | 19 | :py:class:`typus.core.TypusCore` runs :ref:`Processors` to do the job 20 | which can be plugged in for desired configuration. 21 | Here is a quick example: 22 | 23 | .. testcode:: 24 | 25 | from typus.core import TypusCore 26 | from typus.processors import EnQuotes 27 | 28 | class MyTypus(TypusCore): 29 | processors = (EnQuotes, ) 30 | 31 | my_typus = MyTypus() 32 | assert my_typus('"quoted text"') == '“quoted text”' 33 | 34 | :py:class:`typus.core.TypusCore` runs :py:class:`typus.processors.EnQuotes` 35 | processor which improves *quotes* only. 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) Murad Byashimov and other contributors. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of Django nor the names of its contributors may be used 15 | to endorse or promote products derived from this software without 16 | specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=anomalous-backslash-in-string 2 | 3 | import pytest 4 | 5 | from typus.utils import idict, splinter 6 | 7 | 8 | @pytest.mark.parametrize('source, expected', ( 9 | ({'A': 0, 'b': 1, 'BAr': 2}, {'a': 0, 'b': 1, 'bar': 2}), 10 | )) 11 | def test_idict(source, expected): 12 | result = idict(source) 13 | assert result == expected 14 | assert source != result 15 | 16 | 17 | @pytest.mark.parametrize('source, expected', ( 18 | ('a, b,c', ['a', 'b', 'c']), 19 | ('a, b\,c', ['a', 'b,c']), 20 | )) 21 | def test_splinter_basic(source, expected): 22 | split = splinter(',') 23 | assert split(source) == expected 24 | 25 | 26 | @pytest.mark.parametrize('source', ( 27 | '\\', '\\ ', ' ', 28 | )) 29 | def test_splinter_junk_delimiter(source): 30 | with pytest.raises(ValueError): 31 | splinter(source) 32 | 33 | 34 | @pytest.mark.parametrize('source, expected', ( 35 | (' a; b;c', ['a', 'b', 'c']), 36 | (' a; b ;c', ['a', 'b', 'c']), 37 | (' a; b ;c ', ['a', 'b', 'c']), 38 | )) 39 | def test_splinter_positional_spaces(source, expected): 40 | split = splinter(';') 41 | assert split(source) == expected 42 | 43 | 44 | def test_splinter_delimiter_with_spaces(): 45 | split = splinter(' @ ') 46 | assert split('a@ b@ c ') == ['a', 'b', 'c'] 47 | 48 | 49 | def test_splinter_regex_delimiter(): 50 | split = splinter('$') 51 | assert split('a$b$c') == ['a', 'b', 'c'] 52 | 53 | 54 | def test_splinter_doesnt_remove_other_slashes(): 55 | split = splinter('*') 56 | assert split('a * b * c\*c \\b') == ['a', 'b', 'c*c \\b'] 57 | -------------------------------------------------------------------------------- /docs/processors.rst: -------------------------------------------------------------------------------- 1 | .. _Processors: 2 | 3 | Processors 4 | ========== 5 | 6 | Processors are the core of Typus. Multiple processors are nested and chained 7 | in one single function to do things which may depend on the result returned by 8 | inner processors. Say, we set ``EscapeHtml`` and ``MyTrimProcessor``, 9 | this is how it works: 10 | 11 | :: 12 | 13 | extract html tags 14 | pass text further if condition is true 15 | do something and return 16 | return the text 17 | put tags back and return 18 | 19 | In python: 20 | 21 | .. testcode:: 22 | 23 | from typus.core import TypusCore 24 | from typus.processors import BaseProcessor, EscapeHtml 25 | 26 | class MyTrimProcessor(BaseProcessor): 27 | def run(self, text, **kwargs): 28 | # When processor is initiated it gets typus instance 29 | # as the first argument so you can access to it's configuration 30 | # any time 31 | if self.typus.trim: 32 | trimmed = text.strip() 33 | else: 34 | trimmed = text 35 | return self.run_other(trimmed, **kwargs) 36 | 37 | class MyTypus(TypusCore): 38 | # This becomes a single function. EscapeHtml goes first 39 | processors = (EscapeHtml, MyTrimProcessor) 40 | 41 | # Set it to `False` to disable trimming 42 | trim = True 43 | 44 | my_typus = MyTypus() 45 | assert my_typus(' test ') == 'test' 46 | 47 | 48 | Built-in processors 49 | ------------------- 50 | 51 | .. automodule:: typus.processors 52 | :members: EnQuotes, RuQuotes, EnRuExpressions, EscapeHtml, EscapePhrases 53 | -------------------------------------------------------------------------------- /typus/processors/escapes.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from itertools import count 3 | 4 | from ..utils import re_compile 5 | from .base import BaseProcessor 6 | 7 | 8 | class BaseEscapeProcessor(BaseProcessor): 9 | def run(self, text: str, **kwargs) -> str: 10 | storage = [] 11 | counter = count() 12 | escaped = self._save_values(text, storage, counter, **kwargs) 13 | 14 | # Runs typus 15 | processed = self.run_other(escaped, **kwargs) 16 | if not storage: 17 | return processed 18 | 19 | restored = self._restore_values(processed, storage) 20 | return restored 21 | 22 | @abstractmethod 23 | def _save_values(self, *args, **kwargs): 24 | pass # pragma: nocover 25 | 26 | @staticmethod 27 | def _restore_values(text, storage): 28 | """ 29 | Puts data into the text in reversed order. 30 | It's important to loop over and restore text step by step 31 | because some 'stored' chunks may contain keys to other ones. 32 | """ 33 | for key, value in reversed(storage): 34 | text = text.replace(key, value) 35 | return text 36 | 37 | 38 | class EscapePhrases(BaseEscapeProcessor): 39 | """ 40 | Escapes phrases which should never be processed. 41 | 42 | >>> from typus import en_typus 43 | >>> en_typus('Typus turns `(c)` into "(c)"', escape_phrases=['`(c)`']) 44 | 'Typus turns `(c)` into “©”' 45 | 46 | Also there is a little helper :func:`typus.utils.splinter` which should 47 | help you to split string into the phrases. 48 | """ 49 | 50 | placeholder = '{{#phrase{0}#}}' 51 | 52 | def _save_values( 53 | self, text, storage, counter, escape_phrases=(), **kwargs): 54 | for phrase in escape_phrases: 55 | if not phrase.strip(): 56 | continue 57 | key = self.placeholder.format(next(counter)) 58 | text = text.replace(phrase, key) 59 | storage.append((key, phrase)) 60 | return text 61 | 62 | 63 | class EscapeHtml(BaseEscapeProcessor): 64 | """ 65 | Extracts html tags and puts them back after. 66 | 67 | >>> from typus import en_typus 68 | >>> en_typus('Typus turns (c) into "(c)"') 69 | 'Typus turns (c) into “©”' 70 | 71 | .. caution:: 72 | Doesn't support nested ```` tags. 73 | """ 74 | 75 | placeholder = '{{#html{0}#}}' 76 | skiptags = 'head|iframe|pre|code|script|style|video|audio|canvas' 77 | patterns = ( 78 | re_compile(r'(<)({0})(.*?>.*?)'.format(skiptags)), 79 | # Doctype, xml, closing tag, any tag 80 | re_compile(r'(<[\!\?/]?[a-z]+.*?>)'), 81 | # Comments 82 | re_compile(r'(<\!\-\-.*?\-\->)'), 83 | ) 84 | 85 | def _save_values(self, text, storage, counter, **kwargs): 86 | for pattern in self.patterns: 87 | text = pattern.sub(self._replace(storage, counter), text) 88 | return text 89 | 90 | def _replace(self, storage, counter): 91 | def inner(match): 92 | key = self.placeholder.format(next(counter)) 93 | html = ''.join(match.groups()) 94 | storage.append((key, html)) 95 | return key 96 | return inner 97 | -------------------------------------------------------------------------------- /typus/processors/quotes.py: -------------------------------------------------------------------------------- 1 | from itertools import cycle 2 | from typing import Match 3 | 4 | from ..chars import DLQUO, LAQUO, LDQUO, LSQUO, RAQUO, RDQUO, RSQUO 5 | from ..utils import re_compile 6 | from .base import BaseProcessor 7 | 8 | 9 | class BaseQuotes(BaseProcessor): 10 | """ 11 | Replaces regular quotes with typographic ones. 12 | Supports any level nesting, but doesn't work well with minutes ``1'`` 13 | and inches ``1"`` within the quotes, that kind of cases are ignored. 14 | Please, provide ``loq, roq, leq, req`` attributes with custom quotes. 15 | 16 | >>> from typus import en_typus 17 | >>> en_typus('Say "what" again!') 18 | 'Say “what” again!' 19 | """ 20 | 21 | loq = roq = leq = req = NotImplemented 22 | 23 | def __init__(self, *args, **kwargs): 24 | super().__init__(*args, **kwargs) 25 | 26 | # Pairs of odd and even quotes. Already *switched* in one dimension. 27 | # See :meth:`_switch_nested` for more help. 28 | self.switch = (self.loq + self.req, self.leq + self.roq) 29 | 30 | # Replaces all quotes with `'` 31 | quotes = ''.join((LSQUO, RSQUO, LDQUO, RDQUO, DLQUO, LAQUO, RAQUO)) 32 | self.re_normalize = re_compile(r'[{0}]'.format(quotes)) 33 | 34 | # Matches nested quotes (with no quotes within) 35 | # and replaces with odd level quotes 36 | self.re_normal = re_compile( 37 | # No words before 38 | r'(? str: 56 | # Normalizes editor's quotes to double one 57 | normalized = self.re_normalize.sub('\'', text) 58 | 59 | # Replaces normalized quotes with first level ones, starting 60 | # from inner pairs, moves to sides 61 | nested = 0 62 | while True: 63 | normalized, replaced = self.re_normal.subn( 64 | self.re_normal_replace, normalized) 65 | if not replaced: 66 | break 67 | nested += 1 68 | 69 | # Saves some cpu :) 70 | # Most cases are about just one level quoting 71 | if nested < 2: 72 | return self.run_other(normalized, **kwargs) 73 | 74 | # At this point all quotes are of odd type, have to fix it 75 | switched = self._switch_nested(normalized) 76 | return self.run_other(switched, **kwargs) 77 | 78 | def _switch_nested(self, text: str): 79 | """ 80 | Switches nested quotes to another type. 81 | This function stored in a separate method to make possible to mock it 82 | in tests to make sure it doesn't called without special need. 83 | """ 84 | 85 | # Stores a cycled pairs of possible quotes. Every other loop it's 86 | # switched to provide *next* type of a given quote 87 | quotes = cycle(self.switch) 88 | 89 | def replace(match: Match): 90 | # Since only odd quotes are matched, comparison is the way to 91 | # choose whether it's left or right one of type should be returned. 92 | # As the first quote is the left one, makes negative equal which 93 | # return false, i.e. zero index 94 | return next(quotes)[match.group() != self.loq] 95 | return self.re_nested.sub(replace, text) 96 | 97 | 98 | class EnQuotes(BaseQuotes): 99 | r""" 100 | Provides English quotes configutation for :class:`typus.processors.Quotes` 101 | processor. 102 | 103 | >>> from typus import en_typus 104 | >>> en_typus('He said "\'Winnie-the-Pooh\' is my favorite book!".') 105 | 'He said “‘Winnie-the-Pooh’ is my favorite book!”.' 106 | """ 107 | 108 | # Left odd, right odd, left even, right even 109 | loq = LDQUO 110 | roq = RDQUO 111 | leq = LSQUO 112 | req = RSQUO 113 | 114 | 115 | class RuQuotes(BaseQuotes): 116 | r""" 117 | Provides Russian quotes configutation for :class:`typus.processors.Quotes` 118 | processor. 119 | 120 | >>> from typus import ru_typus 121 | >>> ru_typus('Он сказал: "\'Винни-Пух\' -- моя любимая книга!".') 122 | 'Он\xa0сказал: «„Винни-Пух“\u202f—\u2009моя любимая книга!».' 123 | """ 124 | 125 | # Left odd, right odd, left even, right even 126 | loq = LAQUO 127 | roq = RAQUO 128 | leq = DLQUO 129 | req = LDQUO 130 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Welcome to Typus 2 | ================ 3 | 4 | Typus is a typography tool. It means your can write text the way you use to 5 | and let it handle all that formating headache: 6 | 7 | :: 8 | 9 | "I don't feel very much like Pooh today..." said Pooh. 10 | "There there," said Piglet. "I'll bring you tea and honey until you do." 11 | - A.A. Milne, Winnie-the-Pooh 12 | 13 | “I don’t feel very much like Pooh today…” said Pooh. 14 | “There there,” said Piglet. “I’ll bring you tea and honey until you do.” 15 | — A. A. Milne, Winnie-the-Pooh 16 | 17 | Copy & paste this example to your rich text editor. Result may depend on 18 | the font of your choice. 19 | For instance, there is a tiny non-breaking space between ``A. A.`` you 20 | can see with Helvetica: 21 | 22 | .. image:: https://raw.githubusercontent.com/byashimov/typus/develop/docs/example.png 23 | 24 | Try out the demo_. 25 | 26 | 27 | Web API 28 | ------- 29 | 30 | A tiny `web-service`_ for whatever legal purpose it may serve. 31 | 32 | 33 | Installation 34 | ------------ 35 | 36 | .. code-block:: console 37 | 38 | $ pip install typus 39 | 40 | 41 | Usage 42 | ----- 43 | 44 | Currently Typus supports English and Russian languages only. 45 | But it doesn't mean it can't handle more. I'm quite sure it covers Serbian 46 | and Turkmen. 47 | 48 | In fact, Typus doesn't make difference between languages. It works with text. 49 | If you use Cyrillic then only relative processors will affect that text. 50 | In another words, give it a try if your language is not on the list 51 | 52 | Here is a short example: 53 | 54 | .. code-block:: python 55 | 56 | >>> from typus import en_typus, ru_typus 57 | ... 58 | >>> # Underscore is for nbsp in debug mode 59 | >>> en_typus('"Beautiful is better than ugly." (c) Tim Peters.', debug=True) 60 | '“Beautiful is_better than ugly.” ©_Tim Peters.' 61 | >>> # Cyrillic 'с' in '(с)' 62 | >>> ru_typus('"Красивое лучше, чем уродливое." (с) Тим Петерс.', debug=True) 63 | '«Красивое лучше, чем уродливое.» ©_Тим Петерс.' 64 | 65 | 66 | The only difference between ``en_typus`` and ``ru_typus`` 67 | are in quotes they set: ``“‘’”`` for English and ``«„“»`` for Russian. Both of 68 | them handle mixed text and that is pretty awesome. 69 | 70 | Typus is highly customizable. Not only quotes can be replaced but almost 71 | everything. For instance, if you don't use html tags you can skip 72 | ``EscapeHtml`` processor which makes your Typus a little 73 | faster. 74 | 75 | 76 | What it does 77 | ------------ 78 | 79 | - Replaces regular quotes ``"foo 'bar' baz"`` with typographic pairs: 80 | ``“foo ‘bar’ baz”``. Quotes style depends on language and your Typus configuration. 81 | - Replaces regular dash ``foo - bar`` with mdash or ndash or minus. 82 | Depends on case: plain text, digit range, math, etc. 83 | - Replaces complex symbols such as ``(c)`` with unicode characters: ``©``. 84 | Cyrillic analogs are supported too. 85 | - Replaces vulgar fractions ``1/2`` with unicode characters: ``½``. 86 | - Turns multiply symbol to a real one: ``3x3`` becomes ``3×3``. 87 | - Replaces quotes with primes: ``2' 4"`` becomes ``2′ 4″``. 88 | - Puts non-breaking spaces. 89 | - Puts ruble symbol. 90 | - Trims spaces at the end of lines. 91 | - and much more. 92 | 93 | 94 | Documentation 95 | ------------- 96 | 97 | Docs are hosted on `readthedocs.org`_. 98 | 99 | .. seealso:: 100 | 101 | Oh, there is also an outdated Russian article I should not 102 | probably suggest, but since all docs are in English, this link_ might be 103 | quite helpful. 104 | 105 | 106 | Compatibility 107 | ------------- 108 | 109 | .. image:: https://travis-ci.org/byashimov/typus.svg?branch=develop 110 | :alt: Build Status 111 | :target: https://travis-ci.org/byashimov/typus 112 | 113 | .. image:: https://codecov.io/gh/byashimov/typus/branch/develop/graph/badge.svg 114 | :alt: Codecov 115 | :target: https://codecov.io/gh/byashimov/typus 116 | 117 | Tested on Python 3.6, 3.7. 118 | 119 | 120 | Changelog 121 | --------- 122 | 123 | 0.2.2 124 | ~~~~~ 125 | 126 | - Improved ``mdash``: narrow spaces are used instead of regular ones. 127 | - Improved ``range``: ``mdash`` is replaced with ``ndash``. 128 | - Dropped ``phone`` processing. Using regular hyphen-minus is ok, 129 | because there is no valuable visual difference between that and hyphen. 130 | 131 | Thanks to @danaksim for the help. 132 | 133 | 0.2 134 | ~~~ 135 | 136 | - Python 3.6 and higher are supported only. 137 | That's because 3.6 string formatting is used in tests to make them easier 138 | to read and write. 139 | - ``EnRuExpressions`` is no longer a mixin but processor. 140 | - Better, cleaner tests with pytest. 141 | - Minor fixes and improvements. 142 | 143 | 0.1 144 | ~~~ 145 | 146 | - Initial release. 147 | 148 | 149 | .. _demo: https://byashimov.com/typus/ 150 | .. _web-service: https://byashimov.com/typus/api/ 151 | .. _readthedocs.org: http://py-typus.readthedocs.io/en/latest/ 152 | .. _link: https://habrahabr.ru/post/303608/ 153 | -------------------------------------------------------------------------------- /typus/utils.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=anomalous-backslash-in-string 2 | 3 | import re 4 | from functools import wraps 5 | from typing import Callable, Iterable, List 6 | 7 | __all__ = ( 8 | 'RE_SCASE', 9 | 'RE_ICASE', 10 | 'doc_map', 11 | 'idict', 12 | 'map_choices', 13 | 're_choices', 14 | 're_compile', 15 | 'splinter', 16 | ) 17 | 18 | 19 | RE_SCASE = re.U | re.M | re.S # sensitive case 20 | RE_ICASE = re.I | RE_SCASE # insensitive case 21 | 22 | 23 | def re_compile(pattern: str, flags: int = RE_ICASE): 24 | """ 25 | A shortcut to compile regex with predefined flags: 26 | :const:`re.I`, :const:`re.U`, :const:`re.M`, :const:`re.S`. 27 | 28 | :param str pattern: A string to compile pattern from. 29 | :param int flags: Python :mod:`re` module flags. 30 | 31 | >>> foo = re_compile('[a-z]') # matches with 'test' and 'TEST' 32 | >>> bool(foo.match('TEST')) 33 | True 34 | >>> bar = re_compile('[a-z]', flags=0) # doesn't match with 'TEST' 35 | >>> bool(bar.match('TEST')) 36 | False 37 | """ 38 | 39 | return re.compile(pattern, flags) 40 | 41 | 42 | def re_choices(choices: Iterable[str], group: str = r'({})') -> str: 43 | """ 44 | Returns regex group of escaped choices. 45 | 46 | :param choices: Iterable of strings. 47 | :param group: A string to format the group with. 48 | 49 | >>> re_choices(('foo', 'bar')) 50 | '(foo|bar)' 51 | """ 52 | return group.format('|'.join(map(re.escape, choices))) 53 | 54 | 55 | class idict(dict): 56 | """ 57 | Case-insensitive dictionary. 58 | 59 | :param mapping/iterable obj: An object to initialize new dictionary from 60 | :param `**kwargs`: ``key=value`` pairs to put in the new dictionary 61 | :returns: A regex non-compiled pattern 62 | :rtype: str 63 | 64 | >>> foo = idict({'A': 0, 'b': 1, 'bar': 2}) 65 | >>> foo['a'], foo['B'], foo['bAr'] 66 | (0, 1, 2) 67 | 68 | .. caution:: 69 | :class:`idict` is not a full-featured case-insensitive dictionary. 70 | As it's made for :func:`map_choices` and has limited functionality. 71 | """ 72 | 73 | def __init__(self, obj: dict): 74 | lowered = ((key.lower(), value) for key, value in obj.items()) 75 | super().__init__(lowered) 76 | 77 | def __getitem__(self, key): 78 | return super().__getitem__(key.lower()) 79 | 80 | 81 | def map_choices(data: dict, group: str = r'({})', dict_class=idict) -> tuple: 82 | """ 83 | :class:`typus.processors.Expressions` helper. 84 | Builds regex pattern from the dictionary keys and maps them to values via 85 | replace function. 86 | 87 | :param mapping/iterable data: A pairs of (find, replace with) strings 88 | :param str group: A string to format in choices. 89 | :param class dict_class: A dictionary class to convert source data. 90 | By default :class:`idict` is used which is case-insensitive. 91 | In instance, to map ``(c)`` and ``(C)`` to different values pass 92 | regular python :class:`dict`. Or if the order matters use 93 | :class:`collections.OrderedDict` 94 | 95 | :returns: A regex non-compiled pattern and replace function 96 | :rtype: tuple 97 | 98 | >>> import re 99 | >>> pattern, replace = map_choices({'a': 0, 'b': 1}) 100 | >>> re.sub(pattern, replace, 'abc') 101 | '01c' 102 | """ 103 | 104 | options = dict_class(data) 105 | pattern = re_choices(options, group=group) 106 | 107 | def replace(match): 108 | return str(options[match.group()]) 109 | return pattern, replace 110 | 111 | 112 | def doc_map(data: dict, keys='Before', values='After', delim='|'): 113 | rows = '\n'.join(f'\t``{k}`` {delim} ``{v}``' for k, v in data.items()) 114 | table = ( 115 | f'\n.. csv-table::' 116 | f'\n\t:delim: {delim}' 117 | f'\n\t:header: "{keys}", "{values}"\n' 118 | f'\n{rows}' 119 | ) 120 | 121 | def updater(func): 122 | func.__doc__ += table 123 | return func 124 | return updater 125 | 126 | 127 | def splinter(delimiter: str) -> Callable[[str], List[str]]: 128 | """ 129 | :class:`typus.processors.EscapePhrases` helper. 130 | Almost like ``str.split()`` but handles delimiter escaping and strips 131 | spaces. 132 | 133 | :param str delimiter: String delimiter 134 | :raises ValueError: If delimiter is a slash or an empty space 135 | 136 | :returns: A list of stripped phrases splitted by the delimiter 137 | :rtype: list 138 | 139 | >>> split = splinter(', ') # strips this spaces 140 | >>> split('a, b,c , d\,e') # and this ones too 141 | ['a', 'b', 'c', 'd,e'] 142 | """ 143 | 144 | delim = delimiter.strip(' \\') 145 | if not delim: 146 | raise ValueError('Delimiter can not be a slash or an empty space.') 147 | 148 | # Doesn't split escaped delimiters 149 | pattern = re.compile(r'(?dsfsdf "test" "sdfdf"', 15 | 'dsfsdf "test" "sdfdf"', 16 | ['"test"'], 17 | ), 18 | 19 | # Empty string, nothing to escape 20 | ('"foo"', '«foo»', ['']), 21 | )) 22 | def test_escape_phrases(source, expected, escape_phrases): 23 | assert ru_typus(source, escape_phrases=escape_phrases) == expected 24 | 25 | 26 | @mock.patch('typus.processors.EscapeHtml._restore_values', return_value='test') 27 | def test_restore_html_call(mock_restore_values): 28 | ru_typus('test') 29 | mock_restore_values.assert_not_called() 30 | 31 | ru_typus('test') 32 | mock_restore_values.assert_called_once() 33 | 34 | 35 | @pytest.mark.parametrize('source', ( 36 | '
"test"
', 37 | '"test"', 38 | 39 | # Nested code in pre 40 | '
"test"
', 41 | '
"test"
', 42 | 43 | # Script tag 44 | '', 45 | '', 46 | )) 47 | def test_codeblocks(source): 48 | assert ru_typus(source) == source 49 | 50 | 51 | @pytest.mark.parametrize('source, expected', ( 52 | ( 53 | 'dsfsdf "test" "sdfdf"', 54 | 'dsfsdf "test" «sdfdf»', 55 | ), 56 | )) 57 | def test_nested_codeblocks(typus, source, expected): 58 | # No nested codeblocks 59 | assert typus(source) == expected 60 | 61 | 62 | @pytest.mark.parametrize('source, expected', ( 63 | ('"test"', '«test»'), 64 | ('"test"', '«test»'), 65 | ('"test"', '«test»'), 66 | 67 | # Image: html + xhtml 68 | ('"test"', '«test»'), 69 | ('test"test"', 'test«test»'), 70 | ('test"test"', 'test«test»'), 71 | )) 72 | def test_tags(source, expected): 73 | assert ru_typus(source) == expected 74 | 75 | 76 | @pytest.mark.parametrize('source', ( 77 | '', 78 | '', 79 | '', 80 | )) 81 | def test_comments(source): 82 | assert ru_typus(source) == source 83 | 84 | 85 | @pytest.mark.parametrize('source', ( 86 | '', 87 | '', 88 | )) 89 | def test_doctype(source): 90 | assert ru_typus(source) == source 91 | 92 | 93 | @pytest.mark.parametrize('source', ( 94 | '(c)', 95 | )) 96 | def test_head(source): 97 | assert ru_typus(source) == source 98 | 99 | 100 | @pytest.mark.parametrize('source', ( 101 | '', 102 | )) 103 | def test_iframe(source): 104 | assert ru_typus(source) == source 105 | 106 | 107 | @pytest.fixture(name='typus') 108 | def get_typus(): 109 | class Typus(TypusCore): 110 | processors = ( 111 | EscapePhrases, 112 | EscapeHtml, 113 | RuQuotes, 114 | ) 115 | 116 | return Typus() 117 | 118 | 119 | @mock.patch('typus.processors.BaseQuotes._switch_nested', return_value='test') 120 | def test_switch_nested_call(mock_switch_nested, typus): 121 | # No quotes 122 | typus('00 11 00') 123 | mock_switch_nested.assert_not_called() 124 | 125 | # Odd only 126 | typus('00 "11" 00') 127 | mock_switch_nested.assert_not_called() 128 | 129 | # Both 130 | typus('"00 "11" 00"') 131 | mock_switch_nested.assert_called_once() 132 | 133 | 134 | @pytest.mark.parametrize('source, expected', ( 135 | # Levels 136 | ('00 "11" 00', '00 «11» 00'), # One 137 | ('"00 "11" 00"', '«00 „11“ 00»'), # Two 138 | ('00" "11 "22" 11"', '00" «11 „22“ 11»'), # Tree 139 | 140 | # Hardcore 141 | ('00 ""22"" 00', '00 «„22“» 00'), 142 | ('00 ""22..."" 00', '00 «„22...“» 00'), 143 | ('00 ""22"..." 00', '00 «„22“...» 00'), 144 | ('"© test"', '«© test»'), 145 | ('("test")', '(«test»)'), 146 | ('"test"*', '«test»*'), 147 | ('"test"®', '«test»®'), 148 | ('"""test"""', '«„«test»“»'), 149 | ('""""test""""', '«„«„test“»“»'), 150 | ('"""""""test"""""""', '«„«„«„«test»“»“»“»'), 151 | ('" test"', '" test"'), 152 | ('" "test""', '" «test»"'), 153 | ('"foo 2\'"', '«foo 2\'»'), 154 | 155 | # False positive 156 | ('"foo 2""', '«foo 2»"'), 157 | 158 | # Weired cases 159 | ('00 "... "22"" 00', '00 «... „22“» 00'), 160 | ('00 "..."22"" 00', '00 «...„22“» 00'), 161 | 162 | # Punctuation 163 | ('00 "...11 "22!"" 00', '00 «...11 „22!“» 00'), 164 | ('00 "11 "22!"..." 00', '00 «11 „22!“...» 00'), 165 | ('00 "11 "22!"?!." 00', '00 «11 „22!“?!.» 00'), 166 | ('00 "11 "22!"?!."? 00', '00 «11 „22!“?!.»? 00'), 167 | 168 | # Nested on side 169 | ('00 ""22!" 11" 00', '00 «„22!“ 11» 00'), 170 | ('00 "11 "22?"" 00', '00 «11 „22?“» 00'), 171 | 172 | # Different quotes 173 | ('00 "“22”" 00', '00 «„22“» 00'), 174 | ('00 "‘22’" 00', '00 «„22“» 00'), 175 | 176 | # Inches, minutes within quotes 177 | ('00 "11\'" 00 "11"', '00 «11\'» 00 «11»'), 178 | ('00" "11" 00 "11"', '00" «11» 00 «11»'), 179 | 180 | # Fire them all! 181 | ( 182 | '''00" "11 '22' 11"? "11 '22 "33 33"' 11" 00' "11 '22' 11" 00"''', 183 | '00" «11 „22“ 11»? «11 „22 «33 33»“ 11» 00\' «11 „22“ 11» 00"', 184 | ), 185 | )) 186 | def test_quotes(typus, source, expected): 187 | assert typus(source) == expected 188 | 189 | 190 | @pytest.mark.parametrize('source, expected', ( 191 | # Html test 192 | ('"11"', '«11»'), 193 | ('"11"', '«11»'), 194 | )) 195 | def test_me(typus, source, expected): 196 | assert typus(source) == expected 197 | -------------------------------------------------------------------------------- /tests/test_summary.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from typus import en_typus, ru_typus 4 | from typus.chars import * 5 | 6 | QUOTES = ( 7 | ''.join((LAQUO, RAQUO, DLQUO, LDQUO)), 8 | ''.join((LDQUO, RDQUO, LSQUO, RSQUO)), 9 | ) 10 | TYPUSES = ( 11 | (ru_typus, {}), 12 | (en_typus, str.maketrans(*QUOTES)), 13 | ) 14 | 15 | 16 | @pytest.fixture(name='assert_typus', scope='module', params=TYPUSES) 17 | def get_assert_typus(request): 18 | typus, charmap = request.param 19 | 20 | def assert_typus(source, expected): 21 | assert typus(source) == expected.translate(charmap) 22 | return assert_typus 23 | 24 | 25 | def test_debug(): 26 | assert ru_typus('1m', debug=True) == '1_m' 27 | 28 | 29 | @pytest.mark.parametrize('source, expected', ( 30 | ('00 "11" 00', '00 «11» 00'), 31 | # clashes with digit_spaces 32 | ( 33 | '''00" "11 '22' 11"? "11 '22 "33 33?"' 11" 00 "11 '22' 11" 0"''', 34 | f'00{DPRIME} «11 „22“ 11»? «11 „22 «33{NBSP}33?»“ 11» ' 35 | f'00 «11 „22“ 11» 0{DPRIME}' 36 | ), 37 | )) 38 | def test_quotes(assert_typus, source, expected): 39 | assert_typus(source, expected) 40 | 41 | 42 | @pytest.mark.parametrize('source, expected', ( 43 | ('--', '--'), 44 | ('foo - foo', f'foo{MDASH_PAIR}foo'), 45 | # Leading comma case 46 | (', - foo', f',{MDASH}{THNSP}foo'), 47 | (', -- foo', f',{MDASH}{THNSP}foo'), 48 | # if line begins, adds nbsp after mdash 49 | ('-- foo', f'{MDASH}{NBSP}foo'), 50 | # if line ends, adds nbsp before mdash 51 | ('foo --', f'foo{NBSP}{MDASH}'), 52 | ('foo -- bar', f'foo{MDASH_PAIR}bar'), 53 | # Python markdown replaces dash with ndash, don't know why 54 | (f'foo {NDASH} foo', f'foo{MDASH_PAIR}foo'), 55 | 56 | # This one for ru_typus 57 | ('foo - "11" 00', f'foo{MDASH_PAIR}«11» 00'), 58 | ('2 - 2foo', f'2{MDASH_PAIR}2foo'), # no units clash 59 | ('2 - 2', f'2{NBSP}{MINUS}{NBSP}2'), # + minus 60 | ('Winnie-the-Pooh', 'Winnie-the-Pooh'), 61 | )) 62 | def test_mdash(assert_typus, source, expected): 63 | assert_typus(source, expected) 64 | 65 | 66 | @pytest.mark.parametrize('source, expected', ( 67 | ('"4"', '«4»'), 68 | ('4\'', '4' + SPRIME), 69 | ('4"', '4' + DPRIME), 70 | ('" 22"', '" 22' + DPRIME), 71 | )) 72 | def test_primes(assert_typus, source, expected): 73 | assert_typus(source, expected) 74 | 75 | 76 | @pytest.mark.parametrize('source, expected', ( 77 | ('25-foo', '25-foo'), 78 | ('2-3', f'2{NDASH}3'), 79 | ('2,5-3', f'2,5{NDASH}3'), 80 | ('0.5-3', f'0.5{NDASH}3'), 81 | ('2-3 foo', f'2{NDASH}3{NBSP}foo'), # + ranges 82 | ('(15-20 items)', f'(15{NDASH}20{NBSP}items)'), 83 | 84 | # Float 85 | ('0,5-3', f'0,5{NDASH}3'), 86 | ('-0,5-3', f'{MINUS}0,5{NDASH}3'), 87 | ('-5.5-3', f'{MINUS}5.5{NDASH}3'), 88 | ('-5,5-3', f'{MINUS}5,5{NDASH}3'), 89 | ('-5,5-3.5', f'{MINUS}5,5{NDASH}3.5'), 90 | ('2 - 3', f'2{NBSP}{MINUS}{NBSP}3'), 91 | ('2-3 x 4', f'2{MINUS}3{NBSP}{TIMES}{NBSP}4'), 92 | ('2-3 * 4', f'2{MINUS}3{NBSP}{TIMES}{NBSP}4'), 93 | ('2-3 - 4', f'2{MINUS}3{NBSP}{MINUS}{NBSP}4'), 94 | )) 95 | def test_ranges(assert_typus, source, expected): 96 | assert_typus(source, expected) 97 | 98 | 99 | @pytest.mark.parametrize('source, expected', ( 100 | # Minus 101 | (f'3{NBSP}-{NBSP}2', f'3{NBSP}{MINUS}{NBSP}2'), 102 | # This one clashes with range 103 | ('2-3', f'2{NDASH}3'), 104 | # This one clashes with mdash 105 | (f'x{NBSP}-{NBSP}3', f'x{NNBSP}{MDASH}{THNSP}3'), 106 | ('-3', f'{MINUS}3'), 107 | 108 | # Star 109 | ('3*2', f'3{TIMES}2'), 110 | ('*3', f'{TIMES}3'), 111 | (f'3{NBSP}*{NBSP}2', f'3{NBSP}{TIMES}{NBSP}2'), 112 | (f'x{NBSP}*{NBSP}2', f'x{NBSP}{TIMES}{NBSP}2'), 113 | 114 | # 'x' 115 | ('3x2', f'3{TIMES}2'), 116 | ('x3', f'{TIMES}3'), 117 | (f'3{NBSP}x{NBSP}2', f'3{NBSP}{TIMES}{NBSP}2'), 118 | (f'x{NBSP}x{NBSP}2', f'x{NBSP}{TIMES}{NBSP}2'), 119 | 120 | # and Russian "х" 121 | ('3х2', f'3{TIMES}2'), 122 | ('х3', f'{TIMES}3'), 123 | (f'3{NBSP}х{NBSP}2', f'3{NBSP}{TIMES}{NBSP}2'), 124 | (f'x{NBSP}х{NBSP}2', f'x{NBSP}{TIMES}{NBSP}2'), 125 | )) 126 | def test_math(assert_typus, source, expected): 127 | assert_typus(source, expected) 128 | 129 | 130 | @pytest.mark.parametrize('source, expected', ( 131 | ('aaa 2a', f'aaa 2a'), # doesnt clash with units 132 | )) 133 | def test_pairs(assert_typus, source, expected): 134 | assert_typus(source, expected) 135 | 136 | 137 | @pytest.mark.parametrize('source, expected', ( 138 | ('4444444 fooo', '4444444 fooo'), 139 | ('444 foo', f'444{NBSP}foo'), 140 | ('444 +', f'444{NBSP}+'), 141 | ('444 4444 bucks', f'444{NBSP}4444 bucks'), 142 | ('4444444 foo', f'4444444 foo'), # no untis clash 143 | ('444 -', f'444{NBSP}{MDASH}'), 144 | )) 145 | def test_digit_spaces(assert_typus, source, expected): 146 | assert_typus(source, expected) 147 | 148 | 149 | def test_example(assert_typus): 150 | source = ( 151 | 'Излучение, как следует из вышесказанного, концентрирует ' 152 | 'внутримолекулярный предмет - деятельности . "...ff \'Можно?\' ' 153 | 'предположить, что силовое - "поле "мент "d" ально" отклоняет" ' 154 | 'сенсибельный \'квазар !..\' cc", не учитывая мнения авторитетов. ' 155 | 'Искусство испускает данный электрон, учитывая опасность, ' 156 | ' "d" test -- test(c) которую представляли ' 157 | 'собой писания Дюринга для не окрепшего еще немецкого рабочего ' 158 | 'движения. Смысл жизни -- амбивалентно (с) дискредитирует ' 159 | 'закон (r) исключённого(tm) третьего (тм)... \n\n\n' 160 | '1500 мА*ч\n\n' 161 | '1-2=4\n' 162 | '- Химическое соединение (p) ненаблюдаемо контролирует экран-ый ' 163 | 'квазар (р). Идеи 3/4 гедонизма занимают b & b центральное место ' 164 | 'в утилитаризме(sm) "Милля и Бентама", однако <- гравитирующая -> ' 165 | 'сфера масштабирует фотон, +-2мм изменяя привычную == реальность. ' 166 | 'Силовое *3 поле -3 реально 3 * 2 /= 6 3x3 восстанавливает ' 167 | 'трансцендентальный 3" 2\' принцип 1000р. восприятия.' 168 | '"...\'test\'" (c) m&m\'s\n\n\n' 169 | ) 170 | expected = ( 171 | 'Излучение, как следует из_вышесказанного, концентрирует ' 172 | 'внутримолекулярный предмет\u202f—\u2009деятельности. «…ff „Можно?“ ' 173 | 'предположить, что силовое\u202f—\u2009„поле «мент „d“ ально» ' 174 | 'отклоняет“ ' 175 | 'сенсибельный „квазар!..“ cc», не_учитывая мнения авторитетов. ' 176 | 'Искусство испускает данный электрон, учитывая опасность, ' 177 | ' "d" test -- test(c) которую представляли собой ' 178 | 'писания Дюринга для не_окрепшего еще немецкого рабочего ' 179 | 'движения. Смысл жизни\u202f—\u2009амбивалентно ©_дискредитирует ' 180 | 'закон® исключённого™ третьего™…\n\n' 181 | '1500_мА•ч\n\n' 182 | '1−2=4\n' 183 | '—_Химическое соединение℗ ненаблюдаемо контролирует экран-ый ' 184 | 'квазар℗. Идеи ¾_гедонизма занимают b_&_b_центральное место ' 185 | 'в_утилитаризме℠ «Милля и_Бентама», однако ←_гравитирующая_→ ' 186 | 'сфера масштабирует фотон, ±2_мм изменяя привычную_≡_реальность. ' 187 | 'Силовое ×3_поле −3_реально 3_×_2_≠_6 3×3 восстанавливает ' 188 | 'трансцендентальный 3″ 2′ принцип 1000_₽ восприятия.' 189 | '«…„test“» ©_m&m’s' 190 | ).replace('_', NBSP) 191 | assert_typus(source, expected) 192 | -------------------------------------------------------------------------------- /tests/test_expressions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests expressions one by one. 3 | Some of them may return different results depending on which was 4 | applied earlier, so order matters. But that also means it's important 5 | to be sure they don't affect each other more than expected. This case 6 | tests every expression as if it was the only one to apply. 7 | """ 8 | import pytest 9 | 10 | from typus.chars import * 11 | from typus.core import TypusCore 12 | from typus.processors import EnRuExpressions 13 | 14 | 15 | @pytest.fixture(name='factory') 16 | def get_factory(): 17 | def factory(*exps): 18 | class MyExpressions(EnRuExpressions): 19 | expressions = exps 20 | 21 | class Typus(TypusCore): 22 | processors = (MyExpressions, ) 23 | return Typus() 24 | return factory 25 | 26 | 27 | @pytest.mark.parametrize('source, expected', ( 28 | ('110 р', f'110{NBSP}₽'), 29 | ('111 р.', f'111{NBSP}₽'), 30 | ('112 руб', f'112{NBSP}₽'), 31 | ('113 руб.', f'113{NBSP}₽'), 32 | # With comma 33 | ('114,00 р', f'114,00{NBSP}₽'), 34 | ('115.00 р', f'115.00{NBSP}₽'), 35 | # Ignores 36 | ('116 рубчиков', '116 рубчиков'), 37 | ('117 ру', '117 ру'), 38 | # Case sensivity 39 | ('117 Р', '117 Р'), 40 | )) 41 | def test_ruble(factory, source, expected): 42 | typus = factory('ruble') 43 | assert expected == typus(source) 44 | 45 | 46 | @pytest.mark.parametrize('source, expected', ( 47 | ('foo{}bar'.format(' ' * 30), 'foo bar'), 48 | )) 49 | def test_spaces(factory, source, expected): 50 | typus = factory('spaces') 51 | assert expected == typus(source) 52 | 53 | 54 | @pytest.mark.parametrize('source, expected', ( 55 | ('a\nb', 'a\nb'), 56 | ('a\r\nb', 'a\nb'), 57 | ('a{0}b'.format('\n' * 5), 'a\n\nb'), 58 | ('a\n\n\r\nb', 'a\n\nb'), 59 | )) 60 | def test_linebreaks(factory, source, expected): 61 | typus = factory('linebreaks') 62 | assert expected == typus(source) 63 | 64 | 65 | @pytest.mark.parametrize('source, expected', ( 66 | ("She'd", f'She{RSQUO}d'), 67 | ("I'm", f'I{RSQUO}m'), 68 | ("it's", f'it{RSQUO}s'), 69 | ("don't", f'don{RSQUO}t'), 70 | ("you're", f'you{RSQUO}re'), 71 | ("he'll", f'he{RSQUO}ll'), 72 | ("90's", f'90{RSQUO}s'), 73 | ("Карло's", f'Карло{RSQUO}s'), 74 | )) 75 | def test_apostrophe(factory, source, expected): 76 | typus = factory('apostrophe') 77 | assert expected == typus(source) 78 | 79 | 80 | @pytest.mark.parametrize('source, expected', ( 81 | ('--', '--'), 82 | ('foo - foo', f'foo{MDASH_PAIR}foo'), 83 | # Leading comma case 84 | (', - foo', f',{MDASH}{THNSP}foo'), 85 | (', -- foo', f',{MDASH}{THNSP}foo'), 86 | # if line begins, adds nbsp after mdash 87 | ('-- foo', f'{MDASH}{NBSP}foo'), 88 | # if line ends, adds nbsp before mdash 89 | ('foo --', f'foo{NBSP}{MDASH}'), 90 | ('foo -- bar', f'foo{MDASH_PAIR}bar'), 91 | # Python markdown replaces dash with ndash, don't know why 92 | (f'foo {NDASH} foo', f'foo{MDASH_PAIR}foo'), 93 | ('foo - "11" 00', f'foo{MDASH_PAIR}"11" 00'), 94 | ('2 - 2foo', f'2{MDASH_PAIR}2foo'), 95 | ('2 - 2', '2 - 2'), # Doesn't clash with minus 96 | )) 97 | def test_mdash(factory, source, expected): 98 | typus = factory('mdash') 99 | assert expected == typus(source) 100 | 101 | 102 | @pytest.mark.parametrize('source, expected', ( 103 | ('4\'', '4' + SPRIME), 104 | ('4"', '4' + DPRIME), 105 | ('" 22"', '" 22' + DPRIME), 106 | ('"4"', '"4"'), 107 | )) 108 | def test_primes(factory, source, expected): 109 | typus = factory('primes') 110 | assert expected == typus(source) 111 | 112 | 113 | @pytest.mark.parametrize('source, expected', ( 114 | ('4444444 fooo', '4444444 fooo'), 115 | ('444 foo', f'444{NBSP}foo'), 116 | ('444 +', f'444{NBSP}+'), 117 | ('444 4444 bucks', f'444{NBSP}4444 bucks'), 118 | ('444 -', f'444{NBSP}-'), 119 | ('4444444 foo', '4444444 foo'), 120 | )) 121 | def test_digit_spaces(factory, source, expected): 122 | typus = factory('digit_spaces') 123 | assert expected == typus(source) 124 | 125 | 126 | @pytest.mark.parametrize('source, expected', ( 127 | ('aaa aaa', 'aaa aaa'), 128 | ('aaa-aa aa', 'aaa-aa aa'), # important check -- dash and 2 letters 129 | ('aaa aa', 'aaa aa'), 130 | ('I’ll check', 'I’ll check'), 131 | ('a aa a', f'a{NBSP}aa{NBSP}a'), 132 | ('aaa 2a', 'aaa 2a') # letters only, no digits, 133 | )) 134 | def test_pairs(factory, source, expected): 135 | typus = factory('pairs') 136 | assert expected == typus(source) 137 | 138 | 139 | @pytest.mark.parametrize('source, expected', ( 140 | # Latin 141 | ('1mm', f'1{NBSP}mm'), 142 | ('1cm', f'1{NBSP}cm'), 143 | ('1dm', f'1{NBSP}dm'), 144 | ('1m', f'1{NBSP}m'), 145 | ('1km', f'1{NBSP}km'), 146 | ('1mg', f'1{NBSP}mg'), 147 | ('1kg', f'1{NBSP}kg'), 148 | ('1ml', f'1{NBSP}ml'), 149 | ('1mA•h', f'1{NBSP}mA•h'), 150 | ('1dpi', f'1{NBSP}dpi'), 151 | # Cyrillic 152 | ('1мм', f'1{NBSP}мм'), 153 | ('1см', f'1{NBSP}см'), 154 | ('1дм', f'1{NBSP}дм'), 155 | ('1м', f'1{NBSP}м'), 156 | ('1км', f'1{NBSP}км'), 157 | ('1мг', f'1{NBSP}мг'), 158 | ('1г', f'1{NBSP}г'), 159 | ('1кг', f'1{NBSP}кг'), 160 | ('1мл', f'1{NBSP}мл'), 161 | ('1л', f'1{NBSP}л'), 162 | ('1т', f'1{NBSP}т'), 163 | ('1мА•ч', f'1{NBSP}мА•ч'), 164 | # Skips 165 | ('1foobar', '1foobar'), 166 | # Exceptions 167 | ('3g', '3g'), # 4G lte 168 | ('3d', '3d'), # 3D movie 169 | ('2nd', '2nd'), # floor 170 | ('3rd', '3rd'), # floor 171 | ('4th', '4th'), # floor 172 | ('1px', '1px'), 173 | ('1000A', '1000A'), 174 | # Case sensivity 175 | ('1000ML', '1000ML'), 176 | )) 177 | def test_units(factory, source, expected): 178 | typus = factory('units') 179 | assert expected == typus(source) 180 | 181 | 182 | @pytest.mark.parametrize('source, expected', ( 183 | ('25-foo', '25-foo'), 184 | ('2-3', f'2{NDASH}3'), 185 | ('2,5-3', f'2,5{NDASH}3'), 186 | ('0.5-3', f'0.5{NDASH}3'), 187 | 188 | ('2-3 foo', f'2{NDASH}3 foo'), 189 | ('(15-20 items)', f'(15{NDASH}20 items)'), 190 | 191 | # Float 192 | ('0,5-3', f'0,5{NDASH}3'), 193 | ('-0,5-3', f'-0,5{NDASH}3'), 194 | ('-5.5-3', f'-5.5{NDASH}3'), 195 | ('-5,5-3', f'-5,5{NDASH}3'), 196 | ('-5,5-3.5', f'-5,5{NDASH}3.5'), 197 | 198 | # Skips 199 | ('2 - 3', '2 - 3'), 200 | ('2-3 x 4', '2-3 x 4'), 201 | ('2-3 * 4', '2-3 * 4'), 202 | ('2-3 - 4', '2-3 - 4'), 203 | 204 | # Left is less than or equal to right 205 | ('3-2', '3-2'), 206 | ('3-3', '3-3'), 207 | 208 | # Doesn't affect math 209 | ('1-2=4', f'1-2=4'), 210 | )) 211 | def test_ranges(factory, source, expected): 212 | typus = factory('ranges') 213 | assert expected == typus(source) 214 | 215 | 216 | @pytest.mark.parametrize('source, expected', ( 217 | ('(C)', '©'), # Case insensitive test 218 | ('...', '…'), 219 | ('<-', '←'), 220 | ('->', '→'), 221 | ('+-', '±'), 222 | ('+' + MINUS, '±'), 223 | ('<=', '≤'), 224 | ('>=', '≥'), 225 | ('/=', '≠'), 226 | ('==', '≡'), 227 | ('(r)', '®'), 228 | ('(c)', '©'), 229 | ('(p)', '℗'), 230 | ('(tm)', '™'), 231 | ('(sm)', '℠'), 232 | ('mA*h', 'mA•h'), 233 | # cyrillic 234 | ('(с)', '©'), 235 | ('(р)', '℗'), 236 | ('(тм)', '™',), 237 | ('мА*ч', 'мА•ч'), 238 | )) 239 | def test_complex_symbols(factory, source, expected): 240 | typus = factory('complex_symbols') 241 | assert expected == typus(source) 242 | 243 | 244 | @pytest.mark.parametrize('source, expected', ( 245 | ('1/2', '½'), 246 | ('1/3', '⅓'), 247 | ('1/4', '​¼'), 248 | ('1/5', '⅕'), 249 | ('1/6', '⅙'), 250 | ('1/8', '⅛'), 251 | ('2/3', '⅔'), 252 | ('2/5', '⅖'), 253 | ('3/4', '¾'), 254 | ('3/5', '⅗'), 255 | ('3/8', '⅜'), 256 | ('4/5', '⅘'), 257 | ('5/6', '⅚'), 258 | ('5/8', '⅝'), 259 | ('7/8', '⅞'), 260 | # False positive 261 | ('11/22', '11/22'), 262 | )) 263 | def test_vulgar_fractions(factory, source, expected): 264 | typus = factory('vulgar_fractions') 265 | assert expected == typus(source) 266 | 267 | 268 | @pytest.mark.parametrize('source, expected', ( 269 | ('-', MINUS), 270 | ('*', TIMES), 271 | ('x', TIMES), 272 | ('х', TIMES), 273 | )) 274 | def test_math(factory, source, expected): 275 | typus = factory('math') 276 | # -3, 3-3, 3 - 3, x - 3 277 | assert typus(source + '3') == expected + '3' 278 | assert typus(f'word{source} 3') == f'word{source} 3' 279 | assert typus(f'3{source}3') == f'3{expected}3' 280 | assert typus(f'3 {source} 3') == f'3 {expected} 3' 281 | assert typus(f'x {source} 3') == f'x {expected} 3' 282 | assert typus(f'3{source}3=3') == f'3{expected}3=3' 283 | 284 | 285 | @pytest.mark.parametrize('source, expected', ( 286 | ('т. д.', f'т.{NNBSP}д.'), 287 | ('т.д.', f'т.{NNBSP}д.'), 288 | ('т.п.', f'т.{NNBSP}п.'), 289 | ('т. ч.', f'т.{NNBSP}ч.'), 290 | ('т.е.', f'т.{NNBSP}е.'), 291 | ('Пушкин А.С.', f'Пушкин А.{NNBSP}С.'), 292 | ('А.С. Пушкин', f'А.{NNBSP}С.{NBSP}Пушкин'), 293 | )) 294 | def test_abbrs(factory, source, expected): 295 | typus = factory('abbrs') 296 | assert expected == typus(source) 297 | 298 | 299 | @pytest.mark.parametrize('char', f'←$€£%±{MINUS}{TIMES}©§¶№') 300 | def test_rep_positional_spaces_after(factory, char): 301 | typus = factory('rep_positional_spaces') 302 | assert typus(f'foo {char} bar') == f'foo {char}{NBSP}bar' 303 | 304 | 305 | @pytest.mark.parametrize('char', '&≡≤≥≠') 306 | def test_rep_positional_spaces_both(factory, char): 307 | typus = factory('rep_positional_spaces') 308 | assert typus(f'foo {char} bar') == f'foo{NBSP}{char}{NBSP}bar' 309 | 310 | 311 | @pytest.mark.parametrize('char', '₽→' + MDASH) 312 | def test_rep_positional_spaces_before(factory, char): 313 | typus = factory('rep_positional_spaces') 314 | assert typus(f'foo {char} bar') == f'foo{NBSP}{char} bar' 315 | 316 | 317 | @pytest.mark.parametrize('char', '®℗™℠:,.?!…') 318 | def test_rdel_positional_spaces_before(factory, char): 319 | typus = factory('del_positional_spaces') 320 | assert typus(f'foo {char} bar') == f'foo{char} bar' 321 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Typus documentation build configuration file, created by 4 | # sphinx-quickstart on Tue Jul 12 22:26:26 2016. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # 19 | # import os 20 | # import sys 21 | # sys.path.insert(0, os.path.abspath('.')) 22 | 23 | # -- General configuration ------------------------------------------------ 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | # 27 | # needs_sphinx = '1.0' 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = [ 33 | 'sphinx.ext.autodoc', 34 | 'sphinx.ext.doctest', 35 | 'sphinx.ext.coverage', 36 | ] 37 | 38 | # Add any paths that contain templates here, relative to this directory. 39 | templates_path = ['_templates'] 40 | 41 | # The suffix(es) of source filenames. 42 | # You can specify multiple suffix as a list of string: 43 | # 44 | # source_suffix = ['.rst', '.md'] 45 | source_suffix = '.rst' 46 | 47 | # The encoding of source files. 48 | # 49 | # source_encoding = 'utf-8-sig' 50 | 51 | # The master toctree document. 52 | master_doc = 'index' 53 | 54 | # General information about the project. 55 | project = 'Typus' 56 | copyright = '2016, Murad Byashimov' 57 | author = 'Murad Byashimov' 58 | 59 | # The version info for the project you're documenting, acts as replacement for 60 | # |version| and |release|, also used in various other places throughout the 61 | # built documents. 62 | # 63 | # The short X.Y version. 64 | version = '0.2.2' 65 | # The full version, including alpha/beta/rc tags. 66 | release = '0.2.2' 67 | 68 | # The language for content autogenerated by Sphinx. Refer to documentation 69 | # for a list of supported languages. 70 | # 71 | # This is also used if you do content translation via gettext catalogs. 72 | # Usually you set "language" from the command line for these cases. 73 | language = None 74 | 75 | # There are two options for replacing |today|: either, you set today to some 76 | # non-false value, then it is used: 77 | # 78 | # today = '' 79 | # 80 | # Else, today_fmt is used as the format for a strftime call. 81 | # 82 | # today_fmt = '%B %d, %Y' 83 | 84 | # List of patterns, relative to source directory, that match files and 85 | # directories to ignore when looking for source files. 86 | # This patterns also effect to html_static_path and html_extra_path 87 | exclude_patterns = [] 88 | 89 | # The reST default role (used for this markup: `text`) to use for all 90 | # documents. 91 | # 92 | # default_role = None 93 | 94 | # If true, '()' will be appended to :func: etc. cross-reference text. 95 | # 96 | # add_function_parentheses = True 97 | 98 | # If true, the current module name will be prepended to all description 99 | # unit titles (such as .. function::). 100 | # 101 | # add_module_names = True 102 | 103 | # If true, sectionauthor and moduleauthor directives will be shown in the 104 | # output. They are ignored by default. 105 | # 106 | # show_authors = False 107 | 108 | # The name of the Pygments (syntax highlighting) style to use. 109 | pygments_style = 'sphinx' 110 | 111 | # A list of ignored prefixes for module index sorting. 112 | # modindex_common_prefix = [] 113 | 114 | # If true, keep warnings as "system message" paragraphs in the built documents. 115 | # keep_warnings = False 116 | 117 | # If true, `todo` and `todoList` produce output, else they produce nothing. 118 | todo_include_todos = True 119 | 120 | 121 | # -- Options for HTML output ---------------------------------------------- 122 | 123 | # The theme to use for HTML and HTML Help pages. See the documentation for 124 | # a list of builtin themes. 125 | # 126 | html_theme = 'sphinx_rtd_theme' 127 | 128 | # Theme options are theme-specific and customize the look and feel of a theme 129 | # further. For a list of options available for each theme, see the 130 | # documentation. 131 | # 132 | # html_theme_options = {} 133 | 134 | # Add any paths that contain custom themes here, relative to this directory. 135 | # html_theme_path = [] 136 | 137 | # The name for this set of Sphinx documents. 138 | # " v documentation" by default. 139 | # 140 | # html_title = 'Typus v0.0.4' 141 | 142 | # A shorter title for the navigation bar. Default is the same as html_title. 143 | # 144 | # html_short_title = None 145 | 146 | # The name of an image file (relative to this directory) to place at the top 147 | # of the sidebar. 148 | # 149 | # html_logo = None 150 | 151 | # The name of an image file (relative to this directory) to use as a favicon of 152 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 153 | # pixels large. 154 | # 155 | # html_favicon = None 156 | 157 | # Add any paths that contain custom static files (such as style sheets) here, 158 | # relative to this directory. They are copied after the builtin static files, 159 | # so a file named "default.css" will overwrite the builtin "default.css". 160 | html_static_path = ['_static'] 161 | 162 | # Add any extra paths that contain custom files (such as robots.txt or 163 | # .htaccess) here, relative to this directory. These files are copied 164 | # directly to the root of the documentation. 165 | # 166 | # html_extra_path = [] 167 | 168 | # If not None, a 'Last updated on:' timestamp is inserted at every page 169 | # bottom, using the given strftime format. 170 | # The empty string is equivalent to '%b %d, %Y'. 171 | # 172 | # html_last_updated_fmt = None 173 | 174 | # If true, SmartyPants will be used to convert quotes and dashes to 175 | # typographically correct entities. 176 | # 177 | # html_use_smartypants = True 178 | 179 | # Custom sidebar templates, maps document names to template names. 180 | # 181 | # html_sidebars = {} 182 | 183 | # Additional templates that should be rendered to pages, maps page names to 184 | # template names. 185 | # 186 | # html_additional_pages = {} 187 | 188 | # If false, no module index is generated. 189 | # 190 | # html_domain_indices = True 191 | 192 | # If false, no index is generated. 193 | # 194 | # html_use_index = True 195 | 196 | # If true, the index is split into individual pages for each letter. 197 | # 198 | # html_split_index = False 199 | 200 | # If true, links to the reST sources are added to the pages. 201 | # 202 | # html_show_sourcelink = True 203 | 204 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 205 | # 206 | # html_show_sphinx = True 207 | 208 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 209 | # 210 | # html_show_copyright = True 211 | 212 | # If true, an OpenSearch description file will be output, and all pages will 213 | # contain a tag referring to it. The value of this option must be the 214 | # base URL from which the finished HTML is served. 215 | # 216 | # html_use_opensearch = '' 217 | 218 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 219 | # html_file_suffix = None 220 | 221 | # Language to be used for generating the HTML full-text search index. 222 | # Sphinx supports the following languages: 223 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' 224 | # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' 225 | # 226 | # html_search_language = 'en' 227 | 228 | # A dictionary with options for the search language support, empty by default. 229 | # 'ja' uses this config value. 230 | # 'zh' user can custom change `jieba` dictionary path. 231 | # 232 | # html_search_options = {'type': 'default'} 233 | 234 | # The name of a javascript file (relative to the configuration directory) that 235 | # implements a search results scorer. If empty, the default will be used. 236 | # 237 | # html_search_scorer = 'scorer.js' 238 | 239 | # Output file base name for HTML help builder. 240 | htmlhelp_basename = 'Typusdoc' 241 | 242 | # -- Options for LaTeX output --------------------------------------------- 243 | 244 | latex_elements = { 245 | # The paper size ('letterpaper' or 'a4paper'). 246 | # 247 | # 'papersize': 'letterpaper', 248 | 249 | # The font size ('10pt', '11pt' or '12pt'). 250 | # 251 | # 'pointsize': '10pt', 252 | 253 | # Additional stuff for the LaTeX preamble. 254 | # 255 | # 'preamble': '', 256 | 257 | # Latex figure (float) alignment 258 | # 259 | # 'figure_align': 'htbp', 260 | } 261 | 262 | # Grouping the document tree into LaTeX files. List of tuples 263 | # (source start file, target name, title, 264 | # author, documentclass [howto, manual, or own class]). 265 | latex_documents = [ 266 | (master_doc, 'Typus.tex', 'Typus Documentation', 267 | 'Murad Byashimov', 'manual'), 268 | ] 269 | 270 | # The name of an image file (relative to this directory) to place at the top of 271 | # the title page. 272 | # 273 | # latex_logo = None 274 | 275 | # For "manual" documents, if this is true, then toplevel headings are parts, 276 | # not chapters. 277 | # 278 | # latex_use_parts = False 279 | 280 | # If true, show page references after internal links. 281 | # 282 | # latex_show_pagerefs = False 283 | 284 | # If true, show URL addresses after external links. 285 | # 286 | # latex_show_urls = False 287 | 288 | # Documents to append as an appendix to all manuals. 289 | # 290 | # latex_appendices = [] 291 | 292 | # If false, no module index is generated. 293 | # 294 | # latex_domain_indices = True 295 | 296 | 297 | # -- Options for manual page output --------------------------------------- 298 | 299 | # One entry per manual page. List of tuples 300 | # (source start file, name, description, authors, manual section). 301 | man_pages = [ 302 | (master_doc, 'typus', 'Typus Documentation', 303 | [author], 1) 304 | ] 305 | 306 | # If true, show URL addresses after external links. 307 | # 308 | # man_show_urls = False 309 | 310 | 311 | # -- Options for Texinfo output ------------------------------------------- 312 | 313 | # Grouping the document tree into Texinfo files. List of tuples 314 | # (source start file, target name, title, author, 315 | # dir menu entry, description, category) 316 | texinfo_documents = [ 317 | (master_doc, 'Typus', 'Typus Documentation', 318 | author, 'Typus', 'One line description of project.', 319 | 'Miscellaneous'), 320 | ] 321 | 322 | # Documents to append as an appendix to all manuals. 323 | # 324 | # texinfo_appendices = [] 325 | 326 | # If false, no module index is generated. 327 | # 328 | # texinfo_domain_indices = True 329 | 330 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 331 | # 332 | # texinfo_show_urls = 'footnote' 333 | 334 | # If true, do not generate a @detailmenu in the "Top" node's menu. 335 | # 336 | # texinfo_no_detailmenu = False 337 | 338 | doctest_global_setup = """ 339 | from typus import * 340 | from typus.utils import * 341 | """ 342 | -------------------------------------------------------------------------------- /typus/processors/expressions.py: -------------------------------------------------------------------------------- 1 | import re 2 | from functools import partial 3 | 4 | from ..chars import * 5 | from ..utils import RE_SCASE, doc_map, map_choices, re_choices, re_compile 6 | from .base import BaseProcessor 7 | 8 | 9 | class BaseExpressions(BaseProcessor): 10 | r""" 11 | Provides regular expressions support. Looks for ``expressions`` list 12 | attribute in Typus with expressions name, compiles and runs them on every 13 | Typus call. 14 | 15 | >>> from typus.core import TypusCore 16 | >>> from typus.processors import BaseExpressions 17 | ... 18 | >>> class MyExpressions(BaseExpressions): 19 | ... expressions = ('bold_price', ) # no prefix `expr_`! 20 | ... def expr_bold_price(self): 21 | ... expr = ( 22 | ... (r'(\$\d+)', r'\1'), 23 | ... ) 24 | ... return expr 25 | ... 26 | >>> class MyTypus(TypusCore): 27 | ... processors = (MyExpressions, ) 28 | ... 29 | >>> my_typus = MyTypus() # `expr_bold_price` is compiled and stored 30 | >>> my_typus('Get now just for $1000!') 31 | 'Get now just for $1000!' 32 | 33 | .. note:: 34 | *Expression* is a pair of regex and replace strings. Regex strings are 35 | compiled with :func:`typus.utils.re_compile` with a bunch of flags: 36 | unicode, case-insensitive, etc. If that doesn't suit for you pass your 37 | own flags as a third member of the tuple: ``(regex, replace, re.I)``. 38 | """ 39 | 40 | expressions = NotImplemented 41 | 42 | def __init__(self, *args, **kwargs): 43 | super().__init__(*args, **kwargs) 44 | 45 | # Compiles expressions 46 | self.compiled = tuple( 47 | partial(re_compile(*expr[::2]).sub, expr[1]) 48 | for name in self.expressions 49 | for expr in getattr(self, 'expr_' + name)() 50 | ) 51 | 52 | def run(self, text: str, **kwargs) -> str: 53 | for expression in self.compiled: 54 | text = expression(text) 55 | return self.run_other(text, **kwargs) 56 | 57 | 58 | class EnRuExpressions(BaseExpressions): 59 | """ 60 | This class holds most of Typus functionality for English and Russian 61 | languages. 62 | """ 63 | 64 | expressions = ( 65 | 'spaces linebreaks apostrophe complex_symbols mdash primes ' 66 | 'digit_spaces pairs units ranges vulgar_fractions math ruble abbrs ' 67 | 'rep_positional_spaces del_positional_spaces' 68 | ).split() 69 | 70 | # Any unicode word 71 | words = r'[^\W\d_]' 72 | 73 | complex_symbols = { 74 | '...': '…', 75 | '<-': '←', 76 | '->': '→', 77 | '+-': '±', 78 | '+' + MINUS: '±', 79 | '<=': '≤', 80 | '>=': '≥', 81 | '/=': '≠', 82 | '==': '≡', 83 | '(r)': '®', 84 | '(c)': '©', 85 | '(p)': '℗', 86 | '(tm)': '™', 87 | '(sm)': '℠', 88 | 'mA*h': 'mA•h', 89 | # cyrillic 90 | '(с)': '©', 91 | '(р)': '℗', 92 | '(тм)': '™', 93 | 'мА*ч': 'мА•ч', 94 | } 95 | 96 | units = ( 97 | 'mm', 98 | 'cm', 99 | 'dm', 100 | 'm', 101 | 'km', 102 | 'mg', 103 | 'kg', 104 | 'ml', 105 | 'dpi', 106 | 'mA•h', 107 | 'мм', 108 | 'см', 109 | 'дм', 110 | 'м', 111 | 'км', 112 | 'мг', 113 | 'г', 114 | 'кг', 115 | 'т', 116 | 'мл', 117 | 'л', 118 | 'мА•ч', 119 | ) 120 | 121 | # This is for docs 122 | units_doc_map = {'1' + k: '1{}{}'.format(NBSP, k) for k in units} 123 | 124 | vulgar_fractions = { 125 | '1/2': '½', 126 | '1/3': '⅓', 127 | '1/4': '​¼', 128 | '1/5': '⅕', 129 | '1/6': '⅙', 130 | '1/8': '⅛', 131 | '2/3': '⅔', 132 | '2/5': '⅖', 133 | '3/4': '¾', 134 | '3/5': '⅗', 135 | '3/8': '⅜', 136 | '4/5': '⅘', 137 | '5/6': '⅚', 138 | '5/8': '⅝', 139 | '7/8': '⅞', 140 | } 141 | 142 | math = { 143 | '-': MINUS, 144 | '*xх': TIMES, 145 | } 146 | 147 | # No need to put >=, +-, etc, after expr_complex_symbols 148 | math_operators = r'[\-{0}\*xх{1}\+\=±≤≥≠÷\/]'.format(MINUS, TIMES) 149 | 150 | rep_positional_spaces = { 151 | # No need to put vulgar fractions in here because of expr_digit_spaces 152 | # which joins digits and words afterward 153 | 'after': '←$€£%±{0}{1}©§¶№'.format(MINUS, TIMES), 154 | 'both': '&≡≤≥≠', 155 | 'before': '₽→' + MDASH, 156 | } 157 | 158 | del_positional_spaces = { 159 | 'before': '®℗™℠:,.?!…', 160 | } 161 | 162 | ruble = ( 163 | 'руб', 164 | 'р', 165 | ) 166 | 167 | @staticmethod 168 | def expr_spaces(): 169 | """ 170 | Trims spaces at the beginning and end of the line and removes extra 171 | spaces within. 172 | 173 | >>> from typus import en_typus 174 | >>> en_typus(' foo bar ') 175 | 'foo bar' 176 | 177 | .. caution:: 178 | Doesn't work correctly with nbsp (replaces with whitespace). 179 | """ 180 | 181 | expr = ( 182 | (r'{0}{{2,}}'.format(ANYSP), WHSP), 183 | (r'(?:^{0}+|{0}+$)'.format(ANYSP), ''), 184 | ) 185 | return expr 186 | 187 | @staticmethod 188 | def expr_linebreaks(): 189 | r""" 190 | Converts line breaks to unix-style and removes extra breaks 191 | if found more than two in a row. 192 | 193 | >>> from typus import en_typus 194 | >>> en_typus('foo\r\nbar\n\n\nbaz') 195 | 'foo\nbar\n\nbaz' 196 | """ 197 | 198 | expr = ( 199 | (r'\r\n', '\n'), 200 | (r'\n{2,}', '\n' * 2), 201 | ) 202 | return expr 203 | 204 | def expr_apostrophe(self): 205 | """ 206 | Replaces single quote with apostrophe. 207 | 208 | >>> from typus import en_typus 209 | >>> en_typus("She'd, I'm, it's, don't, you're, he'll, 90's") 210 | 'She’d, I’m, it’s, don’t, you’re, he’ll, 90’s' 211 | 212 | .. note:: 213 | By the way it works with any omitted word. But then again, why not? 214 | """ 215 | 216 | expr = ( 217 | (r'(?<={0}|[0-9])\'(?={0})'.format(self.words), RSQUO), 218 | ) 219 | return expr 220 | 221 | @doc_map(complex_symbols) 222 | def expr_complex_symbols(self): 223 | """ 224 | Replaces complex symbols with Unicode characters. Doesn't care 225 | about case-sensitivity and handles Cyrillic-Latin twins 226 | like ``c`` and ``с``. 227 | 228 | >>> from typus import en_typus 229 | >>> en_typus('(c)(с)(C)(r)(R)...') 230 | '©©©®®…' 231 | """ 232 | 233 | expr = ( 234 | map_choices(self.complex_symbols), 235 | ) 236 | return expr 237 | 238 | @staticmethod 239 | def expr_mdash(): 240 | """ 241 | Replaces dash with mdash. 242 | 243 | >>> from typus import en_typus 244 | >>> en_typus('foo -- bar') # adds non-breaking space after `foo` 245 | 'foo\u202f—\u2009bar' 246 | """ 247 | 248 | expr = ( 249 | # Double dash guarantees to be replaced with mdash 250 | (r'{0}--{0}'.format(WHSP), MDASH_PAIR), 251 | 252 | # Dash can be between anything except digits 253 | # because in that case it's not obvious 254 | (r'{0}+[\-|{1}]{0}+(?!\d\b)'.format(ANYSP, NDASH), MDASH_PAIR), 255 | 256 | # Same but backwards 257 | # It joins non-digit with digit or word 258 | (r'(\b\D+){0}+[\-|{1}]{0}+'.format(ANYSP, NDASH), 259 | r'\1{0}'.format(MDASH_PAIR)), 260 | 261 | # Line beginning adds nbsp after dash 262 | (r'^\-{{1,2}}{0}+'.format(ANYSP), 263 | r'{0}{1}'.format(MDASH, NBSP)), 264 | 265 | # Also mdash can be at the end of the line in poems 266 | (r'{0}+\-{{1,2}}{0}*(?=$|
)'.format(ANYSP), 267 | r'{0}{1}'.format(NBSP, MDASH)), 268 | 269 | # Special case with leading comma 270 | (',' + MDASH_PAIR, f',{MDASH}{THNSP}'), 271 | ) 272 | return expr 273 | 274 | @staticmethod 275 | def expr_primes(): 276 | r""" 277 | Replaces quotes with prime after digits. 278 | 279 | >>> from typus import en_typus 280 | >>> en_typus('3\' 5" long') 281 | '3′ 5″ long' 282 | 283 | .. caution:: 284 | Won't break ``"4"``, but fails with ``" 4"``. 285 | """ 286 | 287 | expr = ( 288 | (r'(^|{0})(\d+)\''.format(ANYSP), r'\1\2' + SPRIME), 289 | (r'(^|{0})(\d+)"'.format(ANYSP), r'\1\2' + DPRIME), 290 | ) 291 | return expr 292 | 293 | def expr_digit_spaces(self): 294 | """ 295 | Replaces whitespace with non-breaking space after 4 (and less) 296 | length digits if word or digit without comma or math operators 297 | found afterwards: 298 | 3 apples 299 | 40 000 bucks 300 | 400 + 3 301 | Skips: 302 | 4000 bucks 303 | 40 000,00 bucks 304 | """ 305 | 306 | expr = ( 307 | (r'\b(\d{{1,3}}){0}(?=[0-9]+\b|{1}|{2})' 308 | .format(WHSP, self.words, self.math_operators), r'\1' + NBSP), 309 | ) 310 | return expr 311 | 312 | def expr_pairs(self): 313 | """ 314 | Replaces whitespace with non-breaking space after 1-2 length words. 315 | """ 316 | 317 | expr = ( 318 | # Unions, units and all that small staff 319 | (r'\b({1}{{1,2}}){0}+'.format(WHSP, self.words), r'\1' + NBSP), 320 | # Fixes previous with leading dash, ellipsis or apostrophe 321 | (r'([-…’]{1}{{1,2}}){0}'.format(NBSP, self.words), r'\1' + WHSP), 322 | ) 323 | return expr 324 | 325 | @doc_map(units_doc_map) 326 | def expr_units(self): 327 | """ 328 | Puts narrow non-breaking space between digits and units. 329 | Case sensitive. 330 | 331 | >>> from typus import en_typus 332 | >>> en_typus('1mm', debug=True), en_typus('1mm') 333 | ('1_mm', '1 mm') 334 | """ 335 | 336 | expr = ( 337 | (r'\b(\d+){0}*{1}\b'.format(WHSP, re_choices(self.units)), 338 | r'\1{0}\2'.format(NBSP), RE_SCASE), 339 | ) 340 | return expr 341 | 342 | def expr_ranges(self): 343 | """ 344 | Replaces dash with ndash in ranges. 345 | Supports float and negative values. 346 | Tries to not mess with minus: skips if any math operator or word 347 | was found after dash: 3-2=1, 24-pin. 348 | **NOTE**: _range_ should not have spaces between dash: `2-3` and 349 | left side should be less than right side. 350 | """ 351 | 352 | def ufloat(string): 353 | return float(string.replace(',', '.')) 354 | 355 | def replace(match): 356 | left, dash, right = match.groups() 357 | if ufloat(left) < ufloat(right): 358 | dash = NDASH 359 | return '{0}{1}{2}'.format(left, dash, right) 360 | 361 | expr = ( 362 | (r'(-?(?:[0-9]+[\.,][0-9]+|[0-9]+))(-)' 363 | r'([0-9]+[\.,][0-9]+|[0-9]+)' 364 | r'(?!{0}*{1}|{2})' 365 | .format(ANYSP, self.math_operators, self.words), 366 | replace), 367 | ) 368 | return expr 369 | 370 | @doc_map(vulgar_fractions) 371 | def expr_vulgar_fractions(self): 372 | """ 373 | Replaces vulgar fractions with appropriate unicode characters. 374 | 375 | >>> from typus import en_typus 376 | >>> en_typus('1/2') 377 | '½' 378 | """ 379 | 380 | expr = ( 381 | # \b to excludes digits which are not on map, like `11/22` 382 | map_choices(self.vulgar_fractions, r'\b({0})\b'), 383 | ) 384 | return expr 385 | 386 | @doc_map(math) 387 | def expr_math(self): 388 | """ 389 | Puts minus and multiplication symbols between pair and before 390 | single digits. 391 | 392 | >>> from typus import en_typus 393 | >>> en_typus('3 - 3 = 0') 394 | '3 − 3 = 0' 395 | >>> en_typus('-3 degrees') 396 | '−3 degrees' 397 | >>> en_typus('3 x 3 = 9') 398 | '3 × 3 = 9' 399 | >>> en_typus('x3 better!') 400 | '×3 better!' 401 | """ 402 | 403 | expr = ( 404 | (r'(^|{0}|\d)[{1}]({0}*\d)'.format(ANYSP, re.escape(x)), 405 | r'\1{0}\2'.format(y)) for x, y in self.math.items() 406 | ) 407 | return expr 408 | 409 | def expr_abbrs(self): 410 | """ 411 | Adds narrow non-breaking space and replaces whitespaces between 412 | shorten words. 413 | """ 414 | 415 | expr = ( 416 | (r'\b({1}\.){0}*({1}\.)'.format(ANYSP, self.words), 417 | r'\1{0}\2'.format(NNBSP)), 418 | (r'\b({1}\.){0}*(?={1})'.format(WHSP, self.words), 419 | r'\1{0}'.format(NBSP)), 420 | ) 421 | return expr 422 | 423 | def expr_ruble(self): 424 | """ 425 | Replaces `руб` and `р` (with or without dot) after digits 426 | with ruble symbol. Case sensitive. 427 | 428 | >>> from typus import en_typus 429 | >>> en_typus('1000 р.') 430 | '1000 ₽' 431 | 432 | .. caution:: 433 | 434 | Drops the dot at the end of sentence if match found in there. 435 | """ 436 | 437 | choices = re_choices(self.ruble, r'(?:{0})') 438 | expr = ( 439 | (r'(\d){0}*{1}\b\.?'.format(ANYSP, choices), 440 | r'\1{0}₽'.format(NBSP), RE_SCASE), # case matters 441 | ) 442 | return expr 443 | 444 | @staticmethod 445 | def _positional_spaces(data, find, replace): 446 | """ 447 | Helper method for `rep_positional_spaces` and `del_positional_spaces` 448 | expressions. 449 | """ 450 | 451 | both = data.get('both', '') 452 | before = re.escape(data.get('before', '') + both) 453 | after = re.escape(data.get('after', '') + both) 454 | if before: 455 | yield r'{0}+(?=[{1}])'.format(find, before), replace 456 | if after: 457 | yield r'(?<=[{1}]){0}+'.format(find, after), replace 458 | 459 | @doc_map(rep_positional_spaces, keys='Direction', values='Characters') 460 | def expr_rep_positional_spaces(self): 461 | """ 462 | Replaces whitespaces after and before certain symbols 463 | with non-breaking space. 464 | """ 465 | 466 | expr = self._positional_spaces(self.rep_positional_spaces, WHSP, NBSP) 467 | return tuple(expr) 468 | 469 | @doc_map(del_positional_spaces, keys='Direction', values='Characters') 470 | def expr_del_positional_spaces(self): 471 | """ 472 | Removes spaces before and after certain symbols. 473 | """ 474 | 475 | expr = self._positional_spaces(self.del_positional_spaces, ANYSP, '') 476 | return tuple(expr) 477 | --------------------------------------------------------------------------------