├── .coveragerc ├── .flake8 ├── .github └── workflows │ └── lint_and_test.yml ├── .gitignore ├── CHANGELOG.textile ├── CONTRIBUTORS.txt ├── LICENSE.txt ├── Makefile ├── README.textile ├── TODO.textile ├── pyproject.toml ├── pytest.ini ├── tests ├── __init__.py ├── fixtures │ └── README.txt ├── test_attributes.py ├── test_block.py ├── test_cli.py ├── test_footnoteRef.py ├── test_getRefs.py ├── test_getimagesize.py ├── test_github_issues.py ├── test_glyphs.py ├── test_image.py ├── test_imagesize.py ├── test_lists.py ├── test_retrieve.py ├── test_span.py ├── test_subclassing.py ├── test_table.py ├── test_textile.py ├── test_textilefactory.py ├── test_urls.py ├── test_utils.py └── test_values.py └── textile ├── __init__.py ├── __main__.py ├── core.py ├── objects ├── __init__.py ├── block.py └── table.py ├── regex_strings.py ├── textilefactory.py ├── utils.py └── version.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = textile 4 | parallel = True 5 | 6 | [report] 7 | show_missing = True 8 | omit = 9 | textile/tests/* -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = 3 | # line too long 4 | E501 5 | exclude = 6 | build/ 7 | -------------------------------------------------------------------------------- /.github/workflows/lint_and_test.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: python-textile 3 | 4 | on: [push] 5 | 6 | jobs: 7 | lint_and_test: 8 | runs-on: ubuntu-latest 9 | strategy: 10 | matrix: 11 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "pypy3.10"] 12 | image_size: ['true', 'false'] 13 | steps: 14 | - uses: actions/checkout@v4 15 | - name: Set up Python ${{ matrix.python-version }} 16 | uses: actions/setup-python@v5 17 | with: 18 | python-version: ${{ matrix.python-version }} 19 | - name: Python flake8 Lint 20 | uses: py-actions/flake8@v2.3.0 21 | - name: Install dependencies 22 | run: | 23 | imagesize='' 24 | pip install -U pytest pytest-cov coverage codecov 25 | if [[ ${{ matrix.image_size }} == true ]] ; then imagesize='[imagesize]' ; fi 26 | pip install -e ".${imagesize}" 27 | - name: run tests 28 | run: | 29 | pytest 30 | - name: Codecov 31 | uses: codecov/codecov-action@v4 32 | env: 33 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.orig 3 | *.rej 4 | *~ 5 | *.pyo 6 | *.egg-info 7 | .cache/ 8 | .coverage 9 | .eggs/ 10 | .noseids* 11 | .pytest_cache 12 | docs/build 13 | docs/coverage 14 | build 15 | bin 16 | dist 17 | eggs 18 | htmlcov 19 | parts 20 | develop-eggs 21 | .DS_Store 22 | *.swp 23 | .tox 24 | README.txt 25 | -------------------------------------------------------------------------------- /CHANGELOG.textile: -------------------------------------------------------------------------------- 1 | h1. Textile Changelog 2 | 3 | h2. Version 4.0.3 4 | * Update supported Python versions to 3.8 - 3.12 ("#83":https://github.com/textile/python-textile/issues/83) 5 | * Replace html5lib with nh3 for html sanitization 6 | * General code cleanup 7 | * Bugfixes: 8 | ** Wrong HTML output when "bc.." is the very last in the document ("#81":https://github.com/textile/python-textile/issues/81) 9 | * Other: 10 | ** Use github actions instead of travis for automated testing 11 | 12 | h2. Version 4.0.2 13 | * Bugfixes: 14 | ** Support non-http schemas in url refs ("#75":https://github.com/textile/python-textile/pull/75) 15 | ** pytest-runner is deprecated ("#77":https://github.com/textile/python-textile/issues/77) 16 | *** other changes related to CI infrastructure 17 | 18 | h2. Version 4.0.1 19 | * Bugfixes: 20 | ** SyntaxWarnings with Python 3.8 i("#71":https://github.com/textile/python-textile/issues/71) 21 | ** testsuite: internal error with coverage 5.0.X ("#72":https://github.com/textile/python-textile/issues/72) 22 | ** DeprecationWarnings about invalid escape sequences ("#73":https://github.com/textile/python-textile/issues/73) 23 | 24 | h2. Version 4.0.0 25 | * Drop support for Python 2, hence the version bump. Update list of PY3K versions to currently-supported versions. If you need to use textile on Python 2.7 or Python 3.3 or 3.4, please use textile Version 3.0.4. 26 | * For use in PyPy environments, textile used to work well with the regex package. Lately, it's running into trouble. Please uninstall regex if this is the case for you. 27 | 28 | h2. Version 3.0.4 29 | * BUGFIX: Restricted mode strips out CSS attributes again. 30 | * Update travis to more current versions and test against current Pillow version. 31 | 32 | h2. Version 3.0.3 33 | * BUGFIX: Improve handling code block following extended p block ("#63":https://github.com/textile/python-textile/pull/63) 34 | 35 | h2. Version 3.0.2 36 | * BUGFIX: Fix for multiple multi-line paragraphs. ("#62":https://github.com/textile/python-textile/pull/62) 37 | 38 | h2. Version 3.0.1 39 | * BUGFIX: Fix improper handling of extended code blocks. ("#61":https://github.com/textile/python-textile/pull/61) 40 | 41 | h2. Version 3.0.0 42 | * Drop support for Python 2.6 and 3.2. 43 | * Update to the current version of html5lib 44 | * Bugfixes: 45 | ** Fix handling of HTML entities in extended pre blocks. ("#55":https://github.com/textile/python-textile/issues/55) 46 | ** Empty definitions in definition lists raised an exception ("#56":https://github.com/textile/python-textile/issues/56) 47 | ** Fix handling of unicode in img attributes ("#58":https://github.com/textile/python-textile/issues/58) 48 | 49 | h2. Version 2.3.16 50 | * Bugfixes: 51 | ** Fix processing of extended code blocks ("#50":https://github.com/textile/python-textile/issues/50) 52 | ** Don't break when links fail to include "http:" ("#51":https://github.com/textile/python-textile/issues/51) 53 | ** Better handling of poorly-formatted tables ("#52":https://github.com/textile/python-textile/issues/52) 54 | 55 | h2. Version 2.3.15 56 | * Bugfix: Don't break on unicode characters in the fragment of a url. 57 | 58 | h2. Version 2.3.14 59 | * Bugfix: Fix textile on Python 2.6 ("#48":https://github.com/textile/python-textile/issues/48) 60 | 61 | h2. Version 2.3.13 62 | * Remove extraneous arguments from textile method. These were originally added long ago to work with django, but markup languages are long gone from django. 63 | * Bugfix: Don't mangle percent-encoded URLs so much. ("#45":https://github.com/textile/python-textile/issues/45) 64 | * Bugfix: More fixes for poorly-formatted lists. ("#46":https://github.com/textile/python-textile/issues/46) 65 | * Bugfix: Improve handling of whitespace in pre-formatted blocks. This now matches php-textile's handling of pre blocks much more closely. ("#47":https://github.com/textile/python-textile/issues/47) 66 | 67 | h2. Version 2.3.12 68 | * Bugfix: Don't die on pre blocks with unicode characters. ("#43":https://github.com/textile/python-textile/issues/43) 69 | * Bugfix: Fix regressions introduced into the code between 2.2.2 and 2.3.11. (Special thanks to "@adam-iris":https://github.com/adam-iris for providing pull request "#44":https://github.com/textile/python-textile/pull/44) 70 | * Bugfix: Don't just die when processing poorly-formatted textile lists. ("#37":https://github.com/textile/python-textile/issues/37) 71 | * Add Python 3.6 to testing. 72 | * Add a "print the version string and exit" argument to the cli tool: @pytextile -v@ 73 | 74 | h2. Version 2.3.11 75 | * Bugfix: Don't strip leading dot from image URIs ("#42":https://github.com/textile/python-textile/issues/42) 76 | 77 | h2. Version 2.3.10 78 | * Packaging: cleanup in MANIFEST.IN leads to better linux packaging, and smaller wheel size. 79 | 80 | h2. Version 2.3.9 81 | * Packaging: remove extraneous files from the source distribution upload. 82 | * Remove a lingering file from a feature branch for overhauling list handling. This brings coverage back up to 100% 83 | 84 | h2. Version 2.3.8 85 | * Bugfix: Fix process of string containing only whitespaces ("#40":https://github.com/textile/python-textile/issues/40) 86 | * Bugfix: Fix process of formatted text after lists ("#37":https://github.com/textile/python-textile/issues/37) 87 | * Test: Use sys.executable instead of 'python' to test the CLI ("#38":https://github.com/textile/python-textile/issues/38) 88 | 89 | h2. Version 2.3.7 90 | * Bugfix: Don't assume pytest is available to be imported in setup.py ("#39":https://github.com/textile/python-textile/issues/39) 91 | 92 | h2. Version 2.3.6 93 | * Packaging: @tests@ directory is correctly included in source-tarball. ("#33":https://github.com/textile/python-textile/issues/33) 94 | 95 | h2. Version 2.3.5 96 | * Bugfix: Correctly handle unicode text in url query-strings. ("#36":https://github.com/textile/python-textile/issues/36) 97 | 98 | h2. Version 2.3.4 99 | * Bugfix: fix an issue with extended block code 100 | * Remove misplaced shebang on non-callable files. 101 | * Packaging: Add test-command to setup.py directly. 102 | * Packaging: Included the tests/ directory for source-tarballs, useful for packaging checks. ("#33":https://github.com/textile/python-textile/issues/33) 103 | * Add a cli tool @pytextile@ which takes textile input and prints html output. See @pytextile -h@ for details. 104 | 105 | h2. Version 2.3.3 106 | * Bugfix: Unicode in URL titles no longer break everything ("#30":https://github.com/textile/python-textile/issues/30) 107 | * Display DeprecationWarning when using textile on Python 2.6. 108 | 109 | h2. Version 2.3.2 110 | * Bugfix: properly handle @":"@ as text, not a link. 111 | 112 | h2. Version 2.3.1 113 | * Regression bugfix: empty string input returns empty string again. 114 | 115 | h2. Version 2.3.0 116 | 117 | * Bugfixes: 118 | ** Support data URIs in img tags 119 | ** Fix autolink urls with image references ("#17":https://github.com/textile/python-textile/issues/17) 120 | ** Fix textile links containing parentheses ("#20":https://github.com/textile/python-textile/issues/20) 121 | ** Fix double-encoding of code blocks ("#21":https://github.com/textile/python-textile/issues/21) 122 | ** Fix handling of scheme in self-linked URLs ("#16":https://github.com/textile/python-textile/issues/16) 123 | ** Fix Markup not parsed if followed by certain characters ("#22":Markup not parsed if followed by certain characters) 124 | * Convert testing over to "py.test":http://pytest.org/, improving unicode testing 125 | * Update functionality for tables, notelists, and footnotes. This involved a major reworking of parts of the code, but it should now match php-textile and txstyle.org precisely. Please file an issue for any bugs you come across. 126 | * Remove @head_offset@ option from parse. I'm not sure it ever existed in php-textile. 127 | 128 | h2. Version 2.2.2 129 | 130 | * bugfix: "regex":https://pypi.python.org/pypi/regex is now an optional dependency 131 | 132 | h2. Version 2.2.1 133 | 134 | * drop textilefactory support for html. 135 | * Various development-related bugfixes. 136 | * Added this changelog. 137 | 138 | h2. Version 2.2.0 139 | 140 | * Started refactoring the code to be less repetitive. @textile.Textile().parse()@ is a little more friendly than @textile.Textile().textile()@ There may be more work to be done on this front to make the flow a little smoother. 141 | * We now support versions 2.6 - 3.4 (including 3.2) using the same codebase. Many thanks to Radek Czajka for this. 142 | * Drop support for html4. We now only output xhtml or html5. 143 | * Various development-related bugfixes. 144 | 145 | h2. Version 2.1.8 146 | 147 | * Add support for html5 output. 148 | * Lots of new functionality added bringing us in line with the official Textile 2.4 149 | -------------------------------------------------------------------------------- /CONTRIBUTORS.txt: -------------------------------------------------------------------------------- 1 | Dennis Burke 2 | Radek Czajka 3 | Roberto A. F. De Almeida 4 | Matt Layman 5 | Mark Pilgrim 6 | Alex Shiels 7 | Jason Samsa 8 | Kurt Raschke 9 | Dave Brondsema 10 | Dmitry Shachnev 11 | Kirill Mavreshko 12 | Brad Schoening -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | L I C E N S E 2 | ============= 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | * Redistributions of source code must retain the above copyright notice, 7 | this list of conditions and the following disclaimer. 8 | 9 | * Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | * Neither the name Textile nor the names of its contributors may be used to 14 | endorse or promote products derived from this software without specific 15 | prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | clean: 2 | $(RM) README.txt 3 | $(RM) -r ./dist ./build 4 | 5 | generate_pypi_README: 6 | ${VIRTUAL_ENV}/bin/pytextile README.textile | sed -e 's/^\t//' > README.txt 7 | 8 | build: generate_pypi_README 9 | python -m build 10 | 11 | upload_to_test: build 12 | twine check ./dist/* 13 | twine upload --repository test_textile ./dist/* 14 | 15 | upload_to_prod: build 16 | twine check ./dist/* 17 | # for now, don't actually upload to prod PyPI, just output the command to do so. 18 | @echo "twine upload --repository textile ./dist/*" 19 | -------------------------------------------------------------------------------- /README.textile: -------------------------------------------------------------------------------- 1 | !https://github.com/textile/python-textile/actions/workflows/lint_and_test.yml/badge.svg(python-textile)!:https://github.com/textile/python-textile/actions/workflows/lint_and_test.yml !https://codecov.io/github/textile/python-textile/coverage.svg!:https://codecov.io/github/textile/python-textile !https://img.shields.io/pypi/pyversions/textile! !https://img.shields.io/pypi/wheel/textile! 2 | 3 | h1. python-textile 4 | 5 | python-textile is a Python port of "Textile":https://textile-lang.com/, Dean Allen's humane web text generator. 6 | 7 | h2. Installation 8 | 9 | @pip install textile@ 10 | 11 | Dependencies: 12 | * "nh3":https://pypi.org/project/nh3/ 13 | * "regex":https://pypi.org/project/regex/ (The regex package causes problems with PyPy, and is not installed as a dependency in such environments. If you are upgrading a textile install on PyPy which had regex previously included, you may need to uninstall it.) 14 | 15 | Optional dependencies include: 16 | * "PIL/Pillow":http://python-pillow.github.io/ (for checking image sizes). If needed, install via @pip install 'textile[imagesize]'@ 17 | 18 | h2. Usage 19 | 20 | bc.. import textile 21 | >>> s = """ 22 | ... _This_ is a *test.* 23 | ... 24 | ... * One 25 | ... * Two 26 | ... * Three 27 | ... 28 | ... Link to "Slashdot":http://slashdot.org/ 29 | ... """ 30 | >>> html = textile.textile(s) 31 | >>> print html 32 |

This is a test.

33 | 34 | 39 | 40 |

Link to Slashdot

41 | >>> 42 | 43 | h3. Notes: 44 | 45 | * Active development supports Python 3.8 or later. 46 | 47 | h3. Running Tests 48 | 49 | To run the test suite, use pytest. `pytest-cov` is required as well. 50 | 51 | When textile is installed locally: 52 | 53 | bc. pytest 54 | 55 | When textile is not installed locally: 56 | 57 | bc. PYTHONPATH=. pytest 58 | -------------------------------------------------------------------------------- /TODO.textile: -------------------------------------------------------------------------------- 1 | TODO 2 | 3 | * Improve documentation, both of the code and Textile syntax. 4 | ** Not all functions have docstrings or adequate docstrings. 5 | ** Because the Textile syntax implemented by PyTextile has deviated from the syntax implemented by other implementations of Textile, PyTextile-specific documentation needs to be produced for end-users. 6 | * Update to comply with Textile 2.5 7 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "setuptools-scm", "nh3"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "textile" 7 | authors = [ 8 | { name = "Dennis Burke", email = "ikirudennis@gmail.com"} 9 | ] 10 | description = 'Textile processing for python.' 11 | classifiers = [ 12 | 'Development Status :: 5 - Production/Stable', 13 | 'Environment :: Web Environment', 14 | 'Intended Audience :: Developers', 15 | 'License :: OSI Approved :: BSD License', 16 | 'Operating System :: OS Independent', 17 | 'Programming Language :: Python', 18 | 'Programming Language :: Python :: 3', 19 | 'Programming Language :: Python :: 3 :: Only', 20 | 'Programming Language :: Python :: 3.8', 21 | 'Programming Language :: Python :: 3.9', 22 | 'Programming Language :: Python :: 3.10', 23 | 'Programming Language :: Python :: 3.11', 24 | 'Programming Language :: Python :: 3.12', 25 | 'Topic :: Software Development :: Libraries :: Python Modules', 26 | ] 27 | dynamic = ["version",] 28 | dependencies = [ 29 | 'nh3', 30 | 'regex>1.0; implementation_name != "pypy"', 31 | ] 32 | requires-python = '>=3.8' 33 | keywords = ['textile', 'text', 'html markup'] 34 | # Use the following command to generate a README.txt which is compatible with 35 | # pypi's readme rendering: 36 | # pytextile README.textile | sed -e 's/^\t//' > README.txt 37 | readme = {file = 'README.txt', content-type = 'text/markdown'} 38 | 39 | [project.optional-dependencies] 40 | develop = ['pytest', 'pytest-cov'] 41 | imagesize = ['Pillow>=3.0.0',] 42 | 43 | [project.urls] 44 | Homepage = "https://github.com/textile/python-textile" 45 | Repository = "https://github.com/textile/python-textile.git" 46 | Issues = "https://github.com/textile/python-textile/issues" 47 | 48 | [project.scripts] 49 | pytextile = "textile.__main__:main" 50 | 51 | [tool.setuptools.dynamic] 52 | version = {attr = "textile.__version__"} 53 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | testpaths = tests 3 | addopts = --cov=textile --cov-report=html --cov-append --cov-report=term-missing 4 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/textile/python-textile/a1c31e525dfdb1745ae8d09d5a08323ef579f414/tests/__init__.py -------------------------------------------------------------------------------- /tests/fixtures/README.txt: -------------------------------------------------------------------------------- 1 |

python-textile

2 | 3 |

python-textile

4 | 5 |

python-textile is a Python port of Textile, Dean Allen’s humane web text generator.

6 | 7 |

Installation

8 | 9 |

pip install textile

10 | 11 |

Dependencies: 12 |

16 | 17 |

Optional dependencies include: 18 |

21 | 22 |

Usage

23 | 24 |
import textile
25 | >>> s = """
26 | ... _This_ is a *test.*
27 | ...
28 | ... * One
29 | ... * Two
30 | ... * Three
31 | ...
32 | ... Link to "Slashdot":http://slashdot.org/
33 | ... """
34 | >>> html = textile.textile(s)
35 | >>> print html
36 | 	<p><em>This</em> is a <strong>test.</strong></p>
37 | 
38 | 	<ul>
39 | 		<li>One</li>
40 | 		<li>Two</li>
41 | 		<li>Three</li>
42 | 	</ul>
43 | 
44 | 	<p>Link to <a href="http://slashdot.org/">Slashdot</a></p>
45 | >>>
46 | 47 |

Notes:

48 | 49 | 52 | 53 |

Running Tests

54 | 55 |

To run the test suite, use pytest. `pytest-cov` is required as well.

56 | 57 |

When textile is installed locally:

58 | 59 |
pytest
60 | 61 |

When textile is not installed locally:

62 | 63 |
PYTHONPATH=. pytest
-------------------------------------------------------------------------------- /tests/test_attributes.py: -------------------------------------------------------------------------------- 1 | from typing import OrderedDict 2 | from textile.utils import parse_attributes 3 | 4 | 5 | def test_parse_attributes(): 6 | assert parse_attributes('\\1', element='td') == {'colspan': '1'} 7 | assert parse_attributes('/1', element='td') == {'rowspan': '1'} 8 | assert parse_attributes('^', element='td') == {'style': 'vertical-align:top;'} 9 | assert parse_attributes('{color: blue}') == {'style': 'color: blue;'} 10 | assert parse_attributes('[en]') == {'lang': 'en'} 11 | assert parse_attributes('(cssclass)') == {'class': 'cssclass'} 12 | assert parse_attributes('(') == {'style': 'padding-left:1em;'} 13 | assert parse_attributes(')') == {'style': 'padding-right:1em;'} 14 | assert parse_attributes('<') == {'style': 'text-align:left;'} 15 | assert parse_attributes('(c#i)') == {'class': 'c', 'id': 'i'} 16 | assert parse_attributes('\\2 100', element='col') == {'span': '2', 'width': '100'} 17 | 18 | 19 | def test_parse_attributes_edge_cases(): 20 | result = parse_attributes('(:c#i)') 21 | expect = OrderedDict({'id': 'i'}) 22 | assert result == expect 23 | 24 | assert parse_attributes('(<)') == OrderedDict() 25 | -------------------------------------------------------------------------------- /tests/test_block.py: -------------------------------------------------------------------------------- 1 | import textile 2 | from textile.objects import Block 3 | 4 | try: 5 | from collections import OrderedDict 6 | except ImportError: 7 | from ordereddict import OrderedDict 8 | 9 | 10 | def test_block(): 11 | t = textile.Textile() 12 | result = t.block('h1. foobar baby') 13 | expect = '\t

foobar baby

' 14 | assert result == expect 15 | 16 | b = Block(t, "bq", "", None, "", "Hello BlockQuote") 17 | expect = ('blockquote', OrderedDict(), 'p', OrderedDict(), 18 | 'Hello BlockQuote') 19 | result = (b.outer_tag, b.outer_atts, b.inner_tag, b.inner_atts, b.content) 20 | assert result == expect 21 | 22 | b = Block(t, "bq", "", None, "http://google.com", "Hello BlockQuote") 23 | expect = ('blockquote', OrderedDict([('cite', 24 | '{0.uid}{0.refIndex}:url'.format(t))]), 'p', OrderedDict(), 25 | 'Hello BlockQuote') 26 | result = (b.outer_tag, b.outer_atts, b.inner_tag, b.inner_atts, b.content) 27 | assert result == expect 28 | 29 | b = Block(t, "bc", "", None, "", 'printf "Hello, World";') 30 | # the content of text will be turned shelved, so we'll asert only the 31 | # deterministic portions of the expected values, below 32 | expect = ('pre', OrderedDict(), 'code', OrderedDict()) 33 | result = (b.outer_tag, b.outer_atts, b.inner_tag, b.inner_atts) 34 | assert result == expect 35 | 36 | b = Block(t, "h1", "", None, "", "foobar") 37 | expect = ('h1', OrderedDict(), '', OrderedDict(), 'foobar') 38 | result = (b.outer_tag, b.outer_atts, b.inner_tag, b.inner_atts, b.content) 39 | assert result == expect 40 | 41 | 42 | def test_block_tags_false(): 43 | t = textile.Textile(block_tags=False) 44 | assert t.block_tags is False 45 | 46 | result = t.parse('test') 47 | expect = 'test' 48 | assert result == expect 49 | 50 | 51 | def test_blockcode_extended(): 52 | input = 'bc.. text\nmoretext\n\nevenmoretext\n\nmoremoretext\n\np. test' 53 | expect = '
text\nmoretext\n\nevenmoretext\n\nmoremoretext
\n\n\t

test

' 54 | t = textile.Textile() 55 | result = t.parse(input) 56 | assert result == expect 57 | 58 | 59 | def test_blockcode_in_README(): 60 | with open('README.textile') as f: 61 | readme = ''.join(f.readlines()) 62 | result = textile.textile(readme) 63 | with open('tests/fixtures/README.txt') as f: 64 | expect = ''.join(f.readlines()) 65 | assert result == expect 66 | 67 | 68 | def test_blockcode_comment(): 69 | input = '###.. block comment\nanother line\n\np. New line' 70 | expect = '\t

New line

' 71 | t = textile.Textile() 72 | result = t.parse(input) 73 | assert result == expect 74 | 75 | 76 | def test_extended_pre_block_with_many_newlines(): 77 | """Extra newlines in an extended pre block should not get cut down to only 78 | two.""" 79 | text = '''pre.. word 80 | 81 | another 82 | 83 | word 84 | 85 | 86 | yet anothe word''' 87 | expect = '''
word
 88 | 
 89 | another
 90 | 
 91 | word
 92 | 
 93 | 
 94 | yet anothe word
''' 95 | result = textile.textile(text) 96 | assert result == expect 97 | 98 | text = 'p. text text\n\n\nh1. Hello\n' 99 | expect = '\t

text text

\n\n\n\t

Hello

' 100 | result = textile.textile(text) 101 | assert result == expect 102 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | 4 | import textile 5 | 6 | 7 | def test_console_script(): 8 | command = [sys.executable, '-m', 'textile', 'README.textile'] 9 | try: 10 | result = subprocess.check_output(command) 11 | except AttributeError: 12 | command[2] = 'textile.__main__' 13 | result = subprocess.Popen( 14 | command, stdout=subprocess.PIPE).communicate()[0] 15 | with open('tests/fixtures/README.txt') as f: 16 | expect = ''.join(f.readlines()) 17 | if isinstance(result, bytes): 18 | result = result.decode('utf-8') 19 | assert result == expect 20 | 21 | 22 | def test_version_string(): 23 | command = [sys.executable, '-m', 'textile', '-v'] 24 | try: 25 | result = subprocess.check_output(command) 26 | except AttributeError: 27 | command[2] = 'textile.__main__' 28 | result = subprocess.Popen( 29 | command, stdout=subprocess.PIPE).communicate()[0] 30 | if isinstance(result, bytes): 31 | result = result.decode('utf-8') 32 | assert result.strip() == textile.__version__ 33 | -------------------------------------------------------------------------------- /tests/test_footnoteRef.py: -------------------------------------------------------------------------------- 1 | from textile import Textile 2 | 3 | 4 | def test_footnoteRef(): 5 | t = Textile() 6 | result = t.footnoteRef('foo[1]') 7 | expect = 'foo1'.format(t.linkPrefix) 8 | assert expect == result 9 | -------------------------------------------------------------------------------- /tests/test_getRefs.py: -------------------------------------------------------------------------------- 1 | from textile import Textile 2 | 3 | 4 | def test_getRefs(): 5 | t = Textile() 6 | result = t.getRefs("some text [Google]http://www.google.com") 7 | expect = 'some text ' 8 | assert result == expect 9 | 10 | result = t.urlrefs 11 | expect = {'Google': 'http://www.google.com'} 12 | assert result == expect 13 | 14 | t2 = Textile() 15 | 16 | result = t2.getRefs("my ftp [ftp]ftp://example.com") 17 | expect = 'my ftp ' 18 | assert result == expect 19 | 20 | result = t2.urlrefs 21 | expect = {'ftp': 'ftp://example.com'} 22 | assert result == expect 23 | -------------------------------------------------------------------------------- /tests/test_getimagesize.py: -------------------------------------------------------------------------------- 1 | from textile.utils import getimagesize 2 | import pytest 3 | 4 | PIL = pytest.importorskip('PIL') 5 | 6 | 7 | def test_imagesize(): 8 | assert getimagesize("http://www.google.com/intl/en_ALL/images/logo.gif") == (276, 110) 9 | assert getimagesize("http://bad.domain/") == '' 10 | assert getimagesize("http://www.google.com/robots.txt") is None 11 | -------------------------------------------------------------------------------- /tests/test_github_issues.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import textile 3 | 4 | 5 | def test_github_issue_16(): 6 | result = textile.textile('"$":http://google.com "$":https://google.com "$":mailto:blackhole@sun.comet') 7 | expect = '\t

google.com google.com blackhole@sun.comet

' 8 | assert result == expect 9 | 10 | 11 | def test_github_issue_17(): 12 | result = textile.textile('!http://www.ox.ac.uk/favicon.ico!') 13 | expect = '\t

' 14 | assert result == expect 15 | 16 | 17 | def test_github_issue_20(): 18 | text = 'This is a link to a ["Wikipedia article about Textile":http://en.wikipedia.org/wiki/Textile_(markup_language)].' 19 | result = textile.textile(text) 20 | expect = '\t

This is a link to a Wikipedia article about Textile.

' 21 | assert result == expect 22 | 23 | 24 | def test_github_issue_21(): 25 | text = ('''h1. xml example 26 | 27 | bc. ''' 28 | ''' 29 | 30 | bar 31 | ''') 32 | result = textile.textile(text) 33 | expect = '\t

xml example

\n\n
\n<foo>\n  bar\n</foo>
' 34 | assert result == expect 35 | 36 | 37 | def test_github_issue_22(): 38 | text = '''_(artist-name)Ty Segall_’s''' 39 | result = textile.textile(text) 40 | expect = '\t

Ty Segall’s

' 41 | assert result == expect 42 | 43 | 44 | def test_github_issue_26(): 45 | text = '' 46 | result = textile.textile(text) 47 | expect = '' 48 | assert result == expect 49 | 50 | 51 | def test_github_issue_27(): 52 | test = """* Folders with ":" in their names are displayed with a forward slash "/" instead. (Filed as "#4581709":/test/link, which was considered "normal behaviour" - quote: "Please note that Finder presents the 'Carbon filesystem' view, regardless of the underlying filesystem.")""" 53 | result = textile.textile(test) 54 | expect = """\t""" 55 | assert result == expect 56 | 57 | 58 | def test_github_issue_28(): 59 | test = """So here I am porting my ancient "newspipe":newspipe "front-end":blog/2006/09/30/0950 to "Snakelets":Snakelets and "Python":Python, and I've just trimmed down over 20 lines of "PHP":PHP down to essentially one line of "BeautifulSoup":BeautifulSoup retrieval: 60 | 61 |
 62 | def parseWapProfile(self, url):
 63 |   result = fetch.fetchURL(url)
 64 |   soup = BeautifulStoneSoup(result['data'], convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
 65 |   try:
 66 |     width, height = soup('prf:screensize')[0].contents[0].split('x')
 67 |   except:
 68 |     width = height = None
 69 |   return {"width": width, "height": height}
 70 | 
71 | 72 | Of course there's a lot more error handling to do (and useful data to glean off the "XML":XML), but being able to cut through all the usual parsing crap is immensely gratifying.""" 73 | result = textile.textile(test) 74 | expect = ("""\t

So here I am porting my ancient newspipe front-end to Snakelets and Python, and I’ve just trimmed down over 20 lines of PHP down to essentially one line of BeautifulSoup retrieval:

75 | 76 |
 77 | def parseWapProfile(self, url):
 78 |   result = fetch.fetchURL(url)
 79 |   soup = BeautifulStoneSoup(result['data'], convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
 80 |   try:
 81 |     width, height = soup('prf:screensize')[0].contents[0].split('x')
 82 |   except:
 83 |     width = height = None
 84 |   return {"width": width, "height": height}
 85 | 
86 | 87 | \t

Of course there’s a lot more error handling to do (and useful data to glean off the XML), but being able to cut through all the usual parsing crap is immensely gratifying.

""") 88 | assert result == expect 89 | 90 | 91 | def test_github_issue_30(): 92 | text = '"Tëxtíle (Tëxtíle)":http://lala.com' 93 | result = textile.textile(text) 94 | expect = '\t

Tëxtíle

' 95 | assert result == expect 96 | 97 | text = '!http://lala.com/lol.gif(♡ imáges)!' 98 | result = textile.textile(text) 99 | expect = '\t

♡ imáges

' 100 | assert result == expect 101 | 102 | 103 | def test_github_issue_36(): 104 | text = '"Chögyam Trungpa":https://www.google.com/search?q=Chögyam+Trungpa' 105 | result = textile.textile(text) 106 | expect = '\t

Chögyam Trungpa

' 107 | assert result == expect 108 | 109 | 110 | def test_github_issue_37(): 111 | text = '# xxx\n# yyy\n*blah*' 112 | result = textile.textile(text) 113 | expect = '\t

\t

    \n\t\t
  1. xxx
  2. \n\t\t
  3. yyy
  4. \n\t

\nblah

' 114 | assert result == expect 115 | 116 | text = '*Highlights*\n\n* UNITEK Y-3705A Type-C Universal DockingStation Pro\n* USB3.0/RJ45/EARPHONE/MICROPHONE/HDMI 6 PORT HUB 1.2m Data Cable 5V 4A Power Adaptor\n*\n* Dimensions: 25cm x 13cm x 9cm\n* Weight: 0.7kg' 117 | result = textile.textile(text) 118 | expect = '''\t

Highlights

119 | 120 | \t 124 | * 125 | \t''' 129 | assert result == expect 130 | 131 | 132 | def test_github_issue_40(): 133 | text = '\r\n' 134 | result = textile.textile(text) 135 | expect = '\r\n' 136 | assert result == expect 137 | 138 | 139 | def test_github_issue_42(): 140 | text = '!./image.png!' 141 | result = textile.textile(text) 142 | expect = '\t

' 143 | assert result == expect 144 | 145 | 146 | def test_github_issue_43(): 147 | text = 'pre. smart ‘quotes’ are not smart!' 148 | result = textile.textile(text) 149 | expect = '
smart ‘quotes’ are not smart!
' 150 | assert result == expect 151 | 152 | 153 | def test_github_issue_45(): 154 | """Incorrect transform unicode url""" 155 | text = '"test":https://myabstractwiki.ru/index.php/%D0%97%D0%B0%D0%B3%D0%BB%D0%B0%D0%B2%D0%BD%D0%B0%D1%8F_%D1%81%D1%82%D1%80%D0%B0%D0%BD%D0%B8%D1%86%D0%B0' 156 | result = textile.textile(text) 157 | expect = '\t

test

' 158 | assert result == expect 159 | 160 | 161 | def test_github_issue_46(): 162 | """Key error on mal-formed numbered lists. CAUTION: both the input and the 163 | ouput are ugly.""" 164 | text = '# test\n### test\n## test' 165 | expect = ('\t
    \n\t\t
  1. test\n\t\t\t
      \n\t\t\t\t
    1. test
    2. ' 166 | '\n\t\t\t
  2. \n\t\t
      \n\t\t\t
    1. test
    2. ' 167 | '\n\t\t
    \n\t\t
') 168 | result = textile.textile(text) 169 | assert result == expect 170 | 171 | 172 | def test_github_issue_47(): 173 | """Incorrect wrap pre-formatted value""" 174 | text = '''pre.. word 175 | 176 | another 177 | 178 | word 179 | 180 | yet anothe word''' 181 | result = textile.textile(text) 182 | expect = '''
word
183 | 
184 | another
185 | 
186 | word
187 | 
188 | yet anothe word
''' 189 | assert result == expect 190 | 191 | 192 | def test_github_issue_49(): 193 | """Key error on russian hash-route link""" 194 | s = '"link":https://ru.vuejs.org/v2/guide/components.html#Входные-параметры' 195 | result = textile.textile(s) 196 | expect = '\t

link

' 197 | assert result == expect 198 | 199 | 200 | def test_github_issue_50(): 201 | """Incorrect wrap code with Java generics in pre""" 202 | test = ('pre.. public class Tynopet {}\n\nfinal ' 203 | 'List> multipleList = new ArrayList<>();') 204 | result = textile.textile(test) 205 | expect = ('
public class Tynopet<T extends Framework> {}\n\n'
206 |               'final List<List<String>> multipleList = new '
207 |               'ArrayList<>();
') 208 | assert result == expect 209 | 210 | 211 | def test_github_issue_51(): 212 | """Link build with $ sign without "http" prefix broken.""" 213 | test = '"$":www.google.com.br' 214 | result = textile.textile(test) 215 | expect = '\t

www.google.com.br

' 216 | assert result == expect 217 | 218 | 219 | def test_github_issue_52(): 220 | """Table build without space after aligment raise a AttributeError.""" 221 | test = '|=.First Header |=. Second Header |' 222 | result = textile.textile(test) 223 | expect = ('\t\n\t\t\n\t\t\t\n\t\t\t' 225 | '\n\t\t\n\t
=.First Header ' 224 | 'Second Header
') 226 | assert result == expect 227 | 228 | 229 | def test_github_issue_55(): 230 | """Incorrect handling of quote entities in extended pre block""" 231 | test = ('pre.. this is the first line\n\nbut "quotes" in an extended pre ' 232 | 'block need to be handled properly.') 233 | result = textile.textile(test) 234 | expect = ('
this is the first line\n\nbut "quotes" in an '
235 |               'extended pre block need to be handled properly.
') 236 | assert result == expect 237 | 238 | # supplied input 239 | test = ('pre.. import org.slf4j.Logger;\nimport org.slf4j.LoggerFactory;' 240 | '\nimport ru.onyma.job.Context;\nimport ru.onyma.job.' 241 | 'RescheduleTask;\n\nimport java.util.concurrent.' 242 | 'ScheduledExecutorService;\nimport java.util.concurrent.TimeUnit;' 243 | '\n\n/**\n* @author ustits\n*/\npublic abstract class ' 244 | 'MainService extends RescheduleTask implements Context {\n\n' 245 | 'private static final Logger log = LoggerFactory.getLogger(' 246 | 'MainService.class);\nprivate final ScheduledExecutorService ' 247 | 'scheduler;\n\nprivate boolean isFirstRun = true;\nprivate T ' 248 | 'configs;\n\npublic MainService(final ScheduledExecutorService ' 249 | 'scheduler) {\nsuper(scheduler);\nthis.scheduler = scheduler;\n}\n' 250 | '\n@Override\npublic void setConfig(final T configs) {\nthis.' 251 | 'configs = configs;\nif (isFirstRun) {\nscheduler.schedule(this, ' 252 | '0, TimeUnit.SECONDS);\nisFirstRun = false;\n}\n}\n\n@Override\n' 253 | 'public void stop() {\nsuper.stop();\nscheduler.shutdown();\ntry {' 254 | '\nscheduler.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);\n} ' 255 | 'catch (InterruptedException ie) {\nlog.warn("Unable to wait for ' 256 | 'syncs termination", ie);\nThread.currentThread().interrupt();\n}' 257 | '\n}\n\nprotected final T getConfigs() {\nreturn configs;\n}\n}') 258 | result = textile.textile(test) 259 | expect = ('
import org.slf4j.Logger;\nimport org.slf4j.LoggerFactory;'
260 |               '\nimport ru.onyma.job.Context;\nimport ru.onyma.job.'
261 |               'RescheduleTask;\n\nimport java.util.concurrent.'
262 |               'ScheduledExecutorService;\nimport java.util.concurrent.'
263 |               'TimeUnit;\n\n/**\n* @author ustits\n*/\npublic abstract class '
264 |               'MainService<T> extends RescheduleTask implements '
265 |               'Context<T> {\n\nprivate static final Logger log = '
266 |               'LoggerFactory.getLogger(MainService.class);\nprivate final '
267 |               'ScheduledExecutorService scheduler;\n\nprivate boolean '
268 |               'isFirstRun = true;\nprivate T configs;\n\npublic MainService('
269 |               'final ScheduledExecutorService scheduler) {\nsuper(scheduler);'
270 |               '\nthis.scheduler = scheduler;\n}\n\n@Override\npublic void '
271 |               'setConfig(final T configs) {\nthis.configs = configs;\nif ('
272 |               'isFirstRun) {\nscheduler.schedule(this, 0, TimeUnit.SECONDS);'
273 |               '\nisFirstRun = false;\n}\n}\n\n@Override\npublic void stop() {'
274 |               '\nsuper.stop();\nscheduler.shutdown();\ntry {\nscheduler.'
275 |               'awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);\n} catch '
276 |               '(InterruptedException ie) {\nlog.warn("Unable to wait '
277 |               'for syncs termination", ie);\nThread.currentThread().'
278 |               'interrupt();\n}\n}\n\nprotected final T getConfigs() {\n'
279 |               'return configs;\n}\n}
') 280 | assert result == expect 281 | 282 | 283 | def test_github_issue_56(): 284 | """Empty description lists throw error""" 285 | result = textile.textile("- :=\n-") 286 | expect = '
\n
' 287 | assert result == expect 288 | 289 | 290 | def test_github_pull_61(): 291 | """Fixed code block multiline encoding on quotes/span""" 292 | test = ('''bc.. This is some TEXT inside a "Code BLOCK" 293 | 294 | { 295 | if (JSON) { 296 | 297 | return {"JSON":"value"} 298 | } 299 | } 300 | 301 | Back to 10-4 CAPS ''' 302 | ''' 303 | 304 | p.. Some multiline Paragragh 305 | 306 | Here is some output!!! "Some" CAPS''') 307 | 308 | expect = '''
This is some TEXT inside a "Code BLOCK"
309 | 
310 | {
311 |   if (JSON) {
312 | 
313 |     return {"JSON":"value"}
314 |   }
315 | }
316 | 
317 | Back to 10-4 CAPS 
318 | 319 |

Some multiline Paragragh

320 | 321 |

Here is some output!!! “Some” CAPS

''' 322 | t = textile.Textile() 323 | result = t.parse(test) 324 | assert result == expect 325 | 326 | 327 | def test_github_pull_62(): 328 | """Fix for paragraph multiline, only last paragraph is rendered 329 | correctly""" 330 | test = '''p.. First one 'is' 331 | 332 | ESCAPED "bad" 333 | 334 | p.. Second one 'is' 335 | 336 | 337 | 338 | ESCAPED "bad" 339 | 340 | p.. Third one 'is' 341 | 342 | ESCAPED "bad" 343 | 344 | p.. Last one 'is' 345 | 346 | ESCAPED "good" test''' 347 | 348 | expect = '''

First one ‘is’

349 | 350 |

ESCAPED “bad”

351 | 352 |

Second one ‘is’

353 | 354 | 355 | 356 |

ESCAPED “bad”

357 | 358 |

Third one ‘is’

359 | 360 |

ESCAPED “bad”

361 | 362 |

Last one ‘is’

363 | 364 |

ESCAPED “good” test

''' 365 | t = textile.Textile() 366 | result = t.parse(test) 367 | assert result == expect 368 | 369 | 370 | def test_github_pull_63(): 371 | """Forgot to set multiline_para to False""" 372 | test = '''p.. First one 'is' 373 | 374 | ESCAPED "bad" 375 | 376 | bc.. { 377 | First code BLOCK 378 | 379 | {"JSON":'value'} 380 | } 381 | 382 | p.. Second one 'is' 383 | 384 | 385 | 386 | ESCAPED "bad" 387 | 388 | p.. Third one 'is' 389 | 390 | ESCAPED "bad" 391 | 392 | bc.. { 393 | Last code BLOCK 394 | 395 | {"JSON":'value'} 396 | } 397 | 398 | p.. Last one 'is' 399 | 400 | ESCAPED "good" test''' 401 | 402 | expect = '''

First one ‘is’

403 | 404 |

ESCAPED “bad”

405 | 406 |
{
407 |  First code BLOCK
408 | 
409 |  {"JSON":'value'}
410 | }
411 | 412 |

Second one ‘is’

413 | 414 | 415 | 416 |

ESCAPED “bad”

417 | 418 |

Third one ‘is’

419 | 420 |

ESCAPED “bad”

421 | 422 |
{
423 |  Last code BLOCK
424 | 
425 |  {"JSON":'value'}
426 | }
427 | 428 |

Last one ‘is’

429 | 430 |

ESCAPED “good” test

''' 431 | t = textile.Textile() 432 | result = t.parse(test) 433 | assert result == expect 434 | -------------------------------------------------------------------------------- /tests/test_glyphs.py: -------------------------------------------------------------------------------- 1 | from textile import Textile 2 | 3 | 4 | def test_glyphs(): 5 | t = Textile() 6 | 7 | result = t.glyphs("apostrophe's") 8 | expect = 'apostrophe’s' 9 | assert result == expect 10 | 11 | result = t.glyphs("back in '88") 12 | expect = 'back in ’88' 13 | assert result == expect 14 | 15 | result = t.glyphs('foo ...') 16 | expect = 'foo …' 17 | assert result == expect 18 | 19 | result = t.glyphs('--') 20 | expect = '—' 21 | assert result == expect 22 | 23 | result = t.glyphs('FooBar[tm]') 24 | expect = 'FooBar™' 25 | assert result == expect 26 | 27 | result = t.glyphs("

Cat's Cradle by Vonnegut

") 28 | expect = '

Cat’s Cradle by Vonnegut

' 29 | assert result == expect 30 | -------------------------------------------------------------------------------- /tests/test_image.py: -------------------------------------------------------------------------------- 1 | from textile import Textile 2 | 3 | 4 | def test_image(): 5 | t = Textile() 6 | result = t.image('!/imgs/myphoto.jpg!:http://jsamsa.com') 7 | expect = (''.format( 8 | t.uid)) 9 | assert result == expect 10 | assert t.refCache[1] == 'http://jsamsa.com' 11 | assert t.refCache[2] == '/imgs/myphoto.jpg' 12 | 13 | result = t.image('!'.format(t.uid)) 22 | assert result == expect 23 | -------------------------------------------------------------------------------- /tests/test_imagesize.py: -------------------------------------------------------------------------------- 1 | import textile 2 | 3 | 4 | def test_imagesize(): 5 | imgurl = 'http://www.google.com/intl/en_ALL/images/srpr/logo1w.png' 6 | result = textile.utils.getimagesize(imgurl) 7 | try: 8 | import PIL # noqa: F401 9 | 10 | expect = (275, 95) 11 | assert result == expect 12 | except ImportError: 13 | expect = '' 14 | assert result == expect 15 | -------------------------------------------------------------------------------- /tests/test_lists.py: -------------------------------------------------------------------------------- 1 | from textile import Textile 2 | 3 | 4 | def test_lists(): 5 | t = Textile() 6 | result = t.textileLists("* one\n* two\n* three") 7 | expect = '\t
    \n\t\t
  • one
  • \n\t\t
  • two
  • \n\t\t
  • three
  • \n\t
' 8 | assert result == expect 9 | -------------------------------------------------------------------------------- /tests/test_retrieve.py: -------------------------------------------------------------------------------- 1 | from textile import Textile 2 | 3 | 4 | def test_retrieve(): 5 | t = Textile() 6 | id = t.shelve("foobar") 7 | assert t.retrieve(id) == 'foobar' 8 | -------------------------------------------------------------------------------- /tests/test_span.py: -------------------------------------------------------------------------------- 1 | from textile import Textile 2 | 3 | 4 | def test_span(): 5 | t = Textile() 6 | result = t.retrieveTags(t.span("hello %(bob)span *strong* and **bold**% goodbye")) 7 | expect = ('hello span strong and ' 8 | 'bold goodbye') 9 | assert result == expect 10 | 11 | result = t.retrieveTags(t.span('%:http://domain.tld test%')) 12 | expect = 'test' 13 | assert result == expect 14 | 15 | t = Textile() 16 | # cover the partial branch where we exceed the max_span_depth. 17 | t.max_span_depth = 2 18 | result = t.retrieveTags(t.span('_-*test*-_')) 19 | expect = '*test*' 20 | assert result == expect 21 | -------------------------------------------------------------------------------- /tests/test_subclassing.py: -------------------------------------------------------------------------------- 1 | import textile 2 | 3 | 4 | def test_change_glyphs(): 5 | class TextilePL(textile.Textile): 6 | glyph_definitions = dict(textile.Textile.glyph_definitions, 7 | quote_double_open='„') 8 | 9 | test = 'Test "quotes".' 10 | expect = '\t

Test „quotes”.

' 11 | result = TextilePL().parse(test) 12 | assert expect == result 13 | 14 | # Base Textile is unchanged. 15 | expect = '\t

Test “quotes”.

' 16 | result = textile.textile(test) 17 | assert expect == result 18 | -------------------------------------------------------------------------------- /tests/test_table.py: -------------------------------------------------------------------------------- 1 | from textile import Textile 2 | 3 | 4 | def test_table(): 5 | t = Textile() 6 | result = t.table('(rowclass). |one|two|three|\n|a|b|c|') 7 | expect = '\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
onetwothree
abc
\n\n' 8 | assert result == expect 9 | 10 | t = Textile(lite=True) 11 | result = t.table('(lite). |one|two|three|\n|a|b|c|\n| * test\n* test|1|2|') 12 | expect = '\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
onetwothree
abc
* test\n* test12
\n\n' 13 | assert result == expect 14 | -------------------------------------------------------------------------------- /tests/test_textile.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pytest 3 | import re 4 | import textile 5 | 6 | 7 | def test_FootnoteReference(): 8 | html = textile.textile('YACC[1]') 9 | assert re.search(r'^\t

YACC1

', html) is not None 10 | 11 | 12 | def test_Footnote(): 13 | html = textile.textile('This is covered elsewhere[1].\n\nfn1. Down here, in fact.\n\nfn2. Here is another footnote.') 14 | assert re.search(r'^\t

This is covered elsewhere1.

\n\n\t

1 Down here, in fact.

\n\n\t

2 Here is another footnote.

$', html) is not None 15 | 16 | html = textile.textile('''See[1] for details -- or perhaps[100] at a push.\n\nfn1. Here are the details.\n\nfn100(footy#otherid). A totally unrelated footnote.''') 17 | assert re.search(r'^\t

See1 for details — or perhaps100 at a push.

\n\n\t

1 Here are the details.

\n\n\t

100 A totally unrelated footnote.

$', html) is not None 18 | 19 | html = textile.textile('''See[2] for details, and later, reference it again[2].\n\nfn2^(footy#otherid)[en]. Here are the details.''') 20 | assert re.search(r'^\t

See2 for details, and later, reference it again2.

\n\n\t

2 Here are the details.

$', html) is not None 21 | 22 | html = textile.textile('''See[3!] for details.\n\nfn3. Here are the details.''') 23 | assert re.search(r'^\t

See3 for details.

\n\n\t

3 Here are the details.

$', html) is not None 24 | 25 | html = textile.textile('''See[4!] for details.\n\nfn4^. Here are the details.''') 26 | assert re.search(r'^\t

See4 for details.

\n\n\t

4 Here are the details.

$', html) is not None 27 | 28 | 29 | def test_issue_35(): 30 | result = textile.textile('"z"') 31 | expect = '\t

“z”

' 32 | assert result == expect 33 | 34 | result = textile.textile('" z"') 35 | expect = '\t

“ z”

' 36 | assert result == expect 37 | 38 | 39 | def test_restricted(): 40 | # Note that the HTML is escaped, thus rendering the " 42 | result = textile.textile_restricted(test) 43 | expect = "\t

Here is some text.
\n<script>alert(‘hello world’)</script>

" 44 | 45 | assert result == expect 46 | 47 | test = "Here's some text." 48 | result = textile.textile_restricted(test) 49 | expect = "\t

Here’s some <!— commented out —> text.

" 50 | 51 | assert result == expect 52 | 53 | test = "p[fr]. Partir, c'est toujours mourir un peu." 54 | result = textile.textile_restricted(test) 55 | expect = '\t

Partir, c’est toujours mourir un peu.

' 56 | 57 | assert result == expect 58 | 59 | test = "p{color:blue}. is this blue?" 60 | result = textile.textile_restricted(test) 61 | expect = '\t

is this blue?

' 62 | 63 | assert result == expect 64 | 65 | test = """\ 66 | table{border:1px solid black}. 67 | |={color:gray}. Your caption goes here 68 | |~. 69 | |{position:absolute}. A footer | foo | 70 | |-. 71 | |_{font-size:xxlarge}. header|_=. centered header| 72 | |~. bottom aligned|{background:red;width:200px}. asfd|""" 73 | result = textile.textile_restricted(test, lite=False) 74 | # styles from alignment hints like =. and ~. are ok 75 | expect = '''\ 76 | \t 77 | \t 78 | \t 79 | \t\t 80 | \t\t\t 81 | \t\t\t 82 | \t\t 83 | \t 84 | \t 85 | \t\t 86 | \t\t\t 87 | \t\t\t 88 | \t\t 89 | \t\t 90 | \t\t\t 91 | \t\t\t 92 | \t\t 93 | \t 94 | \t
Your caption goes here
A footer foo
headercentered header
bottom alignedasfd
''' 95 | 96 | assert result == expect 97 | 98 | 99 | def test_unicode_footnote(): 100 | html = textile.textile('текст[1]') 101 | assert re.compile(r'^\t

текст1

$', re.U).search(html) is not None 102 | 103 | 104 | def test_autolinking(): 105 | test = """some text "test":http://www.google.com http://www.google.com "$":http://www.google.com""" 106 | result = """\t

some text test http://www.google.com www.google.com

""" 107 | expect = textile.textile(test) 108 | 109 | assert result == expect 110 | 111 | 112 | def test_sanitize(): 113 | test = "a paragraph of benign text" 114 | result = "\t

a paragraph of benign text

" 115 | expect = textile.Textile().parse(test, sanitize=True) 116 | assert result == expect 117 | 118 | test = """

a paragraph of evil text

""" 119 | result = '

a paragraph of evil text

' 120 | expect = textile.Textile().parse(test, sanitize=True) 121 | assert result == expect 122 | 123 | test = """

a paragraph of benign text
and more text

""" 124 | result = '

a paragraph of benign text
\nand more text

' 125 | expect = textile.Textile(html_type='html5').parse(test, sanitize=True) 126 | assert result == expect 127 | 128 | 129 | def test_imagesize(): 130 | PIL = pytest.importorskip('PIL') # noqa: F841 131 | 132 | test = "!http://www.google.com/intl/en_ALL/images/srpr/logo1w.png!" 133 | result = '\t

' 134 | expect = textile.Textile(get_sizes=True).parse(test) 135 | assert result == expect 136 | 137 | 138 | def test_endnotes_simple(): 139 | test = """Scientists say the moon is slowly shrinking[#my_first_label].\n\nnotelist!.\n\nnote#my_first_label Over the past billion years, about a quarter of the moon's 4.5 billion-year lifespan, it has shrunk about 200 meters (700 feet) in diameter.""" 140 | html = textile.textile(test) 141 | result_pattern = r"""\t

Scientists say the moon is slowly shrinking1.

\n\n\t
    \n\t\t
  1. Over the past billion years, about a quarter of the moon’s 4.5 billion-year lifespan, it has shrunk about 200 meters \(700 feet\) in diameter.
  2. \n\t
$""" 142 | result_re = re.compile(result_pattern) 143 | assert result_re.search(html) is not None 144 | 145 | 146 | def test_endnotes_complex(): 147 | test = """Tim Berners-Lee is one of the pioneer voices in favour of Net Neutrality[#netneutral] and has expressed the view that ISPs should supply "connectivity with no strings attached"[#netneutral!] [#tbl_quote]\n\nBerners-Lee admitted that the forward slashes ("//") in a web address were actually unnecessary. He told the newspaper that he could easily have designed URLs not to have the forward slashes. "... it seemed like a good idea at the time,"[#slashes]\n\nnote#netneutral. "Web creator rejects net tracking":http://news.bbc.co.uk/2/hi/technology/7613201.stm. BBC. 15 September 2008\n\nnote#tbl_quote. "Web inventor's warning on spy software":http://www.telegraph.co.uk/news/uknews/1581938/Web-inventor%27s-warning-on-spy-software.html. The Daily Telegraph (London). 25 May 2008\n\nnote#slashes. "Berners-Lee 'sorry' for slashes":http://news.bbc.co.uk/1/hi/technology/8306631.stm. BBC. 14 October 2009\n\nnotelist.""" 148 | html = textile.textile(test) 149 | result_pattern = r"""\t

Tim Berners-Lee is one of the pioneer voices in favour of Net Neutrality1 and has expressed the view that ISPs should supply “connectivity with no strings attached”1 2

\n\n\t

Berners-Lee admitted that the forward slashes \(“//”\) in a web address were actually unnecessary. He told the newspaper that he could easily have designed URLs not to have the forward slashes. “… it seemed like a good idea at the time,”3

\n\n\t
    \n\t\t
  1. a b Web creator rejects net tracking. BBC. 15 September 2008
  2. \n\t\t
  3. a Web inventor’s warning on spy software. The Daily Telegraph \(London\). 25 May 2008
  4. \n\t\t
  5. a Berners-Lee ‘sorry’ for slashes. BBC. 14 October 2009
  6. \n\t
$""" 150 | result_re = re.compile(result_pattern) 151 | assert result_re.search(html) is not None 152 | 153 | 154 | def test_endnotes_unreferenced_note(): 155 | test = """Scientists say[#lavader] the moon is quite small. But I, for one, don't believe them. Others claim it to be made of cheese[#aardman]. If this proves true I suspect we are in for troubled times[#apollo13] as people argue over their "share" of the moon's cheese. In the end, its limited size[#lavader] may prove problematic.\n\nnote#lavader(noteclass). "Proof of the small moon hypothesis":http://antwrp.gsfc.nasa.gov/apod/ap080801.html. Copyright(c) Laurent Laveder\n\nnote#aardman(#noteid). "Proof of a cheese moon":http://www.imdb.com/title/tt0104361\n\nnote#apollo13. After all, things do go "wrong":http://en.wikipedia.org/wiki/Apollo_13#The_oxygen_tank_incident.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}:§^.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}:‡""" 156 | html = textile.textile(test) 157 | result_pattern = r"""\t

Scientists say1 the moon is quite small. But I, for one, don’t believe them. Others claim it to be made of cheese2. If this proves true I suspect we are in for troubled times3 as people argue over their “share” of the moon’s cheese. In the end, its limited size1 may prove problematic.

\n\n\t
    \n\t\t
  1. a b Proof of the small moon hypothesis. Copyright© Laurent Laveder
  2. \n\t\t
  3. a Proof of a cheese moon
  4. \n\t\t
  5. a After all, things do go wrong.
  6. \n\t
\n\n\t
    \n\t\t
  1. § Proof of the small moon hypothesis. Copyright© Laurent Laveder
  2. \n\t\t
  3. § Proof of a cheese moon
  4. \n\t\t
  5. § After all, things do go wrong.
  6. \n\t
\n\n\t
    \n\t\t
  1. Proof of the small moon hypothesis. Copyright© Laurent Laveder
  2. \n\t\t
  3. Proof of a cheese moon
  4. \n\t\t
  5. After all, things do go wrong.
  6. \n\t
""" 158 | result_re = re.compile(result_pattern, re.U) 159 | assert result_re.search(html) is not None 160 | 161 | 162 | def test_endnotes_malformed(): 163 | test = """Scientists say[#lavader] the moon is quite small. But I, for one, don't believe them. Others claim it to be made of cheese[#aardman]. If this proves true I suspect we are in for troubled times[#apollo13!] as people argue over their "share" of the moon's cheese. In the end, its limited size[#lavader] may prove problematic.\n\nnote#unused An unreferenced note.\n\nnote#lavader^ "Proof of the small moon hypothesis":http://antwrp.gsfc.nasa.gov/apod/ap080801.html. Copyright(c) Laurent Laveder\n\nnote#aardman^ "Proof of a cheese moon":http://www.imdb.com/title/tt0104361\n\nnote#apollo13^ After all, things do go "wrong":http://en.wikipedia.org/wiki/Apollo_13#The_oxygen_tank_incident.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}:α!+""" 164 | html = textile.textile(test) 165 | result_pattern = r"""^\t

Scientists say1 the moon is quite small. But I, for one, don’t believe them. Others claim it to be made of cheese2. If this proves true I suspect we are in for troubled times3 as people argue over their “share” of the moon’s cheese. In the end, its limited size1 may prove problematic.

\n\n\t
    \n\t\t
  1. α Proof of the small moon hypothesis. Copyright© Laurent Laveder
  2. \n\t\t
  3. α Proof of a cheese moon
  4. \n\t\t
  5. α After all, things do go wrong.
  6. \n\t\t
  7. An unreferenced note.
  8. \n\t
$""" 166 | result_re = re.compile(result_pattern, re.U) 167 | assert result_re.search(html) is not None 168 | 169 | 170 | def test_endnotes_undefined_note(): 171 | test = """Scientists say the moon is slowly shrinking[#my_first_label].\n\nnotelist!.""" 172 | html = textile.textile(test) 173 | result_pattern = r"""\t

Scientists say the moon is slowly shrinking1.

\n\n\t
    \n\t\t
  1. Undefined Note \[#my_first_label\].
  2. \n\t
$""" 174 | result_re = re.compile(result_pattern) 175 | assert result_re.search(html) is not None 176 | 177 | 178 | def test_encode_url(): 179 | # I tried adding these as doctests, but the unicode tests weren't 180 | # returning the correct results. 181 | t = textile.Textile() 182 | 183 | url = 'http://www.example.local' 184 | result = 'http://www.example.local' 185 | eurl = t.encode_url(url) 186 | assert eurl == result 187 | 188 | url = 'http://user@www.example.local' 189 | result = 'http://user@www.example.local' 190 | eurl = t.encode_url(url) 191 | assert eurl == result 192 | 193 | url = 'http://user:password@www.example.local' 194 | result = 'http://user:password@www.example.local' 195 | eurl = t.encode_url(url) 196 | assert eurl == result 197 | 198 | url = 'http://user:password@www.example.local/Ubermensch' 199 | result = 'http://user:password@www.example.local/Ubermensch' 200 | eurl = t.encode_url(url) 201 | assert eurl == result 202 | 203 | url = "http://user:password@www.example.local/Übermensch" 204 | result = "http://user:password@www.example.local/%C3%9Cbermensch" 205 | eurl = t.encode_url(url) 206 | assert eurl == result 207 | 208 | url = 'http://user:password@www.example.local:8080/Übermensch' 209 | result = 'http://user:password@www.example.local:8080/%C3%9Cbermensch' 210 | eurl = t.encode_url(url) 211 | assert eurl == result 212 | 213 | 214 | def test_footnote_crosslink(): 215 | html = textile.textile('''See[2] for details, and later, reference it again[2].\n\nfn2^(footy#otherid)[en]. Here are the details.''') 216 | searchstring = r'\t

See2 for details, and later, reference it again2.

\n\n\t

2 Here are the details.

$' 217 | assert re.compile(searchstring).search(html) is not None 218 | 219 | 220 | def test_footnote_without_reflink(): 221 | html = textile.textile('''See[3!] for details.\n\nfn3. Here are the details.''') 222 | searchstring = r'^\t

See3 for details.

\n\n\t

3 Here are the details.

$' 223 | assert re.compile(searchstring).search(html) is not None 224 | 225 | 226 | def testSquareBrackets(): 227 | html = textile.textile("""1[^st^], 2[^nd^], 3[^rd^]. 2 log[~n~]\n\nA close[!http://textpattern.com/favicon.ico!]image.\nA tight["text":http://textpattern.com/]link.\nA ["footnoted link":http://textpattern.com/][182].""") 228 | searchstring = r'^\t

1st, 2nd, 3rd. 2 logn

\n\n\t

A closeimage.
\nA tighttextlink.
\nA footnoted link182.

' 229 | assert re.compile(searchstring).search(html) is not None 230 | 231 | 232 | def test_html5(): 233 | """docstring for testHTML5""" 234 | 235 | test = 'We use CSS(Cascading Style Sheets).' 236 | result = '\t

We use CSS.

' 237 | expect = textile.textile(test, html_type="html5") 238 | assert result == expect 239 | 240 | 241 | def test_relURL(): 242 | t = textile.Textile() 243 | t.restricted = True 244 | assert t.relURL("gopher://gopher.com/") == '#' 245 | -------------------------------------------------------------------------------- /tests/test_textilefactory.py: -------------------------------------------------------------------------------- 1 | from textile import textilefactory 2 | import pytest 3 | 4 | 5 | def test_TextileFactory(): 6 | f = textilefactory.TextileFactory() 7 | result = f.process("some text here") 8 | expect = '\t

some text here

' 9 | assert result == expect 10 | 11 | f = textilefactory.TextileFactory(restricted=True) 12 | result = f.process("more text here") 13 | expect = '\t

more text here

' 14 | assert result == expect 15 | 16 | f = textilefactory.TextileFactory(noimage=True) 17 | result = f.process("this covers a partial branch.") 18 | expect = '\t

this covers a partial branch.

' 19 | assert result == expect 20 | 21 | # Certain parameter values are not permitted because they are illogical: 22 | 23 | with pytest.raises(ValueError) as ve: 24 | f = textilefactory.TextileFactory(lite=True) 25 | assert 'lite can only be enabled in restricted mode' in str(ve.value) 26 | 27 | with pytest.raises(ValueError) as ve: 28 | f = textilefactory.TextileFactory(html_type='invalid') 29 | assert "html_type must be 'xhtml' or 'html5'" in str(ve.value) 30 | -------------------------------------------------------------------------------- /tests/test_urls.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from textile import Textile 3 | 4 | 5 | def test_urls(): 6 | t = Textile() 7 | assert t.relURL("http://www.google.com/") == 'http://www.google.com/' 8 | 9 | result = t.links('fooobar "Google":http://google.com/foobar/ and hello world "flickr":http://flickr.com/photos/jsamsa/ ') 10 | expect = 'fooobar {0}2:shelve and hello world {0}4:shelve '.format(t.uid) 11 | assert result == expect 12 | 13 | result = t.links('""Open the door, HAL!"":https://xkcd.com/375/') 14 | expect = '{0}6:shelve'.format(t.uid) 15 | assert result == expect 16 | 17 | result = t.links('"$":http://domain.tld/test_[brackets]') 18 | expect = '{0}8:shelve'.format(t.uid) 19 | assert result == expect 20 | 21 | result = t.links('"$":http://domain.tld/test_') 22 | expect = '{0}10:shelve'.format(t.uid) 23 | assert result == expect 24 | 25 | expect = '"":test' 26 | result = t.links(expect) 27 | assert result == expect 28 | 29 | expect = '"$":htt://domain.tld' 30 | result = t.links(expect) 31 | assert result == expect 32 | 33 | result = t.shelveURL('') 34 | expect = '' 35 | assert result == expect 36 | 37 | result = t.retrieveURLs('{0}2:url'.format(t.uid)) 38 | expect = '' 39 | assert result == expect 40 | 41 | result = t.encode_url('http://domain.tld/übermensch') 42 | expect = 'http://domain.tld/%C3%BCbermensch' 43 | assert result == expect 44 | 45 | result = t.parse('A link that starts with an h is "handled":/test/ incorrectly.') 46 | expect = '\t

A link that starts with an h is handled incorrectly.

' 47 | assert result == expect 48 | 49 | result = t.parse('A link that starts with a space" raises":/test/ an exception.') 50 | expect = '\t

A link that starts with a space” raises an exception.

' 51 | assert result == expect 52 | 53 | result = t.parse('A link that "contains a\nnewline":/test/ raises an exception.') 54 | expect = '\t

A link that contains a\nnewline raises an exception.

' 55 | assert result == expect 56 | 57 | 58 | def test_rel_attribute(): 59 | t = Textile(rel='nofollow') 60 | result = t.parse('"$":http://domain.tld') 61 | expect = '\t

domain.tld

' 62 | assert result == expect 63 | 64 | 65 | def test_quotes_in_link_text(): 66 | """quotes in link text are tricky.""" 67 | test = '""this is a quote in link text"":url' 68 | t = Textile() 69 | result = t.parse(test) 70 | expect = '\t

“this is a quote in link text”

' 71 | assert result == expect 72 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from textile import utils 3 | 4 | 5 | def test_encode_html(): 6 | result = utils.encode_html('''this is a "test" of text that's safe to ''' 7 | 'put in an attribute.') 8 | expect = ('this is a "test" of text that's safe to put in ' 9 | 'an <html> attribute.') 10 | assert result == expect 11 | 12 | 13 | def test_has_raw_text(): 14 | assert utils.has_raw_text('

foo bar biz baz

') is False 15 | assert utils.has_raw_text(' why yes, yes it does') is True 16 | 17 | 18 | def test_is_rel_url(): 19 | assert utils.is_rel_url("http://www.google.com/") is False 20 | assert utils.is_rel_url("/foo") is True 21 | 22 | 23 | def test_generate_tag(): 24 | result = utils.generate_tag('span', 'inner text', {'class': 'test'}) 25 | expect = 'inner text' 26 | assert result == expect 27 | 28 | text = 'Übermensch' 29 | attributes = {'href': 'http://de.wikipedia.org/wiki/%C3%C9bermensch'} 30 | expect = 'Übermensch' 31 | result = utils.generate_tag('a', text, attributes) 32 | assert result == expect 33 | 34 | 35 | def test_human_readable_url_edge_case(): 36 | assert utils.human_readable_url('google.com') == 'google.com' 37 | assert utils.human_readable_url('tel:1-800-555-1212') == '1-800-555-1212' 38 | -------------------------------------------------------------------------------- /tests/test_values.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import textile 3 | import pytest 4 | 5 | xhtml_known_values = ( 6 | ('hello, world', '\t

hello, world

'), 7 | 8 | ('A single paragraph.\n\nFollowed by another.', 9 | '\t

A single paragraph.

\n\n\t

Followed by another.

'), 10 | 11 | ('I am very serious.\n\n
\nI am very serious.\n
', 12 | '\t

I am very serious.

\n\n
\nI am <b>very</b> serious.\n
'), 13 | 14 | ('I spoke.\nAnd none replied.', '\t

I spoke.
\nAnd none replied.

'), 15 | 16 | ('"Observe!"', '\t

“Observe!”

'), 17 | 18 | ('Observe -- very nice!', '\t

Observe — very nice!

'), 19 | 20 | ('Observe - tiny and brief.', '\t

Observe – tiny and brief.

'), 21 | 22 | ('Observe...', '\t

Observe…

'), 23 | 24 | ('Observe ...', '\t

Observe …

'), 25 | 26 | ('Observe: 2 x 2.', '\t

Observe: 2 × 2.

'), 27 | 28 | ('one(TM), two(R), three(C).', '\t

one™, two®, three©.

'), 29 | 30 | ('h1. Header 1', '\t

Header 1

'), 31 | 32 | ('h2. Header 2', '\t

Header 2

'), 33 | 34 | ('h3. Header 3', '\t

Header 3

'), 35 | 36 | ('An old text\n\nbq. A block quotation.\n\nAny old text''', 37 | '\t

An old text

\n\n\t
\n\t\t

A block quotation.

\n\t
\n\n\t

Any old text

'), 38 | 39 | ('I _believe_ every word.', '\t

I believe every word.

'), 40 | 41 | ('And then? She *fell*!', '\t

And then? She fell!

'), 42 | 43 | ('I __know__.\nI **really** __know__.', '\t

I know.
\nI really know.

'), 44 | 45 | ("??Cat's Cradle?? by Vonnegut", '\t

Cat’s Cradle by Vonnegut

'), 46 | 47 | ('Convert with @str(foo)@', '\t

Convert with str(foo)

'), 48 | 49 | ('I\'m -sure- not sure.', '\t

I’m sure not sure.

'), 50 | 51 | ('You are a +pleasant+ child.', '\t

You are a pleasant child.

'), 52 | 53 | ('a ^2^ + b ^2^ = c ^2^', '\t

a 2 + b 2 = c 2

'), 54 | 55 | ('log ~2~ x', '\t

log 2 x

'), 56 | 57 | ('I\'m %unaware% of most soft drinks.', '\t

I’m unaware of most soft drinks.

'), 58 | 59 | ("I'm %{color:red}unaware%\nof most soft drinks.", '\t

I’m unaware
\nof most soft drinks.

'), 60 | 61 | ('p(example1). An example', '\t

An example

'), 62 | 63 | ('p(#big-red). Red here', '\t

Red here

'), 64 | 65 | ('p(example1#big-red2). Red here', '\t

Red here

'), 66 | 67 | ('p{color:blue;margin:30px}. Spacey blue', '\t

Spacey blue

'), 68 | 69 | ('p[fr]. rouge', '\t

rouge

'), 70 | 71 | ('I seriously *{color:red}blushed*\nwhen I _(big)sprouted_ that\ncorn stalk from my\n%[es]cabeza%.', 72 | '\t

I seriously blushed
\nwhen I sprouted' 73 | ' that
\ncorn stalk from my
\ncabeza.

'), 74 | 75 | ('p<. align left', '\t

align left

'), 76 | 77 | ('p>. align right', '\t

align right

'), 78 | 79 | ('p=. centered', '\t

centered

'), 80 | 81 | ('p<>. justified', '\t

justified

'), 82 | 83 | ('p(. left ident 1em', '\t

left ident 1em

'), 84 | 85 | ('p((. left ident 2em', '\t

left ident 2em

'), 86 | 87 | ('p))). right ident 3em', '\t

right ident 3em

'), 88 | 89 | ('h2()>. Bingo.', '\t

Bingo.

'), 90 | 91 | ('h3()>[no]{color:red}. Bingo', '\t

Bingo

'), 92 | 93 | ('
\n\na.gsub!( /\n
', 94 | '
\n\na.gsub!( /</, "" )\n\n
'), 95 | 96 | ('
\n\nh3. Sidebar\n\n"Hobix":http://hobix.com/\n"Ruby":http://ruby-lang.org/\n\n
\n\n' 97 | 'The main text of the\npage goes here and will\nstay to the left of the\nsidebar.', 98 | '\t

\n\n\t

Sidebar

\n\n\t

Hobix
\n' 99 | 'Ruby

\n\n\t

\n\n\t

The main text of the
\n' 100 | 'page goes here and will
\nstay to the left of the
\nsidebar.

'), 101 | 102 | ('# A first item\n# A second item\n# A third', 103 | '\t
    \n\t\t
  1. A first item
  2. \n\t\t
  3. A second item
  4. \n\t\t
  5. A third
  6. \n\t
'), 104 | 105 | ('# Fuel could be:\n## Coal\n## Gasoline\n## Electricity\n# Humans need only:\n## Water\n## Protein', 106 | '\t
    \n\t\t
  1. Fuel could be:\n\t\t
      \n\t\t\t
    1. Coal
    2. \n\t\t\t
    3. Gasoline
    4. \n\t\t\t
    5. Electricity
    6. \n\t\t
  2. \n\t\t
  3. Humans need only:\n\t\t
      \n\t\t\t
    1. Water
    2. \n\t\t\t
    3. Protein
    4. \n\t\t
  4. \n\t\t
'), 107 | 108 | ('* A first item\n* A second item\n* A third', 109 | '\t
    \n\t\t
  • A first item
  • \n\t\t
  • A second item
  • \n\t\t
  • A third
  • \n\t
'), 110 | 111 | ('* Fuel could be:\n** Coal\n** Gasoline\n** Electricity\n* Humans need only:\n** Water\n** Protein', 112 | '\t
    \n\t\t
  • Fuel could be:\n\t\t
      \n\t\t\t
    • Coal
    • \n\t\t\t
    • Gasoline
    • \n\t\t\t
    • Electricity
    • \n\t\t
  • \n\t\t
  • Humans need only:\n\t\t
      \n\t\t\t
    • Water
    • \n\t\t\t
    • Protein
    • \n\t\t
  • \n\t\t
'), 113 | 114 | ('I searched "Google":http://google.com.', '\t

I searched Google.

'), 115 | 116 | ('I searched "a search engine (Google)":http://google.com.', '\t

I searched a search engine.

'), 117 | 118 | ('I am crazy about "Hobix":hobix\nand "it\'s":hobix "all":hobix I ever\n"link to":hobix!\n\n[hobix]http://hobix.com', 119 | '\t

I am crazy about Hobix
\nand it’s ' 120 | 'all I ever
\nlink to!

'), 121 | 122 | ('!http://hobix.com/sample.jpg!', '\t

'), 123 | 124 | ('!openwindow1.gif(Bunny.)!', '\t

Bunny.

'), 125 | 126 | ('!openwindow1.gif!:http://hobix.com/', '\t

'), 127 | 128 | ('!>obake.gif!\n\nAnd others sat all round the small\nmachine and paid it to sing to them.', 129 | '\t

\n\n\t' 130 | '

And others sat all round the small
\nmachine and paid it to sing to them.

'), 131 | 132 | ('We use CSS(Cascading Style Sheets).', '\t

We use CSS.

'), 133 | 134 | ('|one|two|three|\n|a|b|c|', 135 | '\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t' 136 | '\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
onetwothree
abc
'), 137 | 138 | ('| name | age | sex |\n| joan | 24 | f |\n| archie | 29 | m |\n| bella | 45 | f |', 139 | '\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t' 140 | '\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t' 141 | '\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t' 142 | '\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
name age sex
joan 24 f
archie 29 m
bella 45 f
'), 143 | 144 | ('|_. name |_. age |_. sex |\n| joan | 24 | f |\n| archie | 29 | m |\n| bella | 45 | f |', 145 | '\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t' 146 | '\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t' 147 | '\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t' 148 | '\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
name age sex
joan 24 f
archie 29 m
bella 45 f
'), 149 | 150 | ('', '\t

'), 151 | 152 | ('pre.. Hello\n\nHello Again\n\np. normal text', '
Hello\n\nHello Again
\n\n\t

normal text

'), 153 | 154 | ('
this is in a pre tag
', '
this is in a pre tag
'), 155 | 156 | ('"test1":http://foo.com/bar--baz\n\n"test2":http://foo.com/bar---baz\n\n"test3":http://foo.com/bar-17-18-baz', 157 | '\t

test1

\n\n\t' 158 | '

test2

\n\n\t' 159 | '

test3

'), 160 | 161 | ('"foo ==(bar)==":#foobar', '\t

foo (bar)

'), 162 | 163 | ('!http://render.mathim.com/A%5EtAx%20%3D%20A%5Et%28Ax%29.!', 164 | '\t

'), 165 | 166 | ('* Point one\n* Point two\n## Step 1\n## Step 2\n## Step 3\n* Point three\n** Sub point 1\n** Sub point 2', 167 | '\t
    \n\t\t
  • Point one
  • \n\t\t
  • Point two\n\t\t
      \n\t\t\t
    1. Step 1
    2. \n\t\t\t
    3. Step 2
    4. \n\t\t\t
    5. Step 3
    6. \n\t\t
  • \n\t\t
  • Point three\n\t\t
      \n\t\t\t
    • Sub point 1
    • \n\t\t\t
    • Sub point 2
    • \n\t\t
  • \n\t\t
'), 168 | 169 | ('@array[4] = 8@', '\t

array[4] = 8

'), 170 | 171 | ('#{color:blue} one\n# two\n# three', 172 | '\t
    \n\t\t
  1. one
  2. \n\t\t
  3. two
  4. \n\t\t
  5. three
  6. \n\t
'), 173 | 174 | ('Links (like "this":http://foo.com), are now mangled in 2.1.0, whereas 2.0 parsed them correctly.', 175 | '\t

Links (like this), are now mangled in 2.1.0, whereas 2.0 parsed them correctly.

'), 176 | 177 | ('@monospaced text@, followed by text', 178 | '\t

monospaced text, followed by text

'), 179 | 180 | ('h2. A header\n\n\n\n\n\nsome text', '\t

A header

\n\n\n\n\n\n\t

some text

'), 181 | 182 | ('pre.. foo bar baz\nquux', '
foo bar baz\nquux
'), 183 | 184 | ('line of text\n\n leading spaces', 185 | '\t

line of text

\n\n leading spaces'), 186 | 187 | ('"some text":http://www.example.com/?q=foo%20bar and more text', 188 | '\t

some text and more text

'), 189 | 190 | ('(??some text??)', '\t

(some text)

'), 191 | 192 | ('(*bold text*)', '\t

(bold text)

'), 193 | 194 | ('H[~2~]O', '\t

H2O

'), 195 | 196 | ("p=. Où est l'école, l'église s'il vous plaît?", 197 | """\t

Où est l’école, l’église s’il vous plaît?

"""), 198 | 199 | ("p=. *_The_* _*Prisoner*_", 200 | """\t

The Prisoner

"""), 201 | 202 | ("""p=. "An emphasised _word._" & "*A spanned phrase.*" """, 203 | """\t

“An emphasised word.” & “A spanned phrase.

"""), 204 | 205 | ("""p=. "*Here*'s a word!" """, 206 | """\t

Here’s a word!”

"""), 207 | 208 | ("""p=. "Please visit our "Textile Test Page":http://textile.sitemonks.com" """, 209 | """\t

“Please visit our Textile Test Page

"""), 210 | ("""| Foreign EXPÓŅÉNTIAL |""", 211 | """\t\n\t\t\n\t\t\t\n\t\t\n\t
Foreign EXPÓŅÉNTIAL
"""), 212 | ("""Piękne ŹDŹBŁO""", 213 | """\t

Piękne ŹDŹBŁO

"""), 214 | 215 | ("""p=. Tell me, what is AJAX(Asynchronous Javascript and XML), please?""", 216 | """\t

Tell me, what is AJAX, please?

"""), 217 | ('p{font-size:0.8em}. *TxStyle* is a documentation project of Textile 2.4 for "Textpattern CMS":http://texpattern.com.', 218 | '\t

TxStyle is a documentation project of Textile 2.4 for Textpattern CMS.

'), 219 | (""""Übermensch":http://de.wikipedia.org/wiki/Übermensch""", """\t

Übermensch

"""), 220 | ("""Here is some text with a block.\n\n\n\n\n\nbc. """, 221 | """\t

Here is some text with a block.

\n\n\n\n\n\n
<!-- Here is a comment block in a code block. -->
"""), 222 | (""""Textile(c)" is a registered(r) 'trademark' of Textpattern(tm) -- or TXP(That's textpattern!) -- at least it was - back in '88 when 2x4 was (+/-)5(o)C ... QED!\n\np{font-size: 200%;}. 2(1/4) 3(1/2) 4(3/4)""", 223 | """\t

“Textile©” is a registered® ‘trademark’ of Textpattern™ — or TXP — at least it was – back in ’88 when 2×4 was ±5°C … QED!

\n\n\t

2¼ 3½ 4¾

"""), 224 | ("""|=. Testing colgroup and col syntax\n|:\\5. 80\n|a|b|c|d|e|\n\n|=. Testing colgroup and col syntax|\n|:\\5. 80|\n|a|b|c|d|e|""", """\t\n\t\n\t\n\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
Testing colgroup and col syntax
abcde
\n\n\t\n\t\n\t\n\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
Testing colgroup and col syntax
abcde
"""), 225 | ("""table(#dvds){border-collapse:collapse}. Great films on DVD employing Textile summary, caption, thead, tfoot, two tbody elements and colgroups\n|={font-size:140%;margin-bottom:15px}. DVDs with two Textiled tbody elements\n|:\\3. 100 |{background:#ddd}|250||50|300|\n|^(header).\n|_. Title |_. Starring |_. Director |_. Writer |_. Notes |\n|~(footer).\n|\\5=. This is the tfoot, centred |\n|-(toplist){background:#c5f7f6}.\n| _The Usual Suspects_ | Benicio Del Toro, Gabriel Byrne, Stephen Baldwin, Kevin Spacey | Bryan Singer | Chris McQaurrie | One of the finest films ever made |\n| _Se7en_ | Morgan Freeman, Brad Pitt, Kevin Spacey | David Fincher | Andrew Kevin Walker | Great psychological thriller |\n| _Primer_ | David Sullivan, Shane Carruth | Shane Carruth | Shane Carruth | Amazing insight into trust and human psychology
rather than science fiction. Terrific! |\n| _District 9_ | Sharlto Copley, Jason Cope | Neill Blomkamp | Neill Blomkamp, Terri Tatchell | Social commentary layered on thick,\nbut boy is it done well |\n|-(medlist){background:#e7e895;}.\n| _Arlington Road_ | Tim Robbins, Jeff Bridges | Mark Pellington | Ehren Kruger | Awesome study in neighbourly relations |\n| _Phone Booth_ | Colin Farrell, Kiefer Sutherland, Forest Whitaker | Joel Schumacher | Larry Cohen | Edge-of-the-seat stuff in this\nshort but brilliantly executed thriller |""", 226 | """\t\n\t\n\t\n\t\n\t\n\t\n\t\n\t\n\t\n\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\n\t
DVDs with two Textiled tbody elements
Title Starring Director Writer Notes
This is the tfoot, centred
The Usual Suspects Benicio Del Toro, Gabriel Byrne, Stephen Baldwin, Kevin Spacey Bryan Singer Chris McQaurrie One of the finest films ever made
Se7en Morgan Freeman, Brad Pitt, Kevin Spacey David Fincher Andrew Kevin Walker Great psychological thriller
Primer David Sullivan, Shane Carruth Shane Carruth Shane Carruth Amazing insight into trust and human psychology
\nrather than science fiction. Terrific!
District 9 Sharlto Copley, Jason Cope Neill Blomkamp Neill Blomkamp, Terri Tatchell Social commentary layered on thick,
\nbut boy is it done well
Arlington Road Tim Robbins, Jeff Bridges Mark Pellington Ehren Kruger Awesome study in neighbourly relations
Phone Booth Colin Farrell, Kiefer Sutherland, Forest Whitaker Joel Schumacher Larry Cohen Edge-of-the-seat stuff in this
\nshort but brilliantly executed thriller
"""), 227 | ("""-(hot) *coffee* := Hot _and_ black\n-(hot#tea) tea := Also hot, but a little less black\n-(cold) milk := Nourishing beverage for baby cows.\nCold drink that goes great with cookies. =:\n\n-(hot) coffee := Hot and black\n-(hot#tea) tea := Also hot, but a little less black\n-(cold) milk :=\nNourishing beverage for baby cows.\nCold drink that goes great with cookies. =:""", 228 | """
\n\t
coffee
\n\t
Hot and black
\n\t
tea
\n\t
Also hot, but a little less black
\n\t
milk
\n\t
Nourishing beverage for baby cows.
\nCold drink that goes great with cookies.
\n
\n\n
\n\t
coffee
\n\t
Hot and black
\n\t
tea
\n\t
Also hot, but a little less black
\n\t
milk
\n\t

Nourishing beverage for baby cows.
\nCold drink that goes great with cookies.

\n
"""), 229 | (""";(class#id) Term 1\n: Def 1\n: Def 2\n: Def 3""", 230 | """\t
\n\t\t
Term 1
\n\t\t
Def 1
\n\t\t
Def 2
\n\t\t
Def 3
\n\t
"""), 231 | ("""*Here is a comment*\n\nHere is *(class)a comment*\n\n*(class)Here is a class* that is a little extended and is\n*followed* by a strong word!\n\nbc. ; Content-type: text/javascript\n; Cache-Control: no-store, no-cache, must-revalidate, pre-check=0, post-check=0, max-age=0\n; Expires: Sat, 24 Jul 2003 05:00:00 GMT\n; Last-Modified: Wed, 1 Jan 2025 05:00:00 GMT\n; Pragma: no-cache\n\n*123 test*\n\n*test 123*\n\n**123 test**\n\n**test 123**""", 232 | """\t

Here is a comment

\n\n\t

Here is a comment

\n\n\t

Here is a class that is a little extended and is
\nfollowed by a strong word!

\n\n
; Content-type: text/javascript\n; Cache-Control: no-store, no-cache, must-revalidate, pre-check=0, post-check=0, max-age=0\n; Expires: Sat, 24 Jul 2003 05:00:00 GMT\n; Last-Modified: Wed, 1 Jan 2025 05:00:00 GMT\n; Pragma: no-cache
\n\n\t

123 test

\n\n\t

test 123

\n\n\t

123 test

\n\n\t

test 123

"""), 233 | ("""#_(first#list) one\n# two\n# three\n\ntest\n\n#(ordered#list2).\n# one\n# two\n# three\n\ntest\n\n#_(class_4).\n# four\n# five\n# six\n\ntest\n\n#_ seven\n# eight\n# nine\n\ntest\n\n# one\n# two\n# three\n\ntest\n\n#22 22\n# 23\n# 24""", 234 | """\t
    \n\t\t
  1. one
  2. \n\t\t
  3. two
  4. \n\t\t
  5. three
  6. \n\t
\n\n\t

test

\n\n\t
    \n\t\t
  1. one
  2. \n\t\t
  3. two
  4. \n\t\t
  5. three
  6. \n\t
\n\n\t

test

\n\n\t
    \n\t\t
  1. four
  2. \n\t\t
  3. five
  4. \n\t\t
  5. six
  6. \n\t
\n\n\t

test

\n\n\t
    \n\t\t
  1. seven
  2. \n\t\t
  3. eight
  4. \n\t\t
  5. nine
  6. \n\t
\n\n\t

test

\n\n\t
    \n\t\t
  1. one
  2. \n\t\t
  3. two
  4. \n\t\t
  5. three
  6. \n\t
\n\n\t

test

\n\n\t
    \n\t\t
  1. 22
  2. \n\t\t
  3. 23
  4. \n\t\t
  5. 24
  6. \n\t
"""), 235 | ("""# one\n##3 one.three\n## one.four\n## one.five\n# two\n\ntest\n\n#_(continuation#section2).\n# three\n# four\n##_ four.six\n## four.seven\n# five\n\ntest\n\n#21 twenty-one\n# twenty-two""", 236 | """\t
    \n\t\t
  1. one\n\t\t
      \n\t\t\t
    1. one.three
    2. \n\t\t\t
    3. one.four
    4. \n\t\t\t
    5. one.five
    6. \n\t\t
  2. \n\t\t
  3. two
  4. \n\t
\n\n\t

test

\n\n\t
    \n\t\t
  1. three
  2. \n\t\t
  3. four\n\t\t
      \n\t\t\t
    1. four.six
    2. \n\t\t\t
    3. four.seven
    4. \n\t\t
  4. \n\t\t
  5. five
  6. \n\t
\n\n\t

test

\n\n\t
    \n\t\t
  1. twenty-one
  2. \n\t\t
  3. twenty-two
  4. \n\t
"""), 237 | ("""|* Foo[^2^]\n* _bar_\n* ~baz~ |\n|#4 *Four*\n# __Five__ |\n|-(hot) coffee := Hot and black\n-(hot#tea) tea := Also hot, but a little less black\n-(cold) milk :=\nNourishing beverage for baby cows.\nCold drink that goes great with cookies. =:\n|""", 238 | """\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\n\t
\t
    \n\t\t
  • Foo2
  • \n\t\t
  • bar
  • \n\t\t
  • baz
  • \n\t
\t
    \n\t\t
  1. Four
  2. \n\t\t
  3. Five
  4. \n\t
\n\t
coffee
\n\t
Hot and black
\n\t
tea
\n\t
Also hot, but a little less black
\n\t
milk
\n\t

Nourishing beverage for baby cows.
\nCold drink that goes great with cookies.

\n
"""), 239 | ("""h4. A more complicated table\n\ntable(tableclass#tableid){color:blue}.\n|_. table |_. more |_. badass |\n|\\3. Horizontal span of 3|\n(firstrow). |first|HAL(open the pod bay doors)|1|\n|some|{color:green}. styled|content|\n|/2. spans 2 rows|this is|quite a|\n| deep test | don't you think?|\n(lastrow). |fifth|I'm a lumberjack|5|\n|sixth| _*bold italics*_ |6|""", 240 | """\t

A more complicated table

\n\n\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
table more badass
Horizontal span of 3
firstHAL1
somestyledcontent
spans 2 rowsthis isquite a
deep test don’t you think?
fifthI’m a lumberjack5
sixth bold italics 6
"""), 241 | ("""| *strong* |\n\n| _em_ |\n\n| Inter-word -dashes- | ZIP-codes are 5- or 9-digit codes |""", 242 | """\t\n\t\t\n\t\t\t\n\t\t\n\t
strong
\n\n\t\n\t\t\n\t\t\t\n\t\t\n\t
em
\n\n\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
Inter-word dashes ZIP-codes are 5- or 9-digit codes
"""), 243 | ("""|_. attribute list |\n|<. align left |\n|>. align right|\n|=. center |\n|<>. justify me|\n|^. valign top |\n|~. bottom |""", 244 | """\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\n\t
attribute list
align left
align right
center
justify me
valign top
bottom
"""), 245 | ("""h2. A definition list\n\n;(class#id) Term 1\n: Def 1\n: Def 2\n: Def 3\n;; Center\n;; NATO(Why Em Cee Ayy)\n:: Subdef 1\n:: Subdef 2\n;;; SubSub Term\n::: SubSub Def 1\n::: SubSub Def 2\n::: Subsub Def 3\nWith newline\n::: Subsub Def 4\n:: Subdef 3\n: DEF 4\n; Term 2\n: Another def\n: And another\n: One more\n:: A def without a term\n:: More defness\n; Third term for good measure\n: My definition of a boombastic jazz""", 246 | """\t

A definition list

\n\n\t
\n\t\t
Term 1
\n\t\t
Def 1
\n\t\t
Def 2
\n\t\t
Def 3\n\t\t
\n\t\t\t
Center
\n\t\t\t
NATO
\n\t\t\t
Subdef 1
\n\t\t\t
Subdef 2\n\t\t\t
\n\t\t\t\t
SubSub Term
\n\t\t\t\t
SubSub Def 1
\n\t\t\t\t
SubSub Def 2
\n\t\t\t\t
Subsub Def 3
\nWith newline
\n\t\t\t\t
Subsub Def 4
\n\t\t\t
\n\t\t\t
Subdef 3
\n\t\t
\n\t\t
DEF 4
\n\t\t
Term 2
\n\t\t
Another def
\n\t\t
And another
\n\t\t
One more\n\t\t
\n\t\t\t
A def without a term
\n\t\t\t
More defness
\n\t\t
\n\t\t
Third term for good measure
\n\t\t
My definition of a boombastic jazz
\n\t
"""), 247 | ("""###. Here's a comment.\n\nh3. Hello\n\n###. And\nanother\none.\n\nGoodbye.""", """\t

Hello

\n\n\t

Goodbye.

"""), 248 | ("""h2. A Definition list which covers the instance where a new definition list is created with a term without a definition\n\n- term :=\n- term2 := def""", """\t

A Definition list which covers the instance where a new definition list is created with a term without a definition

\n\n
\n\t
term
\n\t
term2
\n\t
def
\n
"""), 249 | ('!{height:20px;width:20px;}https://1.gravatar.com/avatar/!', 250 | '\t

'), 251 | ('& test', '\t

& test

'), 252 | ) 253 | 254 | # A few extra cases for HTML4 255 | html_known_values = ( 256 | ("pre.. The beginning\n\nbc.. This code\n\nis the last\n\nblock in the document\n", 257 | "
The beginning
\n\n
This code\n\nis the last\n\nblock in the document
"), 258 | ("bc.. This code\n\nis not\n\nsurrounded by anything\n", 259 | "
This code\n\nis not\n\nsurrounded by anything
"), 260 | ("bc.. Paragraph 1\n\nParagraph 2\n\nParagraph 3\n\np.. post-code paragraph", 261 | "
Paragraph 1\n\nParagraph 2\n\nParagraph 3
\n\n

post-code paragraph

"), 262 | ("bc.. Paragraph 1\n\nParagraph 2\n\nParagraph 3\n\npre.. post-code non-p block", 263 | "
Paragraph 1\n\nParagraph 2\n\nParagraph 3
\n\n
post-code non-p block
"), 264 | ('I spoke.\nAnd none replied.', '\t

I spoke.
\nAnd none replied.

'), 265 | ('I __know__.\nI **really** __know__.', '\t

I know.
\nI really know.

'), 266 | ("I'm %{color:red}unaware%\nof most soft drinks.", '\t

I’m unaware
\nof most soft drinks.

'), 267 | ('I seriously *{color:red}blushed*\nwhen I _(big)sprouted_ that\ncorn stalk from my\n%[es]cabeza%.', 268 | '\t

I seriously blushed
\nwhen I sprouted' 269 | ' that
\ncorn stalk from my
\ncabeza.

'), 270 | ('
\n\na.gsub!( /\n
', 271 | '
\n\na.gsub!( /</, "" )\n\n
'), 272 | ('
\n\nh3. Sidebar\n\n"Hobix":http://hobix.com/\n"Ruby":http://ruby-lang.org/\n\n
\n\n' 273 | 'The main text of the\npage goes here and will\nstay to the left of the\nsidebar.', 274 | '\t

\n\n\t

Sidebar

\n\n\t

Hobix
\n' 275 | 'Ruby

\n\n\t

\n\n\t

The main text of the
\n' 276 | 'page goes here and will
\nstay to the left of the
\nsidebar.

'), 277 | ('I am crazy about "Hobix":hobix\nand "it\'s":hobix "all":hobix I ever\n"link to":hobix!\n\n[hobix]http://hobix.com', 278 | '\t

I am crazy about Hobix
\nand it’s ' 279 | 'all I ever
\nlink to!

'), 280 | ('!http://hobix.com/sample.jpg!', '\t

'), 281 | ('!openwindow1.gif(Bunny.)!', '\t

Bunny.

'), 282 | ('!openwindow1.gif!:http://hobix.com/', '\t

'), 283 | ('!>obake.gif!\n\nAnd others sat all round the small\nmachine and paid it to sing to them.', 284 | '\t

\n\n\t' 285 | '

And others sat all round the small
\nmachine and paid it to sing to them.

'), 286 | ('!http://render.mathim.com/A%5EtAx%20%3D%20A%5Et%28Ax%29.!', 287 | '\t

'), 288 | ('notextile. foo bar baz\n\np. quux\n', 289 | ' foo bar baz\n\n\t

quux

'), 290 | ('"foo":http://google.com/one--two', '\t

foo

'), 291 | # issue 24 colspan 292 | ('|\\2. spans two cols |\n| col 1 | col 2 |', '\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
spans two cols
col 1 col 2
'), 293 | # issue 2 escaping 294 | ('"foo ==(bar)==":#foobar', '\t

foo (bar)

'), 295 | # issue 14 newlines in extended pre blocks 296 | ("pre.. Hello\n\nAgain\n\np. normal text", '
Hello\n\nAgain
\n\n\t

normal text

'), 297 | # url with parentheses 298 | ('"python":http://en.wikipedia.org/wiki/Python_(programming_language)', '\t

python

'), 299 | # table with hyphen styles 300 | ('table(linkblog-thumbnail).\n|(linkblog-thumbnail-cell). apple|bear|', '\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
applebear
'), 301 | # issue 32 empty table cells 302 | ("|thing|||otherthing|", "\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
thingotherthing
"), 303 | # issue 36 link reference names http and https 304 | ('"signup":signup\n[signup]http://myservice.com/signup', '\t

signup

'), 305 | ('"signup":signup\n[signup]https://myservice.com/signup', '\t

signup

'), 306 | # nested formatting 307 | ("*_test text_*", "\t

test text

"), 308 | ("_*test text*_", "\t

test text

"), 309 | # quotes in code block 310 | ("'quoted string'", "\t

'quoted string'

"), 311 | ("
some preformatted text
other text", "\t

some preformatted text
other text

"), 312 | # at sign and notextile in table 313 | ("|@@|@@ @@|\n|*B1*|*B2* *B3*|", "\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
<A1><A2> <A3>
*B1**B2* *B3*
"), 314 | # cite attribute 315 | ('bq.:http://textism.com/ Text...', '\t
\n\t\t

Text…

\n\t
'), 316 | ('Hello ["(Mum) & dad"]', '\t

Hello [“(Mum) & dad”]

'), 317 | # Dimensions 318 | ( 319 | ('[1/2] x [1/4] and (1/2)" x [1/4]" and (1/2)\' x (1/4)\'\n\n' 320 | '(2 x 10) X (3 / 4) x (200 + 64)\n\n' 321 | '1 x 1 = 1\n\n' 322 | '1 x1 = 1\n\n' 323 | '1x 1 = 1\n\n' 324 | '1x1 = 1\n\n' 325 | '1 X 1 = 1\n\n' 326 | '1 X1 = 1\n\n' 327 | '1X 1 = 1\n\n' 328 | '1X1 = 1\n\n' 329 | 'What is 1 x 1?\n\n' 330 | 'What is 1x1?\n\n' 331 | 'What is 1 X 1?\n\n' 332 | 'What is 1X1?\n\n' 333 | '1 x 2 x 3 = 6\n\n' 334 | '1x2x3=6\n\n' 335 | '1x2 x 1x3 = 6\n\n' 336 | '2\' x 2\' = 4 sqft.\n\n' 337 | '2\'x 2\' = 4 sqft.\n\n' 338 | '2\' x2\' = 4 sqft.\n\n' 339 | '2\'x2\' = 4 sqft.\n\n' 340 | '2\' X 2\' = 4 sqft.\n\n' 341 | '2\'X 2\' = 4 sqft.\n\n' 342 | '2\' X2\' = 4 sqft.\n\n' 343 | '2\'X2\' = 4 sqft.\n\n' 344 | '2" x 2" = 4 sqin.\n\n' 345 | '2"x 2" = 4 sqin.\n\n' 346 | '2" x2" = 4 sqin.\n\n' 347 | '2"x2" = 4 sqin.\n\n' 348 | '2" X 2" = 4 sqin.\n\n' 349 | '2"X 2" = 4 sqin.\n\n' 350 | '2" X2" = 4 sqin.\n\n' 351 | '2"X2" = 4in[^2^].\n\n' 352 | 'What is 1.2 x 3.5?\n\n' 353 | 'What is .2 x .5?\n\n' 354 | 'What is 1.2x3.5?\n\n' 355 | 'What is .2x.5?\n\n' 356 | 'What is 1.2\' x3.5\'?\n\n' 357 | 'What is .2"x .5"?\n\n' 358 | '1 x $10.00 x -£ 1.23 x ¥20,000 x -¤120.00 x ฿1,000,000 x -€110,00\n\n'), 359 | 360 | ('\t

½ × ¼ and ½” × ¼” and ½’ × ¼’

\n\n' 361 | '\t

(2 × 10) × (3 / 4) × (200 + 64)

\n\n' 362 | '\t

1 × 1 = 1

\n\n' 363 | '\t

1 ×1 = 1

\n\n' 364 | '\t

1× 1 = 1

\n\n' 365 | '\t

1×1 = 1

\n\n' 366 | '\t

1 × 1 = 1

\n\n' 367 | '\t

1 ×1 = 1

\n\n' 368 | '\t

1× 1 = 1

\n\n' 369 | '\t

1×1 = 1

\n\n' 370 | '\t

What is 1 × 1?

\n\n' 371 | '\t

What is 1×1?

\n\n' 372 | '\t

What is 1 × 1?

\n\n' 373 | '\t

What is 1×1?

\n\n' 374 | '\t

1 × 2 × 3 = 6

\n\n' 375 | '\t

1×2×3=6

\n\n' 376 | '\t

1×2 × 1×3 = 6

\n\n' 377 | '\t

2’ × 2’ = 4 sqft.

\n\n' 378 | '\t

2’× 2’ = 4 sqft.

\n\n' 379 | '\t

2’ ×2’ = 4 sqft.

\n\n' 380 | '\t

2’×2’ = 4 sqft.

\n\n' 381 | '\t

2’ × 2’ = 4 sqft.

\n\n' 382 | '\t

2’× 2’ = 4 sqft.

\n\n' 383 | '\t

2’ ×2’ = 4 sqft.

\n\n' 384 | '\t

2’×2’ = 4 sqft.

\n\n' 385 | '\t

2” × 2” = 4 sqin.

\n\n' 386 | '\t

2”× 2” = 4 sqin.

\n\n' 387 | '\t

2” ×2” = 4 sqin.

\n\n' 388 | '\t

2”×2” = 4 sqin.

\n\n' 389 | '\t

2” × 2” = 4 sqin.

\n\n' 390 | '\t

2”× 2” = 4 sqin.

\n\n' 391 | '\t

2” ×2” = 4 sqin.

\n\n' 392 | '\t

2”×2” = 4in2.

\n\n' 393 | '\t

What is 1.2 × 3.5?

\n\n' 394 | '\t

What is .2 × .5?

\n\n' 395 | '\t

What is 1.2×3.5?

\n\n' 396 | '\t

What is .2×.5?

\n\n' 397 | '\t

What is 1.2’ ×3.5’?

\n\n' 398 | '\t

What is .2”× .5”?

\n\n' 399 | '\t

1 × $10.00 × -£ 1.23 × ¥20,000 × -¤120.00 × ฿1,000,000 × -€110,00

') 400 | ), 401 | # Empty note lists 402 | ('There should be nothing below.\n\nnotelist.', '\t

There should be nothing below.

\n\n\t'), 403 | # Empty things 404 | (('\'\'\n\n""\n\n%%\n\n^^\n\n&&\n\n**\n\n__\n\n--\n\n++\n\n~~\n\n{}\n\n' 405 | '[]\n\n()\n\n<>\n\n\\\\\n\n//\n\n??\n\n==\n\n@@\n\n##\n\n$$\n\n!!\n\n' 406 | '::\n\n;;\n\n..\n\n,,\n\n||\n\n` `\n\n\' \'\n\n" "\n\n% %\n\n^ ^\n\n' 407 | '& &\n\n* *\n\n_ _\n\n- -\n\n+ +\n\n~ ~\n\n{ }\n\n[ ]\n\n( )\n\n< >\n\n' 408 | '\\ \\\n\n/ /\n\n? ?\n\n= =\n\n@ @\n\n# #\n\n$ $\n\n! !\n\n: :\n\n; ;\n\n' 409 | '. .\n\n, ,'), 410 | ("\t

‘’

\n\n\t

“”

\n\n\t

%%

\n\n\t

^^

\n\n\t" 411 | "

&&

\n\n\t

**

\n\n\t

__

\n\n\t

\n\n\t

++

\n\n\t" 412 | "

~~

\n\n\t

{}

\n\n\t

[]

\n\n\t

()

\n\n\t

<>

\n\n\t

\\\\

\n\n\t" 413 | "

//

\n\n\t

??

\n\n\t

==

\n\n\t

\n\n\t

##

\n\n\t

$$

\n\n\t" 414 | "

!!

\n\n\t

::

\n\n\t

;;

\n\n\t

..

\n\n\t

,,

\n\n\t" 415 | "\n\t\t\n\t\t\t\n\t\t\n\t
\n\n\t

` `

\n\n\t

‘ ‘

\n\n\t" 416 | "

“ “

\n\n\t

% %

\n\n\t

^ ^

\n\n\t

& &

\n\n\t" 417 | "
    \n\t\t
  • *
  • \n\t
\n\n\t

_ _

\n\n\t

- -

\n\n\t

+ +

\n\n\t

~ ~

\n\n\t" 418 | "

{ }

\n\n\t

[ ]

\n\n\t

( )

\n\n\t

< >

\n\n\t

\\ \\

\n\n\t" 419 | "

/ /

\n\n\t

? ?

\n\n\t

= =

\n\n\t

\n\n\t
    \n\t\t
  1. #
  2. \n\t
\n\n\t" 420 | "

$ $

\n\n\t

! !

\n\n\t
\n\t\t
:
\n\t
\n\n\t
\n\t\t
;
\n\t
\n\n\t" 421 | "

. .

\n\n\t

, ,

")), 422 | # A lone standing comment must be preserved as is: 423 | # withouth wrapping it into a paragraph 424 | (('An ordinary block.\n\n' 425 | '\n'), 426 | '\t

An ordinary block.

\n\n'), 427 | # Headers must be "breakable", just like paragraphs. 428 | ('h1. Two line with *strong*\nheading\n', 429 | '\t

Two line with strong
\nheading

'), 430 | # Non-standalone ampersands should not be escaped 431 | (("“test”\n\n" 432 | "“test”\n\n" 433 | " test \n"), 434 | ("\t

test

\n\n" 435 | "\t

test

\n\n" 436 | "\t

 test 

")), 437 | # Nested and mixed multi-level ordered and unordered lists 438 | (("* bullet\n" 439 | "*# number\n" 440 | "*# number\n" 441 | "*#* bullet\n" 442 | "*# number\n" 443 | "*# number with\n" 444 | "a break\n" 445 | "* bullet\n" 446 | "** okay"), 447 | ("\t
    \n" 448 | "\t\t
  • bullet\n" 449 | "\t\t
      \n" 450 | "\t\t\t
    1. number
    2. \n" 451 | "\t\t\t
    3. number\n" 452 | "\t\t\t
        \n" 453 | "\t\t\t\t
      • bullet
      • \n" 454 | "\t\t\t
    4. \n" 455 | "\t\t\t
    5. number
    6. \n" 456 | "\t\t\t
    7. number with
      \n" 457 | "a break
    8. \n" 458 | "\t\t
  • \n" 459 | "\t\t
  • bullet\n" 460 | "\t\t
      \n" 461 | "\t\t\t
    • okay
    • \n" 462 | "\t\t
  • \n" 463 | "\t\t
")), 464 | # Checks proper insertion of
within table cells 465 | (("|-(cold) milk :=\n" 466 | "Nourishing beverage for baby cows. =:\n" 467 | "|"), 468 | ("\t\n" 469 | "\t\t\n" 470 | "\t\t\t\n" 474 | "\t\t\n\t
\n" 471 | "\t
milk
\n" 472 | "\t

Nourishing beverage for baby cows.

\n" 473 | "
")), 475 | # Long non-textile blocks 476 | ("notextile.. *a very*\n\n*long*\n\n*block*\n", "*a very*\n\n*long*\n\n*block*"), 477 | # Correct use of ‘ and ’ 478 | ("Here is a %(example)'spanned'% word.", 479 | '\t

Here is a ‘spanned’ word.

'), 480 | # Using $-links with link aliases 481 | ("\"$\":test\n[test]https://textpattern.com/start\n", 482 | "\t

textpattern.com/start

"), 483 | ('Please check on "$":test for any updates.\n[test]https://de.wikipedia.org/wiki/Übermensch', 484 | '\t

Please check on de.wikipedia.org/wiki/Übermensch for any updates.

'), 485 | # Make sure smileys don't get recognised as a definition list. 486 | (":(\n\n:)\n\n:( \n:( \n:( \n:) \n\nPinocchio!\n:^)\n\nBaboon!\n:=)\n\nWink!\n;)\n\n:[ \n:]\n\n;(\nsomething\ndark side\n:) \n\n;(c)[de] Item", 487 | '\t

:(

\n\n\t

:)

\n\n\t

:(
\n:(
\n:(
\n:)

\n\n\t

Pinocchio!
\n:^)

\n\n\t

Baboon!
\n:=)

\n\n\t

Wink!
\n;)

\n\n\t

:[
\n:]

\n\n\t

;(
\nsomething
\ndark side
\n:)

\n\n\t
\n\t\t
Item
\n\t
'), 488 | # Checking proper parsing of classes and IDs 489 | ("_(class1 class2#id1)text1_ -(foobarbaz#boom bang)text2-\n", 490 | '\t

text1 text2

'), 491 | # Tables with nested textile elements 492 | ("|!http://tester.local/logo.png!| !http://tester.local/logo.png! |", 493 | '\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
'), 494 | # Tables with colgroups 495 | (("|=. Testing colgroup and col syntax | \n" 496 | "|:\\5. 80 |\x20\n" 497 | "|a|b|c|d|e|\x20\n"), 498 | ('\t\n\t\n' 499 | '\t\n\t\n' 500 | '\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n' 501 | '\t
Testing colgroup and col syntax
abcde
')), 502 | # Table column with an emphasis should not be confused with a heading 503 | ('|_touch_ this!| _touch_ this! |', 504 | '\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
touch this! touch this!
'), 505 | # Table with colgroup but no caption 506 | (("|:\\5. 80 |\x20\n" 507 | "|a|b|c|d|e|\x20\n"), 508 | ('\t\n' 509 | '\t\n\t\n' 510 | '\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n' 511 | '\t
abcde
')), 512 | ) 513 | 514 | 515 | @pytest.mark.parametrize("input, expected_output", xhtml_known_values) 516 | def test_KnownValuesXHTML(input, expected_output): 517 | # XHTML 518 | output = textile.textile(input, html_type='xhtml') 519 | assert output == expected_output 520 | 521 | 522 | @pytest.mark.parametrize("input, expected_output", html_known_values) 523 | def test_KnownValuesHTML(input, expected_output): 524 | # HTML5 525 | output = textile.textile(input, html_type='html5') 526 | assert output == expected_output 527 | -------------------------------------------------------------------------------- /textile/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import textile, textile_restricted, Textile # noqa: F401 2 | from .version import VERSION 3 | 4 | __all__ = ['textile', 'textile_restricted'] 5 | 6 | __version__ = VERSION 7 | -------------------------------------------------------------------------------- /textile/__main__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import textile 4 | 5 | 6 | def main(): 7 | """A CLI tool in the style of python's json.tool. In fact, this is mostly 8 | copied directly from that module. This allows us to create a stand-alone 9 | tool as well as invoking it via `python -m textile`.""" 10 | prog = 'textile' 11 | description = ('A simple command line interface for textile module ' 12 | 'to convert textile input to HTML output. This script ' 13 | 'accepts input as a file or stdin and can write out to ' 14 | 'a file or stdout.') 15 | parser = argparse.ArgumentParser(prog=prog, description=description) 16 | parser.add_argument('-v', '--version', action='store_true', 17 | help='show the version number and exit') 18 | parser.add_argument('infile', nargs='?', type=argparse.FileType(), 19 | help='a textile file to be converted') 20 | parser.add_argument('outfile', nargs='?', type=argparse.FileType('w'), 21 | help='write the output of infile to outfile') 22 | options = parser.parse_args() 23 | 24 | if options.version: 25 | print(textile.VERSION) 26 | sys.exit() 27 | 28 | infile = options.infile or sys.stdin 29 | outfile = options.outfile or sys.stdout 30 | with infile: 31 | output = textile.textile(''.join(infile.readlines())) 32 | with outfile: 33 | outfile.write(output) 34 | 35 | 36 | if __name__ == '__main__': # pragma: no cover 37 | main() 38 | -------------------------------------------------------------------------------- /textile/core.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | __copyright__ = """ 3 | Copyright (c) 2009, Jason Samsa, http://jsamsa.com/ 4 | Copyright (c) 2010, Kurt Raschke 5 | Copyright (c) 2004, Roberto A. F. De Almeida, http://dealmeida.net/ 6 | Copyright (c) 2003, Mark Pilgrim, http://diveintomark.org/ 7 | 8 | Original PHP Version: 9 | Copyright (c) 2003-2004, Dean Allen 10 | All rights reserved. 11 | 12 | Thanks to Carlo Zottmann for refactoring 13 | Textile's procedural code into a class framework 14 | 15 | Additions and fixes Copyright (c) 2006 Alex Shiels http://thresholdstate.com/ 16 | 17 | """ 18 | import uuid 19 | from urllib.parse import urlparse, urlsplit, urlunsplit, quote, unquote 20 | from collections import OrderedDict 21 | from nh3 import clean 22 | 23 | from textile.regex_strings import (align_re_s, cls_re_s, pnct_re_s, 24 | regex_snippets, syms_re_s, table_span_re_s) 25 | from textile.utils import (decode_high, encode_high, encode_html, generate_tag, 26 | getimagesize, has_raw_text, human_readable_url, 27 | is_rel_url, is_valid_url, list_type, 28 | normalize_newlines, parse_attributes, pba) 29 | from textile.objects import Block, Table 30 | 31 | try: 32 | import regex as re 33 | except ImportError: 34 | import re 35 | 36 | 37 | def make_glyph_replacers(html_type, uid, glyph_defs): 38 | """ 39 | Generates a list of "replacers" (each is a pair consiting of 40 | a regular expression and a replacing pattern) that, 41 | when applied sequentially, replace some characters of the original 42 | text with their HTML codes to produce valid HTML. 43 | """ 44 | cur = ( 45 | r'(?:[{0}]{1}*)?'.format(regex_snippets['cur'], regex_snippets['space']) 46 | if regex_snippets['cur'] 47 | else r'') 48 | pre_result = [ 49 | # dimension sign (before apostrophes/quotes are replaced) 50 | (re.compile( 51 | r'([0-9]+[\])]?[\'"]? ?)[x]( ?[\[(]?)' 52 | r'(?=[+-]?{0}[0-9]*\.?[0-9]+)'.format(cur), 53 | flags=re.I | re.U), 54 | r'\1{dimension}\2'), 55 | # apostrophe's 56 | (re.compile( 57 | r"({0}|\))'({0})" 58 | .format(regex_snippets['wrd']), 59 | flags=re.U), 60 | r'\1{apostrophe}\2'), 61 | # back in '88 62 | (re.compile( 63 | r"({0})'(\d+{1}?)\b(?![.]?[{1}]*?')".format( 64 | regex_snippets['space'], regex_snippets['wrd']), 65 | flags=re.U), 66 | r'\1{apostrophe}\2'), 67 | # single opening following an open bracket. 68 | (re.compile(r"([([{])'(?=\S)", flags=re.U), 69 | r'\1{quote_single_open}'), 70 | # single closing 71 | (re.compile( 72 | r"(\S)'(?={0}|{1}|<|$)".format(regex_snippets['space'], pnct_re_s), 73 | flags=re.U), 74 | r'\1{quote_single_close}'), 75 | # single opening 76 | (re.compile(r"'", re.U), r'{quote_single_open}'), 77 | # double opening following an open bracket. Allows things like 78 | # Hello ["(Mum) & dad"] 79 | (re.compile(r'([([{])"(?=\S)', flags=re.U), 80 | r'\1{quote_double_open}'), 81 | # double closing 82 | (re.compile( 83 | r'(\S)"(?={0}|{1}|<|$)'.format(regex_snippets['space'], pnct_re_s), 84 | flags=re.U), 85 | r'\1{quote_double_close}'), 86 | # double opening 87 | (re.compile(r'"'), r'{quote_double_open}'), 88 | # ellipsis 89 | (re.compile(r'([^.]?)\.{3}'), r'\1{ellipsis}'), 90 | # em dash 91 | (re.compile(r'(\s?)--(\s?)'), r'\1{emdash}\2'), 92 | # en dash 93 | (re.compile(r' - '), r' {endash} '), 94 | # trademark 95 | (re.compile( 96 | r'(\b ?|{0}|^)[([]TM[])]'.format(regex_snippets['space']), 97 | flags=re.I | re.U), 98 | r'\1{trademark}'), 99 | # registered 100 | (re.compile( 101 | r'(\b ?|{0}|^)[([]R[])]'.format(regex_snippets['space']), 102 | flags=re.I | re.U), 103 | r'\1{registered}'), 104 | # copyright 105 | (re.compile( 106 | r'(\b ?|{0}|^)[([]C[])]'.format(regex_snippets['space']), 107 | flags=re.I | re.U), 108 | r'\1{copyright}'), 109 | # 1/2 110 | (re.compile(r'[([]1\/2[])]'), r'{half}'), 111 | # 1/4 112 | (re.compile(r'[([]1\/4[])]'), r'{quarter}'), 113 | # 3/4 114 | (re.compile(r'[([]3\/4[])]'), r'{threequarters}'), 115 | # degrees 116 | (re.compile(r'[([]o[])]'), r'{degrees}'), 117 | # plus/minus 118 | (re.compile(r'[([]\+\/-[])]'), r'{plusminus}'), 119 | # 3+ uppercase acronym 120 | (re.compile( 121 | r'\b([{0}][{1}]{{2,}})\b(?:[(]([^)]*)[)])' 122 | .format(regex_snippets['abr'], regex_snippets['acr']), 123 | flags=re.U), 124 | (r'\1' if html_type == 'html5' 125 | else r'\1')), 126 | # 3+ uppercase 127 | (re.compile( 128 | r'({space}|^|[>(;-])([{abr}]{{3,}})([{nab}]*)' 129 | '(?={space}|{pnct}|<|$)(?=[^">]*?(<|$))' 130 | .format(space=regex_snippets['space'], 131 | abr=regex_snippets['abr'], 132 | nab=regex_snippets['nab'], 133 | pnct=pnct_re_s), 134 | re.U), 135 | r'\1{0}:glyph:\2\3'.format(uid)), 136 | ] 137 | return [(regex_obj, replacement.format(**glyph_defs)) 138 | for (regex_obj, replacement) in pre_result] 139 | 140 | 141 | class Textile(object): 142 | restricted_url_schemes = ('http', 'https', 'ftp', 'mailto') 143 | unrestricted_url_schemes = restricted_url_schemes + ( 144 | 'file', 'tel', 'callto', 'sftp', 'data') 145 | 146 | btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', r'fn\d+', 'p', '###') 147 | btag_lite = ('bq', 'bc', 'p') 148 | 149 | note_index = 1 150 | 151 | glyph_definitions = { 152 | 'quote_single_open': '‘', # noqa: E241 153 | 'quote_single_close': '’', # noqa: E241 154 | 'quote_double_open': '“', # noqa: E241 155 | 'quote_double_close': '”', # noqa: E241 156 | 'apostrophe': '’', # noqa: E241 157 | 'prime': '′', # noqa: E241 158 | 'prime_double': '″', # noqa: E241 159 | 'ellipsis': '…', # noqa: E241 160 | 'ampersand': '&', # noqa: E241 161 | 'emdash': '—', # noqa: E241 162 | 'endash': '–', # noqa: E241 163 | 'dimension': '×', # noqa: E241 164 | 'trademark': '™', # noqa: E241 165 | 'registered': '®', # noqa: E241 166 | 'copyright': '©', # noqa: E241 167 | 'half': '½', # noqa: E241 168 | 'quarter': '¼', # noqa: E241 169 | 'threequarters': '¾', # noqa: E241 170 | 'degrees': '°', # noqa: E241 171 | 'plusminus': '±', # noqa: E241 172 | } 173 | 174 | spanWrappers = ( 175 | ('[', ']'), 176 | ) 177 | 178 | def __init__(self, restricted=False, lite=False, noimage=False, 179 | get_sizes=False, html_type='xhtml', rel='', block_tags=True): 180 | """Textile properties that are common to regular textile and 181 | textile_restricted""" 182 | self.restricted = restricted 183 | self.lite = lite 184 | self.noimage = noimage 185 | self.get_sizes = get_sizes 186 | self.fn = {} 187 | self.urlrefs = {} 188 | self.shelf = {} 189 | self.rel = rel 190 | self.html_type = html_type 191 | self.max_span_depth = 5 192 | self.span_depth = 0 193 | uid = uuid.uuid4().hex 194 | self.uid = 'textileRef:{0}:'.format(uid) 195 | self.linkPrefix = '{0}-'.format(uid) 196 | self.linkIndex = 0 197 | self.refCache = {} 198 | self.refIndex = 0 199 | self.block_tags = block_tags 200 | 201 | self.glyph_replacers = make_glyph_replacers( 202 | html_type, self.uid, self.glyph_definitions) 203 | 204 | if self.restricted is True: 205 | self.url_schemes = self.restricted_url_schemes 206 | else: 207 | self.url_schemes = self.unrestricted_url_schemes 208 | 209 | all_schemes_re_s = '|'.join([ 210 | '(?:{0})'.format(scheme) 211 | for scheme in self.url_schemes 212 | ]) 213 | self.url_ref_regex = re.compile( 214 | r'(?:(?<=^)|(?<=\s))\[(.+)\]\s?((?:{0}:\/\/|\/)\S+)(?=\s|$)'.format(all_schemes_re_s), 215 | re.U 216 | ) 217 | 218 | def parse(self, text, rel=None, sanitize=False): 219 | """Parse the input text as textile and return html output.""" 220 | self.notes = OrderedDict() 221 | self.unreferencedNotes = OrderedDict() 222 | self.notelist_cache = OrderedDict() 223 | 224 | if text.strip() == '': 225 | return text 226 | 227 | if self.restricted: 228 | text = encode_html(text, quotes=False) 229 | 230 | text = normalize_newlines(text) 231 | text = text.replace(self.uid, '') 232 | 233 | if self.block_tags: 234 | if self.lite: 235 | self.blocktag_allowlist = set(['bq', 'p', 'br']) 236 | text = self.block(text) 237 | else: 238 | self.blocktag_allowlist = set(['bq', 'p', 'br', 'bc', 'notextile', 239 | 'pre', 'h[1-6]', 240 | f"fn{regex_snippets['digit']}+", '###']) 241 | text = self.block(text) 242 | text = self.placeNoteLists(text) 243 | else: 244 | # Inline markup (em, strong, sup, sub, del etc). 245 | text = self.span(text) 246 | 247 | # Glyph level substitutions (mainly typographic -- " & ' => curly 248 | # quotes, -- => em-dash etc. 249 | text = self.glyphs(text) 250 | 251 | if rel: 252 | self.rel = ' rel="{0}"'.format(rel) 253 | 254 | text = self.getRefs(text) 255 | 256 | if not self.lite: 257 | text = self.placeNoteLists(text) 258 | text = self.retrieve(text) 259 | text = text.replace('{0}:glyph:'.format(self.uid), '') 260 | 261 | if sanitize: 262 | text = clean(text, tags=self.blocktag_allowlist) 263 | 264 | text = self.retrieveTags(text) 265 | text = self.retrieveURLs(text) 266 | 267 | # if the text contains a break tag (
or
) not followed by 268 | # a newline, replace it with a new style break tag and a newline. 269 | text = re.sub(r'(?!\n)', '
\n', text) 270 | 271 | text = text.rstrip('\n') 272 | 273 | return text 274 | 275 | def table(self, text): 276 | text = "{0}\n\n".format(text) 277 | pattern = re.compile(r'^(?:table(?P_?{s}{a}{c})\.' 278 | r'(?P.*?)\n)?^(?P{a}{c}\.? ?\|.*\|)' 279 | r'[\s]*\n\n'.format( 280 | **{'s': table_span_re_s, 'a': align_re_s, 281 | 'c': cls_re_s}), flags=re.S | re.M | re.U) 282 | match = pattern.search(text) 283 | if match: 284 | table = Table(self, **match.groupdict()) 285 | return table.process() 286 | return text 287 | 288 | def textileLists(self, text): 289 | pattern = re.compile(r'^((?:[*;:]+|[*;:#]*#(?:_|\d+)?){0}[ .].*)$' 290 | r'(?![^#*;:])'.format(cls_re_s), re.U | re.M | re.S) 291 | return pattern.sub(self.fTextileList, text) 292 | 293 | def fTextileList(self, match): 294 | text = re.split(r'\n(?=[*#;:])', match.group(), flags=re.M) 295 | pt = '' 296 | result = [] 297 | ls = OrderedDict() 298 | for i, line in enumerate(text): 299 | try: 300 | nextline = text[i + 1] 301 | except IndexError: 302 | nextline = '' 303 | 304 | m = re.search(r"^(?P[#*;:]+)(?P_|\d+)?(?P{0})[ .]" 305 | "(?P.*)$".format(cls_re_s), line, re.S) 306 | if m: 307 | tl, start, atts, content = m.groups() 308 | content = content.strip() 309 | else: 310 | result.append(line) 311 | continue 312 | 313 | nl = '' 314 | ltype = list_type(tl) 315 | tl_tags = {';': 'dt', ':': 'dd'} 316 | litem = tl_tags.get(tl[0], 'li') 317 | 318 | showitem = len(content) > 0 319 | 320 | # handle list continuation/start attribute on ordered lists 321 | if ltype == 'o': 322 | if not hasattr(self, 'olstarts'): 323 | self.olstarts = {tl: 1} 324 | 325 | # does the first line of this ol have a start attribute 326 | if len(tl) > len(pt): 327 | # no, set it to 1. 328 | if start is None: 329 | self.olstarts[tl] = 1 330 | # yes, set it to the given number 331 | elif start != '_': 332 | self.olstarts[tl] = int(start) 333 | # we won't need to handle the '_' case, we'll just 334 | # print out the number when it's needed 335 | 336 | # put together the start attribute if needed 337 | if len(tl) > len(pt) and start is not None: 338 | start = ' start="{0}"'.format(self.olstarts[tl]) 339 | 340 | # This will only increment the count for list items, not 341 | # definition items 342 | if showitem: 343 | # Assume properly formatted input 344 | try: 345 | self.olstarts[tl] = self.olstarts[tl] + 1 346 | # if we get here, we've got some poor textile formatting. 347 | # add this type of list to olstarts and assume we'll start 348 | # it at 1. expect screwy output. 349 | except KeyError: 350 | self.olstarts[tl] = 1 351 | 352 | nm = re.match(r"^(?P[#\*;:]+)(_|[\d]+)?{0}" 353 | r"[ .].*".format(cls_re_s), nextline) 354 | if nm: 355 | nl = nm.group('nextlistitem') 356 | 357 | # We need to handle nested definition lists differently. If 358 | # the next tag is a dt (';') of a lower nested level than the 359 | # current dd (':'), 360 | if ';' in pt and ':' in tl: 361 | ls[tl] = 2 362 | 363 | atts = pba(atts, restricted=self.restricted) 364 | tabs = '\t' * len(tl) 365 | # If start is still None, set it to '', else leave the value that 366 | # we've already formatted. 367 | start = start or '' 368 | # if this item tag isn't in the list, create a new list and 369 | # item, else just create the item 370 | if tl not in ls: 371 | ls[tl] = 1 372 | itemtag = ("\n{0}\t<{1}>{2}".format(tabs, litem, content) if 373 | showitem else '') 374 | line = "<{0}l{1}{2}>{3}".format(ltype, atts, start, itemtag) 375 | else: 376 | line = ("\t<{0}{1}>{2}".format(litem, atts, content) if 377 | showitem else '') 378 | line = '{0}{1}'.format(tabs, line) 379 | if len(nl) <= len(tl): 380 | if showitem: 381 | line = "{0}".format(line, litem) 382 | # work backward through the list closing nested lists/items 383 | for k, v in reversed(list(ls.items())): 384 | if len(k) > len(nl): 385 | if v != 2: 386 | line = "{0}\n{1}".format( 387 | line, tabs, list_type(k)) 388 | if len(k) > 1 and v != 2: 389 | line = "{0}".format(line, litem) 390 | del ls[k] 391 | # Remember the current Textile tag: 392 | pt = tl 393 | result.append(line) 394 | return self.doTagBr(litem, "\n".join(result)) 395 | 396 | def doTagBr(self, tag, input): 397 | return re.compile(r'<({0})([^>]*?)>(.*)()'.format(re.escape(tag)), 398 | re.S).sub(self.doBr, input) 399 | 400 | def doPBr(self, in_): 401 | return (re 402 | .compile(r'<(p|h[1-6])([^>]*?)>(.*)()', re.S) 403 | .sub(self.fPBr, in_)) 404 | 405 | def fPBr(self, m): 406 | content = m.group(3) 407 | content = ( 408 | re.compile(r"{0}*\n(?![{0}|])".format(regex_snippets['space']), 409 | re.I) 410 | .sub("\n", content)) 411 | content = re.compile(r"\n(?![\s|])").sub('
', content) 412 | return '<{0}{1}>{2}{3}'.format(m.group(1), m.group(2), content, m.group(4)) 413 | 414 | def doBr(self, match): 415 | content = ( 416 | re.compile( 417 | r'(.+)(?!(?<=|||
)' 418 | r'|(?<=
)|(?<=
))\n(?![#*;:\s|])', 419 | re.I) 420 | .sub(r'\1
', match.group(3))) 421 | return '<{0}{1}>{2}{3}'.format(match.group(1), match.group(2), content, 422 | match.group(4)) 423 | 424 | def block(self, text): 425 | if not self.lite: 426 | tre = '|'.join(self.btag) 427 | else: 428 | tre = '|'.join(self.btag_lite) 429 | # split the text by two or more newlines, retaining the newlines in the 430 | # split list 431 | text = re.split(r'(\n{2,})', text) 432 | 433 | # some blocks, when processed, will ask us to output nothing, if that's 434 | # the case, we'd want to drop the whitespace which follows it. 435 | eat_whitespace = False 436 | 437 | # check to see if previous block has already been escaped 438 | escaped = False 439 | 440 | # check if multiline paragraph (p..) tags

..

are added to line 441 | multiline_para = False 442 | 443 | tag = 'p' 444 | atts = cite = ext = '' 445 | 446 | out = [] 447 | 448 | for line in text: 449 | # the line is just whitespace, add it to the output, and move on 450 | if not line.strip(): 451 | if not eat_whitespace: 452 | out.append(line) 453 | continue 454 | 455 | eat_whitespace = False 456 | 457 | pattern = (r'^(?P{0})(?P{1}{2})\.(?P\.?)' 458 | r'(?::(?P\S+))? (?P.*)$'.format( 459 | tre, align_re_s, cls_re_s)) 460 | match = re.search(pattern, line, flags=re.S | re.U) 461 | # tag specified on this line. 462 | if match: 463 | # if we had a previous extended tag but not this time, close up 464 | # the tag 465 | if ext and out: 466 | # it's out[-2] because the last element in out is the 467 | # whitespace that preceded this line 468 | if not escaped: 469 | content = encode_html(out[-2], quotes=True) 470 | escaped = True 471 | else: 472 | content = out[-2] 473 | 474 | if not multiline_para: 475 | # block will have been defined in a previous run of the 476 | # loop 477 | content = generate_tag(block.inner_tag, content, # noqa: F821 478 | block.inner_atts) # noqa: F821 479 | content = generate_tag(block.outer_tag, content, # noqa: F821 480 | block.outer_atts) # noqa: F821 481 | out[-2] = content 482 | tag, atts, ext, cite, content = match.groups() 483 | block = Block(self, **match.groupdict()) 484 | inner_block = generate_tag(block.inner_tag, block.content, 485 | block.inner_atts) 486 | # code tags and raw text won't be indented inside outer_tag. 487 | if block.inner_tag != 'code' and not has_raw_text(inner_block): 488 | inner_block = "\n\t\t{0}\n\t".format(inner_block) 489 | if ext: 490 | line = block.content 491 | else: 492 | line = generate_tag(block.outer_tag, inner_block, 493 | block.outer_atts) 494 | # pre tags and raw text won't be indented. 495 | if block.outer_tag != 'pre' and not has_raw_text(line): 496 | line = "\t{0}".format(line) 497 | 498 | # set having paragraph tags to false 499 | if block.tag == 'p' and ext: 500 | multiline_para = False 501 | # no tag specified 502 | else: 503 | # if we're inside an extended block, add the text from the 504 | # previous line to the front. 505 | if ext and out: 506 | if block.tag == 'p': 507 | line = generate_tag(block.tag, line, block.outer_atts) 508 | multiline_para = True 509 | line = '{0}{1}'.format(out.pop(), line) 510 | # the logic in the if statement below is a bit confusing in 511 | # php-textile. I'm still not sure I understand what the php 512 | # code is doing. Something tells me it's a phpsadness. Anyway, 513 | # this works, and is much easier to understand: if we're not in 514 | # an extension, and the line doesn't begin with a space, treat 515 | # it like a block to insert. Lines that begin with a space are 516 | # not processed as a block. 517 | if not ext and not line[0] == ' ': 518 | block = Block(self, tag, atts, ext, cite, line) 519 | # if the block contains html tags, generate_tag would 520 | # mangle it, so process as is. 521 | if block.tag == 'p' and not has_raw_text(block.content): 522 | line = block.content 523 | else: 524 | line = generate_tag(block.outer_tag, block.content, 525 | block.outer_atts) 526 | line = "\t{0}".format(line) 527 | else: 528 | if block.tag in ('pre', 'notextile') or block.inner_tag == 'code': 529 | line = self.shelve(encode_html(line, quotes=True)) 530 | else: 531 | line = self.graf(line) 532 | 533 | if block.tag == 'p': 534 | escaped = True 535 | 536 | if block.tag == 'p' and ext and not multiline_para: 537 | line = generate_tag(block.tag, line, block.outer_atts) 538 | multiline_para = True 539 | else: 540 | line = self.doPBr(line) 541 | if not block.tag == 'p': 542 | multiline_para = False 543 | 544 | line = line.replace('
', '
') 545 | 546 | # if we're in an extended block, and we haven't specified a new 547 | # tag, join this line to the last item of the output 548 | if ext and not match: 549 | last_item = out.pop() 550 | out.append('{0}{1}'.format(last_item, line)) 551 | elif not block.eat: 552 | # or if it's a type of block which indicates we shouldn't drop 553 | # it, add it to the output. 554 | out.append(line) 555 | 556 | if not ext: 557 | tag = 'p' 558 | atts = '' 559 | cite = '' 560 | 561 | # if it's a block we should drop, don't keep the whitespace which 562 | # will come after it. 563 | if block.eat: 564 | eat_whitespace = True 565 | 566 | # at this point, we've gone through all the lines. if there's still an 567 | # extension in effect, we close it here 568 | if ext and out and not block.tag == 'p': 569 | content = out.pop() 570 | content = generate_tag(block.inner_tag, content, block.inner_atts) 571 | content = generate_tag(block.outer_tag, content, block.outer_atts) 572 | out.append(content) 573 | return ''.join(out) 574 | 575 | def footnoteRef(self, text): 576 | # somehow php-textile gets away with not capturing the space. 577 | return re.compile(r'(?<=\S)\[(?P{0}+)(?P!?)\]' 578 | r'(?P{1}?)'.format( 579 | regex_snippets['digit'], regex_snippets['space']), 580 | re.U).sub(self.footnoteID, text) 581 | 582 | def footnoteID(self, m): 583 | fn_att = OrderedDict({'class': 'footnote'}) 584 | if m.group('id') not in self.fn: 585 | self.fn[m.group('id')] = '{0}{1}'.format( 586 | self.linkPrefix, self._increment_link_index()) 587 | fnid = self.fn[m.group('id')] 588 | fn_att['id'] = 'fnrev{0}'.format(fnid) 589 | fnid = self.fn[m.group('id')] 590 | footref = generate_tag('a', m.group('id'), {'href': '#fn{0}'.format( 591 | fnid)}) 592 | if '!' == m.group('nolink'): 593 | footref = m.group('id') 594 | footref = generate_tag('sup', footref, fn_att) 595 | return '{0}{1}'.format(footref, m.group('space')) 596 | 597 | def glyphs(self, text): 598 | """ 599 | Because of the split command, the regular expressions are different for 600 | when the text at the beginning and the rest of the text. 601 | for example: 602 | let's say the raw text provided is "*Here*'s some textile" 603 | before it gets to this glyphs method, the text has been converted to 604 | "Here's some textile" 605 | When run through the split, we end up with ["", "Here", 606 | "", "'s some textile"]. The re.search that follows tells it 607 | not to ignore html tags. 608 | If the single quote is the first character on the line, it's an open 609 | single quote. If it's the first character of one of those splits, it's 610 | an apostrophe or closed single quote, but the regex will bear that out. 611 | A similar situation occurs for double quotes as well. 612 | So, for the first pass, we use a set of regexes from 613 | the initial_glyph_replacers. For all remaining passes, 614 | we use glyph_replacers 615 | """ 616 | text = text.rstrip('\n') 617 | result = [] 618 | standalone_amp_re = re.compile( 619 | r"&(?!#[0-9]+;|#x[a-f0-9]+;|[a-z][a-z0-9]*;)", 620 | flags=re.I) 621 | html_amp_symbol = self.glyph_definitions['ampersand'] 622 | # split the text by any angle-bracketed tags 623 | lines = re.compile(r'(<[\w/!?].*?>)', re.U | re.S).split(text) 624 | for i, line in enumerate(lines): 625 | if i % 2 == 0: 626 | if not self.restricted: 627 | # Raw < > & chars have already been encoded 628 | # when in restricted mode 629 | line = ( 630 | standalone_amp_re 631 | .sub(html_amp_symbol, line) 632 | .replace('<', '<') 633 | .replace('>', '>')) 634 | for s, r in self.glyph_replacers: 635 | line = s.sub(r, line) 636 | result.append(line) 637 | return ''.join(result) 638 | 639 | def getRefs(self, text): 640 | """Capture and store URL references in self.urlrefs.""" 641 | return self.url_ref_regex.sub(self.refs, text) 642 | 643 | def refs(self, match): 644 | flag, url = match.groups() 645 | self.urlrefs[flag] = url 646 | return '' 647 | 648 | def relURL(self, url): 649 | scheme = urlparse(url)[0] 650 | if scheme and scheme not in self.url_schemes: 651 | return '#' 652 | return url 653 | 654 | def shelve(self, text): 655 | self.refIndex = self.refIndex + 1 656 | itemID = '{0}{1}:shelve'.format(self.uid, self.refIndex) 657 | self.shelf[itemID] = text 658 | return itemID 659 | 660 | def retrieve(self, text): 661 | while True: 662 | old = text 663 | for k, v in self.shelf.items(): 664 | text = text.replace(k, v) 665 | if text == old: 666 | break 667 | return text 668 | 669 | def graf(self, text): 670 | if not self.lite: 671 | text = self.noTextile(text) 672 | text = self.code(text) 673 | 674 | text = self.getHTMLComments(text) 675 | 676 | text = self.getRefs(text) 677 | text = self.links(text) 678 | 679 | if not self.noimage: 680 | text = self.image(text) 681 | 682 | if not self.lite: 683 | text = self.table(text) 684 | text = self.redcloth_list(text) 685 | text = self.textileLists(text) 686 | 687 | text = self.span(text) 688 | text = self.footnoteRef(text) 689 | text = self.noteRef(text) 690 | text = self.glyphs(text) 691 | 692 | return text.rstrip('\n') 693 | 694 | def links(self, text): 695 | """For some reason, the part of the regex below that matches the url 696 | does not match a trailing parenthesis. It gets caught by tail, and 697 | we check later to see if it should be included as part of the url.""" 698 | text = self.markStartOfLinks(text) 699 | 700 | return self.replaceLinks(text) 701 | 702 | def markStartOfLinks(self, text): 703 | """Finds and marks the start of well formed links in the input text.""" 704 | # Slice text on '":' boundaries. These always occur in 705 | # inline links between the link text and the url part and are much more 706 | # infrequent than '"' characters so we have less possible links to 707 | # process. 708 | slice_re = re.compile(r'":(?={0})'.format(regex_snippets['char'])) 709 | slices = slice_re.split(text) 710 | output = [] 711 | 712 | if len(slices) > 1: 713 | # There are never any start of links in the last slice, so pop it 714 | # off (we'll glue it back later). 715 | last_slice = slices.pop() 716 | 717 | for s in slices: 718 | # If there is no possible start quote then this slice is not 719 | # a link 720 | if '"' not in s: 721 | output.append(s) 722 | continue 723 | # Cut this slice into possible starting points wherever we find 724 | # a '"' character. Any of these parts could represent the start 725 | # of the link text - we have to find which one. 726 | possible_start_quotes = s.split('"') 727 | 728 | # Start our search for the start of the link with the closest 729 | # prior quote mark. 730 | possibility = possible_start_quotes.pop() 731 | 732 | # Init the balanced count. If this is still zero at the end of 733 | # our do loop we'll mark the " that caused it to balance as the 734 | # start of the link and move on to the next slice. 735 | balanced = 0 736 | linkparts = [] 737 | i = 0 738 | 739 | while balanced != 0 or i == 0: # pragma: no branch 740 | # Starting at the end, pop off the previous part of the 741 | # slice's fragments. 742 | 743 | # Add this part to those parts that make up the link text. 744 | linkparts.append(possibility) 745 | 746 | if len(possibility) > 0: 747 | # did this part inc or dec the balanced count? 748 | if re.search(r'^\S|=$', possibility, flags=re.U): # pragma: no branch 749 | balanced = balanced - 1 750 | if re.search(r'\S$', possibility, flags=re.U): # pragma: no branch 751 | balanced = balanced + 1 752 | try: 753 | possibility = possible_start_quotes.pop() 754 | except IndexError: 755 | break 756 | else: 757 | # If quotes occur next to each other, we get zero 758 | # length strings. eg. ...""Open the door, 759 | # HAL!"":url... In this case we count a zero length in 760 | # the last position as a closing quote and others as 761 | # opening quotes. 762 | if i == 0: 763 | balanced = balanced + 1 764 | else: 765 | balanced = balanced - 1 766 | i = i + 1 767 | 768 | try: 769 | possibility = possible_start_quotes.pop() 770 | except IndexError: # pragma: no cover 771 | # If out of possible starting segments we back the 772 | # last one from the linkparts array 773 | linkparts.pop() 774 | break 775 | # If the next possibility is empty or ends in a space 776 | # we have a closing ". 777 | if (possibility == '' or possibility.endswith(' ')): 778 | # force search exit 779 | balanced = 0 780 | 781 | if balanced <= 0: 782 | possible_start_quotes.append(possibility) 783 | break 784 | 785 | # Rebuild the link's text by reversing the parts and sticking 786 | # them back together with quotes. 787 | linkparts.reverse() 788 | link_content = '"'.join(linkparts) 789 | # Rebuild the remaining stuff that goes before the link but 790 | # that's already in order. 791 | pre_link = '"'.join(possible_start_quotes) 792 | # Re-assemble the link starts with a specific marker for the 793 | # next regex. 794 | o = '{0}{1}linkStartMarker:"{2}'.format(pre_link, self.uid, 795 | link_content) 796 | output.append(o) 797 | 798 | # Add the last part back 799 | output.append(last_slice) 800 | # Re-assemble the full text with the start and end markers 801 | text = '":'.join(output) 802 | 803 | return text 804 | 805 | def replaceLinks(self, text): 806 | """Replaces links with tokens and stores them on the shelf.""" 807 | stopchars = r"\s|^'\"*" 808 | pattern = r""" 809 | (?P
\[)?           # Optionally open with a square bracket eg. Look ["here":url]
 810 |             {0}linkStartMarker:"   # marks start of the link
 811 |             (?P(?:.|\n)*?)  # grab the content of the inner "..." part of the link, can be anything but
 812 |                                    # do not worry about matching class, id, lang or title yet
 813 |             ":                     # literal ": marks end of atts + text + title block
 814 |             (?P[^{1}]*)      # url upto a stopchar
 815 |         """.format(self.uid, stopchars)
 816 |         text = re.compile(pattern, flags=re.X | re.U).sub(self.fLink, text)
 817 |         return text
 818 | 
 819 |     def fLink(self, m):
 820 |         in_ = m.group()
 821 |         pre, inner, url = m.groups()
 822 |         pre = pre or ''
 823 | 
 824 |         if inner == '':
 825 |             return '{0}"{1}":{2}'.format(pre, inner, url)
 826 | 
 827 |         m = re.search(r'''^
 828 |             (?P{0})                # $atts (if any)
 829 |             {1}*                         # any optional spaces
 830 |             (?P                    # $text is...
 831 |                 (!.+!)                   #     an image
 832 |             |                            #   else...
 833 |                 .+?                      #     link text
 834 |             )                            # end of $text
 835 |             (?:\((?P[^)]+?)\))?   # $title (if any)
 836 |             $'''.format(cls_re_s, regex_snippets['space']), inner,
 837 |                       flags=re.X | re.U)
 838 | 
 839 |         atts = (m and m.group('atts')) or ''
 840 |         text = (m and m.group('text')) or inner
 841 |         title = (m and m.group('title')) or ''
 842 | 
 843 |         pop, tight = '', ''
 844 |         counts = {'[': None, ']': url.count(']'), '(': None, ')': None}
 845 | 
 846 |         # Look for footnotes or other square-bracket delimited stuff at the end
 847 |         # of the url...
 848 |         #
 849 |         # eg. "text":url][otherstuff... will have "[otherstuff" popped back
 850 |         # out.
 851 |         #
 852 |         # "text":url?q[]=x][123]    will have "[123]" popped off the back, the
 853 |         # remaining closing square brackets will later be tested for balance
 854 |         if (counts[']']):
 855 |             m = re.search(r'(?P<url>^.*\])(?P<tight>\[.*?)$', url, flags=re.U)
 856 |             if m:
 857 |                 url, tight = m.groups()
 858 | 
 859 |         # Split off any trailing text that isn't part of an array assignment.
 860 |         # eg. "text":...?q[]=value1&q[]=value2 ... is ok
 861 |         # "text":...?q[]=value1]following  ... would have "following" popped
 862 |         # back out and the remaining square bracket will later be tested for
 863 |         # balance
 864 |         if (counts[']']):
 865 |             m = re.search(r'(?P<url>^.*\])(?!=)(?P<end>.*?)$', url, flags=re.U)
 866 |             url = m.group('url')
 867 |             tight = '{0}{1}'.format(m.group('end'), tight)
 868 | 
 869 |         # Now we have the array of all the multi-byte chars in the url we will
 870 |         # parse the  uri backwards and pop off  any chars that don't belong
 871 |         # there (like . or , or unmatched brackets of various kinds).
 872 |         first = True
 873 |         popped = True
 874 | 
 875 |         counts[']'] = url.count(']')
 876 |         url_chars = list(url)
 877 | 
 878 |         def _endchar(c, pop, popped, url_chars, counts, pre):
 879 |             """Textile URL shouldn't end in these characters, we pop them off
 880 |             the end and push them out the back of the url again."""
 881 |             pop = '{0}{1}'.format(c, pop)
 882 |             url_chars.pop()
 883 |             popped = True
 884 |             return pop, popped, url_chars, counts, pre
 885 | 
 886 |         def _rightanglebracket(c, pop, popped, url_chars, counts, pre):
 887 |             url_chars.pop()
 888 |             urlLeft = ''.join(url_chars)
 889 | 
 890 |             m = re.search(r'(?P<url_chars>.*)(?P<tag><\/[a-z]+)$', urlLeft)
 891 |             url_chars = m.group('url_chars')
 892 |             pop = '{0}{1}{2}'.format(m.group('tag'), c, pop)
 893 |             popped = True
 894 |             return pop, popped, url_chars, counts, pre
 895 | 
 896 |         def _closingsquarebracket(c, pop, popped, url_chars, counts, pre):
 897 |             """If we find a closing square bracket we are going to see if it is
 898 |             balanced.  If it is balanced with matching opening bracket then it
 899 |             is part of the URL else we spit it back out of the URL."""
 900 |             # If counts['['] is None, count the occurrences of '['
 901 |             counts['['] = counts['['] or url.count('[')
 902 | 
 903 |             if counts['['] == counts[']']:
 904 |                 # It is balanced, so keep it
 905 |                 url_chars.append(c)
 906 |             else:
 907 |                 # In the case of un-matched closing square brackets we just eat
 908 |                 # it
 909 |                 popped = True
 910 |                 url_chars.pop()
 911 |                 counts[']'] = counts[']'] - 1
 912 |                 if first:  # pragma: no branch
 913 |                     pre = ''
 914 |             return pop, popped, url_chars, counts, pre
 915 | 
 916 |         def _closingparenthesis(c, pop, popped, url_chars, counts, pre):
 917 |             if counts[')'] is None:  # pragma: no branch
 918 |                 counts['('] = url.count('(')
 919 |                 counts[')'] = url.count(')')
 920 | 
 921 |             if counts['('] != counts[')']:
 922 |                 # Unbalanced so spit it out the back end
 923 |                 popped = True
 924 |                 pop = '{0}{1}'.format(url_chars.pop(), pop)
 925 |                 counts[')'] = counts[')'] - 1
 926 |             return pop, popped, url_chars, counts, pre
 927 | 
 928 |         def _casesdefault(c, pop, popped, url_chars, counts, pre):
 929 |             return pop, popped, url_chars, counts, pre
 930 | 
 931 |         cases = {
 932 |             '!': _endchar,
 933 |             '?': _endchar,
 934 |             ':': _endchar,
 935 |             ';': _endchar,
 936 |             '.': _endchar,
 937 |             ',': _endchar,
 938 |             '>': _rightanglebracket,
 939 |             ']': _closingsquarebracket,
 940 |             ')': _closingparenthesis,
 941 |         }
 942 |         for c in url_chars[-1::-1]:  # pragma: no branch
 943 |             popped = False
 944 |             pop, popped, url_chars, counts, pre = cases.get(
 945 |                 c, _casesdefault)(c, pop, popped, url_chars, counts, pre)
 946 |             first = False
 947 |             if popped is False:
 948 |                 break
 949 | 
 950 |         url = ''.join(url_chars)
 951 |         uri_parts = urlsplit(url)
 952 | 
 953 |         scheme_in_list = uri_parts.scheme in self.url_schemes
 954 |         valid_scheme = (uri_parts.scheme and scheme_in_list)
 955 |         if not is_valid_url(url) and not valid_scheme:
 956 |             return in_.replace('{0}linkStartMarker:'.format(self.uid), '')
 957 | 
 958 |         if text == '$':
 959 |             if valid_scheme:
 960 |                 text = human_readable_url(url)
 961 |             else:
 962 |                 ref_url = self.urlrefs.get(url)
 963 |                 if ref_url is not None:
 964 |                     text = human_readable_url(ref_url)
 965 |                 else:
 966 |                     text = url
 967 | 
 968 |         text = text.strip()
 969 |         title = encode_html(title)
 970 | 
 971 |         if not self.noimage:  # pragma: no branch
 972 |             text = self.image(text)
 973 |         text = self.span(text)
 974 |         text = self.glyphs(text)
 975 |         url = self.shelveURL(self.encode_url(urlunsplit(uri_parts)))
 976 |         attributes = parse_attributes(atts, restricted=self.restricted)
 977 |         attributes['href'] = url
 978 |         if title:
 979 |             # if the title contains unicode data, it is annoying to get Python
 980 |             # 2.6 and all the latter versions working properly.  But shelving
 981 |             # the title is a quick and dirty solution.
 982 |             attributes['title'] = self.shelve(title)
 983 |         if self.rel:
 984 |             attributes['rel'] = self.rel
 985 |         a_text = generate_tag('a', text, attributes)
 986 |         a_shelf_id = self.shelve(a_text)
 987 | 
 988 |         out = '{0}{1}{2}{3}'.format(pre, a_shelf_id, pop, tight)
 989 | 
 990 |         return out
 991 | 
 992 |     def encode_url(self, url):
 993 |         """
 994 |         Converts a (unicode) URL to an ASCII URL, with the domain part
 995 |         IDNA-encoded and the path part %-encoded (as per RFC 3986).
 996 | 
 997 |         Fixed version of the following code fragment from Stack Overflow:
 998 |             http://stackoverflow.com/a/804380/72656
 999 |         """
1000 |         # parse it
1001 |         parsed = urlsplit(url)
1002 | 
1003 |         if parsed.netloc:
1004 |             # divide the netloc further
1005 |             netloc_pattern = re.compile(r"""
1006 |                 (?:(?P<user>[^:@]+)(?::(?P<password>[^:@]+))?@)?
1007 |                 (?P<host>[^:]+)
1008 |                 (?::(?P<port>[0-9]+))?
1009 |             """, re.X | re.U)
1010 |             netloc_parsed = netloc_pattern.match(parsed.netloc).groupdict()
1011 |         else:
1012 |             netloc_parsed = {'user': '', 'password': '', 'host': '', 'port': ''}
1013 | 
1014 |         # encode each component
1015 |         scheme = parsed.scheme
1016 |         user = netloc_parsed['user'] and quote(netloc_parsed['user'])
1017 |         password = (
1018 |             netloc_parsed['password'] and quote(netloc_parsed['password'])
1019 |         )
1020 |         host = netloc_parsed['host']
1021 |         port = netloc_parsed['port'] and netloc_parsed['port']
1022 |         # the below splits the path portion of the url by slashes, translates
1023 |         # percent-encoded characters back into strings, then re-percent-encodes
1024 |         # what's necessary. Sounds screwy, but the url could include encoded
1025 |         # slashes, and this is a way to clean that up. It branches for PY2/3
1026 |         # because the quote and unquote functions expects different input
1027 |         # types: unicode strings for PY2 and str for PY3.
1028 |         path_parts = (quote(unquote(pce), b'') for pce in
1029 |                       parsed.path.split('/'))
1030 |         path = '/'.join(path_parts)
1031 | 
1032 |         # put it back together
1033 |         netloc = ''
1034 |         if user:
1035 |             netloc = '{0}{1}'.format(netloc, user)
1036 |             if password:
1037 |                 netloc = '{0}:{1}'.format(netloc, password)
1038 |             netloc = '{0}@'.format(netloc)
1039 |         netloc = '{0}{1}'.format(netloc, host)
1040 |         if port:
1041 |             netloc = '{0}:{1}'.format(netloc, port)
1042 |         return urlunsplit((scheme, netloc, path, parsed.query, parsed.fragment))
1043 | 
1044 |     def span(self, text):
1045 |         qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__',
1046 |                  r'_', r'%', r'\+', r'~', r'\^')
1047 |         pnct = r""".,"'?!;:‹›«»„“”‚‘’"""
1048 |         self.span_depth = self.span_depth + 1
1049 | 
1050 |         if self.span_depth <= self.max_span_depth:
1051 |             for tag in qtags:
1052 |                 pattern = re.compile(r"""
1053 |                     (?P<pre>^|(?<=[\s>{pnct}\(])|[{{[])
1054 |                     (?P<tag>{tag})(?!{tag})
1055 |                     (?P<atts>{cls})
1056 |                     (?!{tag})
1057 |                     (?::(?P<cite>\S+[^{tag}]{space}))?
1058 |                     (?P<content>[^{space}{tag}]+|\S.*?[^\s{tag}\n])
1059 |                     (?P<end>[{pnct}]*)
1060 |                     {tag}
1061 |                     (?P<tail>$|[\[\]}}<]|(?=[{pnct}]{{1,2}}[^0-9]|\s|\)))
1062 |                 """.format(
1063 |                     **{'tag': tag, 'cls': cls_re_s, 'pnct': pnct, 'space':
1064 |                         regex_snippets['space']}
1065 |                 ), flags=re.X | re.U)
1066 |                 text = pattern.sub(self.fSpan, text)
1067 |         self.span_depth = self.span_depth - 1
1068 |         return text
1069 | 
1070 |     def getSpecialOptions(self, pre, tail):
1071 |         for before, after in self.spanWrappers:
1072 |             if pre == before and tail == after:
1073 |                 pre = tail = ''
1074 |                 break
1075 |         return (pre, tail)
1076 | 
1077 |     def fSpan(self, match):
1078 |         pre, tag, atts, cite, content, end, tail = match.groups()
1079 |         pre, tail = self.getSpecialOptions(pre, tail)
1080 | 
1081 |         qtags = {
1082 |             '*':  'strong',  # noqa: E241
1083 |             '**': 'b',       # noqa: E241
1084 |             '??': 'cite',    # noqa: E241
1085 |             '_':  'em',      # noqa: E241
1086 |             '__': 'i',       # noqa: E241
1087 |             '-':  'del',     # noqa: E241
1088 |             '%':  'span',    # noqa: E241
1089 |             '+':  'ins',     # noqa: E241
1090 |             '~':  'sub',     # noqa: E241
1091 |             '^':  'sup'      # noqa: E241
1092 |         }
1093 | 
1094 |         tag = qtags[tag]
1095 |         atts = pba(atts, restricted=self.restricted)
1096 |         if cite:
1097 |             atts = '{0} cite="{1}"'.format(atts, cite.rstrip())
1098 | 
1099 |         content = self.span(content)
1100 |         opentag = '<{0}{1}>'.format(tag, atts)
1101 |         closetag = '</{0}>'.format(tag)
1102 |         tags = self.storeTags(opentag, closetag)
1103 |         return pre + tags['open'] + content + end + tags['close'] + tail
1104 | 
1105 |     def storeTags(self, opentag, closetag=''):
1106 |         tags = {}
1107 |         self.refIndex += 1
1108 |         self.refCache[self.refIndex] = opentag
1109 |         tags['open'] = self.uid + str(self.refIndex) + ':ospan '
1110 | 
1111 |         self.refIndex += 1
1112 |         self.refCache[self.refIndex] = closetag
1113 |         tags['close'] = ' ' + self.uid + str(self.refIndex) + ':cspan'
1114 |         return tags
1115 | 
1116 |     def retrieveTags(self, text):
1117 |         text = (re.compile('{0}(?P<token>[0-9]+):ospan '.format(self.uid), re.U)
1118 |                 .sub(self.fRetrieveTags, text))
1119 |         text = (re.compile(' {0}(?P<token>[0-9]+):cspan'.format(self.uid), re.U)
1120 |                 .sub(self.fRetrieveTags, text))
1121 |         return text
1122 | 
1123 |     def fRetrieveTags(self, match):
1124 |         return self.refCache[int(match.group('token'))]
1125 | 
1126 |     def image(self, text):
1127 |         pattern = re.compile(r"""
1128 |             (?:[\[{{])?                # pre
1129 |             \!                         # opening !
1130 |             (\<|\=|\>)?                # optional alignment atts
1131 |             ({0})                      # optional style,class atts
1132 |             (?:\.\s)?                  # optional dot-space
1133 |             ([^\s(!]+)                 # presume this is the src
1134 |             \s?                        # optional space
1135 |             (?:\(([^\)]+)\))?          # optional title
1136 |             \!                         # closing
1137 |             (?::(\S+)(?<![\]).,]))?    # optional href sans final punct
1138 |             (?:[\]}}]|(?=[.,\s)|]|$))  # lookahead: space or end of string
1139 |         """.format(cls_re_s), re.U | re.X)
1140 |         return pattern.sub(self.fImage, text)
1141 | 
1142 |     def fImage(self, match):
1143 |         # (None, '', '/imgs/myphoto.jpg', None, None)
1144 |         align, attributes, url, title, href = match.groups()
1145 |         atts = OrderedDict()
1146 |         size = None
1147 | 
1148 |         alignments = {'<': 'left', '=': 'center', '>': 'right'}
1149 | 
1150 |         if not title:
1151 |             title = ''
1152 | 
1153 |         if not is_rel_url(url) and self.get_sizes:
1154 |             size = getimagesize(url)
1155 | 
1156 |         if href:
1157 |             href = self.shelveURL(href)
1158 | 
1159 |         url = self.shelveURL(url)
1160 | 
1161 |         if align:
1162 |             atts.update(align=alignments[align])
1163 |         atts.update(alt=title)
1164 |         if size:
1165 |             atts.update(height="{0}".format(size[1]))
1166 |         atts.update(src=url)
1167 |         if attributes:
1168 |             atts.update(parse_attributes(attributes, restricted=self.restricted))
1169 |         if title:
1170 |             atts.update(title=title)
1171 |         if size:
1172 |             atts.update(width="{0}".format(size[0]))
1173 |         img = generate_tag('img', ' /', atts)
1174 |         if href:
1175 |             a_atts = OrderedDict(href=href)
1176 |             if self.rel:
1177 |                 a_atts.update(rel=self.rel)
1178 |             img = generate_tag('a', img, a_atts)
1179 |         return img
1180 | 
1181 |     def code(self, text):
1182 |         text = self.doSpecial(text, '<code>', '</code>', self.fCode)
1183 |         text = self.doSpecial(text, '@', '@', self.fCode)
1184 |         text = self.doSpecial(text, '<pre>', '</pre>', self.fPre)
1185 |         return text
1186 | 
1187 |     def fCode(self, match):
1188 |         before, text, after = match.groups()
1189 |         after = after or ''
1190 |         before, after = self.getSpecialOptions(before, after)
1191 |         # text needs to be escaped
1192 |         text = encode_html(text, quotes=False)
1193 |         return ''.join([before, self.shelve('<code>{0}</code>'.format(text)), after])
1194 | 
1195 |     def fPre(self, match):
1196 |         before, text, after = match.groups()
1197 |         if after is None:
1198 |             after = ''
1199 |         before, after = self.getSpecialOptions(before, after)
1200 |         # text needs to be escaped
1201 |         text = encode_html(text)
1202 |         return ''.join([before, '<pre>', self.shelve(text), '</pre>', after])
1203 | 
1204 |     def doSpecial(self, text, start, end, method):
1205 |         pattern = re.compile(r'(^|\s|[\[({{>|]){0}(.*?){1}($|[\])}}])?'.format(
1206 |             re.escape(start), re.escape(end)), re.M | re.S)
1207 |         return pattern.sub(method, text)
1208 | 
1209 |     def noTextile(self, text):
1210 |         text = self.doSpecial(text, '<notextile>', '</notextile>',
1211 |                               self.fTextile)
1212 |         return self.doSpecial(text, '==', '==', self.fTextile)
1213 | 
1214 |     def fTextile(self, match):
1215 |         before, notextile, after = match.groups()
1216 |         if after is None:  # pragma: no branch
1217 |             after = ''
1218 |         before, after = self.getSpecialOptions(before, after)
1219 |         return ''.join([before, self.shelve(notextile), after])
1220 | 
1221 |     def getHTMLComments(self, text):
1222 |         """Search the string for HTML comments, e.g. <!-- comment text -->.  We
1223 |         send the text that matches this to fParseHTMLComments."""
1224 |         return self.doSpecial(text, '<!--', '-->', self.fParseHTMLComments)
1225 | 
1226 |     def fParseHTMLComments(self, match):
1227 |         """If self.restricted is True, clean the matched contents of the HTML
1228 |         comment.  Otherwise, return the comments unchanged.
1229 |         The original php had an if statement in here regarding restricted mode.
1230 |         nose reported that this line wasn't covered.  It's correct.  In
1231 |         restricted mode, the html comment tags have already been converted to
1232 |         <!*#8212; and —> so they don't match in getHTMLComments,
1233 |         and never arrive here.
1234 |         """
1235 |         before, commenttext, after = match.groups()
1236 |         commenttext = self.shelve(commenttext)
1237 |         return '{0}<!--{1}-->'.format(before, commenttext)
1238 | 
1239 |     def redcloth_list(self, text):
1240 |         """Parse the text for definition lists and send them to be
1241 |         formatted."""
1242 |         pattern = re.compile(r"^([-]+{0}[ .].*:=.*)$(?![^-])".format(cls_re_s),
1243 |                              re.M | re.U | re.S)
1244 |         return pattern.sub(self.fRCList, text)
1245 | 
1246 |     def fRCList(self, match):
1247 |         """Format a definition list."""
1248 |         out = []
1249 |         text = re.split(r'\n(?=[-])', match.group(), flags=re.M)
1250 |         for line in text:
1251 |             # parse the attributes and content
1252 |             m = re.match(r'^[-]+({0})[ .](.*)$'.format(cls_re_s), line,
1253 |                          flags=re.M | re.S)
1254 |             if not m:
1255 |                 continue
1256 | 
1257 |             atts, content = m.groups()
1258 |             # cleanup
1259 |             content = content.strip()
1260 |             atts = pba(atts, restricted=self.restricted)
1261 | 
1262 |             # split the content into the term and definition
1263 |             xm = re.match(
1264 |                 r'^(.*?){0}*:=(.*?){0}*(=:|:=)?{0}*$'
1265 |                 .format(regex_snippets['space']),
1266 |                 content,
1267 |                 re.S)
1268 |             term, definition, _ = xm.groups()
1269 |             # cleanup
1270 |             term = term.strip()
1271 |             definition = definition.strip(' ')
1272 | 
1273 |             # if this is the first time through, out as a bool is False
1274 |             if not out:
1275 |                 if definition == '':
1276 |                     dltag = "<dl{0}>".format(atts)
1277 |                 else:
1278 |                     dltag = "<dl>"
1279 |                 out.append(dltag)
1280 | 
1281 |             if term != '':
1282 |                 is_newline_started_def = definition.startswith('\n')
1283 |                 definition = (
1284 |                     definition
1285 |                     .strip()
1286 |                     .replace('\n', '<br />'))
1287 | 
1288 |                 if is_newline_started_def:
1289 |                     definition = '<p>{0}</p>'.format(definition)
1290 |                 term = term.replace('\n', '<br />')
1291 | 
1292 |                 term = self.graf(term)
1293 |                 definition = self.graf(definition)
1294 | 
1295 |                 out.append('\t<dt{0}>{1}</dt>'.format(atts, term))
1296 |                 if definition:
1297 |                     out.append('\t<dd>{0}</dd>'.format(definition))
1298 | 
1299 |         out.append('</dl>')
1300 |         out = '\n'.join(out)
1301 |         return out
1302 | 
1303 |     def placeNoteLists(self, text):
1304 |         """Parse the text for endnotes."""
1305 |         if self.notes:
1306 |             o = OrderedDict()
1307 |             for label, info in self.notes.items():
1308 |                 if 'seq' in info:
1309 |                     i = info['seq']
1310 |                     info['seq'] = label
1311 |                     o[i] = info
1312 |                 else:
1313 |                     self.unreferencedNotes[label] = info
1314 | 
1315 |             if o:  # pragma: no branch
1316 |                 # sort o by key
1317 |                 o = OrderedDict(sorted(o.items(), key=lambda t: t[0]))
1318 |             self.notes = o
1319 |         text_re = re.compile(r'<p>notelist({0})(?:\:([\w|{1}]))?([\^!]?)(\+?)'
1320 |                              r'\.?[\s]*</p>'.format(cls_re_s, syms_re_s), re.U)
1321 |         text = text_re.sub(self.fNoteLists, text)
1322 |         return text
1323 | 
1324 |     def fNoteLists(self, match):
1325 |         """Given the text that matches as a note, format it into HTML."""
1326 |         att, start_char, g_links, extras = match.groups()
1327 |         start_char = start_char or 'a'
1328 |         index = '{0}{1}{2}'.format(g_links, extras, start_char)
1329 |         result = ''
1330 | 
1331 |         if index not in self.notelist_cache:  # pragma: no branch
1332 |             o = []
1333 |             if self.notes:  # pragma: no branch
1334 |                 for seq, info in self.notes.items():
1335 |                     links = self.makeBackrefLink(info, g_links, start_char)
1336 |                     atts = ''
1337 |                     if 'def' in info:
1338 |                         infoid = info['id']
1339 |                         atts = info['def']['atts']
1340 |                         content = info['def']['content']
1341 |                         li = ('\t\t<li{0}>{1}<span id="note{2}"> '
1342 |                               '</span>{3}</li>').format(atts, links, infoid,
1343 |                                                         content)
1344 |                     else:
1345 |                         li = ('\t\t<li{0}>{1} Undefined Note [#{2}].</li>'
1346 |                               ).format(atts, links, info['seq'])
1347 |                     o.append(li)
1348 |             if '+' == extras and self.unreferencedNotes:
1349 |                 for seq, info in self.unreferencedNotes.items():
1350 |                     atts = info['def']['atts']
1351 |                     content = info['def']['content']
1352 |                     li = '\t\t<li{0}>{1}</li>'.format(atts, content)
1353 |                     o.append(li)
1354 |             self.notelist_cache[index] = "\n".join(o)
1355 |             result = self.notelist_cache[index]
1356 |         if result:
1357 |             list_atts = pba(att, restricted=self.restricted)
1358 |             result = '<ol{0}>\n{1}\n\t</ol>'.format(list_atts, result)
1359 |         return result
1360 | 
1361 |     def makeBackrefLink(self, info, g_links, i):
1362 |         """Given the pieces of a back reference link, create an <a> tag."""
1363 |         link = ''
1364 |         if 'def' in info:
1365 |             link = info['def']['link']
1366 |         backlink_type = link or g_links
1367 |         i_ = encode_high(i)
1368 |         allow_inc = i not in syms_re_s
1369 |         i_ = int(i_)
1370 | 
1371 |         if backlink_type == "!":
1372 |             return ''
1373 |         elif backlink_type == '^':
1374 |             return """<sup><a href="#noteref{0}">{1}</a></sup>""".format(
1375 |                 info['refids'][0], i)
1376 |         else:
1377 |             result = []
1378 |             for refid in info['refids']:
1379 |                 i_entity = decode_high(i_)
1380 |                 sup = """<sup><a href="#noteref{0}">{1}</a></sup>""".format(
1381 |                     refid, i_entity)
1382 |                 if allow_inc:
1383 |                     i_ = i_ + 1
1384 |                 result.append(sup)
1385 |             result = ' '.join(result)
1386 |             return result
1387 | 
1388 |     def fParseNoteDefs(self, m):
1389 |         """Parse the note definitions and format them as HTML"""
1390 |         label = m.group('label')
1391 |         link = m.group('link')
1392 |         att = m.group('att')
1393 |         content = m.group('content')
1394 | 
1395 |         # Assign an id if the note reference parse hasn't found the label yet.
1396 |         if label not in self.notes:
1397 |             self.notes[label] = {'id': '{0}{1}'.format(
1398 |                 self.linkPrefix, self._increment_link_index())}
1399 | 
1400 |         # Ignores subsequent defs using the same label
1401 |         if 'def' not in self.notes[label]:  # pragma: no branch
1402 |             self.notes[label]['def'] = {
1403 |                 'atts': pba(att, restricted=self.restricted), 'content':
1404 |                 self.graf(content), 'link': link}
1405 |         return ''
1406 | 
1407 |     def noteRef(self, text):
1408 |         """Search the text looking for note references."""
1409 |         text_re = re.compile(r"""
1410 |         \[          # start
1411 |         ({0})       # !atts
1412 |         \#
1413 |         ([^\]!]+)   # !label
1414 |         ([!]?)      # !nolink
1415 |         \]""".format(cls_re_s), re.X)
1416 |         text = text_re.sub(self.fParseNoteRefs, text)
1417 |         return text
1418 | 
1419 |     def fParseNoteRefs(self, match):
1420 |         """Parse and format the matched text into note references.
1421 |         By the time this function is called, all the defs will have been
1422 |         processed into the notes array. So now we can resolve the link numbers
1423 |         in the order we process the refs..."""
1424 |         atts, label, nolink = match.groups()
1425 |         atts = pba(atts, restricted=self.restricted)
1426 |         nolink = nolink == '!'
1427 | 
1428 |         # Assign a sequence number to this reference if there isn't one already
1429 |         if label in self.notes:
1430 |             num = self.notes[label]['seq']
1431 |         else:
1432 |             self.notes[label] = {
1433 |                 'seq': self.note_index, 'refids': [], 'id': ''
1434 |             }
1435 |             num = self.note_index
1436 |             self.note_index = self.note_index + 1
1437 | 
1438 |         # Make our anchor point and stash it for possible use in backlinks when
1439 |         # the note list is generated later...
1440 |         refid = '{0}{1}'.format(self.linkPrefix, self._increment_link_index())
1441 |         self.notes[label]['refids'].append(refid)
1442 | 
1443 |         # If we are referencing a note that hasn't had the definition parsed
1444 |         # yet, then assign it an ID...
1445 |         if not self.notes[label]['id']:
1446 |             self.notes[label]['id'] = '{0}{1}'.format(
1447 |                 self.linkPrefix, self._increment_link_index())
1448 |         labelid = self.notes[label]['id']
1449 | 
1450 |         # Build the link (if any)...
1451 |         result = '<span id="noteref{0}">{1}</span>'.format(refid, num)
1452 |         if not nolink:
1453 |             result = '<a href="#note{0}">{1}</a>'.format(labelid, result)
1454 | 
1455 |         # Build the reference...
1456 |         result = '<sup{0}>{1}</sup>'.format(atts, result)
1457 |         return result
1458 | 
1459 |     def shelveURL(self, text):
1460 |         if text == '':
1461 |             return ''
1462 |         self.refIndex = self.refIndex + 1
1463 |         self.refCache[self.refIndex] = text
1464 |         output = '{0}{1}{2}'.format(self.uid, self.refIndex, ':url')
1465 |         return output
1466 | 
1467 |     def retrieveURLs(self, text):
1468 |         return re.sub(r'{0}(?P<token>[0-9]+):url'.format(self.uid), self.retrieveURL, text)
1469 | 
1470 |     def retrieveURL(self, match):
1471 |         url = self.refCache.get(int(match.group('token')), '')
1472 |         if url == '':
1473 |             return url
1474 | 
1475 |         if url in self.urlrefs:
1476 |             url = self.urlrefs[url]
1477 | 
1478 |         return url
1479 | 
1480 |     def _increment_link_index(self):
1481 |         """The self.linkIndex property needs to be incremented in various
1482 |         places.  Don't Repeat Yourself."""
1483 |         self.linkIndex = self.linkIndex + 1
1484 |         return self.linkIndex
1485 | 
1486 | 
1487 | def textile(text, html_type='xhtml'):
1488 |     """
1489 |     Apply Textile to a block of text.
1490 | 
1491 |     This function takes the following additional parameters:
1492 | 
1493 |     html_type - 'xhtml' or 'html5' style tags (default: 'xhtml')
1494 | 
1495 |     """
1496 |     return Textile(html_type=html_type).parse(text)
1497 | 
1498 | 
1499 | def textile_restricted(text, lite=True, noimage=True, html_type='xhtml'):
1500 |     """
1501 |     Apply Textile to a block of text, with restrictions designed for weblog
1502 |     comments and other untrusted input.  Raw HTML is escaped, style attributes
1503 |     are disabled, and rel='nofollow' is added to external links.
1504 | 
1505 |     This function takes the following additional parameters:
1506 | 
1507 |     html_type - 'xhtml' or 'html5' style tags (default: 'xhtml')
1508 |     lite - restrict block tags to p, bq, and bc, disable tables (default: True)
1509 |     noimage - disable image tags (default: True)
1510 | 
1511 |     """
1512 |     return Textile(restricted=True, lite=lite, noimage=noimage,
1513 |                    html_type=html_type, rel='nofollow').parse(text)
1514 | 


--------------------------------------------------------------------------------
/textile/objects/__init__.py:
--------------------------------------------------------------------------------
1 | from .block import Block
2 | from .table import Table
3 | 
4 | __all__ = ['Block', 'Table']
5 | 


--------------------------------------------------------------------------------
/textile/objects/block.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from collections import OrderedDict
  3 | try:
  4 |     import regex as re
  5 | except ImportError:
  6 |     import re
  7 | 
  8 | from textile.regex_strings import cls_re_s, regex_snippets
  9 | from textile.utils import encode_html, generate_tag, parse_attributes
 10 | 
 11 | 
 12 | class Block(object):
 13 |     def __init__(self, textile, tag, atts, ext, cite, content):
 14 |         self.textile = textile
 15 |         self.tag = tag
 16 |         self.atts = atts
 17 |         self.ext = ext
 18 |         self.cite = cite
 19 |         self.content = content
 20 | 
 21 |         self.attributes = parse_attributes(atts, restricted=self.textile.restricted)
 22 |         self.outer_tag = ''
 23 |         self.inner_tag = ''
 24 |         self.outer_atts = OrderedDict()
 25 |         self.inner_atts = OrderedDict()
 26 |         self.eat = False
 27 |         self.process()
 28 | 
 29 |     def process(self):
 30 |         if self.tag == 'p':
 31 |             # is this an anonymous block with a note definition?
 32 |             notedef_re = re.compile(r"""
 33 |             ^note\#                               # start of note def marker
 34 |             (?P<label>[^%<*!@\#^([{{ {space}.]+)  # label
 35 |             (?P<link>[*!^]?)                      # link
 36 |             (?P<att>{cls})                        # att
 37 |             \.?                                   # optional period.
 38 |             [{space}]+                            # whitespace ends def marker
 39 |             (?P<content>.*)$                      # content""".format(
 40 |                 space=regex_snippets['space'], cls=cls_re_s),
 41 |                 flags=re.X | re.U)
 42 |             notedef = notedef_re.sub(self.textile.fParseNoteDefs, self.content)
 43 | 
 44 |             # It will be empty if the regex matched and ate it.
 45 |             if '' == notedef:
 46 |                 self.content = notedef
 47 |                 self.eat = True
 48 | 
 49 |         fns = re.search(r'fn(?P<fnid>{0}+)'.format(regex_snippets['digit']),
 50 |                         self.tag, flags=re.U)
 51 |         if fns:
 52 |             self.tag = 'p'
 53 |             fnid = self.textile.fn.get(fns.group('fnid'), None)
 54 |             if fnid is None:
 55 |                 fnid = '{0}{1}'.format(self.textile.linkPrefix,
 56 |                                        self.textile._increment_link_index())
 57 | 
 58 |             # If there is an author-specified ID goes on the wrapper & the
 59 |             # auto-id gets pushed to the <sup>
 60 |             supp_id = OrderedDict()
 61 | 
 62 |             # if class has not been previously specified, set it to "footnote"
 63 |             if 'class' not in self.attributes:
 64 |                 self.attributes.update({'class': 'footnote'})
 65 | 
 66 |             # if there's no specified id, use the generated one.
 67 |             if 'id' not in self.attributes:
 68 |                 self.attributes.update({'id': 'fn{0}'.format(fnid)})
 69 |             else:
 70 |                 supp_id = parse_attributes('(#fn{0})'.format(fnid), restricted=self.textile.restricted)
 71 | 
 72 |             if '^' not in self.atts:
 73 |                 sup = generate_tag('sup', fns.group('fnid'), supp_id)
 74 |             else:
 75 |                 fnrev = generate_tag('a', fns.group('fnid'), {'href':
 76 |                                      '#fnrev{0}'.format(fnid)})
 77 |                 sup = generate_tag('sup', fnrev, supp_id)
 78 | 
 79 |             self.content = '{0} {1}'.format(sup, self.content)
 80 | 
 81 |         if self.tag == 'bq':
 82 |             if self.cite:
 83 |                 self.cite = self.textile.shelveURL(self.cite)
 84 |                 cite_att = OrderedDict(cite=self.cite)
 85 |                 self.cite = ' cite="{0}"'.format(self.cite)
 86 |             else:
 87 |                 self.cite = ''
 88 |                 cite_att = OrderedDict()
 89 |             cite_att.update(self.attributes)
 90 |             self.outer_tag = 'blockquote'
 91 |             self.outer_atts = cite_att
 92 |             self.inner_tag = 'p'
 93 |             self.inner_atts = self.attributes
 94 |             self.eat = False
 95 | 
 96 |         elif self.tag == 'bc' or self.tag == 'pre':
 97 |             i_tag = ''
 98 |             if self.tag == 'bc':
 99 |                 i_tag = 'code'
100 |             content = encode_html(self.content)
101 |             self.content = self.textile.shelve(content)
102 |             self.outer_tag = 'pre'
103 |             self.outer_atts = self.attributes
104 |             self.inner_tag = i_tag
105 |             self.inner_atts = self.attributes
106 |             self.eat = False
107 | 
108 |         elif self.tag == 'notextile':
109 |             self.content = self.textile.shelve(self.content)
110 | 
111 |         elif self.tag == '###':
112 |             self.eat = True
113 | 
114 |         else:
115 |             self.outer_tag = self.tag
116 |             self.outer_atts = self.attributes
117 | 
118 |         if not self.eat:
119 |             self.content = self.textile.graf(self.content)
120 |         else:
121 |             self.content = ''
122 | 


--------------------------------------------------------------------------------
/textile/objects/table.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from xml.etree import ElementTree
  3 | 
  4 | from textile.regex_strings import (align_re_s, cls_re_s, regex_snippets,
  5 |                                    table_span_re_s, valign_re_s, pnct_re_s)
  6 | from textile.utils import generate_tag, parse_attributes
  7 | 
  8 | try:
  9 |     import regex as re
 10 | except ImportError:
 11 |     import re
 12 | 
 13 | 
 14 | class Table(object):
 15 |     caption_re = re.compile(
 16 |         (r'^\|\=(?P<capts>{s}{a}{c})\. '
 17 |          r'(?P<cap>[^\n]*)(?P<row>.*)'
 18 |          .format(**{'s': table_span_re_s, 'a': align_re_s, 'c': cls_re_s})),
 19 |         re.S)
 20 |     colgroup_re = re.compile(
 21 |         r'^\|:(?P<cols>{s}{a}{c}\. .*)'
 22 |         .format(**{'s': table_span_re_s, 'a': align_re_s, 'c': cls_re_s}),
 23 |         re.M)
 24 |     heading_re = re.compile(
 25 |         r'^_(?={0}|{1})'.format(regex_snippets['space'], pnct_re_s))
 26 | 
 27 |     def __init__(self, textile, tatts, rows, summary):
 28 |         self.textile = textile
 29 |         self.attributes = parse_attributes(tatts, 'table', restricted=self.textile.restricted)
 30 |         if summary:
 31 |             self.attributes.update(summary=summary.strip())
 32 |         self.input = rows
 33 |         self.caption = ''
 34 |         self.colgroup = ''
 35 |         self.content = []
 36 | 
 37 |     def process(self):
 38 |         rgrp = None
 39 |         groups = []
 40 |         split = (
 41 |             re.compile(r'\|{0}*?$'.format(regex_snippets['space']), re.M)
 42 |             .split(self.input))
 43 |         for i, row in enumerate([x for x in split if x]):
 44 |             row = row.lstrip()
 45 | 
 46 |             # Caption -- only occurs on row 1, otherwise treat '|=. foo |...'
 47 |             # as a normal center-aligned cell.
 48 |             cmtch = self.caption_re.match(row)
 49 |             if i == 0 and cmtch:
 50 |                 caption = Caption(restricted=self.textile.restricted, **cmtch.groupdict())
 51 |                 self.caption = '\n{0}'.format(caption.caption)
 52 |                 row = cmtch.group('row').lstrip()
 53 |                 if row == '':
 54 |                     continue
 55 | 
 56 |             # Colgroup -- A colgroup row will not necessarily end with a |.
 57 |             # Hence it may include the next row of actual table data.
 58 |             if row[:2] == '|:':
 59 |                 if '\n' in row:
 60 |                     colgroup_data, row = row[2:].split('\n')
 61 |                 else:
 62 |                     colgroup_data, row = row[2:], ''
 63 |                 colgroup_atts, cols = colgroup_data, None
 64 |                 if '|' in colgroup_data:
 65 |                     colgroup_atts, cols = colgroup_data.split('|', 1)
 66 |                 colgrp = Colgroup(cols, colgroup_atts, restricted=self.textile.restricted)
 67 |                 self.colgroup = colgrp.process()
 68 |                 if row == '':
 69 |                     continue
 70 | 
 71 |             # search the row for a table group - thead, tfoot, or tbody
 72 |             grpmatchpattern = (r"(:?^\|(?P<part>{v})(?P<rgrpatts>{s}{a}{c})"
 73 |                                r"\.\s*$\n)?^(?P<row>.*)").format(
 74 |                                    **{'v': valign_re_s, 's': table_span_re_s,
 75 |                                       'a': align_re_s, 'c': cls_re_s})
 76 |             grpmatch_re = re.compile(grpmatchpattern, re.S | re.M)
 77 |             grpmatch = grpmatch_re.match(row.lstrip())
 78 | 
 79 |             grptypes = {'^': Thead, '~': Tfoot, '-': Tbody}
 80 |             if grpmatch.group('part'):
 81 |                 # we're about to start a new group, so process the current one
 82 |                 # and add it to the output
 83 |                 if rgrp:
 84 |                     groups.append('\n\t{0}'.format(rgrp.process()))
 85 |                 rgrp = grptypes[grpmatch.group('part')](grpmatch.group(
 86 |                     'rgrpatts'), restricted=self.textile.restricted)
 87 |             row = grpmatch.group('row')
 88 | 
 89 |             rmtch = re.search(r'^(?P<ratts>{0}{1}\. )(?P<row>.*)'.format(
 90 |                 align_re_s, cls_re_s), row.lstrip())
 91 |             if rmtch:
 92 |                 row_atts = parse_attributes(rmtch.group('ratts'), 'tr', restricted=self.textile.restricted)
 93 |                 row = rmtch.group('row')
 94 |             else:
 95 |                 row_atts = {}
 96 | 
 97 |             # create a row to hold the cells.
 98 |             r = Row(row_atts, row)
 99 |             for cellctr, cell in enumerate(row.split('|')[1:]):
100 |                 ctag = 'td'
101 |                 if self.heading_re.match(cell):
102 |                     ctag = 'th'
103 | 
104 |                 cmtch = re.search(r'^(?P<catts>_?{0}{1}{2}\. )'
105 |                                   '(?P<cell>.*)'.format(
106 |                                       table_span_re_s, align_re_s, cls_re_s),
107 |                                   cell, flags=re.S)
108 |                 if cmtch:
109 |                     catts = cmtch.group('catts')
110 |                     cell_atts = parse_attributes(catts, 'td', restricted=self.textile.restricted)
111 |                     cell = cmtch.group('cell')
112 |                 else:
113 |                     cell_atts = {}
114 | 
115 |                 if not self.textile.lite:
116 |                     a_pattern = r'(?P<space>{0}*)(?P<cell>.*)'.format(
117 |                         regex_snippets['space'])
118 |                     a = re.search(a_pattern, cell, flags=re.S)
119 |                     cell = self.textile.redcloth_list(a.group('cell'))
120 |                     cell = self.textile.textileLists(cell)
121 |                     cell = '{0}{1}'.format(a.group('space'), cell)
122 | 
123 |                 # create a cell
124 |                 c = Cell(ctag, cell, cell_atts)
125 |                 cline_tag = '\n\t\t\t{0}'.format(c.process())
126 |                 # add the cell to the row
127 |                 r.cells.append(self.textile.doTagBr(ctag, cline_tag))
128 | 
129 |             # if we're in a group, add it to the group's rows, else add it
130 |             # directly to the content
131 |             if rgrp:
132 |                 rgrp.rows.append(r.process())
133 |             else:
134 |                 self.content.append(r.process())
135 | 
136 |         # if there's still an rgrp, process it and add it to the output
137 |         if rgrp:
138 |             groups.append('\n\t{0}'.format(rgrp.process()))
139 | 
140 |         content = '{0}{1}{2}{3}\n\t'.format(
141 |             self.caption, self.colgroup, ''.join(groups), ''.join(self.content))
142 |         tbl = generate_tag('table', content, self.attributes)
143 |         return '\t{0}\n\n'.format(tbl)
144 | 
145 | 
146 | class Caption(object):
147 |     def __init__(self, capts, cap, row, restricted):
148 |         self.attributes = parse_attributes(capts, restricted=restricted)
149 |         self.caption = self.process(cap)
150 | 
151 |     def process(self, cap):
152 |         tag = generate_tag('caption', cap.strip(), self.attributes)
153 |         return '\t{0}'.format(tag)
154 | 
155 | 
156 | class Colgroup(object):
157 |     def __init__(self, cols, atts, restricted):
158 |         self.row = ''
159 |         self.attributes = atts
160 |         self.cols = cols
161 |         self.restricted = restricted
162 | 
163 |     def process(self):
164 |         enc = 'unicode'
165 | 
166 |         group_atts = parse_attributes(self.attributes, 'col', restricted=self.restricted)
167 |         colgroup = ElementTree.Element('colgroup', attrib=group_atts)
168 |         colgroup.text = '\n\t'
169 |         if self.cols is not None:
170 |             match_cols = self.cols.replace('.', '').split('|')
171 |             # colgroup is the first item in match_cols, the remaining items are
172 |             # cols.
173 |             for idx, col in enumerate(match_cols):
174 |                 col_atts = parse_attributes(col.strip(), 'col', restricted=self.restricted)
175 |                 ElementTree.SubElement(colgroup, 'col', col_atts)
176 |         colgrp = ElementTree.tostring(colgroup, encoding=enc)
177 |         # cleanup the extra xml declaration if it exists, (python versions
178 |         # differ) and then format the resulting string accordingly: newline and
179 |         # tab between cols and a newline at the end
180 |         xml_declaration = "<?xml version='1.0' encoding='UTF-8'?>\n"
181 |         colgrp = colgrp.replace(xml_declaration, '')
182 |         colgrp = colgrp.replace('><', '>\n\t<')
183 |         return f"\n\t{colgrp}"
184 | 
185 | 
186 | class Row(object):
187 |     def __init__(self, attributes, row):
188 |         self.tag = 'tr'
189 |         self.attributes = attributes
190 |         self.cells = []
191 | 
192 |     def process(self):
193 |         output = []
194 |         for c in self.cells:
195 |             output.append(c)
196 |         cell_data = '{0}\n\t\t'.format(''.join(output))
197 |         tag = generate_tag('tr', cell_data, self.attributes)
198 |         return '\n\t\t{0}'.format(tag)
199 | 
200 | 
201 | class Cell(object):
202 |     def __init__(self, tag, content, attributes):
203 |         self.tag = tag
204 |         self.content = content
205 |         self.attributes = attributes
206 | 
207 |     def process(self):
208 |         return generate_tag(self.tag, self.content, self.attributes)
209 | 
210 | 
211 | class _TableSection(object):
212 |     def __init__(self, tag, attributes, restricted):
213 |         self.tag = tag
214 |         self.attributes = parse_attributes(attributes, restricted=restricted)
215 |         self.rows = []
216 | 
217 |     def process(self):
218 |         return generate_tag(self.tag, '{0}\n\t'.format(''.join(self.rows)), self.attributes)
219 | 
220 | 
221 | class Thead(_TableSection):
222 |     def __init__(self, attributes, restricted):
223 |         super(Thead, self).__init__('thead', attributes, restricted)
224 | 
225 | 
226 | class Tbody(_TableSection):
227 |     def __init__(self, attributes, restricted):
228 |         super(Tbody, self).__init__('tbody', attributes, restricted)
229 | 
230 | 
231 | class Tfoot(_TableSection):
232 |     def __init__(self, attributes, restricted):
233 |         super(Tfoot, self).__init__('tfoot', attributes, restricted)
234 | 


--------------------------------------------------------------------------------
/textile/regex_strings.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | try:
 3 |     # Use regex module for matching uppercase characters if installed,
 4 |     # otherwise fall back to finding all the uppercase chars in a loop.
 5 |     import regex as re  # noqa: F401
 6 |     upper_re_s = r'\p{Lu}'
 7 |     regex_snippets = {
 8 |         'acr': r'\p{Lu}\p{Nd}',
 9 |         'abr': r'\p{Lu}',
10 |         'nab': r'\p{Ll}',
11 |         'wrd': r'(?:\p{L}|\p{M}|\p{N}|\p{Pc})',
12 |         'cur': r'\p{Sc}',
13 |         'digit': r'\p{N}',
14 |         'space': r'(?:\p{Zs}|\v)',
15 |         'char': r'(?:[^\p{Zs}\v])',
16 |     }
17 | except ImportError:
18 |     from sys import maxunicode
19 |     upper_re_s = "".join(
20 |         [chr(c) for c in range(maxunicode) if chr(c).isupper()]
21 |     )
22 |     regex_snippets = {
23 |         'acr': r'{0}0-9'.format(upper_re_s),
24 |         'abr': r'{0}'.format(upper_re_s),
25 |         'nab': r'a-z',
26 |         'wrd': r'\w',
27 |         # All codepoints identified as currency symbols
28 |         # by the [mrab-regex library](https://pypi.org/project/regex/)
29 |         # and the UNICODE standard.
30 |         'cur': r'$¢-¥֏؋৲৳৻૱௹฿៛\u20a0-\u20cf\ua838﷼﹩$¢£¥₩',
31 |         'digit': r'\d',
32 |         'space': r'(?:\s|\v)',
33 |         'char': r'\S',
34 |     }
35 | 
36 | halign_re_s = r'(?:\<(?!>)|(?<!<)\>|\<\>|\=|[()]+(?! ))'
37 | valign_re_s = r'[\-^~]'
38 | class_re_s = r'(?:\([^)\n]+\))'       # Don't allow classes/ids,
39 | language_re_s = r'(?:\[[^\]\n]+\])'   # languages,
40 | style_re_s = r'(?:\{[^}\n]+\})'       # or styles to span across newlines
41 | colspan_re_s = r'(?:\\\d+)'
42 | rowspan_re_s = r'(?:\/\d+)'
43 | align_re_s = r'(?:{0}|{1})*'.format(halign_re_s, valign_re_s)
44 | table_span_re_s = r'(?:{0}|{1})*'.format(colspan_re_s, rowspan_re_s)
45 | # regex string to match class, style and language attributes
46 | cls_re_s = (r'(?:'
47 |             r'{c}(?:{l}(?:{s})?|{s}(?:{l})?)?|'
48 |             r'{l}(?:{c}(?:{s})?|{s}(?:{c})?)?|'
49 |             r'{s}(?:{c}(?:{l})?|{l}(?:{c})?)?'
50 |             r')?'
51 |             ).format(c=class_re_s, s=style_re_s, l=language_re_s)
52 | pnct_re_s = r'[-!"#$%&()*+,/:;<=>?@\'\[\\\]\.^_`{|}~]'
53 | syms_re_s = '¤§µ¶†‡•∗∴◊♠♣♥♦'
54 | 


--------------------------------------------------------------------------------
/textile/textilefactory.py:
--------------------------------------------------------------------------------
 1 | from .core import Textile
 2 | 
 3 | 
 4 | class TextileFactory(object):
 5 |     """ Use TextileFactory to create a Textile object which can be re-used to
 6 |     process multiple strings with the same settings."""
 7 | 
 8 |     def __init__(self, restricted=False, lite=False, sanitize=False,
 9 |                  noimage=None, get_sizes=False, html_type='xhtml'):
10 | 
11 |         self.class_parms = {}
12 |         self.method_parms = {}
13 | 
14 |         if lite and not restricted:
15 |             raise ValueError("lite can only be enabled in restricted mode")
16 | 
17 |         if restricted:
18 |             self.class_parms['restricted'] = True
19 |             self.class_parms['lite'] = lite
20 |             self.method_parms['rel'] = 'nofollow'
21 | 
22 |         if noimage is None:
23 |             noimage = bool(restricted)
24 | 
25 |         self.class_parms['noimage'] = noimage
26 |         self.method_parms['sanitize'] = sanitize
27 |         self.class_parms['get_sizes'] = get_sizes
28 | 
29 |         if html_type not in ['xhtml', 'html5']:
30 |             raise ValueError("html_type must be 'xhtml' or 'html5'")
31 |         else:
32 |             self.class_parms['html_type'] = html_type
33 | 
34 |     def process(self, text):
35 |         return Textile(**self.class_parms).parse(text, **self.method_parms)
36 | 


--------------------------------------------------------------------------------
/textile/utils.py:
--------------------------------------------------------------------------------
  1 | try:
  2 |     import regex as re
  3 | except ImportError:
  4 |     import re
  5 | 
  6 | from urllib.parse import urlparse
  7 | import html
  8 | 
  9 | from collections import OrderedDict
 10 | 
 11 | from xml.etree import ElementTree
 12 | 
 13 | from textile.regex_strings import valign_re_s, halign_re_s
 14 | 
 15 | # Regular expressions for stripping chunks of HTML,
 16 | # leaving only content not wrapped in a tag or a comment
 17 | RAW_TEXT_REVEALERS = (
 18 |     # The php version has orders the below list of tags differently.  The
 19 |     # important thing to note here is that the pre must occur before the p or
 20 |     # else the regex module doesn't properly match pre-s. It only matches the
 21 |     # p in pre.
 22 |     re.compile(r'<(pre|p|blockquote|div|form|table|ul|ol|dl|h[1-6])[^>]*?>.*</\1>',
 23 |                re.S),
 24 |     re.compile(r'<(hr|br)[^>]*?/>'),
 25 |     re.compile(r'<!--.*?-->'),
 26 | )
 27 | 
 28 | 
 29 | def decode_high(text):
 30 |     """Decode encoded HTML entities."""
 31 |     text = '&#{0};'.format(text)
 32 |     return html.unescape(text)
 33 | 
 34 | 
 35 | def encode_high(text):
 36 |     """Encode the text so that it is an appropriate HTML entity."""
 37 |     return ord(text)
 38 | 
 39 | 
 40 | def encode_html(text, quotes=True):
 41 |     """Return text that's safe for an HTML attribute."""
 42 |     a = (
 43 |         ('&', '&'),
 44 |         ('<', '<'),
 45 |         ('>', '>'))
 46 | 
 47 |     if quotes:
 48 |         a = a + (("'", '''),
 49 |                  ('"', '"'))
 50 | 
 51 |     for k, v in a:
 52 |         text = text.replace(k, v)
 53 |     return text
 54 | 
 55 | 
 56 | def generate_tag(tag, content, attributes=None):
 57 |     """Generate a complete html tag using the ElementTree module.  tag and
 58 |     content are strings, the attributes argument is a dictionary.  As
 59 |     a convenience, if the content is ' /', a self-closing tag is generated."""
 60 |     enc = 'unicode'
 61 |     if not tag:
 62 |         return content
 63 |     element = ElementTree.Element(tag, attrib=attributes)
 64 |     # Sort attributes for Python 3.8+, as suggested in
 65 |     # https://docs.python.org/3/library/xml.etree.elementtree.html
 66 |     if len(element.attrib) > 1:
 67 |         # adjust attribute order, e.g. by sorting
 68 |         attribs = sorted(element.attrib.items())
 69 |         element.attrib.clear()
 70 |         element.attrib.update(attribs)
 71 |     # FIXME: Kind of an ugly hack.  There *must* be a cleaner way.  I tried
 72 |     # adding text by assigning it to element_tag.text.  That results in
 73 |     # non-ascii text being html-entity encoded.  Not bad, but not entirely
 74 |     # matching php-textile either.
 75 |     element_tag = ElementTree.tostringlist(element, encoding=enc,
 76 |                                            method='html')
 77 |     element_tag.insert(len(element_tag) - 1, content)
 78 |     element_text = ''.join(element_tag)
 79 |     return element_text
 80 | 
 81 | 
 82 | def getimagesize(url):
 83 |     """
 84 |     Attempts to determine an image's width and height, and returns a tuple,
 85 |     (width, height), in pixels or an empty string in case of failure.
 86 |     Requires that PIL is installed.
 87 | 
 88 |     """
 89 | 
 90 |     try:
 91 |         from PIL import ImageFile
 92 |     except ImportError:
 93 |         return ''
 94 | 
 95 |     from urllib.request import urlopen
 96 | 
 97 |     try:
 98 |         p = ImageFile.Parser()
 99 |         f = urlopen(url)
100 |         while True:
101 |             s = f.read(1024)
102 |             if not s:
103 |                 break
104 |             p.feed(s)
105 |             if p.image:
106 |                 return p.image.size
107 |     except (IOError, ValueError):
108 |         return ''
109 | 
110 | 
111 | def has_raw_text(text):
112 |     """checks whether the text has text not already enclosed by a block tag"""
113 |     r = text.strip()
114 |     for pattern in RAW_TEXT_REVEALERS:
115 |         r = pattern.sub('', r).strip()
116 |     return r != ''
117 | 
118 | 
119 | def human_readable_url(url):
120 |     if "://" in url:
121 |         url = url.split("://")[1]
122 |     elif ":" in url:
123 |         url = url.split(":")[1]
124 |     return url
125 | 
126 | 
127 | def is_rel_url(url):
128 |     """Identify relative urls."""
129 |     (scheme, netloc) = urlparse(url)[0:2]
130 |     return not scheme and not netloc
131 | 
132 | 
133 | def is_valid_url(url):
134 |     parsed = urlparse(url)
135 |     if parsed.scheme == '':
136 |         return True
137 |     return False
138 | 
139 | 
140 | def list_type(list_string):
141 |     listtypes = {
142 |         list_string.endswith('*'): 'u',
143 |         list_string.endswith('#'): 'o',
144 |         (not list_string.endswith('*') and not list_string.endswith('#')):
145 |         'd'
146 |     }
147 |     return listtypes.get(True, False)
148 | 
149 | 
150 | def normalize_newlines(string):
151 |     out = re.sub(r'\r\n?', '\n', string)
152 |     out = re.compile(r'^[ \t]*\n', flags=re.M).sub('\n', out)
153 |     out = out.strip('\n')
154 |     return out
155 | 
156 | 
157 | def parse_attributes(block_attributes, element=None, include_id=True, restricted=False):
158 |     vAlign = {'^': 'top', '-': 'middle', '~': 'bottom'}
159 |     hAlign = {'<': 'left', '=': 'center', '>': 'right', '<>': 'justify'}
160 |     style = []
161 |     aclass = ''
162 |     lang = ''
163 |     colspan = ''
164 |     rowspan = ''
165 |     block_id = ''
166 |     span = ''
167 |     width = ''
168 |     result = OrderedDict()
169 | 
170 |     if not block_attributes:
171 |         return result
172 | 
173 |     matched = block_attributes
174 |     if element == 'td':
175 |         m = re.search(r'\\(\d+)', matched)
176 |         if m:
177 |             colspan = m.group(1)
178 | 
179 |         m = re.search(r'/(\d+)', matched)
180 |         if m:
181 |             rowspan = m.group(1)
182 | 
183 |     if element == 'td' or element == 'tr':
184 |         m = re.search(r'(^{0})'.format(valign_re_s), matched)
185 |         if m:
186 |             style.append("vertical-align:{0}".format(vAlign[m.group(1)]))
187 | 
188 |     if not restricted:
189 |         m = re.search(r'\{([^}]*)\}', matched)
190 |         if m:
191 |             style.extend(m.group(1).rstrip(';').split(';'))
192 |             matched = matched.replace(m.group(0), '')
193 | 
194 |     m = re.search(r'\[([^\]]+)\]', matched, re.U)
195 |     if m:
196 |         lang = m.group(1)
197 |         matched = matched.replace(m.group(0), '')
198 | 
199 |     m = re.search(r'\(([^()]+)\)', matched, re.U)
200 |     if m:
201 |         matched = matched.replace(m.group(0), '')
202 |         # Only allow a restricted subset of the CSS standard characters for classes/ids.
203 |         # No encoding markers allowed.
204 |         id_class_match = re.compile(r"^([-a-zA-Z 0-9_\/\[\]\.\:\#]+)$", re.U).match(m.group(1))
205 |         if id_class_match:
206 |             class_regex = re.compile(r"^([-a-zA-Z 0-9_\.\/\[\]]*)$")
207 |             id_class = id_class_match.group(1)
208 |             # If a textile class block attribute was found with a '#' in it
209 |             # split it into the css class and css id...
210 |             hashpos = id_class.find('#')
211 |             if hashpos >= 0:
212 |                 id_match = re.match(r"^#([-a-zA-Z0-9_\.\:]*)$", id_class[hashpos:])
213 |                 if id_match:
214 |                     block_id = id_match.group(1)
215 | 
216 |                 cls_match = class_regex.match(id_class[:hashpos])
217 |             else:
218 |                 cls_match = class_regex.match(id_class)
219 | 
220 |             if cls_match:
221 |                 aclass = cls_match.group(1)
222 | 
223 |     m = re.search(r'([(]+)', matched)
224 |     if m:
225 |         style.append("padding-left:{0}em".format(len(m.group(1))))
226 |         matched = matched.replace(m.group(0), '')
227 | 
228 |     m = re.search(r'([)]+)', matched)
229 |     if m:
230 |         style.append("padding-right:{0}em".format(len(m.group(1))))
231 |         matched = matched.replace(m.group(0), '')
232 | 
233 |     m = re.search(r'({0})'.format(halign_re_s), matched)
234 |     if m:
235 |         style.append("text-align:{0}".format(hAlign[m.group(1)]))
236 | 
237 |     if element == 'col':
238 |         pattern = r'(?:\\(\d+)\.?)?\s*(\d+)?'
239 |         csp = re.match(pattern, matched)
240 |         span, width = csp.groups()
241 | 
242 |     if colspan:
243 |         result['colspan'] = colspan
244 | 
245 |     if style:
246 |         # Previous splits that created style may have introduced extra
247 |         # whitespace into the list elements.  Clean it up.
248 |         style = [x.strip() for x in style]
249 |         result['style'] = '{0};'.format("; ".join(style))
250 |     if aclass:
251 |         result['class'] = aclass
252 |     if block_id and include_id:
253 |         result['id'] = block_id
254 |     if lang:
255 |         result['lang'] = lang
256 |     if rowspan:
257 |         result['rowspan'] = rowspan
258 |     if span:
259 |         result['span'] = span
260 |     if width:
261 |         result['width'] = width
262 |     return result
263 | 
264 | 
265 | def pba(block_attributes, element=None, include_id=True, restricted=False):
266 |     """Parse block attributes."""
267 |     attrs = parse_attributes(block_attributes, element, include_id, restricted)
268 |     if not attrs:
269 |         return ''
270 |     result = ' '.join(['{0}="{1}"'.format(k, v) for k, v in attrs.items()])
271 |     return ' {0}'.format(result)
272 | 


--------------------------------------------------------------------------------
/textile/version.py:
--------------------------------------------------------------------------------
1 | VERSION = '4.0.3'
2 | 


--------------------------------------------------------------------------------