├── .coveragerc
├── .flake8
├── .github
    └── workflows
    │   └── lint_and_test.yml
├── .gitignore
├── CHANGELOG.textile
├── CONTRIBUTORS.txt
├── LICENSE.txt
├── Makefile
├── README.textile
├── TODO.textile
├── pyproject.toml
├── pytest.ini
├── tests
    ├── __init__.py
    ├── fixtures
    │   └── README.txt
    ├── test_attributes.py
    ├── test_block.py
    ├── test_cli.py
    ├── test_footnoteRef.py
    ├── test_getRefs.py
    ├── test_getimagesize.py
    ├── test_github_issues.py
    ├── test_glyphs.py
    ├── test_image.py
    ├── test_imagesize.py
    ├── test_lists.py
    ├── test_retrieve.py
    ├── test_span.py
    ├── test_subclassing.py
    ├── test_table.py
    ├── test_textile.py
    ├── test_textilefactory.py
    ├── test_urls.py
    ├── test_utils.py
    └── test_values.py
└── textile
    ├── __init__.py
    ├── __main__.py
    ├── core.py
    ├── objects
        ├── __init__.py
        ├── block.py
        └── table.py
    ├── regex_strings.py
    ├── textilefactory.py
    ├── utils.py
    └── version.py


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = True
3 | source = textile
4 | parallel = True
5 | 
6 | [report]
7 | show_missing = True
8 | omit = 
9 | 	textile/tests/*


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore =
3 |     # line too long
4 |     E501
5 | exclude =
6 |     build/
7 | 


--------------------------------------------------------------------------------
/.github/workflows/lint_and_test.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: python-textile
 3 | 
 4 | on: [push]
 5 | 
 6 | jobs:
 7 |   lint_and_test:
 8 |     runs-on: ubuntu-latest
 9 |     strategy:
10 |       matrix:
11 |         python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "pypy3.10"]
12 |         image_size: ['true', 'false']
13 |     steps:
14 |       - uses: actions/checkout@v4
15 |       - name: Set up Python ${{ matrix.python-version }}
16 |         uses: actions/setup-python@v5
17 |         with:
18 |           python-version: ${{ matrix.python-version }}
19 |       - name: Python flake8 Lint
20 |         uses: py-actions/flake8@v2.3.0
21 |       - name: Install dependencies
22 |         run: |
23 |           imagesize=''
24 |           pip install -U pytest pytest-cov coverage codecov
25 |           if [[ ${{ matrix.image_size }} == true ]] ; then imagesize='[imagesize]' ; fi
26 |           pip install -e ".${imagesize}"
27 |       - name: run tests
28 |         run: |
29 |           pytest
30 |       - name: Codecov
31 |         uses: codecov/codecov-action@v4
32 |         env:
33 |           CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.orig
 3 | *.rej
 4 | *~
 5 | *.pyo
 6 | *.egg-info
 7 | .cache/
 8 | .coverage
 9 | .eggs/
10 | .noseids*
11 | .pytest_cache
12 | docs/build
13 | docs/coverage
14 | build
15 | bin
16 | dist
17 | eggs
18 | htmlcov
19 | parts
20 | develop-eggs
21 | .DS_Store
22 | *.swp
23 | .tox
24 | README.txt
25 | 


--------------------------------------------------------------------------------
/CHANGELOG.textile:
--------------------------------------------------------------------------------
  1 | h1. Textile Changelog
  2 | 
  3 | h2. Version 4.0.3
  4 | * Update supported Python versions to 3.8 - 3.12 ("#83":https://github.com/textile/python-textile/issues/83)
  5 | * Replace html5lib with nh3 for html sanitization
  6 | * General code cleanup
  7 | * Bugfixes:
  8 | **  Wrong HTML output when "bc.." is the very last in the document ("#81":https://github.com/textile/python-textile/issues/81)
  9 | * Other:
 10 | ** Use github actions instead of travis for automated testing
 11 | 
 12 | h2. Version 4.0.2
 13 | * Bugfixes:
 14 | ** Support non-http schemas in url refs ("#75":https://github.com/textile/python-textile/pull/75)
 15 | ** pytest-runner is deprecated ("#77":https://github.com/textile/python-textile/issues/77)
 16 | *** other changes related to CI infrastructure
 17 | 
 18 | h2. Version 4.0.1
 19 | * Bugfixes:
 20 | ** SyntaxWarnings with Python 3.8 i("#71":https://github.com/textile/python-textile/issues/71)
 21 | ** testsuite: internal error with coverage 5.0.X ("#72":https://github.com/textile/python-textile/issues/72)
 22 | ** DeprecationWarnings about invalid escape sequences ("#73":https://github.com/textile/python-textile/issues/73)
 23 | 
 24 | h2. Version 4.0.0
 25 | * Drop support for Python 2, hence the version bump. Update list of PY3K versions to currently-supported versions. If you need to use textile on Python 2.7 or Python 3.3 or 3.4, please use textile Version 3.0.4.
 26 | * For use in PyPy environments, textile used to work well with the regex package. Lately, it's running into trouble. Please uninstall regex if this is the case for you.
 27 | 
 28 | h2. Version 3.0.4
 29 | * BUGFIX: Restricted mode strips out CSS attributes again.
 30 | * Update travis to more current versions and test against current Pillow version.
 31 | 
 32 | h2. Version 3.0.3
 33 | * BUGFIX: Improve handling code block following extended p block ("#63":https://github.com/textile/python-textile/pull/63)
 34 | 
 35 | h2. Version 3.0.2
 36 | * BUGFIX: Fix for multiple multi-line paragraphs. ("#62":https://github.com/textile/python-textile/pull/62)
 37 | 
 38 | h2. Version 3.0.1
 39 | * BUGFIX: Fix improper handling of extended code blocks. ("#61":https://github.com/textile/python-textile/pull/61)
 40 | 
 41 | h2. Version 3.0.0
 42 | * Drop support for Python 2.6 and 3.2.
 43 | * Update to the current version of html5lib
 44 | * Bugfixes:
 45 | ** Fix handling of HTML entities in extended pre blocks. ("#55":https://github.com/textile/python-textile/issues/55)
 46 | ** Empty definitions in definition lists raised an exception ("#56":https://github.com/textile/python-textile/issues/56)
 47 | ** Fix handling of unicode in img attributes ("#58":https://github.com/textile/python-textile/issues/58)
 48 | 
 49 | h2. Version 2.3.16
 50 | * Bugfixes:
 51 | ** Fix processing of extended code blocks ("#50":https://github.com/textile/python-textile/issues/50)
 52 | ** Don't break when links fail to include "http:" ("#51":https://github.com/textile/python-textile/issues/51)
 53 | ** Better handling of poorly-formatted tables ("#52":https://github.com/textile/python-textile/issues/52)
 54 | 
 55 | h2. Version 2.3.15
 56 | * Bugfix: Don't break on unicode characters in the fragment of a url.
 57 | 
 58 | h2. Version 2.3.14
 59 | * Bugfix: Fix textile on Python 2.6 ("#48":https://github.com/textile/python-textile/issues/48)
 60 | 
 61 | h2. Version 2.3.13
 62 | * Remove extraneous arguments from textile method. These were originally added long ago to work with django, but markup languages are long gone from django.
 63 | * Bugfix: Don't mangle percent-encoded URLs so much. ("#45":https://github.com/textile/python-textile/issues/45)
 64 | * Bugfix: More fixes for poorly-formatted lists. ("#46":https://github.com/textile/python-textile/issues/46)
 65 | * Bugfix: Improve handling of whitespace in pre-formatted blocks. This now matches php-textile's handling of pre blocks much more closely. ("#47":https://github.com/textile/python-textile/issues/47)
 66 | 
 67 | h2. Version 2.3.12
 68 | * Bugfix: Don't die on pre blocks with unicode characters. ("#43":https://github.com/textile/python-textile/issues/43)
 69 | * Bugfix: Fix regressions introduced into the code between 2.2.2 and 2.3.11. (Special thanks to "@adam-iris":https://github.com/adam-iris for providing pull request "#44":https://github.com/textile/python-textile/pull/44)
 70 | * Bugfix: Don't just die when processing poorly-formatted textile lists. ("#37":https://github.com/textile/python-textile/issues/37)
 71 | * Add Python 3.6 to testing.
 72 | * Add a "print the version string and exit" argument to the cli tool: @pytextile -v@
 73 | 
 74 | h2. Version 2.3.11
 75 | * Bugfix: Don't strip leading dot from image URIs ("#42":https://github.com/textile/python-textile/issues/42)
 76 | 
 77 | h2. Version 2.3.10
 78 | * Packaging: cleanup in MANIFEST.IN leads to better linux packaging, and smaller wheel size.
 79 | 
 80 | h2. Version 2.3.9
 81 | * Packaging: remove extraneous files from the source distribution upload.
 82 | * Remove a lingering file from a feature branch for overhauling list handling.  This brings coverage back up to 100%
 83 | 
 84 | h2. Version 2.3.8
 85 | * Bugfix: Fix process of string containing only whitespaces ("#40":https://github.com/textile/python-textile/issues/40)
 86 | * Bugfix: Fix process of formatted text after lists ("#37":https://github.com/textile/python-textile/issues/37)
 87 | * Test: Use sys.executable instead of 'python' to test the CLI ("#38":https://github.com/textile/python-textile/issues/38)
 88 | 
 89 | h2. Version 2.3.7
 90 | * Bugfix: Don't assume pytest is available to be imported in setup.py ("#39":https://github.com/textile/python-textile/issues/39)
 91 | 
 92 | h2. Version 2.3.6
 93 | * Packaging: @tests@ directory is correctly included in source-tarball. ("#33":https://github.com/textile/python-textile/issues/33)
 94 | 
 95 | h2. Version 2.3.5
 96 | * Bugfix: Correctly handle unicode text in url query-strings. ("#36":https://github.com/textile/python-textile/issues/36)
 97 | 
 98 | h2. Version 2.3.4
 99 | * Bugfix: fix an issue with extended block code
100 | * Remove misplaced shebang on non-callable files.
101 | * Packaging: Add test-command to setup.py directly.
102 | * Packaging: Included the tests/ directory for source-tarballs, useful for packaging checks. ("#33":https://github.com/textile/python-textile/issues/33)
103 | * Add a cli tool @pytextile@ which takes textile input and prints html output.  See @pytextile -h@ for details.
104 | 
105 | h2. Version 2.3.3
106 | * Bugfix: Unicode in URL titles no longer break everything ("#30":https://github.com/textile/python-textile/issues/30)
107 | * Display DeprecationWarning when using textile on Python 2.6.
108 | 
109 | h2. Version 2.3.2
110 | * Bugfix: properly handle @":"@ as text, not a link.
111 | 
112 | h2. Version 2.3.1
113 | * Regression bugfix: empty string input returns empty string again.
114 | 
115 | h2. Version 2.3.0
116 | 
117 | * Bugfixes:
118 | ** Support data URIs in img tags
119 | ** Fix autolink urls with image references ("#17":https://github.com/textile/python-textile/issues/17)
120 | ** Fix textile links containing parentheses ("#20":https://github.com/textile/python-textile/issues/20)
121 | ** Fix double-encoding of code blocks ("#21":https://github.com/textile/python-textile/issues/21)
122 | ** Fix handling of scheme in self-linked URLs ("#16":https://github.com/textile/python-textile/issues/16)
123 | ** Fix Markup not parsed if followed by certain characters ("#22":Markup not parsed if followed by certain characters)
124 | * Convert testing over to "py.test":http://pytest.org/, improving unicode testing
125 | * Update functionality for tables, notelists, and footnotes.  This involved a major reworking of parts of the code, but it should now match php-textile and txstyle.org precisely.  Please file an issue for any bugs you come across.
126 | * Remove @head_offset@ option from parse.  I'm not sure it ever existed in php-textile.
127 | 
128 | h2. Version 2.2.2
129 | 
130 | * bugfix: "regex":https://pypi.python.org/pypi/regex is now an optional dependency
131 | 
132 | h2. Version 2.2.1
133 | 
134 | * drop textilefactory support for html.
135 | * Various development-related bugfixes.
136 | * Added this changelog.
137 | 
138 | h2. Version 2.2.0
139 | 
140 | * Started refactoring the code to be less repetitive.  @textile.Textile().parse()@ is a little more friendly than @textile.Textile().textile()@  There may be more work to be done on this front to make the flow a little smoother.
141 | * We now support versions 2.6 - 3.4 (including 3.2) using the same codebase.  Many thanks to Radek Czajka for this.
142 | * Drop support for html4.  We now only output xhtml or html5.
143 | * Various development-related bugfixes.
144 | 
145 | h2. Version 2.1.8
146 | 
147 | * Add support for html5 output.
148 | * Lots of new functionality added bringing us in line with the official Textile 2.4
149 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS.txt:
--------------------------------------------------------------------------------
 1 | Dennis Burke
 2 | Radek Czajka
 3 | Roberto A. F. De Almeida
 4 | Matt Layman
 5 | Mark Pilgrim
 6 | Alex Shiels
 7 | Jason Samsa
 8 | Kurt Raschke
 9 | Dave Brondsema
10 | Dmitry Shachnev
11 | Kirill Mavreshko
12 | Brad Schoening


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | L I C E N S E
 2 | =============
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are met:
 5 | 
 6 | * Redistributions of source code must retain the above copyright notice,
 7 |   this list of conditions and the following disclaimer.
 8 | 
 9 | * Redistributions in binary form must reproduce the above copyright notice,
10 |   this list of conditions and the following disclaimer in the documentation
11 |   and/or other materials provided with the distribution.
12 | 
13 | * Neither the name Textile nor the names of its contributors may be used to
14 |   endorse or promote products derived from this software without specific
15 |   prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 | POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | clean:
 2 | 	$(RM) README.txt
 3 | 	$(RM) -r ./dist ./build
 4 | 
 5 | generate_pypi_README:
 6 | 	${VIRTUAL_ENV}/bin/pytextile README.textile | sed -e 's/^\t//' > README.txt
 7 | 
 8 | build: generate_pypi_README
 9 | 	python -m build
10 | 
11 | upload_to_test: build
12 | 	twine check ./dist/*
13 | 	twine upload --repository test_textile ./dist/*
14 | 
15 | upload_to_prod: build
16 | 	twine check ./dist/*
17 | 	# for now, don't actually upload to prod PyPI, just output the command to do so.
18 | 	@echo "twine upload --repository textile ./dist/*"
19 | 


--------------------------------------------------------------------------------
/README.textile:
--------------------------------------------------------------------------------
 1 | !https://github.com/textile/python-textile/actions/workflows/lint_and_test.yml/badge.svg(python-textile)!:https://github.com/textile/python-textile/actions/workflows/lint_and_test.yml !https://codecov.io/github/textile/python-textile/coverage.svg!:https://codecov.io/github/textile/python-textile !https://img.shields.io/pypi/pyversions/textile! !https://img.shields.io/pypi/wheel/textile!
 2 | 
 3 | h1. python-textile
 4 | 
 5 | python-textile is a Python port of "Textile":https://textile-lang.com/, Dean Allen's humane web text generator.
 6 | 
 7 | h2. Installation
 8 | 
 9 | @pip install textile@
10 | 
11 | Dependencies:
12 | * "nh3":https://pypi.org/project/nh3/
13 | * "regex":https://pypi.org/project/regex/ (The regex package causes problems with PyPy, and is not installed as a dependency in such environments. If you are upgrading a textile install on PyPy which had regex previously included, you may need to uninstall it.)
14 | 
15 | Optional dependencies include:
16 | * "PIL/Pillow":http://python-pillow.github.io/ (for checking image sizes). If needed, install via @pip install 'textile[imagesize]'@
17 | 
18 | h2. Usage
19 | 
20 | bc.. import textile
21 | >>> s = """
22 | ... _This_ is a *test.*
23 | ...
24 | ... * One
25 | ... * Two
26 | ... * Three
27 | ...
28 | ... Link to "Slashdot":http://slashdot.org/
29 | ... """
30 | >>> html = textile.textile(s)
31 | >>> print html
32 | 	<p><em>This</em> is a <strong>test.</strong></p>
33 | 
34 | 	<ul>
35 | 		<li>One</li>
36 | 		<li>Two</li>
37 | 		<li>Three</li>
38 | 	</ul>
39 | 
40 | 	<p>Link to <a href="http://slashdot.org/">Slashdot</a></p>
41 | >>>
42 | 
43 | h3. Notes:
44 | 
45 | * Active development supports Python 3.8 or later.
46 | 
47 | h3. Running Tests
48 | 
49 | To run the test suite, use pytest. `pytest-cov` is required as well.
50 | 
51 | When textile is installed locally:
52 | 
53 | bc. pytest
54 | 
55 | When textile is not installed locally:
56 | 
57 | bc. PYTHONPATH=. pytest
58 | 


--------------------------------------------------------------------------------
/TODO.textile:
--------------------------------------------------------------------------------
1 | TODO
2 | 
3 | * Improve documentation, both of the code and Textile syntax.
4 | ** Not all functions have docstrings or adequate docstrings.
5 | ** Because the Textile syntax implemented by PyTextile has deviated from the syntax implemented by other implementations of Textile, PyTextile-specific documentation needs to be produced for end-users.
6 | * Update to comply with Textile 2.5
7 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools", "setuptools-scm", "nh3"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "textile"
 7 | authors = [
 8 |     { name = "Dennis Burke", email = "ikirudennis@gmail.com"}
 9 | ]
10 | description = 'Textile processing for python.'
11 | classifiers = [
12 |     'Development Status :: 5 - Production/Stable',
13 |     'Environment :: Web Environment',
14 |     'Intended Audience :: Developers',
15 |     'License :: OSI Approved :: BSD License',
16 |     'Operating System :: OS Independent',
17 |     'Programming Language :: Python',
18 |     'Programming Language :: Python :: 3',
19 |     'Programming Language :: Python :: 3 :: Only',
20 |     'Programming Language :: Python :: 3.8',
21 |     'Programming Language :: Python :: 3.9',
22 |     'Programming Language :: Python :: 3.10',
23 |     'Programming Language :: Python :: 3.11',
24 |     'Programming Language :: Python :: 3.12',
25 |     'Topic :: Software Development :: Libraries :: Python Modules',
26 | ]
27 | dynamic = ["version",]
28 | dependencies = [
29 |     'nh3',
30 |     'regex>1.0; implementation_name != "pypy"',
31 | ]
32 | requires-python = '>=3.8'
33 | keywords = ['textile', 'text', 'html markup']
34 | # Use the following command to generate a README.txt which is compatible with
35 | # pypi's readme rendering:
36 | #   pytextile README.textile | sed -e 's/^\t//' > README.txt
37 | readme = {file = 'README.txt', content-type = 'text/markdown'}
38 | 
39 | [project.optional-dependencies]
40 | develop = ['pytest', 'pytest-cov']
41 | imagesize = ['Pillow>=3.0.0',]
42 | 
43 | [project.urls]
44 | Homepage = "https://github.com/textile/python-textile"
45 | Repository = "https://github.com/textile/python-textile.git"
46 | Issues = "https://github.com/textile/python-textile/issues"
47 | 
48 | [project.scripts]
49 | pytextile = "textile.__main__:main"
50 | 
51 | [tool.setuptools.dynamic]
52 | version = {attr = "textile.__version__"}
53 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | testpaths = tests
3 | addopts = --cov=textile --cov-report=html --cov-append --cov-report=term-missing
4 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/textile/python-textile/a1c31e525dfdb1745ae8d09d5a08323ef579f414/tests/__init__.py


--------------------------------------------------------------------------------
/tests/fixtures/README.txt:
--------------------------------------------------------------------------------
 1 | 	<p><a href="https://github.com/textile/python-textile/actions/workflows/lint_and_test.yml"><img alt="python-textile" src="https://github.com/textile/python-textile/actions/workflows/lint_and_test.yml/badge.svg" title="python-textile" /></a> <a href="https://codecov.io/github/textile/python-textile"><img alt="" src="https://codecov.io/github/textile/python-textile/coverage.svg" /></a> <img alt="" src="https://img.shields.io/pypi/pyversions/textile" /> <img alt="" src="https://img.shields.io/pypi/wheel/textile" /></p>
 2 | 
 3 | 	<h1>python-textile</h1>
 4 | 
 5 | 	<p>python-textile is a Python port of <a href="https://textile-lang.com/">Textile</a>, Dean Allen&#8217;s humane web text generator.</p>
 6 | 
 7 | 	<h2>Installation</h2>
 8 | 
 9 | 	<p><code>pip install textile</code></p>
10 | 
11 | 	<p>Dependencies:
12 | 	<ul>
13 | 		<li><a href="https://pypi.org/project/nh3/">nh3</a></li>
14 | 		<li><a href="https://pypi.org/project/regex/">regex</a> (The regex package causes problems with PyPy, and is not installed as a dependency in such environments. If you are upgrading a textile install on PyPy which had regex previously included, you may need to uninstall it.)</li>
15 | 	</ul></p>
16 | 
17 | 	<p>Optional dependencies include:
18 | 	<ul>
19 | 		<li><a href="http://python-pillow.github.io/"><span class="caps">PIL</span>/Pillow</a> (for checking image sizes). If needed, install via <code>pip install 'textile[imagesize]'</code></li>
20 | 	</ul></p>
21 | 
22 | 	<h2>Usage</h2>
23 | 
24 | <pre><code>import textile
25 | &gt;&gt;&gt; s = &quot;&quot;&quot;
26 | ... _This_ is a *test.*
27 | ...
28 | ... * One
29 | ... * Two
30 | ... * Three
31 | ...
32 | ... Link to &quot;Slashdot&quot;:http://slashdot.org/
33 | ... &quot;&quot;&quot;
34 | &gt;&gt;&gt; html = textile.textile(s)
35 | &gt;&gt;&gt; print html
36 | 	&lt;p&gt;&lt;em&gt;This&lt;/em&gt; is a &lt;strong&gt;test.&lt;/strong&gt;&lt;/p&gt;
37 | 
38 | 	&lt;ul&gt;
39 | 		&lt;li&gt;One&lt;/li&gt;
40 | 		&lt;li&gt;Two&lt;/li&gt;
41 | 		&lt;li&gt;Three&lt;/li&gt;
42 | 	&lt;/ul&gt;
43 | 
44 | 	&lt;p&gt;Link to &lt;a href=&quot;http://slashdot.org/&quot;&gt;Slashdot&lt;/a&gt;&lt;/p&gt;
45 | &gt;&gt;&gt;</code></pre>
46 | 
47 | 	<h3>Notes:</h3>
48 | 
49 | 	<ul>
50 | 		<li>Active development supports Python 3.8 or later.</li>
51 | 	</ul>
52 | 
53 | 	<h3>Running Tests</h3>
54 | 
55 | 	<p>To run the test suite, use pytest. `pytest-cov` is required as well.</p>
56 | 
57 | 	<p>When textile is installed locally:</p>
58 | 
59 | <pre><code>pytest</code></pre>
60 | 
61 | 	<p>When textile is not installed locally:</p>
62 | 
63 | <pre><code>PYTHONPATH=. pytest</code></pre>


--------------------------------------------------------------------------------
/tests/test_attributes.py:
--------------------------------------------------------------------------------
 1 | from typing import OrderedDict
 2 | from textile.utils import parse_attributes
 3 | 
 4 | 
 5 | def test_parse_attributes():
 6 |     assert parse_attributes('\\1', element='td') == {'colspan': '1'}
 7 |     assert parse_attributes('/1', element='td') == {'rowspan': '1'}
 8 |     assert parse_attributes('^', element='td') == {'style': 'vertical-align:top;'}
 9 |     assert parse_attributes('{color: blue}') == {'style': 'color: blue;'}
10 |     assert parse_attributes('[en]') == {'lang': 'en'}
11 |     assert parse_attributes('(cssclass)') == {'class': 'cssclass'}
12 |     assert parse_attributes('(') == {'style': 'padding-left:1em;'}
13 |     assert parse_attributes(')') == {'style': 'padding-right:1em;'}
14 |     assert parse_attributes('<') == {'style': 'text-align:left;'}
15 |     assert parse_attributes('(c#i)') == {'class': 'c', 'id': 'i'}
16 |     assert parse_attributes('\\2 100', element='col') == {'span': '2', 'width': '100'}
17 | 
18 | 
19 | def test_parse_attributes_edge_cases():
20 |     result = parse_attributes('(:c#i)')
21 |     expect = OrderedDict({'id': 'i'})
22 |     assert result == expect
23 | 
24 |     assert parse_attributes('(<)') == OrderedDict()
25 | 


--------------------------------------------------------------------------------
/tests/test_block.py:
--------------------------------------------------------------------------------
  1 | import textile
  2 | from textile.objects import Block
  3 | 
  4 | try:
  5 |     from collections import OrderedDict
  6 | except ImportError:
  7 |     from ordereddict import OrderedDict
  8 | 
  9 | 
 10 | def test_block():
 11 |     t = textile.Textile()
 12 |     result = t.block('h1. foobar baby')
 13 |     expect = '\t<h1>foobar baby</h1>'
 14 |     assert result == expect
 15 | 
 16 |     b = Block(t, "bq", "", None, "", "Hello BlockQuote")
 17 |     expect = ('blockquote', OrderedDict(), 'p', OrderedDict(),
 18 |               'Hello BlockQuote')
 19 |     result = (b.outer_tag, b.outer_atts, b.inner_tag, b.inner_atts, b.content)
 20 |     assert result == expect
 21 | 
 22 |     b = Block(t, "bq", "", None, "http://google.com", "Hello BlockQuote")
 23 |     expect = ('blockquote', OrderedDict([('cite',
 24 |               '{0.uid}{0.refIndex}:url'.format(t))]), 'p', OrderedDict(),
 25 |               'Hello BlockQuote')
 26 |     result = (b.outer_tag, b.outer_atts, b.inner_tag, b.inner_atts, b.content)
 27 |     assert result == expect
 28 | 
 29 |     b = Block(t, "bc", "", None, "", 'printf "Hello, World";')
 30 |     # the content of text will be turned shelved, so we'll asert only the
 31 |     # deterministic portions of the expected values, below
 32 |     expect = ('pre', OrderedDict(), 'code', OrderedDict())
 33 |     result = (b.outer_tag, b.outer_atts, b.inner_tag, b.inner_atts)
 34 |     assert result == expect
 35 | 
 36 |     b = Block(t, "h1", "", None, "", "foobar")
 37 |     expect = ('h1', OrderedDict(), '', OrderedDict(), 'foobar')
 38 |     result = (b.outer_tag, b.outer_atts, b.inner_tag, b.inner_atts, b.content)
 39 |     assert result == expect
 40 | 
 41 | 
 42 | def test_block_tags_false():
 43 |     t = textile.Textile(block_tags=False)
 44 |     assert t.block_tags is False
 45 | 
 46 |     result = t.parse('test')
 47 |     expect = 'test'
 48 |     assert result == expect
 49 | 
 50 | 
 51 | def test_blockcode_extended():
 52 |     input = 'bc.. text\nmoretext\n\nevenmoretext\n\nmoremoretext\n\np. test'
 53 |     expect = '<pre><code>text\nmoretext\n\nevenmoretext\n\nmoremoretext</code></pre>\n\n\t<p>test</p>'
 54 |     t = textile.Textile()
 55 |     result = t.parse(input)
 56 |     assert result == expect
 57 | 
 58 | 
 59 | def test_blockcode_in_README():
 60 |     with open('README.textile') as f:
 61 |         readme = ''.join(f.readlines())
 62 |     result = textile.textile(readme)
 63 |     with open('tests/fixtures/README.txt') as f:
 64 |         expect = ''.join(f.readlines())
 65 |     assert result == expect
 66 | 
 67 | 
 68 | def test_blockcode_comment():
 69 |     input = '###.. block comment\nanother line\n\np. New line'
 70 |     expect = '\t<p>New line</p>'
 71 |     t = textile.Textile()
 72 |     result = t.parse(input)
 73 |     assert result == expect
 74 | 
 75 | 
 76 | def test_extended_pre_block_with_many_newlines():
 77 |     """Extra newlines in an extended pre block should not get cut down to only
 78 |     two."""
 79 |     text = '''pre.. word
 80 | 
 81 | another
 82 | 
 83 | word
 84 | 
 85 | 
 86 | yet anothe word'''
 87 |     expect = '''<pre>word
 88 | 
 89 | another
 90 | 
 91 | word
 92 | 
 93 | 
 94 | yet anothe word</pre>'''
 95 |     result = textile.textile(text)
 96 |     assert result == expect
 97 | 
 98 |     text = 'p. text text\n\n\nh1. Hello\n'
 99 |     expect = '\t<p>text text</p>\n\n\n\t<h1>Hello</h1>'
100 |     result = textile.textile(text)
101 |     assert result == expect
102 | 


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import sys
 3 | 
 4 | import textile
 5 | 
 6 | 
 7 | def test_console_script():
 8 |     command = [sys.executable, '-m', 'textile', 'README.textile']
 9 |     try:
10 |         result = subprocess.check_output(command)
11 |     except AttributeError:
12 |         command[2] = 'textile.__main__'
13 |         result = subprocess.Popen(
14 |             command, stdout=subprocess.PIPE).communicate()[0]
15 |     with open('tests/fixtures/README.txt') as f:
16 |         expect = ''.join(f.readlines())
17 |     if isinstance(result, bytes):
18 |         result = result.decode('utf-8')
19 |     assert result == expect
20 | 
21 | 
22 | def test_version_string():
23 |     command = [sys.executable, '-m', 'textile', '-v']
24 |     try:
25 |         result = subprocess.check_output(command)
26 |     except AttributeError:
27 |         command[2] = 'textile.__main__'
28 |         result = subprocess.Popen(
29 |             command, stdout=subprocess.PIPE).communicate()[0]
30 |     if isinstance(result, bytes):
31 |         result = result.decode('utf-8')
32 |     assert result.strip() == textile.__version__
33 | 


--------------------------------------------------------------------------------
/tests/test_footnoteRef.py:
--------------------------------------------------------------------------------
1 | from textile import Textile
2 | 
3 | 
4 | def test_footnoteRef():
5 |     t = Textile()
6 |     result = t.footnoteRef('foo[1]')
7 |     expect = 'foo<sup class="footnote" id="fnrev{0}1"><a href="#fn{0}1">1</a></sup>'.format(t.linkPrefix)
8 |     assert expect == result
9 | 


--------------------------------------------------------------------------------
/tests/test_getRefs.py:
--------------------------------------------------------------------------------
 1 | from textile import Textile
 2 | 
 3 | 
 4 | def test_getRefs():
 5 |     t = Textile()
 6 |     result = t.getRefs("some text [Google]http://www.google.com")
 7 |     expect = 'some text '
 8 |     assert result == expect
 9 | 
10 |     result = t.urlrefs
11 |     expect = {'Google': 'http://www.google.com'}
12 |     assert result == expect
13 | 
14 |     t2 = Textile()
15 | 
16 |     result = t2.getRefs("my ftp [ftp]ftp://example.com")
17 |     expect = 'my ftp '
18 |     assert result == expect
19 | 
20 |     result = t2.urlrefs
21 |     expect = {'ftp': 'ftp://example.com'}
22 |     assert result == expect
23 | 


--------------------------------------------------------------------------------
/tests/test_getimagesize.py:
--------------------------------------------------------------------------------
 1 | from textile.utils import getimagesize
 2 | import pytest
 3 | 
 4 | PIL = pytest.importorskip('PIL')
 5 | 
 6 | 
 7 | def test_imagesize():
 8 |     assert getimagesize("http://www.google.com/intl/en_ALL/images/logo.gif") == (276, 110)
 9 |     assert getimagesize("http://bad.domain/") == ''
10 |     assert getimagesize("http://www.google.com/robots.txt") is None
11 | 


--------------------------------------------------------------------------------
/tests/test_github_issues.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import textile
  3 | 
  4 | 
  5 | def test_github_issue_16():
  6 |     result = textile.textile('"$":http://google.com "$":https://google.com "$":mailto:blackhole@sun.comet')
  7 |     expect = '\t<p><a href="http://google.com">google.com</a> <a href="https://google.com">google.com</a> <a href="mailto:blackhole%40sun.comet">blackhole@sun.comet</a></p>'
  8 |     assert result == expect
  9 | 
 10 | 
 11 | def test_github_issue_17():
 12 |     result = textile.textile('!http://www.ox.ac.uk/favicon.ico!')
 13 |     expect = '\t<p><img alt="" src="http://www.ox.ac.uk/favicon.ico" /></p>'
 14 |     assert result == expect
 15 | 
 16 | 
 17 | def test_github_issue_20():
 18 |     text = 'This is a link to a ["Wikipedia article about Textile":http://en.wikipedia.org/wiki/Textile_(markup_language)].'
 19 |     result = textile.textile(text)
 20 |     expect = '\t<p>This is a link to a <a href="http://en.wikipedia.org/wiki/Textile_%28markup_language%29">Wikipedia article about Textile</a>.</p>'
 21 |     assert result == expect
 22 | 
 23 | 
 24 | def test_github_issue_21():
 25 |     text = ('''h1. xml example
 26 | 
 27 | bc. '''
 28 |             '''
 29 | <foo>
 30 |   bar
 31 | </foo>''')
 32 |     result = textile.textile(text)
 33 |     expect = '\t<h1>xml example</h1>\n\n<pre><code>\n&lt;foo&gt;\n  bar\n&lt;/foo&gt;</code></pre>'
 34 |     assert result == expect
 35 | 
 36 | 
 37 | def test_github_issue_22():
 38 |     text = '''_(artist-name)Ty Segall_’s'''
 39 |     result = textile.textile(text)
 40 |     expect = '\t<p><em class="artist-name">Ty Segall</em>’s</p>'
 41 |     assert result == expect
 42 | 
 43 | 
 44 | def test_github_issue_26():
 45 |     text = ''
 46 |     result = textile.textile(text)
 47 |     expect = ''
 48 |     assert result == expect
 49 | 
 50 | 
 51 | def test_github_issue_27():
 52 |     test = """* Folders with ":" in their names are displayed with a forward slash "/" instead. (Filed as "#4581709":/test/link, which was considered "normal behaviour" - quote: "Please note that Finder presents the 'Carbon filesystem' view, regardless of the underlying filesystem.")"""
 53 |     result = textile.textile(test)
 54 |     expect = """\t<ul>\n\t\t<li>Folders with &#8220;:&#8221; in their names are displayed with a forward slash &#8220;/&#8221; instead. (Filed as <a href="/test/link">#4581709</a>, which was considered &#8220;normal behaviour&#8221; &#8211; quote: &#8220;Please note that Finder presents the &#8216;Carbon filesystem&#8217; view, regardless of the underlying filesystem.&#8221;)</li>\n\t</ul>"""
 55 |     assert result == expect
 56 | 
 57 | 
 58 | def test_github_issue_28():
 59 |     test = """So here I am porting my ancient "newspipe":newspipe "front-end":blog/2006/09/30/0950 to "Snakelets":Snakelets and "Python":Python, and I've just trimmed down over 20 lines of "PHP":PHP down to essentially one line of "BeautifulSoup":BeautifulSoup retrieval:
 60 | 
 61 | <pre>
 62 | def parseWapProfile(self, url):
 63 |   result = fetch.fetchURL(url)
 64 |   soup = BeautifulStoneSoup(result['data'], convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
 65 |   try:
 66 |     width, height = soup('prf:screensize')[0].contents[0].split('x')
 67 |   except:
 68 |     width = height = None
 69 |   return {"width": width, "height": height}
 70 | </pre>
 71 | 
 72 | Of course there's a lot more error handling to do (and useful data to glean off the "XML":XML), but being able to cut through all the usual parsing crap is immensely gratifying."""
 73 |     result = textile.textile(test)
 74 |     expect = ("""\t<p>So here I am porting my ancient <a href="newspipe">newspipe</a> <a href="blog/2006/09/30/0950">front-end</a> to <a href="Snakelets">Snakelets</a> and <a href="Python">Python</a>, and I&#8217;ve just trimmed down over 20 lines of <a href="PHP"><span class="caps">PHP</span></a> down to essentially one line of <a href="BeautifulSoup">BeautifulSoup</a> retrieval:</p>
 75 | 
 76 | <pre>
 77 | def parseWapProfile(self, url):
 78 |   result = fetch.fetchURL(url)
 79 |   soup = BeautifulStoneSoup(result[&#39;data&#39;], convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
 80 |   try:
 81 |     width, height = soup(&#39;prf:screensize&#39;)[0].contents[0].split(&#39;x&#39;)
 82 |   except:
 83 |     width = height = None
 84 |   return {&quot;width&quot;: width, &quot;height&quot;: height}
 85 | </pre>
 86 | 
 87 | \t<p>Of course there&#8217;s a lot more error handling to do (and useful data to glean off the <a href="XML"><span class="caps">XML</span></a>), but being able to cut through all the usual parsing crap is immensely gratifying.</p>""")
 88 |     assert result == expect
 89 | 
 90 | 
 91 | def test_github_issue_30():
 92 |     text = '"Tëxtíle (Tëxtíle)":http://lala.com'
 93 |     result = textile.textile(text)
 94 |     expect = '\t<p><a href="http://lala.com" title="Tëxtíle">Tëxtíle</a></p>'
 95 |     assert result == expect
 96 | 
 97 |     text = '!http://lala.com/lol.gif(♡ imáges)!'
 98 |     result = textile.textile(text)
 99 |     expect = '\t<p><img alt="♡ imáges" src="http://lala.com/lol.gif" title="♡ imáges" /></p>'
100 |     assert result == expect
101 | 
102 | 
103 | def test_github_issue_36():
104 |     text = '"Chögyam Trungpa":https://www.google.com/search?q=Chögyam+Trungpa'
105 |     result = textile.textile(text)
106 |     expect = '\t<p><a href="https://www.google.com/search?q=Chögyam+Trungpa">Chögyam Trungpa</a></p>'
107 |     assert result == expect
108 | 
109 | 
110 | def test_github_issue_37():
111 |     text = '# xxx\n# yyy\n*blah*'
112 |     result = textile.textile(text)
113 |     expect = '\t<p>\t<ol>\n\t\t<li>xxx</li>\n\t\t<li>yyy</li>\n\t</ol><br />\n<strong>blah</strong></p>'
114 |     assert result == expect
115 | 
116 |     text = '*Highlights*\n\n* UNITEK Y-3705A Type-C Universal DockingStation Pro\n* USB3.0/RJ45/EARPHONE/MICROPHONE/HDMI 6 PORT HUB 1.2m Data Cable 5V 4A Power Adaptor\n*\n* Dimensions: 25cm x 13cm x 9cm\n* Weight: 0.7kg'
117 |     result = textile.textile(text)
118 |     expect = '''\t<p><strong>Highlights</strong></p>
119 | 
120 | \t<ul>
121 | \t\t<li><span class="caps">UNITEK</span> Y-3705A Type-C Universal DockingStation Pro</li>
122 | \t\t<li>USB3.0/RJ45/EARPHONE/MICROPHONE/HDMI 6 <span class="caps">PORT</span> <span class="caps">HUB</span> 1.2m Data Cable 5V 4A Power Adaptor</li>
123 | \t</ul>
124 | *
125 | \t<ul>
126 | \t\t<li>Dimensions: 25cm x 13cm x 9cm</li>
127 | \t\t<li>Weight: 0.7kg</li>
128 | \t</ul>'''
129 |     assert result == expect
130 | 
131 | 
132 | def test_github_issue_40():
133 |     text = '\r\n'
134 |     result = textile.textile(text)
135 |     expect = '\r\n'
136 |     assert result == expect
137 | 
138 | 
139 | def test_github_issue_42():
140 |     text = '!./image.png!'
141 |     result = textile.textile(text)
142 |     expect = '\t<p><img alt="" src="./image.png" /></p>'
143 |     assert result == expect
144 | 
145 | 
146 | def test_github_issue_43():
147 |     text = 'pre. smart ‘quotes’ are not smart!'
148 |     result = textile.textile(text)
149 |     expect = '<pre>smart ‘quotes’ are not smart!</pre>'
150 |     assert result == expect
151 | 
152 | 
153 | def test_github_issue_45():
154 |     """Incorrect transform unicode url"""
155 |     text = '"test":https://myabstractwiki.ru/index.php/%D0%97%D0%B0%D0%B3%D0%BB%D0%B0%D0%B2%D0%BD%D0%B0%D1%8F_%D1%81%D1%82%D1%80%D0%B0%D0%BD%D0%B8%D1%86%D0%B0'
156 |     result = textile.textile(text)
157 |     expect = '\t<p><a href="https://myabstractwiki.ru/index.php/%D0%97%D0%B0%D0%B3%D0%BB%D0%B0%D0%B2%D0%BD%D0%B0%D1%8F_%D1%81%D1%82%D1%80%D0%B0%D0%BD%D0%B8%D1%86%D0%B0">test</a></p>'
158 |     assert result == expect
159 | 
160 | 
161 | def test_github_issue_46():
162 |     """Key error on mal-formed numbered lists. CAUTION: both the input and the
163 |     ouput are ugly."""
164 |     text = '# test\n### test\n## test'
165 |     expect = ('\t<ol>\n\t\t<li>test\n\t\t\t<ol>\n\t\t\t\t<li>test</li>'
166 |               '\n\t\t\t</ol></li>\n\t\t<ol>\n\t\t\t<li>test</li>'
167 |               '\n\t\t</ol></li>\n\t\t</ol>')
168 |     result = textile.textile(text)
169 |     assert result == expect
170 | 
171 | 
172 | def test_github_issue_47():
173 |     """Incorrect wrap pre-formatted value"""
174 |     text = '''pre.. word
175 | 
176 | another
177 | 
178 | word
179 | 
180 | yet anothe word'''
181 |     result = textile.textile(text)
182 |     expect = '''<pre>word
183 | 
184 | another
185 | 
186 | word
187 | 
188 | yet anothe word</pre>'''
189 |     assert result == expect
190 | 
191 | 
192 | def test_github_issue_49():
193 |     """Key error on russian hash-route link"""
194 |     s = '"link":https://ru.vuejs.org/v2/guide/components.html#Входные-параметры'
195 |     result = textile.textile(s)
196 |     expect = '\t<p><a href="https://ru.vuejs.org/v2/guide/components.html#Входные-параметры">link</a></p>'
197 |     assert result == expect
198 | 
199 | 
200 | def test_github_issue_50():
201 |     """Incorrect wrap code with Java generics in pre"""
202 |     test = ('pre.. public class Tynopet<T extends Framework> {}\n\nfinal '
203 |             'List<List<String>> multipleList = new ArrayList<>();')
204 |     result = textile.textile(test)
205 |     expect = ('<pre>public class Tynopet&lt;T extends Framework&gt; {}\n\n'
206 |               'final List&lt;List&lt;String&gt;&gt; multipleList = new '
207 |               'ArrayList&lt;&gt;();</pre>')
208 |     assert result == expect
209 | 
210 | 
211 | def test_github_issue_51():
212 |     """Link build with $ sign without "http" prefix broken."""
213 |     test = '"$":www.google.com.br'
214 |     result = textile.textile(test)
215 |     expect = '\t<p><a href="www.google.com.br">www.google.com.br</a></p>'
216 |     assert result == expect
217 | 
218 | 
219 | def test_github_issue_52():
220 |     """Table build without space after aligment raise a AttributeError."""
221 |     test = '|=.First Header |=. Second Header |'
222 |     result = textile.textile(test)
223 |     expect = ('\t<table>\n\t\t<tr>\n\t\t\t<td>=.First Header '
224 |               '</td>\n\t\t\t<td style="text-align:center;">Second Header </td>'
225 |               '\n\t\t</tr>\n\t</table>')
226 |     assert result == expect
227 | 
228 | 
229 | def test_github_issue_55():
230 |     """Incorrect handling of quote entities in extended pre block"""
231 |     test = ('pre.. this is the first line\n\nbut "quotes" in an extended pre '
232 |             'block need to be handled properly.')
233 |     result = textile.textile(test)
234 |     expect = ('<pre>this is the first line\n\nbut &quot;quotes&quot; in an '
235 |               'extended pre block need to be handled properly.</pre>')
236 |     assert result == expect
237 | 
238 |     # supplied input
239 |     test = ('pre.. import org.slf4j.Logger;\nimport org.slf4j.LoggerFactory;'
240 |             '\nimport ru.onyma.job.Context;\nimport ru.onyma.job.'
241 |             'RescheduleTask;\n\nimport java.util.concurrent.'
242 |             'ScheduledExecutorService;\nimport java.util.concurrent.TimeUnit;'
243 |             '\n\n/**\n* @author ustits\n*/\npublic abstract class '
244 |             'MainService<T> extends RescheduleTask implements Context<T> {\n\n'
245 |             'private static final Logger log = LoggerFactory.getLogger('
246 |             'MainService.class);\nprivate final ScheduledExecutorService '
247 |             'scheduler;\n\nprivate boolean isFirstRun = true;\nprivate T '
248 |             'configs;\n\npublic MainService(final ScheduledExecutorService '
249 |             'scheduler) {\nsuper(scheduler);\nthis.scheduler = scheduler;\n}\n'
250 |             '\n@Override\npublic void setConfig(final T configs) {\nthis.'
251 |             'configs = configs;\nif (isFirstRun) {\nscheduler.schedule(this, '
252 |             '0, TimeUnit.SECONDS);\nisFirstRun = false;\n}\n}\n\n@Override\n'
253 |             'public void stop() {\nsuper.stop();\nscheduler.shutdown();\ntry {'
254 |             '\nscheduler.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);\n} '
255 |             'catch (InterruptedException ie) {\nlog.warn("Unable to wait for '
256 |             'syncs termination", ie);\nThread.currentThread().interrupt();\n}'
257 |             '\n}\n\nprotected final T getConfigs() {\nreturn configs;\n}\n}')
258 |     result = textile.textile(test)
259 |     expect = ('<pre>import org.slf4j.Logger;\nimport org.slf4j.LoggerFactory;'
260 |               '\nimport ru.onyma.job.Context;\nimport ru.onyma.job.'
261 |               'RescheduleTask;\n\nimport java.util.concurrent.'
262 |               'ScheduledExecutorService;\nimport java.util.concurrent.'
263 |               'TimeUnit;\n\n/**\n* @author ustits\n*/\npublic abstract class '
264 |               'MainService&lt;T&gt; extends RescheduleTask implements '
265 |               'Context&lt;T&gt; {\n\nprivate static final Logger log = '
266 |               'LoggerFactory.getLogger(MainService.class);\nprivate final '
267 |               'ScheduledExecutorService scheduler;\n\nprivate boolean '
268 |               'isFirstRun = true;\nprivate T configs;\n\npublic MainService('
269 |               'final ScheduledExecutorService scheduler) {\nsuper(scheduler);'
270 |               '\nthis.scheduler = scheduler;\n}\n\n@Override\npublic void '
271 |               'setConfig(final T configs) {\nthis.configs = configs;\nif ('
272 |               'isFirstRun) {\nscheduler.schedule(this, 0, TimeUnit.SECONDS);'
273 |               '\nisFirstRun = false;\n}\n}\n\n@Override\npublic void stop() {'
274 |               '\nsuper.stop();\nscheduler.shutdown();\ntry {\nscheduler.'
275 |               'awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);\n} catch '
276 |               '(InterruptedException ie) {\nlog.warn(&quot;Unable to wait '
277 |               'for syncs termination&quot;, ie);\nThread.currentThread().'
278 |               'interrupt();\n}\n}\n\nprotected final T getConfigs() {\n'
279 |               'return configs;\n}\n}</pre>')
280 |     assert result == expect
281 | 
282 | 
283 | def test_github_issue_56():
284 |     """Empty description lists throw error"""
285 |     result = textile.textile("- :=\n-")
286 |     expect = '<dl>\n</dl>'
287 |     assert result == expect
288 | 
289 | 
290 | def test_github_pull_61():
291 |     """Fixed code block multiline encoding on quotes/span"""
292 |     test = ('''bc.. This is some TEXT inside a "Code BLOCK"
293 | 
294 | {
295 |   if (JSON) {
296 | 
297 |     return {"JSON":"value"}
298 |   }
299 | }
300 | 
301 | Back to 10-4 CAPS '''
302 |             '''
303 | 
304 | p.. Some multiline Paragragh
305 | 
306 | Here is some output!!! "Some" CAPS''')
307 | 
308 |     expect = '''<pre><code>This is some TEXT inside a &quot;Code BLOCK&quot;
309 | 
310 | {
311 |   if (JSON) {
312 | 
313 |     return {&quot;JSON&quot;:&quot;value&quot;}
314 |   }
315 | }
316 | 
317 | Back to 10-4 CAPS </code></pre>
318 | 
319 | <p>Some multiline Paragragh</p>
320 | 
321 | <p>Here is some output!!! &#8220;Some&#8221; <span class="caps">CAPS</span></p>'''
322 |     t = textile.Textile()
323 |     result = t.parse(test)
324 |     assert result == expect
325 | 
326 | 
327 | def test_github_pull_62():
328 |     """Fix for paragraph multiline, only last paragraph is rendered
329 |     correctly"""
330 |     test = '''p.. First one 'is'
331 | 
332 | ESCAPED "bad"
333 | 
334 | p.. Second one 'is'
335 | 
336 | 
337 | 
338 | ESCAPED "bad"
339 | 
340 | p.. Third one 'is'
341 | 
342 | ESCAPED "bad"
343 | 
344 | p.. Last one 'is'
345 | 
346 | ESCAPED "good" test'''
347 | 
348 |     expect = '''<p>First one &#8216;is&#8217;</p>
349 | 
350 | <p><span class="caps">ESCAPED</span> &#8220;bad&#8221;</p>
351 | 
352 | <p>Second one &#8216;is&#8217;</p>
353 | 
354 | 
355 | 
356 | <p><span class="caps">ESCAPED</span> &#8220;bad&#8221;</p>
357 | 
358 | <p>Third one &#8216;is&#8217;</p>
359 | 
360 | <p><span class="caps">ESCAPED</span> &#8220;bad&#8221;</p>
361 | 
362 | <p>Last one &#8216;is&#8217;</p>
363 | 
364 | <p><span class="caps">ESCAPED</span> &#8220;good&#8221; test</p>'''
365 |     t = textile.Textile()
366 |     result = t.parse(test)
367 |     assert result == expect
368 | 
369 | 
370 | def test_github_pull_63():
371 |     """Forgot to set multiline_para to False"""
372 |     test = '''p.. First one 'is'
373 | 
374 | ESCAPED "bad"
375 | 
376 | bc.. {
377 |  First code BLOCK
378 | 
379 |  {"JSON":'value'}
380 | }
381 | 
382 | p.. Second one 'is'
383 | 
384 | 
385 | 
386 | ESCAPED "bad"
387 | 
388 | p.. Third one 'is'
389 | 
390 | ESCAPED "bad"
391 | 
392 | bc.. {
393 |  Last code BLOCK
394 | 
395 |  {"JSON":'value'}
396 | }
397 | 
398 | p.. Last one 'is'
399 | 
400 | ESCAPED "good" test'''
401 | 
402 |     expect = '''<p>First one &#8216;is&#8217;</p>
403 | 
404 | <p><span class="caps">ESCAPED</span> &#8220;bad&#8221;</p>
405 | 
406 | <pre><code>{
407 |  First code BLOCK
408 | 
409 |  {&quot;JSON&quot;:&#39;value&#39;}
410 | }</code></pre>
411 | 
412 | <p>Second one &#8216;is&#8217;</p>
413 | 
414 | 
415 | 
416 | <p><span class="caps">ESCAPED</span> &#8220;bad&#8221;</p>
417 | 
418 | <p>Third one &#8216;is&#8217;</p>
419 | 
420 | <p><span class="caps">ESCAPED</span> &#8220;bad&#8221;</p>
421 | 
422 | <pre><code>{
423 |  Last code BLOCK
424 | 
425 |  {&quot;JSON&quot;:&#39;value&#39;}
426 | }</code></pre>
427 | 
428 | <p>Last one &#8216;is&#8217;</p>
429 | 
430 | <p><span class="caps">ESCAPED</span> &#8220;good&#8221; test</p>'''
431 |     t = textile.Textile()
432 |     result = t.parse(test)
433 |     assert result == expect
434 | 


--------------------------------------------------------------------------------
/tests/test_glyphs.py:
--------------------------------------------------------------------------------
 1 | from textile import Textile
 2 | 
 3 | 
 4 | def test_glyphs():
 5 |     t = Textile()
 6 | 
 7 |     result = t.glyphs("apostrophe's")
 8 |     expect = 'apostrophe&#8217;s'
 9 |     assert result == expect
10 | 
11 |     result = t.glyphs("back in '88")
12 |     expect = 'back in &#8217;88'
13 |     assert result == expect
14 | 
15 |     result = t.glyphs('foo ...')
16 |     expect = 'foo &#8230;'
17 |     assert result == expect
18 | 
19 |     result = t.glyphs('--')
20 |     expect = '&#8212;'
21 |     assert result == expect
22 | 
23 |     result = t.glyphs('FooBar[tm]')
24 |     expect = 'FooBar&#8482;'
25 |     assert result == expect
26 | 
27 |     result = t.glyphs("<p><cite>Cat's Cradle</cite> by Vonnegut</p>")
28 |     expect = '<p><cite>Cat&#8217;s Cradle</cite> by Vonnegut</p>'
29 |     assert result == expect
30 | 


--------------------------------------------------------------------------------
/tests/test_image.py:
--------------------------------------------------------------------------------
 1 | from textile import Textile
 2 | 
 3 | 
 4 | def test_image():
 5 |     t = Textile()
 6 |     result = t.image('!/imgs/myphoto.jpg!:http://jsamsa.com')
 7 |     expect = ('<a href="{0}1:url"><img alt="" src="{0}2:url" /></a>'.format(
 8 |         t.uid))
 9 |     assert result == expect
10 |     assert t.refCache[1] == 'http://jsamsa.com'
11 |     assert t.refCache[2] == '/imgs/myphoto.jpg'
12 | 
13 |     result = t.image('!</imgs/myphoto.jpg!')
14 |     expect = '<img align="left" alt="" src="{0}3:url" />'.format(t.uid)
15 |     assert result == expect
16 |     assert t.refCache[3] == '/imgs/myphoto.jpg'
17 | 
18 |     t = Textile(rel='nofollow')
19 |     result = t.image('!/imgs/myphoto.jpg!:http://jsamsa.com')
20 |     expect = ('<a href="{0}1:url" rel="nofollow"><img alt="" src="{0}2:url" '
21 |               '/></a>'.format(t.uid))
22 |     assert result == expect
23 | 


--------------------------------------------------------------------------------
/tests/test_imagesize.py:
--------------------------------------------------------------------------------
 1 | import textile
 2 | 
 3 | 
 4 | def test_imagesize():
 5 |     imgurl = 'http://www.google.com/intl/en_ALL/images/srpr/logo1w.png'
 6 |     result = textile.utils.getimagesize(imgurl)
 7 |     try:
 8 |         import PIL  # noqa: F401
 9 | 
10 |         expect = (275, 95)
11 |         assert result == expect
12 |     except ImportError:
13 |         expect = ''
14 |         assert result == expect
15 | 


--------------------------------------------------------------------------------
/tests/test_lists.py:
--------------------------------------------------------------------------------
1 | from textile import Textile
2 | 
3 | 
4 | def test_lists():
5 |     t = Textile()
6 |     result = t.textileLists("* one\n* two\n* three")
7 |     expect = '\t<ul>\n\t\t<li>one</li>\n\t\t<li>two</li>\n\t\t<li>three</li>\n\t</ul>'
8 |     assert result == expect
9 | 


--------------------------------------------------------------------------------
/tests/test_retrieve.py:
--------------------------------------------------------------------------------
1 | from textile import Textile
2 | 
3 | 
4 | def test_retrieve():
5 |     t = Textile()
6 |     id = t.shelve("foobar")
7 |     assert t.retrieve(id) == 'foobar'
8 | 


--------------------------------------------------------------------------------
/tests/test_span.py:
--------------------------------------------------------------------------------
 1 | from textile import Textile
 2 | 
 3 | 
 4 | def test_span():
 5 |     t = Textile()
 6 |     result = t.retrieveTags(t.span("hello %(bob)span *strong* and **bold**% goodbye"))
 7 |     expect = ('hello <span class="bob">span <strong>strong</strong> and '
 8 |               '<b>bold</b></span> goodbye')
 9 |     assert result == expect
10 | 
11 |     result = t.retrieveTags(t.span('%:http://domain.tld test%'))
12 |     expect = '<span cite="http://domain.tld">test</span>'
13 |     assert result == expect
14 | 
15 |     t = Textile()
16 |     # cover the partial branch where we exceed the max_span_depth.
17 |     t.max_span_depth = 2
18 |     result = t.retrieveTags(t.span('_-*test*-_'))
19 |     expect = '<em><del>*test*</del></em>'
20 |     assert result == expect
21 | 


--------------------------------------------------------------------------------
/tests/test_subclassing.py:
--------------------------------------------------------------------------------
 1 | import textile
 2 | 
 3 | 
 4 | def test_change_glyphs():
 5 |     class TextilePL(textile.Textile):
 6 |         glyph_definitions = dict(textile.Textile.glyph_definitions,
 7 |                                  quote_double_open='&#8222;')
 8 | 
 9 |     test = 'Test "quotes".'
10 |     expect = '\t<p>Test &#8222;quotes&#8221;.</p>'
11 |     result = TextilePL().parse(test)
12 |     assert expect == result
13 | 
14 |     # Base Textile is unchanged.
15 |     expect = '\t<p>Test &#8220;quotes&#8221;.</p>'
16 |     result = textile.textile(test)
17 |     assert expect == result
18 | 


--------------------------------------------------------------------------------
/tests/test_table.py:
--------------------------------------------------------------------------------
 1 | from textile import Textile
 2 | 
 3 | 
 4 | def test_table():
 5 |     t = Textile()
 6 |     result = t.table('(rowclass). |one|two|three|\n|a|b|c|')
 7 |     expect = '\t<table>\n\t\t<tr class="rowclass">\n\t\t\t<td>one</td>\n\t\t\t<td>two</td>\n\t\t\t<td>three</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t</tr>\n\t</table>\n\n'
 8 |     assert result == expect
 9 | 
10 |     t = Textile(lite=True)
11 |     result = t.table('(lite). |one|two|three|\n|a|b|c|\n| * test\n* test|1|2|')
12 |     expect = '\t<table>\n\t\t<tr class="lite">\n\t\t\t<td>one</td>\n\t\t\t<td>two</td>\n\t\t\t<td>three</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td> * test\n* test</td>\n\t\t\t<td>1</td>\n\t\t\t<td>2</td>\n\t\t</tr>\n\t</table>\n\n'
13 |     assert result == expect
14 | 


--------------------------------------------------------------------------------
/tests/test_textile.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import pytest
  3 | import re
  4 | import textile
  5 | 
  6 | 
  7 | def test_FootnoteReference():
  8 |     html = textile.textile('YACC[1]')
  9 |     assert re.search(r'^\t<p><span class="caps">YACC</span><sup class="footnote" id="fnrev([a-f0-9]{32})-1"><a href="#fn\1-1">1</a></sup></p>', html) is not None
 10 | 
 11 | 
 12 | def test_Footnote():
 13 |     html = textile.textile('This is covered elsewhere[1].\n\nfn1. Down here, in fact.\n\nfn2. Here is another footnote.')
 14 |     assert re.search(r'^\t<p>This is covered elsewhere<sup class="footnote" id="fnrev([a-f0-9]{32})-1"><a href="#fn\1-1">1</a></sup>.</p>\n\n\t<p class="footnote" id="fn\1-1"><sup>1</sup> Down here, in fact.</p>\n\n\t<p class="footnote" id="fn\1-2"><sup>2</sup> Here is another footnote.</p>$', html) is not None
 15 | 
 16 |     html = textile.textile('''See[1] for details -- or perhaps[100] at a push.\n\nfn1. Here are the details.\n\nfn100(footy#otherid). A totally unrelated footnote.''')
 17 |     assert re.search(r'^\t<p>See<sup class="footnote" id="fnrev([a-f0-9]{32})-1"><a href="#fn\1-1">1</a></sup> for details &#8212; or perhaps<sup class="footnote" id="fnrev\1-2"><a href="#fn\1-2">100</a></sup> at a push.</p>\n\n\t<p class="footnote" id="fn\1-1"><sup>1</sup> Here are the details.</p>\n\n\t<p class="footy" id="otherid"><sup id="fn\1-2">100</sup> A totally unrelated footnote.</p>$', html) is not None
 18 | 
 19 |     html = textile.textile('''See[2] for details, and later, reference it again[2].\n\nfn2^(footy#otherid)[en]. Here are the details.''')
 20 |     assert re.search(r'^\t<p>See<sup class="footnote" id="fnrev([a-f0-9]{32})-1"><a href="#fn\1-1">2</a></sup> for details, and later, reference it again<sup class="footnote"><a href="#fn\1-1">2</a></sup>.</p>\n\n\t<p class="footy" id="otherid" lang="en"><sup id="fn\1-1"><a href="#fnrev\1-1">2</a></sup> Here are the details.</p>$', html) is not None
 21 | 
 22 |     html = textile.textile('''See[3!] for details.\n\nfn3. Here are the details.''')
 23 |     assert re.search(r'^\t<p>See<sup class="footnote" id="fnrev([a-f0-9]{32})-1">3</sup> for details.</p>\n\n\t<p class="footnote" id="fn\1-1"><sup>3</sup> Here are the details.</p>$', html) is not None
 24 | 
 25 |     html = textile.textile('''See[4!] for details.\n\nfn4^. Here are the details.''')
 26 |     assert re.search(r'^\t<p>See<sup class="footnote" id="fnrev([a-f0-9]{32})-1">4</sup> for details.</p>\n\n\t<p class="footnote" id="fn\1-1"><sup><a href="#fnrev\1-1">4</a></sup> Here are the details.</p>$', html) is not None
 27 | 
 28 | 
 29 | def test_issue_35():
 30 |     result = textile.textile('"z"')
 31 |     expect = '\t<p>&#8220;z&#8221;</p>'
 32 |     assert result == expect
 33 | 
 34 |     result = textile.textile('" z"')
 35 |     expect = '\t<p>&#8220; z&#8221;</p>'
 36 |     assert result == expect
 37 | 
 38 | 
 39 | def test_restricted():
 40 |     # Note that the HTML is escaped, thus rendering the <script> tag harmless.
 41 |     test = "Here is some text.\n<script>alert('hello world')</script>"
 42 |     result = textile.textile_restricted(test)
 43 |     expect = "\t<p>Here is some text.<br />\n&lt;script&gt;alert(&#8216;hello world&#8217;)&lt;/script&gt;</p>"
 44 | 
 45 |     assert result == expect
 46 | 
 47 |     test = "Here's some <!-- commented *out* --> text."
 48 |     result = textile.textile_restricted(test)
 49 |     expect = "\t<p>Here&#8217;s some &lt;!&#8212; commented <strong>out</strong> &#8212;&gt; text.</p>"
 50 | 
 51 |     assert result == expect
 52 | 
 53 |     test = "p[fr]. Partir, c'est toujours mourir un peu."
 54 |     result = textile.textile_restricted(test)
 55 |     expect = '\t<p lang="fr">Partir, c&#8217;est toujours mourir un peu.</p>'
 56 | 
 57 |     assert result == expect
 58 | 
 59 |     test = "p{color:blue}. is this blue?"
 60 |     result = textile.textile_restricted(test)
 61 |     expect = '\t<p>is this blue?</p>'
 62 | 
 63 |     assert result == expect
 64 | 
 65 |     test = """\
 66 | table{border:1px solid black}.
 67 | |={color:gray}. Your caption goes here
 68 | |~.
 69 | |{position:absolute}. A footer | foo |
 70 | |-.
 71 | |_{font-size:xxlarge}. header|_=. centered header|
 72 | |~. bottom aligned|{background:red;width:200px}. asfd|"""
 73 |     result = textile.textile_restricted(test, lite=False)
 74 |     # styles from alignment hints like =. and ~. are ok
 75 |     expect = '''\
 76 | \t<table>
 77 | \t<caption>Your caption goes here</caption>
 78 | \t<tfoot>
 79 | \t\t<tr>
 80 | \t\t\t<td>A footer </td>
 81 | \t\t\t<td> foo </td>
 82 | \t\t</tr>
 83 | \t</tfoot>
 84 | \t<tbody>
 85 | \t\t<tr>
 86 | \t\t\t<th>header</th>
 87 | \t\t\t<th style="text-align:center;">centered header</th>
 88 | \t\t</tr>
 89 | \t\t<tr>
 90 | \t\t\t<td style="vertical-align:bottom;">bottom aligned</td>
 91 | \t\t\t<td>asfd</td>
 92 | \t\t</tr>
 93 | \t</tbody>
 94 | \t</table>'''
 95 | 
 96 |     assert result == expect
 97 | 
 98 | 
 99 | def test_unicode_footnote():
100 |     html = textile.textile('текст[1]')
101 |     assert re.compile(r'^\t<p>текст<sup class="footnote" id="fnrev([a-f0-9]{32})-1"><a href="#fn\1-1">1</a></sup></p>$', re.U).search(html) is not None
102 | 
103 | 
104 | def test_autolinking():
105 |     test = """some text "test":http://www.google.com http://www.google.com "$":http://www.google.com"""
106 |     result = """\t<p>some text <a href="http://www.google.com">test</a> http://www.google.com <a href="http://www.google.com">www.google.com</a></p>"""
107 |     expect = textile.textile(test)
108 | 
109 |     assert result == expect
110 | 
111 | 
112 | def test_sanitize():
113 |     test = "a paragraph of benign text"
114 |     result = "\t<p>a paragraph of benign text</p>"
115 |     expect = textile.Textile().parse(test, sanitize=True)
116 |     assert result == expect
117 | 
118 |     test = """<p style="width: expression(alert('evil'));">a paragraph of evil text</p>"""
119 |     result = '<p>a paragraph of evil text</p>'
120 |     expect = textile.Textile().parse(test, sanitize=True)
121 |     assert result == expect
122 | 
123 |     test = """<p>a paragraph of benign text<br />and more text</p>"""
124 |     result = '<p>a paragraph of benign text<br />\nand more text</p>'
125 |     expect = textile.Textile(html_type='html5').parse(test, sanitize=True)
126 |     assert result == expect
127 | 
128 | 
129 | def test_imagesize():
130 |     PIL = pytest.importorskip('PIL')  # noqa: F841
131 | 
132 |     test = "!http://www.google.com/intl/en_ALL/images/srpr/logo1w.png!"
133 |     result = '\t<p><img alt="" height="95" src="http://www.google.com/intl/en_ALL/images/srpr/logo1w.png" width="275" /></p>'
134 |     expect = textile.Textile(get_sizes=True).parse(test)
135 |     assert result == expect
136 | 
137 | 
138 | def test_endnotes_simple():
139 |     test = """Scientists say the moon is slowly shrinking[#my_first_label].\n\nnotelist!.\n\nnote#my_first_label Over the past billion years, about a quarter of the moon's 4.5 billion-year lifespan, it has shrunk about 200 meters (700 feet) in diameter."""
140 |     html = textile.textile(test)
141 |     result_pattern = r"""\t<p>Scientists say the moon is slowly shrinking<sup><a href="#note([a-f0-9]{32})-2"><span id="noteref\1-1">1</span></a></sup>.</p>\n\n\t<ol>\n\t\t<li><span id="note\1-2"> </span>Over the past billion years, about a quarter of the moon&#8217;s 4.5 billion-year lifespan, it has shrunk about 200 meters \(700 feet\) in diameter.</li>\n\t</ol>$"""
142 |     result_re = re.compile(result_pattern)
143 |     assert result_re.search(html) is not None
144 | 
145 | 
146 | def test_endnotes_complex():
147 |     test = """Tim Berners-Lee is one of the pioneer voices in favour of Net Neutrality[#netneutral] and has expressed the view that ISPs should supply "connectivity with no strings attached"[#netneutral!] [#tbl_quote]\n\nBerners-Lee admitted that the forward slashes ("//") in a web address were actually unnecessary.  He told the newspaper that he could easily have designed URLs not to have the forward slashes.  "... it seemed like a good idea at the time,"[#slashes]\n\nnote#netneutral. "Web creator rejects net tracking":http://news.bbc.co.uk/2/hi/technology/7613201.stm. BBC. 15 September 2008\n\nnote#tbl_quote. "Web inventor's warning on spy software":http://www.telegraph.co.uk/news/uknews/1581938/Web-inventor%27s-warning-on-spy-software.html. The Daily Telegraph (London). 25 May 2008\n\nnote#slashes. "Berners-Lee 'sorry' for slashes":http://news.bbc.co.uk/1/hi/technology/8306631.stm. BBC. 14 October 2009\n\nnotelist."""
148 |     html = textile.textile(test)
149 |     result_pattern = r"""\t<p>Tim Berners-Lee is one of the pioneer voices in favour of Net Neutrality<sup><a href="#note([a-f0-9]{32})-2"><span id="noteref\1-1">1</span></a></sup> and has expressed the view that <span class="caps">ISP</span>s should supply &#8220;connectivity with no strings attached&#8221;<sup><span id="noteref\1-3">1</span></sup> <sup><a href="#note\1-5"><span id="noteref\1-4">2</span></a></sup></p>\n\n\t<p>Berners-Lee admitted that the forward slashes \(&#8220;//&#8221;\) in a web address were actually unnecessary.  He told the newspaper that he could easily have designed <span class="caps">URL</span>s not to have the forward slashes.  &#8220;&#8230; it seemed like a good idea at the time,&#8221;<sup><a href="#note\1-7"><span id="noteref\1-6">3</span></a></sup></p>\n\n\t<ol>\n\t\t<li><sup><a href="#noteref\1-1">a</a></sup> <sup><a href="#noteref\1-3">b</a></sup><span id="note\1-2"> </span><a href="http://news.bbc.co.uk/2/hi/technology/7613201.stm">Web creator rejects net tracking</a>. <span class="caps">BBC</span>. 15 September 2008</li>\n\t\t<li><sup><a href="#noteref\1-4">a</a></sup><span id="note\1-5"> </span><a href="http://www.telegraph.co.uk/news/uknews/1581938/Web-inventor%27s-warning-on-spy-software.html">Web inventor&#8217;s warning on spy software</a>. The Daily Telegraph \(London\). 25 May 2008</li>\n\t\t<li><sup><a href="#noteref\1-6">a</a></sup><span id="note\1-7"> </span><a href="http://news.bbc.co.uk/1/hi/technology/8306631.stm">Berners-Lee &#8216;sorry&#8217; for slashes</a>. <span class="caps">BBC</span>. 14 October 2009</li>\n\t</ol>$"""
150 |     result_re = re.compile(result_pattern)
151 |     assert result_re.search(html) is not None
152 | 
153 | 
154 | def test_endnotes_unreferenced_note():
155 |     test = """Scientists say[#lavader] the moon is quite small. But I, for one, don't believe them. Others claim it to be made of cheese[#aardman]. If this proves true I suspect we are in for troubled times[#apollo13] as people argue over their "share" of the moon's cheese. In the end, its limited size[#lavader] may prove problematic.\n\nnote#lavader(noteclass). "Proof of the small moon hypothesis":http://antwrp.gsfc.nasa.gov/apod/ap080801.html. Copyright(c) Laurent Laveder\n\nnote#aardman(#noteid). "Proof of a cheese moon":http://www.imdb.com/title/tt0104361\n\nnote#apollo13. After all, things do go "wrong":http://en.wikipedia.org/wiki/Apollo_13#The_oxygen_tank_incident.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}:§^.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}:‡"""
156 |     html = textile.textile(test)
157 |     result_pattern = r"""\t<p>Scientists say<sup><a href="#note([a-f0-9]{32})-2"><span id="noteref\1-1">1</span></a></sup> the moon is quite small. But I, for one, don&#8217;t believe them. Others claim it to be made of cheese<sup><a href="#note\1-4"><span id="noteref\1-3">2</span></a></sup>. If this proves true I suspect we are in for troubled times<sup><a href="#note\1-6"><span id="noteref\1-5">3</span></a></sup> as people argue over their &#8220;share&#8221; of the moon&#8217;s cheese. In the end, its limited size<sup><a href="#note\1-2"><span id="noteref\1-7">1</span></a></sup> may prove problematic.</p>\n\n\t<ol style="padding:1em; margin:1em; border-bottom:1px solid gray;">\n\t\t<li class="noteclass"><sup><a href="#noteref\1-1">a</a></sup> <sup><a href="#noteref\1-7">b</a></sup><span id="note\1-2"> </span><a href="http://antwrp.gsfc.nasa.gov/apod/ap080801.html">Proof of the small moon hypothesis</a>. Copyright&#169; Laurent Laveder</li>\n\t\t<li id="noteid"><sup><a href="#noteref\1-3">a</a></sup><span id="note\1-4"> </span><a href="http://www.imdb.com/title/tt0104361">Proof of a cheese moon</a></li>\n\t\t<li><sup><a href="#noteref\1-5">a</a></sup><span id="note\1-6"> </span>After all, things do go <a href="http://en.wikipedia.org/wiki/Apollo_13#The_oxygen_tank_incident">wrong</a>.</li>\n\t</ol>\n\n\t<ol style="padding:1em; margin:1em; border-bottom:1px solid gray;">\n\t\t<li class="noteclass"><sup><a href="#noteref\1-1">§</a></sup><span id="note\1-2"> </span><a href="http://antwrp.gsfc.nasa.gov/apod/ap080801.html">Proof of the small moon hypothesis</a>. Copyright&#169; Laurent Laveder</li>\n\t\t<li id="noteid"><sup><a href="#noteref\1-3">§</a></sup><span id="note\1-4"> </span><a href="http://www.imdb.com/title/tt0104361">Proof of a cheese moon</a></li>\n\t\t<li><sup><a href="#noteref\1-5">§</a></sup><span id="note\1-6"> </span>After all, things do go <a href="http://en.wikipedia.org/wiki/Apollo_13#The_oxygen_tank_incident">wrong</a>.</li>\n\t</ol>\n\n\t<ol style="padding:1em; margin:1em; border-bottom:1px solid gray;">\n\t\t<li class="noteclass"><sup><a href="#noteref\1-1">‡</a></sup> <sup><a href="#noteref\1-7">‡</a></sup><span id="note\1-2"> </span><a href="http://antwrp.gsfc.nasa.gov/apod/ap080801.html">Proof of the small moon hypothesis</a>. Copyright&#169; Laurent Laveder</li>\n\t\t<li id="noteid"><sup><a href="#noteref\1-3">‡</a></sup><span id="note\1-4"> </span><a href="http://www.imdb.com/title/tt0104361">Proof of a cheese moon</a></li>\n\t\t<li><sup><a href="#noteref\1-5">‡</a></sup><span id="note\1-6"> </span>After all, things do go <a href="http://en.wikipedia.org/wiki/Apollo_13#The_oxygen_tank_incident">wrong</a>.</li>\n\t</ol>"""
158 |     result_re = re.compile(result_pattern, re.U)
159 |     assert result_re.search(html) is not None
160 | 
161 | 
162 | def test_endnotes_malformed():
163 |     test = """Scientists say[#lavader] the moon is quite small. But I, for one, don't believe them. Others claim it to be made of cheese[#aardman]. If this proves true I suspect we are in for troubled times[#apollo13!] as people argue over their "share" of the moon's cheese. In the end, its limited size[#lavader] may prove problematic.\n\nnote#unused An unreferenced note.\n\nnote#lavader^ "Proof of the small moon hypothesis":http://antwrp.gsfc.nasa.gov/apod/ap080801.html. Copyright(c) Laurent Laveder\n\nnote#aardman^ "Proof of a cheese moon":http://www.imdb.com/title/tt0104361\n\nnote#apollo13^ After all, things do go "wrong":http://en.wikipedia.org/wiki/Apollo_13#The_oxygen_tank_incident.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}:α!+"""
164 |     html = textile.textile(test)
165 |     result_pattern = r"""^\t<p>Scientists say<sup><a href="#note([a-f0-9]{32})-2"><span id="noteref\1-1">1</span></a></sup> the moon is quite small. But I, for one, don&#8217;t believe them. Others claim it to be made of cheese<sup><a href="#note\1-4"><span id="noteref\1-3">2</span></a></sup>. If this proves true I suspect we are in for troubled times<sup><span id="noteref\1-5">3</span></sup> as people argue over their &#8220;share&#8221; of the moon&#8217;s cheese. In the end, its limited size<sup><a href="#note\1-2"><span id="noteref\1-7">1</span></a></sup> may prove problematic.</p>\n\n\t<ol style="padding:1em; margin:1em; border-bottom:1px solid gray;">\n\t\t<li><sup><a href="#noteref\1-1">α</a></sup><span id="note\1-2"> </span><a href="http://antwrp.gsfc.nasa.gov/apod/ap080801.html">Proof of the small moon hypothesis</a>. Copyright&#169; Laurent Laveder</li>\n\t\t<li><sup><a href="#noteref\1-3">α</a></sup><span id="note\1-4"> </span><a href="http://www.imdb.com/title/tt0104361">Proof of a cheese moon</a></li>\n\t\t<li><sup><a href="#noteref\1-5">α</a></sup><span id="note\1-6"> </span>After all, things do go <a href="http://en.wikipedia.org/wiki/Apollo_13#The_oxygen_tank_incident">wrong</a>.</li>\n\t\t<li>An unreferenced note.</li>\n\t</ol>$"""
166 |     result_re = re.compile(result_pattern, re.U)
167 |     assert result_re.search(html) is not None
168 | 
169 | 
170 | def test_endnotes_undefined_note():
171 |     test = """Scientists say the moon is slowly shrinking[#my_first_label].\n\nnotelist!."""
172 |     html = textile.textile(test)
173 |     result_pattern = r"""\t<p>Scientists say the moon is slowly shrinking<sup><a href="#note([a-f0-9]{32})-2"><span id="noteref\1-1">1</span></a></sup>.</p>\n\n\t<ol>\n\t\t<li> Undefined Note \[#my_first_label\].</li>\n\t</ol>$"""
174 |     result_re = re.compile(result_pattern)
175 |     assert result_re.search(html) is not None
176 | 
177 | 
178 | def test_encode_url():
179 |     # I tried adding these as doctests, but the unicode tests weren't
180 |     # returning the correct results.
181 |     t = textile.Textile()
182 | 
183 |     url = 'http://www.example.local'
184 |     result = 'http://www.example.local'
185 |     eurl = t.encode_url(url)
186 |     assert eurl == result
187 | 
188 |     url = 'http://user@www.example.local'
189 |     result = 'http://user@www.example.local'
190 |     eurl = t.encode_url(url)
191 |     assert eurl == result
192 | 
193 |     url = 'http://user:password@www.example.local'
194 |     result = 'http://user:password@www.example.local'
195 |     eurl = t.encode_url(url)
196 |     assert eurl == result
197 | 
198 |     url = 'http://user:password@www.example.local/Ubermensch'
199 |     result = 'http://user:password@www.example.local/Ubermensch'
200 |     eurl = t.encode_url(url)
201 |     assert eurl == result
202 | 
203 |     url = "http://user:password@www.example.local/Übermensch"
204 |     result = "http://user:password@www.example.local/%C3%9Cbermensch"
205 |     eurl = t.encode_url(url)
206 |     assert eurl == result
207 | 
208 |     url = 'http://user:password@www.example.local:8080/Übermensch'
209 |     result = 'http://user:password@www.example.local:8080/%C3%9Cbermensch'
210 |     eurl = t.encode_url(url)
211 |     assert eurl == result
212 | 
213 | 
214 | def test_footnote_crosslink():
215 |     html = textile.textile('''See[2] for details, and later, reference it again[2].\n\nfn2^(footy#otherid)[en]. Here are the details.''')
216 |     searchstring = r'\t<p>See<sup class="footnote" id="fnrev([a-f0-9]{32})-1"><a href="#fn\1-1">2</a></sup> for details, and later, reference it again<sup class="footnote"><a href="#fn\1-1">2</a></sup>.</p>\n\n\t<p class="footy" id="otherid" lang="en"><sup id="fn\1-1"><a href="#fnrev\1-1">2</a></sup> Here are the details.</p>$'
217 |     assert re.compile(searchstring).search(html) is not None
218 | 
219 | 
220 | def test_footnote_without_reflink():
221 |     html = textile.textile('''See[3!] for details.\n\nfn3. Here are the details.''')
222 |     searchstring = r'^\t<p>See<sup class="footnote" id="fnrev([a-f0-9]{32})-1">3</sup> for details.</p>\n\n\t<p class="footnote" id="fn\1-1"><sup>3</sup> Here are the details.</p>$'
223 |     assert re.compile(searchstring).search(html) is not None
224 | 
225 | 
226 | def testSquareBrackets():
227 |     html = textile.textile("""1[^st^], 2[^nd^], 3[^rd^]. 2 log[~n~]\n\nA close[!http://textpattern.com/favicon.ico!]image.\nA tight["text":http://textpattern.com/]link.\nA ["footnoted link":http://textpattern.com/][182].""")
228 |     searchstring = r'^\t<p>1<sup>st</sup>, 2<sup>nd</sup>, 3<sup>rd</sup>. 2 log<sub>n</sub></p>\n\n\t<p>A close<img alt="" src="http://textpattern.com/favicon.ico" />image.<br />\nA tight<a href="http://textpattern.com/">text</a>link.<br />\nA <a href="http://textpattern.com/">footnoted link</a><sup class="footnote" id="fnrev([a-f0-9]{32})-1"><a href="#fn\1-1">182</a></sup>.</p>'
229 |     assert re.compile(searchstring).search(html) is not None
230 | 
231 | 
232 | def test_html5():
233 |     """docstring for testHTML5"""
234 | 
235 |     test = 'We use CSS(Cascading Style Sheets).'
236 |     result = '\t<p>We use <abbr title="Cascading Style Sheets"><span class="caps">CSS</span></abbr>.</p>'
237 |     expect = textile.textile(test, html_type="html5")
238 |     assert result == expect
239 | 
240 | 
241 | def test_relURL():
242 |     t = textile.Textile()
243 |     t.restricted = True
244 |     assert t.relURL("gopher://gopher.com/") == '#'
245 | 


--------------------------------------------------------------------------------
/tests/test_textilefactory.py:
--------------------------------------------------------------------------------
 1 | from textile import textilefactory
 2 | import pytest
 3 | 
 4 | 
 5 | def test_TextileFactory():
 6 |     f = textilefactory.TextileFactory()
 7 |     result = f.process("some text here")
 8 |     expect = '\t<p>some text here</p>'
 9 |     assert result == expect
10 | 
11 |     f = textilefactory.TextileFactory(restricted=True)
12 |     result = f.process("more text here")
13 |     expect = '\t<p>more text here</p>'
14 |     assert result == expect
15 | 
16 |     f = textilefactory.TextileFactory(noimage=True)
17 |     result = f.process("this covers a partial branch.")
18 |     expect = '\t<p>this covers a partial branch.</p>'
19 |     assert result == expect
20 | 
21 |     # Certain parameter values are not permitted because they are illogical:
22 | 
23 |     with pytest.raises(ValueError) as ve:
24 |         f = textilefactory.TextileFactory(lite=True)
25 |     assert 'lite can only be enabled in restricted mode' in str(ve.value)
26 | 
27 |     with pytest.raises(ValueError) as ve:
28 |         f = textilefactory.TextileFactory(html_type='invalid')
29 |     assert "html_type must be 'xhtml' or 'html5'" in str(ve.value)
30 | 


--------------------------------------------------------------------------------
/tests/test_urls.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from textile import Textile
 3 | 
 4 | 
 5 | def test_urls():
 6 |     t = Textile()
 7 |     assert t.relURL("http://www.google.com/") == 'http://www.google.com/'
 8 | 
 9 |     result = t.links('fooobar "Google":http://google.com/foobar/ and hello world "flickr":http://flickr.com/photos/jsamsa/ ')
10 |     expect = 'fooobar {0}2:shelve and hello world {0}4:shelve '.format(t.uid)
11 |     assert result == expect
12 | 
13 |     result = t.links('""Open the door, HAL!"":https://xkcd.com/375/')
14 |     expect = '{0}6:shelve'.format(t.uid)
15 |     assert result == expect
16 | 
17 |     result = t.links('"$":http://domain.tld/test_[brackets]')
18 |     expect = '{0}8:shelve'.format(t.uid)
19 |     assert result == expect
20 | 
21 |     result = t.links('<em>"$":http://domain.tld/test_</em>')
22 |     expect = '<em>{0}10:shelve</em>'.format(t.uid)
23 |     assert result == expect
24 | 
25 |     expect = '"":test'
26 |     result = t.links(expect)
27 |     assert result == expect
28 | 
29 |     expect = '"$":htt://domain.tld'
30 |     result = t.links(expect)
31 |     assert result == expect
32 | 
33 |     result = t.shelveURL('')
34 |     expect = ''
35 |     assert result == expect
36 | 
37 |     result = t.retrieveURLs('{0}2:url'.format(t.uid))
38 |     expect = ''
39 |     assert result == expect
40 | 
41 |     result = t.encode_url('http://domain.tld/übermensch')
42 |     expect = 'http://domain.tld/%C3%BCbermensch'
43 |     assert result == expect
44 | 
45 |     result = t.parse('A link that starts with an h is "handled":/test/ incorrectly.')
46 |     expect = '\t<p>A link that starts with an h is <a href="/test/">handled</a> incorrectly.</p>'
47 |     assert result == expect
48 | 
49 |     result = t.parse('A link that starts with a space" raises":/test/ an exception.')
50 |     expect = '\t<p><a href="/test/">A link that starts with a space&#8221; raises</a> an exception.</p>'
51 |     assert result == expect
52 | 
53 |     result = t.parse('A link that "contains a\nnewline":/test/ raises an exception.')
54 |     expect = '\t<p>A link that <a href="/test/">contains a\nnewline</a> raises an exception.</p>'
55 |     assert result == expect
56 | 
57 | 
58 | def test_rel_attribute():
59 |     t = Textile(rel='nofollow')
60 |     result = t.parse('"$":http://domain.tld')
61 |     expect = '\t<p><a href="http://domain.tld" rel="nofollow">domain.tld</a></p>'
62 |     assert result == expect
63 | 
64 | 
65 | def test_quotes_in_link_text():
66 |     """quotes in link text are tricky."""
67 |     test = '""this is a quote in link text"":url'
68 |     t = Textile()
69 |     result = t.parse(test)
70 |     expect = '\t<p><a href="url">&#8220;this is a quote in link text&#8221;</a></p>'
71 |     assert result == expect
72 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from textile import utils
 3 | 
 4 | 
 5 | def test_encode_html():
 6 |     result = utils.encode_html('''this is a "test" of text that's safe to '''
 7 |                                'put in an <html> attribute.')
 8 |     expect = ('this is a &quot;test&quot; of text that&#39;s safe to put in '
 9 |               'an &lt;html&gt; attribute.')
10 |     assert result == expect
11 | 
12 | 
13 | def test_has_raw_text():
14 |     assert utils.has_raw_text('<p>foo bar biz baz</p>') is False
15 |     assert utils.has_raw_text(' why yes, yes it does') is True
16 | 
17 | 
18 | def test_is_rel_url():
19 |     assert utils.is_rel_url("http://www.google.com/") is False
20 |     assert utils.is_rel_url("/foo") is True
21 | 
22 | 
23 | def test_generate_tag():
24 |     result = utils.generate_tag('span', 'inner text', {'class': 'test'})
25 |     expect = '<span class="test">inner text</span>'
26 |     assert result == expect
27 | 
28 |     text = 'Übermensch'
29 |     attributes = {'href': 'http://de.wikipedia.org/wiki/%C3%C9bermensch'}
30 |     expect = '<a href="http://de.wikipedia.org/wiki/%C3%C9bermensch">Übermensch</a>'
31 |     result = utils.generate_tag('a', text, attributes)
32 |     assert result == expect
33 | 
34 | 
35 | def test_human_readable_url_edge_case():
36 |     assert utils.human_readable_url('google.com') == 'google.com'
37 |     assert utils.human_readable_url('tel:1-800-555-1212') == '1-800-555-1212'
38 | 


--------------------------------------------------------------------------------
/tests/test_values.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import textile
  3 | import pytest
  4 | 
  5 | xhtml_known_values = (
  6 |     ('hello, world', '\t<p>hello, world</p>'),
  7 | 
  8 |     ('A single paragraph.\n\nFollowed by another.',
  9 |      '\t<p>A single paragraph.</p>\n\n\t<p>Followed by another.</p>'),
 10 | 
 11 |     ('I am <b>very</b> serious.\n\n<pre>\nI am <b>very</b> serious.\n</pre>',
 12 |      '\t<p>I am <b>very</b> serious.</p>\n\n<pre>\nI am &lt;b&gt;very&lt;/b&gt; serious.\n</pre>'),
 13 | 
 14 |     ('I spoke.\nAnd none replied.', '\t<p>I spoke.<br />\nAnd none replied.</p>'),
 15 | 
 16 |     ('"Observe!"', '\t<p>&#8220;Observe!&#8221;</p>'),
 17 | 
 18 |     ('Observe -- very nice!', '\t<p>Observe &#8212; very nice!</p>'),
 19 | 
 20 |     ('Observe - tiny and brief.', '\t<p>Observe &#8211; tiny and brief.</p>'),
 21 | 
 22 |     ('Observe...', '\t<p>Observe&#8230;</p>'),
 23 | 
 24 |     ('Observe ...', '\t<p>Observe &#8230;</p>'),
 25 | 
 26 |     ('Observe: 2 x 2.', '\t<p>Observe: 2 &#215; 2.</p>'),
 27 | 
 28 |     ('one(TM), two(R), three(C).', '\t<p>one&#8482;, two&#174;, three&#169;.</p>'),
 29 | 
 30 |     ('h1. Header 1', '\t<h1>Header 1</h1>'),
 31 | 
 32 |     ('h2. Header 2', '\t<h2>Header 2</h2>'),
 33 | 
 34 |     ('h3. Header 3', '\t<h3>Header 3</h3>'),
 35 | 
 36 |     ('An old text\n\nbq. A block quotation.\n\nAny old text''',
 37 |      '\t<p>An old text</p>\n\n\t<blockquote>\n\t\t<p>A block quotation.</p>\n\t</blockquote>\n\n\t<p>Any old text</p>'),
 38 | 
 39 |     ('I _believe_ every word.', '\t<p>I <em>believe</em> every word.</p>'),
 40 | 
 41 |     ('And then? She *fell*!', '\t<p>And then? She <strong>fell</strong>!</p>'),
 42 | 
 43 |     ('I __know__.\nI **really** __know__.', '\t<p>I <i>know</i>.<br />\nI <b>really</b> <i>know</i>.</p>'),
 44 | 
 45 |     ("??Cat's Cradle?? by Vonnegut", '\t<p><cite>Cat&#8217;s Cradle</cite> by Vonnegut</p>'),
 46 | 
 47 |     ('Convert with @str(foo)@', '\t<p>Convert with <code>str(foo)</code></p>'),
 48 | 
 49 |     ('I\'m -sure- not sure.', '\t<p>I&#8217;m <del>sure</del> not sure.</p>'),
 50 | 
 51 |     ('You are a +pleasant+ child.', '\t<p>You are a <ins>pleasant</ins> child.</p>'),
 52 | 
 53 |     ('a ^2^ + b ^2^ = c ^2^', '\t<p>a <sup>2</sup> + b <sup>2</sup> = c <sup>2</sup></p>'),
 54 | 
 55 |     ('log ~2~ x', '\t<p>log <sub>2</sub> x</p>'),
 56 | 
 57 |     ('I\'m %unaware% of most soft drinks.', '\t<p>I&#8217;m <span>unaware</span> of most soft drinks.</p>'),
 58 | 
 59 |     ("I'm %{color:red}unaware%\nof most soft drinks.", '\t<p>I&#8217;m <span style="color:red;">unaware</span><br />\nof most soft drinks.</p>'),
 60 | 
 61 |     ('p(example1). An example', '\t<p class="example1">An example</p>'),
 62 | 
 63 |     ('p(#big-red). Red here', '\t<p id="big-red">Red here</p>'),
 64 | 
 65 |     ('p(example1#big-red2). Red here', '\t<p class="example1" id="big-red2">Red here</p>'),
 66 | 
 67 |     ('p{color:blue;margin:30px}. Spacey blue', '\t<p style="color:blue; margin:30px;">Spacey blue</p>'),
 68 | 
 69 |     ('p[fr]. rouge', '\t<p lang="fr">rouge</p>'),
 70 | 
 71 |     ('I seriously *{color:red}blushed*\nwhen I _(big)sprouted_ that\ncorn stalk from my\n%[es]cabeza%.',
 72 |      '\t<p>I seriously <strong style="color:red;">blushed</strong><br />\nwhen I <em class="big">sprouted</em>'
 73 |      ' that<br />\ncorn stalk from my<br />\n<span lang="es">cabeza</span>.</p>'),
 74 | 
 75 |     ('p<. align left', '\t<p style="text-align:left;">align left</p>'),
 76 | 
 77 |     ('p>. align right', '\t<p style="text-align:right;">align right</p>'),
 78 | 
 79 |     ('p=. centered', '\t<p style="text-align:center;">centered</p>'),
 80 | 
 81 |     ('p<>. justified', '\t<p style="text-align:justify;">justified</p>'),
 82 | 
 83 |     ('p(. left ident 1em', '\t<p style="padding-left:1em;">left ident 1em</p>'),
 84 | 
 85 |     ('p((. left ident 2em', '\t<p style="padding-left:2em;">left ident 2em</p>'),
 86 | 
 87 |     ('p))). right ident 3em', '\t<p style="padding-right:3em;">right ident 3em</p>'),
 88 | 
 89 |     ('h2()>. Bingo.', '\t<h2 style="padding-left:1em; padding-right:1em; text-align:right;">Bingo.</h2>'),
 90 | 
 91 |     ('h3()>[no]{color:red}. Bingo', '\t<h3 lang="no" style="color:red; padding-left:1em; padding-right:1em; text-align:right;">Bingo</h3>'),
 92 | 
 93 |     ('<pre>\n<code>\na.gsub!( /</, "" )\n</code>\n</pre>',
 94 |      '<pre>\n<code>\na.gsub!( /&lt;/, "" )\n</code>\n</pre>'),
 95 | 
 96 |     ('<div style="float:right;">\n\nh3. Sidebar\n\n"Hobix":http://hobix.com/\n"Ruby":http://ruby-lang.org/\n\n</div>\n\n'
 97 |      'The main text of the\npage goes here and will\nstay to the left of the\nsidebar.',
 98 |      '\t<p><div style="float:right;"></p>\n\n\t<h3>Sidebar</h3>\n\n\t<p><a href="http://hobix.com/">Hobix</a><br />\n'
 99 |      '<a href="http://ruby-lang.org/">Ruby</a></p>\n\n\t<p></div></p>\n\n\t<p>The main text of the<br />\n'
100 |      'page goes here and will<br />\nstay to the left of the<br />\nsidebar.</p>'),
101 | 
102 |     ('# A first item\n# A second item\n# A third',
103 |      '\t<ol>\n\t\t<li>A first item</li>\n\t\t<li>A second item</li>\n\t\t<li>A third</li>\n\t</ol>'),
104 | 
105 |     ('# Fuel could be:\n## Coal\n## Gasoline\n## Electricity\n# Humans need only:\n## Water\n## Protein',
106 |      '\t<ol>\n\t\t<li>Fuel could be:\n\t\t<ol>\n\t\t\t<li>Coal</li>\n\t\t\t<li>Gasoline</li>\n\t\t\t<li>Electricity</li>\n\t\t</ol></li>\n\t\t<li>Humans need only:\n\t\t<ol>\n\t\t\t<li>Water</li>\n\t\t\t<li>Protein</li>\n\t\t</ol></li>\n\t\t</ol>'),
107 | 
108 |     ('* A first item\n* A second item\n* A third',
109 |      '\t<ul>\n\t\t<li>A first item</li>\n\t\t<li>A second item</li>\n\t\t<li>A third</li>\n\t</ul>'),
110 | 
111 |     ('* Fuel could be:\n** Coal\n** Gasoline\n** Electricity\n* Humans need only:\n** Water\n** Protein',
112 |      '\t<ul>\n\t\t<li>Fuel could be:\n\t\t<ul>\n\t\t\t<li>Coal</li>\n\t\t\t<li>Gasoline</li>\n\t\t\t<li>Electricity</li>\n\t\t</ul></li>\n\t\t<li>Humans need only:\n\t\t<ul>\n\t\t\t<li>Water</li>\n\t\t\t<li>Protein</li>\n\t\t</ul></li>\n\t\t</ul>'),
113 | 
114 |     ('I searched "Google":http://google.com.', '\t<p>I searched <a href="http://google.com">Google</a>.</p>'),
115 | 
116 |     ('I searched "a search engine (Google)":http://google.com.', '\t<p>I searched <a href="http://google.com" title="Google">a search engine</a>.</p>'),
117 | 
118 |     ('I am crazy about "Hobix":hobix\nand "it\'s":hobix "all":hobix I ever\n"link to":hobix!\n\n[hobix]http://hobix.com',
119 |      '\t<p>I am crazy about <a href="http://hobix.com">Hobix</a><br />\nand <a href="http://hobix.com">it&#8217;s</a> '
120 |      '<a href="http://hobix.com">all</a> I ever<br />\n<a href="http://hobix.com">link to</a>!</p>'),
121 | 
122 |     ('!http://hobix.com/sample.jpg!', '\t<p><img alt="" src="http://hobix.com/sample.jpg" /></p>'),
123 | 
124 |     ('!openwindow1.gif(Bunny.)!', '\t<p><img alt="Bunny." src="openwindow1.gif" title="Bunny." /></p>'),
125 | 
126 |     ('!openwindow1.gif!:http://hobix.com/', '\t<p><a href="http://hobix.com/"><img alt="" src="openwindow1.gif" /></a></p>'),
127 | 
128 |     ('!>obake.gif!\n\nAnd others sat all round the small\nmachine and paid it to sing to them.',
129 |      '\t<p><img align="right" alt="" src="obake.gif" /></p>\n\n\t'
130 |      '<p>And others sat all round the small<br />\nmachine and paid it to sing to them.</p>'),
131 | 
132 |     ('We use CSS(Cascading Style Sheets).', '\t<p>We use <acronym title="Cascading Style Sheets"><span class="caps">CSS</span></acronym>.</p>'),
133 | 
134 |     ('|one|two|three|\n|a|b|c|',
135 |      '\t<table>\n\t\t<tr>\n\t\t\t<td>one</td>\n\t\t\t<td>two</td>\n\t\t\t<td>three</td>\n\t\t</tr>'
136 |      '\n\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t</tr>\n\t</table>'),
137 | 
138 |     ('| name | age | sex |\n| joan | 24 | f |\n| archie | 29 | m |\n| bella | 45 | f |',
139 |      '\t<table>\n\t\t<tr>\n\t\t\t<td> name </td>\n\t\t\t<td> age </td>\n\t\t\t<td> sex </td>\n\t\t</tr>'
140 |      '\n\t\t<tr>\n\t\t\t<td> joan </td>\n\t\t\t<td> 24 </td>\n\t\t\t<td> f </td>\n\t\t</tr>'
141 |      '\n\t\t<tr>\n\t\t\t<td> archie </td>\n\t\t\t<td> 29 </td>\n\t\t\t<td> m </td>\n\t\t</tr>'
142 |      '\n\t\t<tr>\n\t\t\t<td> bella </td>\n\t\t\t<td> 45 </td>\n\t\t\t<td> f </td>\n\t\t</tr>\n\t</table>'),
143 | 
144 |     ('|_. name |_. age |_. sex |\n| joan | 24 | f |\n| archie | 29 | m |\n| bella | 45 | f |',
145 |      '\t<table>\n\t\t<tr>\n\t\t\t<th>name </th>\n\t\t\t<th>age </th>\n\t\t\t<th>sex </th>\n\t\t</tr>'
146 |      '\n\t\t<tr>\n\t\t\t<td> joan </td>\n\t\t\t<td> 24 </td>\n\t\t\t<td> f </td>\n\t\t</tr>'
147 |      '\n\t\t<tr>\n\t\t\t<td> archie </td>\n\t\t\t<td> 29 </td>\n\t\t\t<td> m </td>\n\t\t</tr>'
148 |      '\n\t\t<tr>\n\t\t\t<td> bella </td>\n\t\t\t<td> 45 </td>\n\t\t\t<td> f </td>\n\t\t</tr>\n\t</table>'),
149 | 
150 |     ('<script>alert("hello");</script>', '\t<p><script>alert(&#8220;hello&#8221;);</script></p>'),
151 | 
152 |     ('pre.. Hello\n\nHello Again\n\np. normal text', '<pre>Hello\n\nHello Again</pre>\n\n\t<p>normal text</p>'),
153 | 
154 |     ('<pre>this is in a pre tag</pre>', '<pre>this is in a pre tag</pre>'),
155 | 
156 |     ('"test1":http://foo.com/bar--baz\n\n"test2":http://foo.com/bar---baz\n\n"test3":http://foo.com/bar-17-18-baz',
157 |      '\t<p><a href="http://foo.com/bar--baz">test1</a></p>\n\n\t'
158 |      '<p><a href="http://foo.com/bar---baz">test2</a></p>\n\n\t'
159 |      '<p><a href="http://foo.com/bar-17-18-baz">test3</a></p>'),
160 | 
161 |     ('"foo ==(bar)==":#foobar', '\t<p><a href="#foobar">foo (bar)</a></p>'),
162 | 
163 |     ('!http://render.mathim.com/A%5EtAx%20%3D%20A%5Et%28Ax%29.!',
164 |      '\t<p><img alt="" src="http://render.mathim.com/A%5EtAx%20%3D%20A%5Et%28Ax%29." /></p>'),
165 | 
166 |     ('* Point one\n* Point two\n## Step 1\n## Step 2\n## Step 3\n* Point three\n** Sub point 1\n** Sub point 2',
167 |      '\t<ul>\n\t\t<li>Point one</li>\n\t\t<li>Point two\n\t\t<ol>\n\t\t\t<li>Step 1</li>\n\t\t\t<li>Step 2</li>\n\t\t\t<li>Step 3</li>\n\t\t</ol></li>\n\t\t<li>Point three\n\t\t<ul>\n\t\t\t<li>Sub point 1</li>\n\t\t\t<li>Sub point 2</li>\n\t\t</ul></li>\n\t\t</ul>'),
168 | 
169 |     ('@array[4] = 8@', '\t<p><code>array[4] = 8</code></p>'),
170 | 
171 |     ('#{color:blue} one\n# two\n# three',
172 |      '\t<ol style="color:blue;">\n\t\t<li>one</li>\n\t\t<li>two</li>\n\t\t<li>three</li>\n\t</ol>'),
173 | 
174 |     ('Links (like "this":http://foo.com), are now mangled in 2.1.0, whereas 2.0 parsed them correctly.',
175 |      '\t<p>Links (like <a href="http://foo.com">this</a>), are now mangled in 2.1.0, whereas 2.0 parsed them correctly.</p>'),
176 | 
177 |     ('@monospaced text@, followed by text',
178 |      '\t<p><code>monospaced text</code>, followed by text</p>'),
179 | 
180 |     ('h2. A header\n\n\n\n\n\nsome text', '\t<h2>A header</h2>\n\n\n\n\n\n\t<p>some text</p>'),
181 | 
182 |     ('pre.. foo bar baz\nquux', '<pre>foo bar baz\nquux</pre>'),
183 | 
184 |     ('line of text\n\n    leading spaces',
185 |      '\t<p>line of text</p>\n\n    leading spaces'),
186 | 
187 |     ('"some text":http://www.example.com/?q=foo%20bar and more text',
188 |      '\t<p><a href="http://www.example.com/?q=foo%20bar">some text</a> and more text</p>'),
189 | 
190 |     ('(??some text??)', '\t<p>(<cite>some text</cite>)</p>'),
191 | 
192 |     ('(*bold text*)', '\t<p>(<strong>bold text</strong>)</p>'),
193 | 
194 |     ('H[~2~]O', '\t<p>H<sub>2</sub>O</p>'),
195 | 
196 |     ("p=. Où est l'école, l'église s'il vous plaît?",
197 |      """\t<p style="text-align:center;">Où est l&#8217;école, l&#8217;église s&#8217;il vous plaît?</p>"""),
198 | 
199 |     ("p=. *_The_* _*Prisoner*_",
200 |      """\t<p style="text-align:center;"><strong><em>The</em></strong> <em><strong>Prisoner</strong></em></p>"""),
201 | 
202 |     ("""p=. "An emphasised _word._" & "*A spanned phrase.*" """,
203 |      """\t<p style="text-align:center;">&#8220;An emphasised <em>word.</em>&#8221; &amp; &#8220;<strong>A spanned phrase.</strong>&#8221; </p>"""),
204 | 
205 |     ("""p=. "*Here*'s a word!" """,
206 |      """\t<p style="text-align:center;">&#8220;<strong>Here</strong>&#8217;s a word!&#8221; </p>"""),
207 | 
208 |     ("""p=. "Please visit our "Textile Test Page":http://textile.sitemonks.com" """,
209 |      """\t<p style="text-align:center;">&#8220;Please visit our <a href="http://textile.sitemonks.com">Textile Test Page</a>&#8221; </p>"""),
210 |     ("""| Foreign EXPÓŅÉNTIAL |""",
211 |      """\t<table>\n\t\t<tr>\n\t\t\t<td> Foreign <span class="caps">EXPÓŅÉNTIAL</span> </td>\n\t\t</tr>\n\t</table>"""),
212 |     ("""Piękne ŹDŹBŁO""",
213 |      """\t<p>Piękne <span class="caps">ŹDŹBŁO</span></p>"""),
214 | 
215 |     ("""p=. Tell me, what is AJAX(Asynchronous Javascript and XML), please?""",
216 |      """\t<p style="text-align:center;">Tell me, what is <acronym title="Asynchronous Javascript and XML"><span class="caps">AJAX</span></acronym>, please?</p>"""),
217 |     ('p{font-size:0.8em}. *TxStyle* is a documentation project of Textile 2.4 for "Textpattern CMS":http://texpattern.com.',
218 |      '\t<p style="font-size:0.8em;"><strong>TxStyle</strong> is a documentation project of Textile 2.4 for <a href="http://texpattern.com">Textpattern <span class="caps">CMS</span></a>.</p>'),
219 |     (""""Übermensch":http://de.wikipedia.org/wiki/Übermensch""", """\t<p><a href="http://de.wikipedia.org/wiki/%C3%9Cbermensch">Übermensch</a></p>"""),
220 |     ("""Here is some text with a <!-- Commented out[1] --> block.\n\n<!-- Here is a single <span>line</span> comment block -->\n\n<!-- Here is a whole\nmultiline\n<span>HTML</span>\nComment\n-->\n\nbc. <!-- Here is a comment block in a code block. -->""",
221 |      """\t<p>Here is some text with a <!-- Commented out[1] --> block.</p>\n\n<!-- Here is a single <span>line</span> comment block -->\n\n<!-- Here is a whole\nmultiline\n<span>HTML</span>\nComment\n-->\n\n<pre><code>&lt;!-- Here is a comment block in a code block. --&gt;</code></pre>"""),
222 |     (""""Textile(c)" is a registered(r) 'trademark' of Textpattern(tm) -- or TXP(That's textpattern!) -- at least it was - back in '88 when 2x4 was (+/-)5(o)C ... QED!\n\np{font-size: 200%;}. 2(1/4) 3(1/2) 4(3/4)""",
223 |      """\t<p>&#8220;Textile&#169;&#8221; is a registered&#174; &#8216;trademark&#8217; of Textpattern&#8482; &#8212; or <acronym title="That&#8217;s textpattern!"><span class="caps">TXP</span></acronym> &#8212; at least it was &#8211; back in &#8217;88 when 2&#215;4 was &#177;5&#176;C &#8230; <span class="caps">QED</span>!</p>\n\n\t<p style="font-size: 200%;">2&#188; 3&#189; 4&#190;</p>"""),
224 |     ("""|=. Testing colgroup and col syntax\n|:\\5. 80\n|a|b|c|d|e|\n\n|=. Testing colgroup and col syntax|\n|:\\5. 80|\n|a|b|c|d|e|""", """\t<table>\n\t<caption>Testing colgroup and col syntax</caption>\n\t<colgroup span="5" width="80">\n\t</colgroup>\n\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t\t<td>d</td>\n\t\t\t<td>e</td>\n\t\t</tr>\n\t</table>\n\n\t<table>\n\t<caption>Testing colgroup and col syntax</caption>\n\t<colgroup span="5" width="80">\n\t</colgroup>\n\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t\t<td>d</td>\n\t\t\t<td>e</td>\n\t\t</tr>\n\t</table>"""),
225 |     ("""table(#dvds){border-collapse:collapse}. Great films on DVD employing Textile summary, caption, thead, tfoot, two tbody elements and colgroups\n|={font-size:140%;margin-bottom:15px}. DVDs with two Textiled tbody elements\n|:\\3. 100 |{background:#ddd}|250||50|300|\n|^(header).\n|_. Title |_. Starring |_. Director |_. Writer |_. Notes |\n|~(footer).\n|\\5=. This is the tfoot, centred |\n|-(toplist){background:#c5f7f6}.\n| _The Usual Suspects_ | Benicio Del Toro, Gabriel Byrne, Stephen Baldwin, Kevin Spacey | Bryan Singer | Chris McQaurrie | One of the finest films ever made |\n| _Se7en_ | Morgan Freeman, Brad Pitt, Kevin Spacey | David Fincher | Andrew Kevin Walker | Great psychological thriller |\n| _Primer_ | David Sullivan, Shane Carruth | Shane Carruth | Shane Carruth | Amazing insight into trust and human psychology <br />rather than science fiction. Terrific! |\n| _District 9_ | Sharlto Copley, Jason Cope | Neill Blomkamp | Neill Blomkamp, Terri Tatchell | Social commentary layered on thick,\nbut boy is it done well |\n|-(medlist){background:#e7e895;}.\n| _Arlington Road_ | Tim Robbins, Jeff Bridges | Mark Pellington | Ehren Kruger | Awesome study in neighbourly relations |\n| _Phone Booth_ | Colin Farrell, Kiefer Sutherland, Forest Whitaker | Joel Schumacher | Larry Cohen | Edge-of-the-seat stuff in this\nshort but brilliantly executed thriller |""",
226 |      """\t<table id="dvds" style="border-collapse:collapse;" summary="Great films on DVD employing Textile summary, caption, thead, tfoot, two tbody elements and colgroups">\n\t<caption style="font-size:140%; margin-bottom:15px;"><span class="caps">DVD</span>s with two Textiled tbody elements</caption>\n\t<colgroup span="3" width="100">\n\t<col style="background:#ddd;" />\n\t<col width="250" />\n\t<col />\n\t<col width="50" />\n\t<col width="300" />\n\t</colgroup>\n\t<thead class="header">\n\t\t<tr>\n\t\t\t<th>Title </th>\n\t\t\t<th>Starring </th>\n\t\t\t<th>Director </th>\n\t\t\t<th>Writer </th>\n\t\t\t<th>Notes </th>\n\t\t</tr>\n\t</thead>\n\t<tfoot class="footer">\n\t\t<tr>\n\t\t\t<td colspan="5" style="text-align:center;">This is the tfoot, centred </td>\n\t\t</tr>\n\t</tfoot>\n\t<tbody class="toplist" style="background:#c5f7f6;">\n\t\t<tr>\n\t\t\t<td> <em>The Usual Suspects</em> </td>\n\t\t\t<td> Benicio Del Toro, Gabriel Byrne, Stephen Baldwin, Kevin Spacey </td>\n\t\t\t<td> Bryan Singer </td>\n\t\t\t<td> Chris McQaurrie </td>\n\t\t\t<td> One of the finest films ever made </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td> <em>Se7en</em> </td>\n\t\t\t<td> Morgan Freeman, Brad Pitt, Kevin Spacey </td>\n\t\t\t<td> David Fincher </td>\n\t\t\t<td> Andrew Kevin Walker </td>\n\t\t\t<td> Great psychological thriller </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td> <em>Primer</em> </td>\n\t\t\t<td> David Sullivan, Shane Carruth </td>\n\t\t\t<td> Shane Carruth </td>\n\t\t\t<td> Shane Carruth </td>\n\t\t\t<td> Amazing insight into trust and human psychology <br />\nrather than science fiction. Terrific! </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td> <em>District 9</em> </td>\n\t\t\t<td> Sharlto Copley, Jason Cope </td>\n\t\t\t<td> Neill Blomkamp </td>\n\t\t\t<td> Neill Blomkamp, Terri Tatchell </td>\n\t\t\t<td> Social commentary layered on thick,<br />\nbut boy is it done well </td>\n\t\t</tr>\n\t</tbody>\n\t<tbody class="medlist" style="background:#e7e895;">\n\t\t<tr>\n\t\t\t<td> <em>Arlington Road</em> </td>\n\t\t\t<td> Tim Robbins, Jeff Bridges </td>\n\t\t\t<td> Mark Pellington </td>\n\t\t\t<td> Ehren Kruger </td>\n\t\t\t<td> Awesome study in neighbourly relations </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td> <em>Phone Booth</em> </td>\n\t\t\t<td> Colin Farrell, Kiefer Sutherland, Forest Whitaker </td>\n\t\t\t<td> Joel Schumacher </td>\n\t\t\t<td> Larry Cohen </td>\n\t\t\t<td> Edge-of-the-seat stuff in this<br />\nshort but brilliantly executed thriller </td>\n\t\t</tr>\n\t</tbody>\n\t</table>"""),
227 |     ("""-(hot) *coffee* := Hot _and_ black\n-(hot#tea) tea := Also hot, but a little less black\n-(cold) milk := Nourishing beverage for baby cows.\nCold drink that goes great with cookies. =:\n\n-(hot) coffee := Hot and black\n-(hot#tea) tea := Also hot, but a little less black\n-(cold) milk :=\nNourishing beverage for baby cows.\nCold drink that goes great with cookies. =:""",
228 |      """<dl>\n\t<dt class="hot"><strong>coffee</strong></dt>\n\t<dd>Hot <em>and</em> black</dd>\n\t<dt class="hot" id="tea">tea</dt>\n\t<dd>Also hot, but a little less black</dd>\n\t<dt class="cold">milk</dt>\n\t<dd>Nourishing beverage for baby cows.<br />\nCold drink that goes great with cookies.</dd>\n</dl>\n\n<dl>\n\t<dt class="hot">coffee</dt>\n\t<dd>Hot and black</dd>\n\t<dt class="hot" id="tea">tea</dt>\n\t<dd>Also hot, but a little less black</dd>\n\t<dt class="cold">milk</dt>\n\t<dd><p>Nourishing beverage for baby cows.<br />\nCold drink that goes great with cookies.</p></dd>\n</dl>"""),
229 |     (""";(class#id) Term 1\n: Def 1\n: Def 2\n: Def 3""",
230 |      """\t<dl class="class" id="id">\n\t\t<dt>Term 1</dt>\n\t\t<dd>Def 1</dd>\n\t\t<dd>Def 2</dd>\n\t\t<dd>Def 3</dd>\n\t</dl>"""),
231 |     ("""*Here is a comment*\n\nHere is *(class)a comment*\n\n*(class)Here is a class* that is a little extended and is\n*followed* by a strong word!\n\nbc. ; Content-type: text/javascript\n; Cache-Control: no-store, no-cache, must-revalidate, pre-check=0, post-check=0, max-age=0\n; Expires: Sat, 24 Jul 2003 05:00:00 GMT\n; Last-Modified: Wed, 1 Jan 2025 05:00:00 GMT\n; Pragma: no-cache\n\n*123 test*\n\n*test 123*\n\n**123 test**\n\n**test 123**""",
232 |      """\t<p><strong>Here is a comment</strong></p>\n\n\t<p>Here is <strong class="class">a comment</strong></p>\n\n\t<p><strong class="class">Here is a class</strong> that is a little extended and is<br />\n<strong>followed</strong> by a strong word!</p>\n\n<pre><code>; Content-type: text/javascript\n; Cache-Control: no-store, no-cache, must-revalidate, pre-check=0, post-check=0, max-age=0\n; Expires: Sat, 24 Jul 2003 05:00:00 GMT\n; Last-Modified: Wed, 1 Jan 2025 05:00:00 GMT\n; Pragma: no-cache</code></pre>\n\n\t<p><strong>123 test</strong></p>\n\n\t<p><strong>test 123</strong></p>\n\n\t<p><b>123 test</b></p>\n\n\t<p><b>test 123</b></p>"""),
233 |     ("""#_(first#list) one\n# two\n# three\n\ntest\n\n#(ordered#list2).\n# one\n# two\n# three\n\ntest\n\n#_(class_4).\n# four\n# five\n# six\n\ntest\n\n#_ seven\n# eight\n# nine\n\ntest\n\n# one\n# two\n# three\n\ntest\n\n#22 22\n# 23\n# 24""",
234 |      """\t<ol class="first" id="list" start="1">\n\t\t<li>one</li>\n\t\t<li>two</li>\n\t\t<li>three</li>\n\t</ol>\n\n\t<p>test</p>\n\n\t<ol class="ordered" id="list2">\n\t\t<li>one</li>\n\t\t<li>two</li>\n\t\t<li>three</li>\n\t</ol>\n\n\t<p>test</p>\n\n\t<ol class="class_4" start="4">\n\t\t<li>four</li>\n\t\t<li>five</li>\n\t\t<li>six</li>\n\t</ol>\n\n\t<p>test</p>\n\n\t<ol start="7">\n\t\t<li>seven</li>\n\t\t<li>eight</li>\n\t\t<li>nine</li>\n\t</ol>\n\n\t<p>test</p>\n\n\t<ol>\n\t\t<li>one</li>\n\t\t<li>two</li>\n\t\t<li>three</li>\n\t</ol>\n\n\t<p>test</p>\n\n\t<ol start="22">\n\t\t<li>22</li>\n\t\t<li>23</li>\n\t\t<li>24</li>\n\t</ol>"""),
235 |     ("""# one\n##3 one.three\n## one.four\n## one.five\n# two\n\ntest\n\n#_(continuation#section2).\n# three\n# four\n##_ four.six\n## four.seven\n# five\n\ntest\n\n#21 twenty-one\n# twenty-two""",
236 |      """\t<ol>\n\t\t<li>one\n\t\t<ol start="3">\n\t\t\t<li>one.three</li>\n\t\t\t<li>one.four</li>\n\t\t\t<li>one.five</li>\n\t\t</ol></li>\n\t\t<li>two</li>\n\t</ol>\n\n\t<p>test</p>\n\n\t<ol class="continuation" id="section2" start="3">\n\t\t<li>three</li>\n\t\t<li>four\n\t\t<ol start="6">\n\t\t\t<li>four.six</li>\n\t\t\t<li>four.seven</li>\n\t\t</ol></li>\n\t\t<li>five</li>\n\t</ol>\n\n\t<p>test</p>\n\n\t<ol start="21">\n\t\t<li>twenty-one</li>\n\t\t<li>twenty-two</li>\n\t</ol>"""),
237 |     ("""|* Foo[^2^]\n* _bar_\n* ~baz~ |\n|#4 *Four*\n# __Five__ |\n|-(hot) coffee := Hot and black\n-(hot#tea) tea := Also hot, but a little less black\n-(cold) milk :=\nNourishing beverage for baby cows.\nCold drink that goes great with cookies. =:\n|""",
238 |      """\t<table>\n\t\t<tr>\n\t\t\t<td>\t<ul>\n\t\t<li>Foo<sup>2</sup></li>\n\t\t<li><em>bar</em></li>\n\t\t<li><sub>baz</sub></li>\n\t</ul></td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>\t<ol start="4">\n\t\t<li><strong>Four</strong></li>\n\t\t<li><i>Five</i></li>\n\t</ol></td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td><dl>\n\t<dt class="hot">coffee</dt>\n\t<dd>Hot and black</dd>\n\t<dt class="hot" id="tea">tea</dt>\n\t<dd>Also hot, but a little less black</dd>\n\t<dt class="cold">milk</dt>\n\t<dd><p>Nourishing beverage for baby cows.<br />\nCold drink that goes great with cookies.</p></dd>\n</dl></td>\n\t\t</tr>\n\t</table>"""),
239 |     ("""h4. A more complicated table\n\ntable(tableclass#tableid){color:blue}.\n|_. table |_. more |_. badass |\n|\\3. Horizontal span of 3|\n(firstrow). |first|HAL(open the pod bay doors)|1|\n|some|{color:green}. styled|content|\n|/2. spans 2 rows|this is|quite a|\n| deep test | don't you think?|\n(lastrow). |fifth|I'm a lumberjack|5|\n|sixth| _*bold italics*_ |6|""",
240 |      """\t<h4>A more complicated table</h4>\n\n\t<table class="tableclass" id="tableid" style="color:blue;">\n\t\t<tr>\n\t\t\t<th>table </th>\n\t\t\t<th>more </th>\n\t\t\t<th>badass </th>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td colspan="3">Horizontal span of 3</td>\n\t\t</tr>\n\t\t<tr class="firstrow">\n\t\t\t<td>first</td>\n\t\t\t<td><acronym title="open the pod bay doors"><span class="caps">HAL</span></acronym></td>\n\t\t\t<td>1</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>some</td>\n\t\t\t<td style="color:green;">styled</td>\n\t\t\t<td>content</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td rowspan="2">spans 2 rows</td>\n\t\t\t<td>this is</td>\n\t\t\t<td>quite a</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td> deep test </td>\n\t\t\t<td> don&#8217;t you think?</td>\n\t\t</tr>\n\t\t<tr class="lastrow">\n\t\t\t<td>fifth</td>\n\t\t\t<td>I&#8217;m a lumberjack</td>\n\t\t\t<td>5</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>sixth</td>\n\t\t\t<td> <em><strong>bold italics</strong></em> </td>\n\t\t\t<td>6</td>\n\t\t</tr>\n\t</table>"""),
241 |     ("""| *strong* |\n\n| _em_ |\n\n| Inter-word -dashes- | ZIP-codes are 5- or 9-digit codes |""",
242 |      """\t<table>\n\t\t<tr>\n\t\t\t<td> <strong>strong</strong> </td>\n\t\t</tr>\n\t</table>\n\n\t<table>\n\t\t<tr>\n\t\t\t<td> <em>em</em> </td>\n\t\t</tr>\n\t</table>\n\n\t<table>\n\t\t<tr>\n\t\t\t<td> Inter-word <del>dashes</del> </td>\n\t\t\t<td> <span class="caps">ZIP</span>-codes are 5- or 9-digit codes </td>\n\t\t</tr>\n\t</table>"""),
243 |     ("""|_. attribute list |\n|<. align left |\n|>. align right|\n|=. center |\n|<>. justify me|\n|^. valign top |\n|~. bottom |""",
244 |      """\t<table>\n\t\t<tr>\n\t\t\t<th>attribute list </th>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td style="text-align:left;">align left </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td style="text-align:right;">align right</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td style="text-align:center;">center </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td style="text-align:justify;">justify me</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td style="vertical-align:top;">valign top </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td style="vertical-align:bottom;">bottom </td>\n\t\t</tr>\n\t</table>"""),
245 |     ("""h2. A definition list\n\n;(class#id) Term 1\n: Def 1\n: Def 2\n: Def 3\n;; Center\n;; NATO(Why Em Cee Ayy)\n:: Subdef 1\n:: Subdef 2\n;;; SubSub Term\n::: SubSub Def 1\n::: SubSub Def 2\n::: Subsub Def 3\nWith newline\n::: Subsub Def 4\n:: Subdef 3\n: DEF 4\n; Term 2\n: Another def\n: And another\n: One more\n:: A def without a term\n:: More defness\n; Third term for good measure\n: My definition of a boombastic jazz""",
246 |      """\t<h2>A definition list</h2>\n\n\t<dl class="class" id="id">\n\t\t<dt>Term 1</dt>\n\t\t<dd>Def 1</dd>\n\t\t<dd>Def 2</dd>\n\t\t<dd>Def 3\n\t\t<dl>\n\t\t\t<dt>Center</dt>\n\t\t\t<dt><acronym title="Why Em Cee Ayy"><span class="caps">NATO</span></acronym></dt>\n\t\t\t<dd>Subdef 1</dd>\n\t\t\t<dd>Subdef 2\n\t\t\t<dl>\n\t\t\t\t<dt>SubSub Term</dt>\n\t\t\t\t<dd>SubSub Def 1</dd>\n\t\t\t\t<dd>SubSub Def 2</dd>\n\t\t\t\t<dd>Subsub Def 3<br />\nWith newline</dd>\n\t\t\t\t<dd>Subsub Def 4</dd>\n\t\t\t</dl></dd>\n\t\t\t<dd>Subdef 3</dd>\n\t\t</dl></dd>\n\t\t<dd><span class="caps">DEF</span> 4</dd>\n\t\t<dt>Term 2</dt>\n\t\t<dd>Another def</dd>\n\t\t<dd>And another</dd>\n\t\t<dd>One more\n\t\t<dl>\n\t\t\t<dd>A def without a term</dd>\n\t\t\t<dd>More defness</dd>\n\t\t</dl></dd>\n\t\t<dt>Third term for good measure</dt>\n\t\t<dd>My definition of a boombastic jazz</dd>\n\t</dl>"""),
247 |     ("""###. Here's a comment.\n\nh3. Hello\n\n###. And\nanother\none.\n\nGoodbye.""", """\t<h3>Hello</h3>\n\n\t<p>Goodbye.</p>"""),
248 |     ("""h2. A Definition list which covers the instance where a new definition list is created with a term without a definition\n\n- term :=\n- term2 := def""", """\t<h2>A Definition list which covers the instance where a new definition list is created with a term without a definition</h2>\n\n<dl>\n\t<dt>term</dt>\n\t<dt>term2</dt>\n\t<dd>def</dd>\n</dl>"""),
249 |     ('!{height:20px;width:20px;}https://1.gravatar.com/avatar/!',
250 |      '\t<p><img alt="" src="https://1.gravatar.com/avatar/" style="height:20px; width:20px;" /></p>'),
251 |     ('& test', '\t<p>&amp; test</p>'),
252 | )
253 | 
254 | # A few extra cases for HTML4
255 | html_known_values = (
256 |     ("pre.. The beginning\n\nbc.. This code\n\nis the last\n\nblock in the document\n",
257 |      "<pre>The beginning</pre>\n\n<pre><code>This code\n\nis the last\n\nblock in the document</code></pre>"),
258 |     ("bc.. This code\n\nis not\n\nsurrounded by anything\n",
259 |      "<pre><code>This code\n\nis not\n\nsurrounded by anything</code></pre>"),
260 |     ("bc.. Paragraph 1\n\nParagraph 2\n\nParagraph 3\n\np.. post-code paragraph",
261 |      "<pre><code>Paragraph 1\n\nParagraph 2\n\nParagraph 3</code></pre>\n\n<p>post-code paragraph</p>"),
262 |     ("bc.. Paragraph 1\n\nParagraph 2\n\nParagraph 3\n\npre.. post-code non-p block",
263 |      "<pre><code>Paragraph 1\n\nParagraph 2\n\nParagraph 3</code></pre>\n\n<pre>post-code non-p block</pre>"),
264 |     ('I spoke.\nAnd none replied.', '\t<p>I spoke.<br />\nAnd none replied.</p>'),
265 |     ('I __know__.\nI **really** __know__.', '\t<p>I <i>know</i>.<br />\nI <b>really</b> <i>know</i>.</p>'),
266 |     ("I'm %{color:red}unaware%\nof most soft drinks.", '\t<p>I&#8217;m <span style="color:red;">unaware</span><br />\nof most soft drinks.</p>'),
267 |     ('I seriously *{color:red}blushed*\nwhen I _(big)sprouted_ that\ncorn stalk from my\n%[es]cabeza%.',
268 |      '\t<p>I seriously <strong style="color:red;">blushed</strong><br />\nwhen I <em class="big">sprouted</em>'
269 |      ' that<br />\ncorn stalk from my<br />\n<span lang="es">cabeza</span>.</p>'),
270 |     ('<pre>\n<code>\na.gsub!( /</, "" )\n</code>\n</pre>',
271 |      '<pre>\n<code>\na.gsub!( /&lt;/, "" )\n</code>\n</pre>'),
272 |     ('<div style="float:right;">\n\nh3. Sidebar\n\n"Hobix":http://hobix.com/\n"Ruby":http://ruby-lang.org/\n\n</div>\n\n'
273 |      'The main text of the\npage goes here and will\nstay to the left of the\nsidebar.',
274 |      '\t<p><div style="float:right;"></p>\n\n\t<h3>Sidebar</h3>\n\n\t<p><a href="http://hobix.com/">Hobix</a><br />\n'
275 |      '<a href="http://ruby-lang.org/">Ruby</a></p>\n\n\t<p></div></p>\n\n\t<p>The main text of the<br />\n'
276 |      'page goes here and will<br />\nstay to the left of the<br />\nsidebar.</p>'),
277 |     ('I am crazy about "Hobix":hobix\nand "it\'s":hobix "all":hobix I ever\n"link to":hobix!\n\n[hobix]http://hobix.com',
278 |      '\t<p>I am crazy about <a href="http://hobix.com">Hobix</a><br />\nand <a href="http://hobix.com">it&#8217;s</a> '
279 |      '<a href="http://hobix.com">all</a> I ever<br />\n<a href="http://hobix.com">link to</a>!</p>'),
280 |     ('!http://hobix.com/sample.jpg!', '\t<p><img alt="" src="http://hobix.com/sample.jpg" /></p>'),
281 |     ('!openwindow1.gif(Bunny.)!', '\t<p><img alt="Bunny." src="openwindow1.gif" title="Bunny." /></p>'),
282 |     ('!openwindow1.gif!:http://hobix.com/', '\t<p><a href="http://hobix.com/"><img alt="" src="openwindow1.gif" /></a></p>'),
283 |     ('!>obake.gif!\n\nAnd others sat all round the small\nmachine and paid it to sing to them.',
284 |      '\t<p><img align="right" alt="" src="obake.gif" /></p>\n\n\t'
285 |      '<p>And others sat all round the small<br />\nmachine and paid it to sing to them.</p>'),
286 |     ('!http://render.mathim.com/A%5EtAx%20%3D%20A%5Et%28Ax%29.!',
287 |      '\t<p><img alt="" src="http://render.mathim.com/A%5EtAx%20%3D%20A%5Et%28Ax%29." /></p>'),
288 |     ('notextile. <b> foo bar baz</b>\n\np. quux\n',
289 |      '<b> foo bar baz</b>\n\n\t<p>quux</p>'),
290 |     ('"foo":http://google.com/one--two', '\t<p><a href="http://google.com/one--two">foo</a></p>'),
291 |     # issue 24 colspan
292 |     ('|\\2. spans two cols |\n| col 1 | col 2 |', '\t<table>\n\t\t<tr>\n\t\t\t<td colspan="2">spans two cols </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td> col 1 </td>\n\t\t\t<td> col 2 </td>\n\t\t</tr>\n\t</table>'),
293 |     # issue 2 escaping
294 |     ('"foo ==(bar)==":#foobar', '\t<p><a href="#foobar">foo (bar)</a></p>'),
295 |     # issue 14 newlines in extended pre blocks
296 |     ("pre.. Hello\n\nAgain\n\np. normal text", '<pre>Hello\n\nAgain</pre>\n\n\t<p>normal text</p>'),
297 |     # url with parentheses
298 |     ('"python":http://en.wikipedia.org/wiki/Python_(programming_language)', '\t<p><a href="http://en.wikipedia.org/wiki/Python_%28programming_language%29">python</a></p>'),
299 |     # table with hyphen styles
300 |     ('table(linkblog-thumbnail).\n|(linkblog-thumbnail-cell). apple|bear|', '\t<table class="linkblog-thumbnail">\n\t\t<tr>\n\t\t\t<td class="linkblog-thumbnail-cell">apple</td>\n\t\t\t<td>bear</td>\n\t\t</tr>\n\t</table>'),
301 |     # issue 32 empty table cells
302 |     ("|thing|||otherthing|", "\t<table>\n\t\t<tr>\n\t\t\t<td>thing</td>\n\t\t\t<td></td>\n\t\t\t<td></td>\n\t\t\t<td>otherthing</td>\n\t\t</tr>\n\t</table>"),
303 |     # issue 36 link reference names http and https
304 |     ('"signup":signup\n[signup]http://myservice.com/signup', '\t<p><a href="http://myservice.com/signup">signup</a></p>'),
305 |     ('"signup":signup\n[signup]https://myservice.com/signup', '\t<p><a href="https://myservice.com/signup">signup</a></p>'),
306 |     # nested formatting
307 |     ("*_test text_*", "\t<p><strong><em>test text</em></strong></p>"),
308 |     ("_*test text*_", "\t<p><em><strong>test text</strong></em></p>"),
309 |     # quotes in code block
310 |     ("<code>'quoted string'</code>", "\t<p><code>'quoted string'</code></p>"),
311 |     ("<pre>some preformatted text</pre>other text", "\t<p><pre>some preformatted text</pre>other text</p>"),
312 |     # at sign and notextile in table
313 |     ("|@<A1>@|@<A2>@ @<A3>@|\n|<notextile>*B1*</notextile>|<notextile>*B2*</notextile> <notextile>*B3*</notextile>|", "\t<table>\n\t\t<tr>\n\t\t\t<td><code>&lt;A1&gt;</code></td>\n\t\t\t<td><code>&lt;A2&gt;</code> <code>&lt;A3&gt;</code></td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>*B1*</td>\n\t\t\t<td>*B2* *B3*</td>\n\t\t</tr>\n\t</table>"),
314 |     # cite attribute
315 |     ('bq.:http://textism.com/ Text...', '\t<blockquote cite="http://textism.com/">\n\t\t<p>Text&#8230;</p>\n\t</blockquote>'),
316 |     ('Hello ["(Mum) & dad"]', '\t<p>Hello [&#8220;(Mum) &amp; dad&#8221;]</p>'),
317 |     # Dimensions
318 |     (
319 |         ('[1/2] x [1/4] and (1/2)" x [1/4]" and (1/2)\' x (1/4)\'\n\n'
320 |          '(2 x 10) X (3 / 4) x (200 + 64)\n\n'
321 |          '1 x 1 = 1\n\n'
322 |          '1 x1 = 1\n\n'
323 |          '1x 1 = 1\n\n'
324 |          '1x1 = 1\n\n'
325 |          '1 X 1 = 1\n\n'
326 |          '1 X1 = 1\n\n'
327 |          '1X 1 = 1\n\n'
328 |          '1X1 = 1\n\n'
329 |          'What is 1 x 1?\n\n'
330 |          'What is 1x1?\n\n'
331 |          'What is 1 X 1?\n\n'
332 |          'What is 1X1?\n\n'
333 |          '1 x 2 x 3 = 6\n\n'
334 |          '1x2x3=6\n\n'
335 |          '1x2 x 1x3 = 6\n\n'
336 |          '2\' x 2\' = 4 sqft.\n\n'
337 |          '2\'x 2\' = 4 sqft.\n\n'
338 |          '2\' x2\' = 4 sqft.\n\n'
339 |          '2\'x2\' = 4 sqft.\n\n'
340 |          '2\' X 2\' = 4 sqft.\n\n'
341 |          '2\'X 2\' = 4 sqft.\n\n'
342 |          '2\' X2\' = 4 sqft.\n\n'
343 |          '2\'X2\' = 4 sqft.\n\n'
344 |          '2" x 2" = 4 sqin.\n\n'
345 |          '2"x 2" = 4 sqin.\n\n'
346 |          '2" x2" = 4 sqin.\n\n'
347 |          '2"x2" = 4 sqin.\n\n'
348 |          '2" X 2" = 4 sqin.\n\n'
349 |          '2"X 2" = 4 sqin.\n\n'
350 |          '2" X2" = 4 sqin.\n\n'
351 |          '2"X2" = 4in[^2^].\n\n'
352 |          'What is 1.2 x 3.5?\n\n'
353 |          'What is .2 x .5?\n\n'
354 |          'What is 1.2x3.5?\n\n'
355 |          'What is .2x.5?\n\n'
356 |          'What is 1.2\' x3.5\'?\n\n'
357 |          'What is .2"x .5"?\n\n'
358 |          '1 x $10.00 x -£ 1.23 x ¥20,000 x -¤120.00 x ฿1,000,000 x -€110,00\n\n'),
359 | 
360 |         ('\t<p>&#189; &#215; &#188; and &#189;&#8221; &#215; &#188;&#8221; and &#189;&#8217; &#215; &#188;&#8217;</p>\n\n'
361 |          '\t<p>(2 &#215; 10) &#215; (3 / 4) &#215; (200 + 64)</p>\n\n'
362 |          '\t<p>1 &#215; 1 = 1</p>\n\n'
363 |          '\t<p>1 &#215;1 = 1</p>\n\n'
364 |          '\t<p>1&#215; 1 = 1</p>\n\n'
365 |          '\t<p>1&#215;1 = 1</p>\n\n'
366 |          '\t<p>1 &#215; 1 = 1</p>\n\n'
367 |          '\t<p>1 &#215;1 = 1</p>\n\n'
368 |          '\t<p>1&#215; 1 = 1</p>\n\n'
369 |          '\t<p>1&#215;1 = 1</p>\n\n'
370 |          '\t<p>What is 1 &#215; 1?</p>\n\n'
371 |          '\t<p>What is 1&#215;1?</p>\n\n'
372 |          '\t<p>What is 1 &#215; 1?</p>\n\n'
373 |          '\t<p>What is 1&#215;1?</p>\n\n'
374 |          '\t<p>1 &#215; 2 &#215; 3 = 6</p>\n\n'
375 |          '\t<p>1&#215;2&#215;3=6</p>\n\n'
376 |          '\t<p>1&#215;2 &#215; 1&#215;3 = 6</p>\n\n'
377 |          '\t<p>2&#8217; &#215; 2&#8217; = 4 sqft.</p>\n\n'
378 |          '\t<p>2&#8217;&#215; 2&#8217; = 4 sqft.</p>\n\n'
379 |          '\t<p>2&#8217; &#215;2&#8217; = 4 sqft.</p>\n\n'
380 |          '\t<p>2&#8217;&#215;2&#8217; = 4 sqft.</p>\n\n'
381 |          '\t<p>2&#8217; &#215; 2&#8217; = 4 sqft.</p>\n\n'
382 |          '\t<p>2&#8217;&#215; 2&#8217; = 4 sqft.</p>\n\n'
383 |          '\t<p>2&#8217; &#215;2&#8217; = 4 sqft.</p>\n\n'
384 |          '\t<p>2&#8217;&#215;2&#8217; = 4 sqft.</p>\n\n'
385 |          '\t<p>2&#8221; &#215; 2&#8221; = 4 sqin.</p>\n\n'
386 |          '\t<p>2&#8221;&#215; 2&#8221; = 4 sqin.</p>\n\n'
387 |          '\t<p>2&#8221; &#215;2&#8221; = 4 sqin.</p>\n\n'
388 |          '\t<p>2&#8221;&#215;2&#8221; = 4 sqin.</p>\n\n'
389 |          '\t<p>2&#8221; &#215; 2&#8221; = 4 sqin.</p>\n\n'
390 |          '\t<p>2&#8221;&#215; 2&#8221; = 4 sqin.</p>\n\n'
391 |          '\t<p>2&#8221; &#215;2&#8221; = 4 sqin.</p>\n\n'
392 |          '\t<p>2&#8221;&#215;2&#8221; = 4in<sup>2</sup>.</p>\n\n'
393 |          '\t<p>What is 1.2 &#215; 3.5?</p>\n\n'
394 |          '\t<p>What is .2 &#215; .5?</p>\n\n'
395 |          '\t<p>What is 1.2&#215;3.5?</p>\n\n'
396 |          '\t<p>What is .2&#215;.5?</p>\n\n'
397 |          '\t<p>What is 1.2&#8217; &#215;3.5&#8217;?</p>\n\n'
398 |          '\t<p>What is .2&#8221;&#215; .5&#8221;?</p>\n\n'
399 |          '\t<p>1 &#215; $10.00 &#215; -£ 1.23 &#215; ¥20,000 &#215; -¤120.00 &#215; ฿1,000,000 &#215; -€110,00</p>')
400 |     ),
401 |     # Empty note lists
402 |     ('There should be nothing below.\n\nnotelist.', '\t<p>There should be nothing below.</p>\n\n\t'),
403 |     #  Empty things
404 |     (('\'\'\n\n""\n\n%%\n\n^^\n\n&&\n\n**\n\n__\n\n--\n\n++\n\n~~\n\n{}\n\n'
405 |       '[]\n\n()\n\n<>\n\n\\\\\n\n//\n\n??\n\n==\n\n@@\n\n##\n\n$$\n\n!!\n\n'
406 |       '::\n\n;;\n\n..\n\n,,\n\n||\n\n` `\n\n\' \'\n\n" "\n\n% %\n\n^ ^\n\n'
407 |       '& &\n\n* *\n\n_ _\n\n- -\n\n+ +\n\n~ ~\n\n{ }\n\n[ ]\n\n( )\n\n< >\n\n'
408 |       '\\ \\\n\n/ /\n\n? ?\n\n= =\n\n@ @\n\n# #\n\n$ $\n\n! !\n\n: :\n\n; ;\n\n'
409 |       '. .\n\n, ,'),
410 |      ("\t<p>&#8216;&#8217;</p>\n\n\t<p>&#8220;&#8221;</p>\n\n\t<p>%%</p>\n\n\t<p>^^</p>\n\n\t"
411 |       "<p>&amp;&amp;</p>\n\n\t<p>**</p>\n\n\t<p>__</p>\n\n\t<p>&#8212;</p>\n\n\t<p>++</p>\n\n\t"
412 |       "<p>~~</p>\n\n\t<p>{}</p>\n\n\t<p>[]</p>\n\n\t<p>()</p>\n\n\t<p>&lt;&gt;</p>\n\n\t<p>\\\\</p>\n\n\t"
413 |       "<p>//</p>\n\n\t<p>??</p>\n\n\t<p>==</p>\n\n\t<p><code></code></p>\n\n\t<p>##</p>\n\n\t<p>$$</p>\n\n\t"
414 |       "<p>!!</p>\n\n\t<p>::</p>\n\n\t<p>;;</p>\n\n\t<p>..</p>\n\n\t<p>,,</p>\n\n\t"
415 |       "<table>\n\t\t<tr>\n\t\t\t<td></td>\n\t\t</tr>\n\t</table>\n\n\t<p>` `</p>\n\n\t<p>&#8216; &#8216;</p>\n\n\t"
416 |       "<p>&#8220; &#8220;</p>\n\n\t<p>% %</p>\n\n\t<p>^ ^</p>\n\n\t<p>&amp; &amp;</p>\n\n\t"
417 |       "<ul>\n\t\t<li>*</li>\n\t</ul>\n\n\t<p>_ _</p>\n\n\t<p>- -</p>\n\n\t<p>+ +</p>\n\n\t<p>~ ~</p>\n\n\t"
418 |       "<p>{ }</p>\n\n\t<p>[ ]</p>\n\n\t<p>( )</p>\n\n\t<p>&lt; &gt;</p>\n\n\t<p>\\ \\</p>\n\n\t"
419 |       "<p>/ /</p>\n\n\t<p>? ?</p>\n\n\t<p>= =</p>\n\n\t<p><code> </code></p>\n\n\t<ol>\n\t\t<li>#</li>\n\t</ol>\n\n\t"
420 |       "<p>$ $</p>\n\n\t<p>! !</p>\n\n\t<dl>\n\t\t<dd>:</dd>\n\t</dl>\n\n\t<dl>\n\t\t<dt>;</dt>\n\t</dl>\n\n\t"
421 |       "<p>. .</p>\n\n\t<p>, ,</p>")),
422 |     # A lone standing comment must be preserved as is:
423 |     # withouth wrapping it into a paragraph
424 |     (('An ordinary block.\n\n'
425 |       '<!-- A comment block -->\n'),
426 |      '\t<p>An ordinary block.</p>\n\n<!-- A comment block -->'),
427 |     # Headers must be "breakable", just like paragraphs.
428 |     ('h1. Two line with *strong*\nheading\n',
429 |      '\t<h1>Two line with <strong>strong</strong><br />\nheading</h1>'),
430 |     # Non-standalone ampersands should not be escaped
431 |     (("&#8220;<span lang=\"en\">test</span>&#8221;\n\n"
432 |       "&#x201c;<span lang=\"en\">test</span>&#x201d;\n\n"
433 |       "&nbsp;<span lang=\"en\">test</span>&nbsp;\n"),
434 |      ("\t<p>&#8220;<span lang=\"en\">test</span>&#8221;</p>\n\n"
435 |       "\t<p>&#x201c;<span lang=\"en\">test</span>&#x201d;</p>\n\n"
436 |       "\t<p>&nbsp;<span lang=\"en\">test</span>&nbsp;</p>")),
437 |     # Nested and mixed multi-level ordered and unordered lists
438 |     (("* bullet\n"
439 |       "*# number\n"
440 |       "*# number\n"
441 |       "*#* bullet\n"
442 |       "*# number\n"
443 |       "*# number with\n"
444 |       "a break\n"
445 |       "* bullet\n"
446 |       "** okay"),
447 |      ("\t<ul>\n"
448 |       "\t\t<li>bullet\n"
449 |       "\t\t<ol>\n"
450 |       "\t\t\t<li>number</li>\n"
451 |       "\t\t\t<li>number\n"
452 |       "\t\t\t<ul>\n"
453 |       "\t\t\t\t<li>bullet</li>\n"
454 |       "\t\t\t</ul></li>\n"
455 |       "\t\t\t<li>number</li>\n"
456 |       "\t\t\t<li>number with<br />\n"
457 |       "a break</li>\n"
458 |       "\t\t</ol></li>\n"
459 |       "\t\t<li>bullet\n"
460 |       "\t\t<ul>\n"
461 |       "\t\t\t<li>okay</li>\n"
462 |       "\t\t</ul></li>\n"
463 |       "\t\t</ul>")),
464 |     # Checks proper insertion of <br /> within table cells
465 |     (("|-(cold) milk :=\n"
466 |       "Nourishing beverage for baby cows. =:\n"
467 |       "|"),
468 |      ("\t<table>\n"
469 |       "\t\t<tr>\n"
470 |       "\t\t\t<td><dl>\n"
471 |       "\t<dt class=\"cold\">milk</dt>\n"
472 |       "\t<dd><p>Nourishing beverage for baby cows.</p></dd>\n"
473 |       "</dl></td>\n"
474 |       "\t\t</tr>\n\t</table>")),
475 |     # Long non-textile blocks
476 |     ("notextile.. *a very*\n\n*long*\n\n*block*\n", "*a very*\n\n*long*\n\n*block*"),
477 |     # Correct use of &lsquo; and &rsquo;
478 |     ("Here is a %(example)'spanned'% word.",
479 |      '\t<p>Here is a <span class="example">&#8216;spanned&#8217;</span> word.</p>'),
480 |     # Using $-links with link aliases
481 |     ("\"$\":test\n[test]https://textpattern.com/start\n",
482 |      "\t<p><a href=\"https://textpattern.com/start\">textpattern.com/start</a></p>"),
483 |     ('Please check on "$":test for any updates.\n[test]https://de.wikipedia.org/wiki/Übermensch',
484 |      '\t<p>Please check on <a href="https://de.wikipedia.org/wiki/Übermensch">de.wikipedia.org/wiki/Übermensch</a> for any updates.</p>'),
485 |     # Make sure smileys don't get recognised as a definition list.
486 |     (":(\n\n:)\n\n:( \n:( \n:( \n:) \n\nPinocchio!\n:^)\n\nBaboon!\n:=)\n\nWink!\n;)\n\n:[ \n:]\n\n;(\nsomething\ndark side\n:) \n\n;(c)[de] Item",
487 |      '\t<p>:(</p>\n\n\t<p>:)</p>\n\n\t<p>:( <br />\n:( <br />\n:( <br />\n:) </p>\n\n\t<p>Pinocchio!<br />\n:^)</p>\n\n\t<p>Baboon!<br />\n:=)</p>\n\n\t<p>Wink!<br />\n;)</p>\n\n\t<p>:[ <br />\n:]</p>\n\n\t<p>;(<br />\nsomething<br />\ndark side<br />\n:) </p>\n\n\t<dl class="c" lang="de">\n\t\t<dt>Item</dt>\n\t</dl>'),
488 |     # Checking proper parsing of classes and IDs
489 |     ("_(class1 class2#id1)text1_ -(foobarbaz#boom bang)text2-\n",
490 |      '\t<p><em class="class1 class2" id="id1">text1</em> <del class="foobarbaz">text2</del></p>'),
491 |     # Tables with nested textile elements
492 |     ("|!http://tester.local/logo.png!| !http://tester.local/logo.png! |",
493 |      '\t<table>\n\t\t<tr>\n\t\t\t<td><img alt="" src="http://tester.local/logo.png" /></td>\n\t\t\t<td> <img alt="" src="http://tester.local/logo.png" /> </td>\n\t\t</tr>\n\t</table>'),
494 |     # Tables with colgroups
495 |     (("|=. Testing colgroup and col syntax | \n"
496 |       "|:\\5. 80 |\x20\n"
497 |       "|a|b|c|d|e|\x20\n"),
498 |      ('\t<table>\n\t<caption>Testing colgroup and col syntax</caption>\n'
499 |       '\t<colgroup span="5" width="80">\n\t</colgroup>\n'
500 |       '\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t\t<td>d</td>\n\t\t\t<td>e</td>\n\t\t</tr>\n'
501 |       '\t</table>')),
502 |     # Table column with an emphasis should not be confused with a heading
503 |     ('|_touch_ this!| _touch_ this! |',
504 |      '\t<table>\n\t\t<tr>\n\t\t\t<td><em>touch</em> this!</td>\n\t\t\t<td> <em>touch</em> this! </td>\n\t\t</tr>\n\t</table>'),
505 |     # Table with colgroup but no caption
506 |     (("|:\\5. 80 |\x20\n"
507 |       "|a|b|c|d|e|\x20\n"),
508 |      ('\t<table>\n'
509 |       '\t<colgroup span="5" width="80">\n\t</colgroup>\n'
510 |       '\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t\t<td>d</td>\n\t\t\t<td>e</td>\n\t\t</tr>\n'
511 |       '\t</table>')),
512 | )
513 | 
514 | 
515 | @pytest.mark.parametrize("input, expected_output", xhtml_known_values)
516 | def test_KnownValuesXHTML(input, expected_output):
517 |     # XHTML
518 |     output = textile.textile(input, html_type='xhtml')
519 |     assert output == expected_output
520 | 
521 | 
522 | @pytest.mark.parametrize("input, expected_output", html_known_values)
523 | def test_KnownValuesHTML(input, expected_output):
524 |     # HTML5
525 |     output = textile.textile(input, html_type='html5')
526 |     assert output == expected_output
527 | 


--------------------------------------------------------------------------------
/textile/__init__.py:
--------------------------------------------------------------------------------
1 | from .core import textile, textile_restricted, Textile  # noqa: F401
2 | from .version import VERSION
3 | 
4 | __all__ = ['textile', 'textile_restricted']
5 | 
6 | __version__ = VERSION
7 | 


--------------------------------------------------------------------------------
/textile/__main__.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import sys
 3 | import textile
 4 | 
 5 | 
 6 | def main():
 7 |     """A CLI tool in the style of python's json.tool.  In fact, this is mostly
 8 |     copied directly from that module.  This allows us to create a stand-alone
 9 |     tool as well as invoking it via `python -m textile`."""
10 |     prog = 'textile'
11 |     description = ('A simple command line interface for textile module '
12 |                    'to convert textile input to HTML output.  This script '
13 |                    'accepts input as a file or stdin and can write out to '
14 |                    'a file or stdout.')
15 |     parser = argparse.ArgumentParser(prog=prog, description=description)
16 |     parser.add_argument('-v', '--version', action='store_true',
17 |                         help='show the version number and exit')
18 |     parser.add_argument('infile', nargs='?', type=argparse.FileType(),
19 |                         help='a textile file to be converted')
20 |     parser.add_argument('outfile', nargs='?', type=argparse.FileType('w'),
21 |                         help='write the output of infile to outfile')
22 |     options = parser.parse_args()
23 | 
24 |     if options.version:
25 |         print(textile.VERSION)
26 |         sys.exit()
27 | 
28 |     infile = options.infile or sys.stdin
29 |     outfile = options.outfile or sys.stdout
30 |     with infile:
31 |         output = textile.textile(''.join(infile.readlines()))
32 |     with outfile:
33 |         outfile.write(output)
34 | 
35 | 
36 | if __name__ == '__main__':  # pragma: no cover
37 |     main()
38 | 


--------------------------------------------------------------------------------
/textile/core.py:
--------------------------------------------------------------------------------
   1 | # -*- coding: utf-8 -*-
   2 | __copyright__ = """
   3 | Copyright (c) 2009, Jason Samsa, http://jsamsa.com/
   4 | Copyright (c) 2010, Kurt Raschke <kurt@kurtraschke.com>
   5 | Copyright (c) 2004, Roberto A. F. De Almeida, http://dealmeida.net/
   6 | Copyright (c) 2003, Mark Pilgrim, http://diveintomark.org/
   7 | 
   8 | Original PHP Version:
   9 | Copyright (c) 2003-2004, Dean Allen <dean@textism.com>
  10 | All rights reserved.
  11 | 
  12 | Thanks to Carlo Zottmann <carlo@g-blog.net> for refactoring
  13 | Textile's procedural code into a class framework
  14 | 
  15 | Additions and fixes Copyright (c) 2006 Alex Shiels http://thresholdstate.com/
  16 | 
  17 | """
  18 | import uuid
  19 | from urllib.parse import urlparse, urlsplit, urlunsplit, quote, unquote
  20 | from collections import OrderedDict
  21 | from nh3 import clean
  22 | 
  23 | from textile.regex_strings import (align_re_s, cls_re_s, pnct_re_s,
  24 |                                    regex_snippets, syms_re_s, table_span_re_s)
  25 | from textile.utils import (decode_high, encode_high, encode_html, generate_tag,
  26 |                            getimagesize, has_raw_text, human_readable_url,
  27 |                            is_rel_url, is_valid_url, list_type,
  28 |                            normalize_newlines, parse_attributes, pba)
  29 | from textile.objects import Block, Table
  30 | 
  31 | try:
  32 |     import regex as re
  33 | except ImportError:
  34 |     import re
  35 | 
  36 | 
  37 | def make_glyph_replacers(html_type, uid, glyph_defs):
  38 |     """
  39 |     Generates a list of "replacers" (each is a pair consiting of
  40 |     a regular expression and a replacing pattern) that,
  41 |     when applied sequentially, replace some characters of the original
  42 |     text with their HTML codes to produce valid HTML.
  43 |     """
  44 |     cur = (
  45 |         r'(?:[{0}]{1}*)?'.format(regex_snippets['cur'], regex_snippets['space'])
  46 |         if regex_snippets['cur']
  47 |         else r'')
  48 |     pre_result = [
  49 |         # dimension sign (before apostrophes/quotes are replaced)
  50 |         (re.compile(
  51 |             r'([0-9]+[\])]?[\'"]? ?)[x]( ?[\[(]?)'
  52 |             r'(?=[+-]?{0}[0-9]*\.?[0-9]+)'.format(cur),
  53 |             flags=re.I | re.U),
  54 |          r'\1{dimension}\2'),
  55 |         # apostrophe's
  56 |         (re.compile(
  57 |             r"({0}|\))'({0})"
  58 |             .format(regex_snippets['wrd']),
  59 |             flags=re.U),
  60 |          r'\1{apostrophe}\2'),
  61 |         # back in '88
  62 |         (re.compile(
  63 |             r"({0})'(\d+{1}?)\b(?![.]?[{1}]*?')".format(
  64 |                 regex_snippets['space'], regex_snippets['wrd']),
  65 |             flags=re.U),
  66 |          r'\1{apostrophe}\2'),
  67 |         # single opening following an open bracket.
  68 |         (re.compile(r"([([{])'(?=\S)", flags=re.U),
  69 |          r'\1{quote_single_open}'),
  70 |         # single closing
  71 |         (re.compile(
  72 |             r"(\S)'(?={0}|{1}|<|$)".format(regex_snippets['space'], pnct_re_s),
  73 |             flags=re.U),
  74 |          r'\1{quote_single_close}'),
  75 |         # single opening
  76 |         (re.compile(r"'", re.U), r'{quote_single_open}'),
  77 |         # double opening following an open bracket. Allows things like
  78 |         # Hello ["(Mum) & dad"]
  79 |         (re.compile(r'([([{])"(?=\S)', flags=re.U),
  80 |          r'\1{quote_double_open}'),
  81 |         # double closing
  82 |         (re.compile(
  83 |             r'(\S)"(?={0}|{1}|<|$)'.format(regex_snippets['space'], pnct_re_s),
  84 |             flags=re.U),
  85 |          r'\1{quote_double_close}'),
  86 |         # double opening
  87 |         (re.compile(r'"'), r'{quote_double_open}'),
  88 |         # ellipsis
  89 |         (re.compile(r'([^.]?)\.{3}'), r'\1{ellipsis}'),
  90 |         # em dash
  91 |         (re.compile(r'(\s?)--(\s?)'), r'\1{emdash}\2'),
  92 |         # en dash
  93 |         (re.compile(r' - '), r' {endash} '),
  94 |         # trademark
  95 |         (re.compile(
  96 |             r'(\b ?|{0}|^)[([]TM[])]'.format(regex_snippets['space']),
  97 |             flags=re.I | re.U),
  98 |          r'\1{trademark}'),
  99 |         # registered
 100 |         (re.compile(
 101 |             r'(\b ?|{0}|^)[([]R[])]'.format(regex_snippets['space']),
 102 |             flags=re.I | re.U),
 103 |             r'\1{registered}'),
 104 |         # copyright
 105 |         (re.compile(
 106 |             r'(\b ?|{0}|^)[([]C[])]'.format(regex_snippets['space']),
 107 |             flags=re.I | re.U),
 108 |          r'\1{copyright}'),
 109 |         # 1/2
 110 |         (re.compile(r'[([]1\/2[])]'), r'{half}'),
 111 |         # 1/4
 112 |         (re.compile(r'[([]1\/4[])]'), r'{quarter}'),
 113 |         # 3/4
 114 |         (re.compile(r'[([]3\/4[])]'), r'{threequarters}'),
 115 |         # degrees
 116 |         (re.compile(r'[([]o[])]'), r'{degrees}'),
 117 |         # plus/minus
 118 |         (re.compile(r'[([]\+\/-[])]'), r'{plusminus}'),
 119 |         # 3+ uppercase acronym
 120 |         (re.compile(
 121 |             r'\b([{0}][{1}]{{2,}})\b(?:[(]([^)]*)[)])'
 122 |             .format(regex_snippets['abr'], regex_snippets['acr']),
 123 |             flags=re.U),
 124 |          (r'<abbr title="\2">\1</abbr>' if html_type == 'html5'
 125 |           else r'<acronym title="\2">\1</acronym>')),
 126 |         # 3+ uppercase
 127 |         (re.compile(
 128 |             r'({space}|^|[>(;-])([{abr}]{{3,}})([{nab}]*)'
 129 |             '(?={space}|{pnct}|<|$)(?=[^">]*?(<|$))'
 130 |             .format(space=regex_snippets['space'],
 131 |                     abr=regex_snippets['abr'],
 132 |                     nab=regex_snippets['nab'],
 133 |                     pnct=pnct_re_s),
 134 |             re.U),
 135 |          r'\1<span class="caps">{0}:glyph:\2</span>\3'.format(uid)),
 136 |     ]
 137 |     return [(regex_obj, replacement.format(**glyph_defs))
 138 |             for (regex_obj, replacement) in pre_result]
 139 | 
 140 | 
 141 | class Textile(object):
 142 |     restricted_url_schemes = ('http', 'https', 'ftp', 'mailto')
 143 |     unrestricted_url_schemes = restricted_url_schemes + (
 144 |         'file', 'tel', 'callto', 'sftp', 'data')
 145 | 
 146 |     btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', r'fn\d+', 'p', '###')
 147 |     btag_lite = ('bq', 'bc', 'p')
 148 | 
 149 |     note_index = 1
 150 | 
 151 |     glyph_definitions = {
 152 |         'quote_single_open':  '&#8216;',  # noqa: E241
 153 |         'quote_single_close': '&#8217;',  # noqa: E241
 154 |         'quote_double_open':  '&#8220;',  # noqa: E241
 155 |         'quote_double_close': '&#8221;',  # noqa: E241
 156 |         'apostrophe':         '&#8217;',  # noqa: E241
 157 |         'prime':              '&#8242;',  # noqa: E241
 158 |         'prime_double':       '&#8243;',  # noqa: E241
 159 |         'ellipsis':           '&#8230;',  # noqa: E241
 160 |         'ampersand':          '&amp;',    # noqa: E241
 161 |         'emdash':             '&#8212;',  # noqa: E241
 162 |         'endash':             '&#8211;',  # noqa: E241
 163 |         'dimension':          '&#215;',   # noqa: E241
 164 |         'trademark':          '&#8482;',  # noqa: E241
 165 |         'registered':         '&#174;',   # noqa: E241
 166 |         'copyright':          '&#169;',   # noqa: E241
 167 |         'half':               '&#189;',   # noqa: E241
 168 |         'quarter':            '&#188;',   # noqa: E241
 169 |         'threequarters':      '&#190;',   # noqa: E241
 170 |         'degrees':            '&#176;',   # noqa: E241
 171 |         'plusminus':          '&#177;',   # noqa: E241
 172 |     }
 173 | 
 174 |     spanWrappers = (
 175 |         ('[', ']'),
 176 |     )
 177 | 
 178 |     def __init__(self, restricted=False, lite=False, noimage=False,
 179 |                  get_sizes=False, html_type='xhtml', rel='', block_tags=True):
 180 |         """Textile properties that are common to regular textile and
 181 |         textile_restricted"""
 182 |         self.restricted = restricted
 183 |         self.lite = lite
 184 |         self.noimage = noimage
 185 |         self.get_sizes = get_sizes
 186 |         self.fn = {}
 187 |         self.urlrefs = {}
 188 |         self.shelf = {}
 189 |         self.rel = rel
 190 |         self.html_type = html_type
 191 |         self.max_span_depth = 5
 192 |         self.span_depth = 0
 193 |         uid = uuid.uuid4().hex
 194 |         self.uid = 'textileRef:{0}:'.format(uid)
 195 |         self.linkPrefix = '{0}-'.format(uid)
 196 |         self.linkIndex = 0
 197 |         self.refCache = {}
 198 |         self.refIndex = 0
 199 |         self.block_tags = block_tags
 200 | 
 201 |         self.glyph_replacers = make_glyph_replacers(
 202 |             html_type, self.uid, self.glyph_definitions)
 203 | 
 204 |         if self.restricted is True:
 205 |             self.url_schemes = self.restricted_url_schemes
 206 |         else:
 207 |             self.url_schemes = self.unrestricted_url_schemes
 208 | 
 209 |         all_schemes_re_s = '|'.join([
 210 |             '(?:{0})'.format(scheme)
 211 |             for scheme in self.url_schemes
 212 |         ])
 213 |         self.url_ref_regex = re.compile(
 214 |             r'(?:(?<=^)|(?<=\s))\[(.+)\]\s?((?:{0}:\/\/|\/)\S+)(?=\s|$)'.format(all_schemes_re_s),
 215 |             re.U
 216 |         )
 217 | 
 218 |     def parse(self, text, rel=None, sanitize=False):
 219 |         """Parse the input text as textile and return html output."""
 220 |         self.notes = OrderedDict()
 221 |         self.unreferencedNotes = OrderedDict()
 222 |         self.notelist_cache = OrderedDict()
 223 | 
 224 |         if text.strip() == '':
 225 |             return text
 226 | 
 227 |         if self.restricted:
 228 |             text = encode_html(text, quotes=False)
 229 | 
 230 |         text = normalize_newlines(text)
 231 |         text = text.replace(self.uid, '')
 232 | 
 233 |         if self.block_tags:
 234 |             if self.lite:
 235 |                 self.blocktag_allowlist = set(['bq', 'p', 'br'])
 236 |                 text = self.block(text)
 237 |             else:
 238 |                 self.blocktag_allowlist = set(['bq', 'p', 'br', 'bc', 'notextile',
 239 |                                                'pre', 'h[1-6]',
 240 |                                                f"fn{regex_snippets['digit']}+", '###'])
 241 |                 text = self.block(text)
 242 |                 text = self.placeNoteLists(text)
 243 |         else:
 244 |             # Inline markup (em, strong, sup, sub, del etc).
 245 |             text = self.span(text)
 246 | 
 247 |             # Glyph level substitutions (mainly typographic -- " & ' => curly
 248 |             # quotes, -- => em-dash etc.
 249 |             text = self.glyphs(text)
 250 | 
 251 |         if rel:
 252 |             self.rel = ' rel="{0}"'.format(rel)
 253 | 
 254 |         text = self.getRefs(text)
 255 | 
 256 |         if not self.lite:
 257 |             text = self.placeNoteLists(text)
 258 |         text = self.retrieve(text)
 259 |         text = text.replace('{0}:glyph:'.format(self.uid), '')
 260 | 
 261 |         if sanitize:
 262 |             text = clean(text, tags=self.blocktag_allowlist)
 263 | 
 264 |         text = self.retrieveTags(text)
 265 |         text = self.retrieveURLs(text)
 266 | 
 267 |         # if the text contains a break tag (<br> or <br />) not followed by
 268 |         # a newline, replace it with a new style break tag and a newline.
 269 |         text = re.sub(r'<br( /)?>(?!\n)', '<br />\n', text)
 270 | 
 271 |         text = text.rstrip('\n')
 272 | 
 273 |         return text
 274 | 
 275 |     def table(self, text):
 276 |         text = "{0}\n\n".format(text)
 277 |         pattern = re.compile(r'^(?:table(?P<tatts>_?{s}{a}{c})\.'
 278 |                              r'(?P<summary>.*?)\n)?^(?P<rows>{a}{c}\.? ?\|.*\|)'
 279 |                              r'[\s]*\n\n'.format(
 280 |                                  **{'s': table_span_re_s, 'a': align_re_s,
 281 |                                     'c': cls_re_s}), flags=re.S | re.M | re.U)
 282 |         match = pattern.search(text)
 283 |         if match:
 284 |             table = Table(self, **match.groupdict())
 285 |             return table.process()
 286 |         return text
 287 | 
 288 |     def textileLists(self, text):
 289 |         pattern = re.compile(r'^((?:[*;:]+|[*;:#]*#(?:_|\d+)?){0}[ .].*)$'
 290 |                              r'(?![^#*;:])'.format(cls_re_s), re.U | re.M | re.S)
 291 |         return pattern.sub(self.fTextileList, text)
 292 | 
 293 |     def fTextileList(self, match):
 294 |         text = re.split(r'\n(?=[*#;:])', match.group(), flags=re.M)
 295 |         pt = ''
 296 |         result = []
 297 |         ls = OrderedDict()
 298 |         for i, line in enumerate(text):
 299 |             try:
 300 |                 nextline = text[i + 1]
 301 |             except IndexError:
 302 |                 nextline = ''
 303 | 
 304 |             m = re.search(r"^(?P<tl>[#*;:]+)(?P<st>_|\d+)?(?P<atts>{0})[ .]"
 305 |                           "(?P<content>.*)$".format(cls_re_s), line, re.S)
 306 |             if m:
 307 |                 tl, start, atts, content = m.groups()
 308 |                 content = content.strip()
 309 |             else:
 310 |                 result.append(line)
 311 |                 continue
 312 | 
 313 |             nl = ''
 314 |             ltype = list_type(tl)
 315 |             tl_tags = {';': 'dt', ':': 'dd'}
 316 |             litem = tl_tags.get(tl[0], 'li')
 317 | 
 318 |             showitem = len(content) > 0
 319 | 
 320 |             # handle list continuation/start attribute on ordered lists
 321 |             if ltype == 'o':
 322 |                 if not hasattr(self, 'olstarts'):
 323 |                     self.olstarts = {tl: 1}
 324 | 
 325 |                 # does the first line of this ol have a start attribute
 326 |                 if len(tl) > len(pt):
 327 |                     # no, set it to 1.
 328 |                     if start is None:
 329 |                         self.olstarts[tl] = 1
 330 |                     # yes, set it to the given number
 331 |                     elif start != '_':
 332 |                         self.olstarts[tl] = int(start)
 333 |                     # we won't need to handle the '_' case, we'll just
 334 |                     # print out the number when it's needed
 335 | 
 336 |                 # put together the start attribute if needed
 337 |                 if len(tl) > len(pt) and start is not None:
 338 |                     start = ' start="{0}"'.format(self.olstarts[tl])
 339 | 
 340 |                 # This will only increment the count for list items, not
 341 |                 # definition items
 342 |                 if showitem:
 343 |                     # Assume properly formatted input
 344 |                     try:
 345 |                         self.olstarts[tl] = self.olstarts[tl] + 1
 346 |                     # if we get here, we've got some poor textile formatting.
 347 |                     # add this type of list to olstarts and assume we'll start
 348 |                     # it at 1. expect screwy output.
 349 |                     except KeyError:
 350 |                         self.olstarts[tl] = 1
 351 | 
 352 |             nm = re.match(r"^(?P<nextlistitem>[#\*;:]+)(_|[\d]+)?{0}"
 353 |                           r"[ .].*".format(cls_re_s), nextline)
 354 |             if nm:
 355 |                 nl = nm.group('nextlistitem')
 356 | 
 357 |             # We need to handle nested definition lists differently.  If
 358 |             # the next tag is a dt (';') of a lower nested level than the
 359 |             # current dd (':'),
 360 |             if ';' in pt and ':' in tl:
 361 |                 ls[tl] = 2
 362 | 
 363 |             atts = pba(atts, restricted=self.restricted)
 364 |             tabs = '\t' * len(tl)
 365 |             # If start is still None, set it to '', else leave the value that
 366 |             # we've already formatted.
 367 |             start = start or ''
 368 |             # if this item tag isn't in the list, create a new list and
 369 |             # item, else just create the item
 370 |             if tl not in ls:
 371 |                 ls[tl] = 1
 372 |                 itemtag = ("\n{0}\t<{1}>{2}".format(tabs, litem, content) if
 373 |                            showitem else '')
 374 |                 line = "<{0}l{1}{2}>{3}".format(ltype, atts, start, itemtag)
 375 |             else:
 376 |                 line = ("\t<{0}{1}>{2}".format(litem, atts, content) if
 377 |                         showitem else '')
 378 |             line = '{0}{1}'.format(tabs, line)
 379 |             if len(nl) <= len(tl):
 380 |                 if showitem:
 381 |                     line = "{0}</{1}>".format(line, litem)
 382 |             # work backward through the list closing nested lists/items
 383 |             for k, v in reversed(list(ls.items())):
 384 |                 if len(k) > len(nl):
 385 |                     if v != 2:
 386 |                         line = "{0}\n{1}</{2}l>".format(
 387 |                             line, tabs, list_type(k))
 388 |                     if len(k) > 1 and v != 2:
 389 |                         line = "{0}</{1}>".format(line, litem)
 390 |                     del ls[k]
 391 |             # Remember the current Textile tag:
 392 |             pt = tl
 393 |             result.append(line)
 394 |         return self.doTagBr(litem, "\n".join(result))
 395 | 
 396 |     def doTagBr(self, tag, input):
 397 |         return re.compile(r'<({0})([^>]*?)>(.*)(</\1>)'.format(re.escape(tag)),
 398 |                           re.S).sub(self.doBr, input)
 399 | 
 400 |     def doPBr(self, in_):
 401 |         return (re
 402 |                 .compile(r'<(p|h[1-6])([^>]*?)>(.*)(</\1>)', re.S)
 403 |                 .sub(self.fPBr, in_))
 404 | 
 405 |     def fPBr(self, m):
 406 |         content = m.group(3)
 407 |         content = (
 408 |             re.compile(r"<br[ ]*/?>{0}*\n(?![{0}|])".format(regex_snippets['space']),
 409 |                        re.I)
 410 |             .sub("\n", content))
 411 |         content = re.compile(r"\n(?![\s|])").sub('<br />', content)
 412 |         return '<{0}{1}>{2}{3}'.format(m.group(1), m.group(2), content, m.group(4))
 413 | 
 414 |     def doBr(self, match):
 415 |         content = (
 416 |             re.compile(
 417 |                 r'(.+)(?!(?<=</dd>|</dt>|</li>|<br/>)'
 418 |                 r'|(?<=<br>)|(?<=<br />))\n(?![#*;:\s|])',
 419 |                 re.I)
 420 |             .sub(r'\1<br />', match.group(3)))
 421 |         return '<{0}{1}>{2}{3}'.format(match.group(1), match.group(2), content,
 422 |                                        match.group(4))
 423 | 
 424 |     def block(self, text):
 425 |         if not self.lite:
 426 |             tre = '|'.join(self.btag)
 427 |         else:
 428 |             tre = '|'.join(self.btag_lite)
 429 |         # split the text by two or more newlines, retaining the newlines in the
 430 |         # split list
 431 |         text = re.split(r'(\n{2,})', text)
 432 | 
 433 |         # some blocks, when processed, will ask us to output nothing, if that's
 434 |         # the case, we'd want to drop the whitespace which follows it.
 435 |         eat_whitespace = False
 436 | 
 437 |         # check to see if previous block has already been escaped
 438 |         escaped = False
 439 | 
 440 |         # check if multiline paragraph (p..) tags <p>..</p> are added to line
 441 |         multiline_para = False
 442 | 
 443 |         tag = 'p'
 444 |         atts = cite = ext = ''
 445 | 
 446 |         out = []
 447 | 
 448 |         for line in text:
 449 |             # the line is just whitespace, add it to the output, and move on
 450 |             if not line.strip():
 451 |                 if not eat_whitespace:
 452 |                     out.append(line)
 453 |                 continue
 454 | 
 455 |             eat_whitespace = False
 456 | 
 457 |             pattern = (r'^(?P<tag>{0})(?P<atts>{1}{2})\.(?P<ext>\.?)'
 458 |                        r'(?::(?P<cite>\S+))? (?P<content>.*)$'.format(
 459 |                            tre, align_re_s, cls_re_s))
 460 |             match = re.search(pattern, line, flags=re.S | re.U)
 461 |             # tag specified on this line.
 462 |             if match:
 463 |                 # if we had a previous extended tag but not this time, close up
 464 |                 # the tag
 465 |                 if ext and out:
 466 |                     # it's out[-2] because the last element in out is the
 467 |                     # whitespace that preceded this line
 468 |                     if not escaped:
 469 |                         content = encode_html(out[-2], quotes=True)
 470 |                         escaped = True
 471 |                     else:
 472 |                         content = out[-2]
 473 | 
 474 |                     if not multiline_para:
 475 |                         # block will have been defined in a previous run of the
 476 |                         # loop
 477 |                         content = generate_tag(block.inner_tag, content,  # noqa: F821
 478 |                                                block.inner_atts)  # noqa: F821
 479 |                         content = generate_tag(block.outer_tag, content,  # noqa: F821
 480 |                                                block.outer_atts)  # noqa: F821
 481 |                     out[-2] = content
 482 |                 tag, atts, ext, cite, content = match.groups()
 483 |                 block = Block(self, **match.groupdict())
 484 |                 inner_block = generate_tag(block.inner_tag, block.content,
 485 |                                            block.inner_atts)
 486 |                 # code tags and raw text won't be indented inside outer_tag.
 487 |                 if block.inner_tag != 'code' and not has_raw_text(inner_block):
 488 |                     inner_block = "\n\t\t{0}\n\t".format(inner_block)
 489 |                 if ext:
 490 |                     line = block.content
 491 |                 else:
 492 |                     line = generate_tag(block.outer_tag, inner_block,
 493 |                                         block.outer_atts)
 494 |                     # pre tags and raw text won't be indented.
 495 |                     if block.outer_tag != 'pre' and not has_raw_text(line):
 496 |                         line = "\t{0}".format(line)
 497 | 
 498 |             # set having paragraph tags to false
 499 |                 if block.tag == 'p' and ext:
 500 |                     multiline_para = False
 501 |             # no tag specified
 502 |             else:
 503 |                 # if we're inside an extended block, add the text from the
 504 |                 # previous line to the front.
 505 |                 if ext and out:
 506 |                     if block.tag == 'p':
 507 |                         line = generate_tag(block.tag, line, block.outer_atts)
 508 |                         multiline_para = True
 509 |                     line = '{0}{1}'.format(out.pop(), line)
 510 |                 # the logic in the if statement below is a bit confusing in
 511 |                 # php-textile. I'm still not sure I understand what the php
 512 |                 # code is doing. Something tells me it's a phpsadness. Anyway,
 513 |                 # this works, and is much easier to understand: if we're not in
 514 |                 # an extension, and the line doesn't begin with a space, treat
 515 |                 # it like a block to insert. Lines that begin with a space are
 516 |                 # not processed as a block.
 517 |                 if not ext and not line[0] == ' ':
 518 |                     block = Block(self, tag, atts, ext, cite, line)
 519 |                     # if the block contains html tags, generate_tag would
 520 |                     # mangle it, so process as is.
 521 |                     if block.tag == 'p' and not has_raw_text(block.content):
 522 |                         line = block.content
 523 |                     else:
 524 |                         line = generate_tag(block.outer_tag, block.content,
 525 |                                             block.outer_atts)
 526 |                         line = "\t{0}".format(line)
 527 |                 else:
 528 |                     if block.tag in ('pre', 'notextile') or block.inner_tag == 'code':
 529 |                         line = self.shelve(encode_html(line, quotes=True))
 530 |                     else:
 531 |                         line = self.graf(line)
 532 | 
 533 |                     if block.tag == 'p':
 534 |                         escaped = True
 535 | 
 536 |             if block.tag == 'p' and ext and not multiline_para:
 537 |                 line = generate_tag(block.tag, line, block.outer_atts)
 538 |                 multiline_para = True
 539 |             else:
 540 |                 line = self.doPBr(line)
 541 |             if not block.tag == 'p':
 542 |                 multiline_para = False
 543 | 
 544 |             line = line.replace('<br>', '<br />')
 545 | 
 546 |             # if we're in an extended block, and we haven't specified a new
 547 |             # tag, join this line to the last item of the output
 548 |             if ext and not match:
 549 |                 last_item = out.pop()
 550 |                 out.append('{0}{1}'.format(last_item, line))
 551 |             elif not block.eat:
 552 |                 # or if it's a type of block which indicates we shouldn't drop
 553 |                 # it, add it to the output.
 554 |                 out.append(line)
 555 | 
 556 |             if not ext:
 557 |                 tag = 'p'
 558 |                 atts = ''
 559 |                 cite = ''
 560 | 
 561 |             # if it's a block we should drop, don't keep the whitespace which
 562 |             # will come after it.
 563 |             if block.eat:
 564 |                 eat_whitespace = True
 565 | 
 566 |         # at this point, we've gone through all the lines. if there's still an
 567 |         # extension in effect, we close it here
 568 |         if ext and out and not block.tag == 'p':
 569 |             content = out.pop()
 570 |             content = generate_tag(block.inner_tag, content, block.inner_atts)
 571 |             content = generate_tag(block.outer_tag, content, block.outer_atts)
 572 |             out.append(content)
 573 |         return ''.join(out)
 574 | 
 575 |     def footnoteRef(self, text):
 576 |         # somehow php-textile gets away with not capturing the space.
 577 |         return re.compile(r'(?<=\S)\[(?P<id>{0}+)(?P<nolink>!?)\]'
 578 |                           r'(?P<space>{1}?)'.format(
 579 |                               regex_snippets['digit'], regex_snippets['space']),
 580 |                           re.U).sub(self.footnoteID, text)
 581 | 
 582 |     def footnoteID(self, m):
 583 |         fn_att = OrderedDict({'class': 'footnote'})
 584 |         if m.group('id') not in self.fn:
 585 |             self.fn[m.group('id')] = '{0}{1}'.format(
 586 |                 self.linkPrefix, self._increment_link_index())
 587 |             fnid = self.fn[m.group('id')]
 588 |             fn_att['id'] = 'fnrev{0}'.format(fnid)
 589 |         fnid = self.fn[m.group('id')]
 590 |         footref = generate_tag('a', m.group('id'), {'href': '#fn{0}'.format(
 591 |             fnid)})
 592 |         if '!' == m.group('nolink'):
 593 |             footref = m.group('id')
 594 |         footref = generate_tag('sup', footref, fn_att)
 595 |         return '{0}{1}'.format(footref, m.group('space'))
 596 | 
 597 |     def glyphs(self, text):
 598 |         """
 599 |         Because of the split command, the regular expressions are different for
 600 |         when the text at the beginning and the rest of the text.
 601 |         for example:
 602 |         let's say the raw text provided is "*Here*'s some textile"
 603 |         before it gets to this glyphs method, the text has been converted to
 604 |         "<strong>Here</strong>'s some textile"
 605 |         When run through the split, we end up with ["<strong>", "Here",
 606 |         "</strong>", "'s some textile"].  The re.search that follows tells it
 607 |         not to ignore html tags.
 608 |         If the single quote is the first character on the line, it's an open
 609 |         single quote.  If it's the first character of one of those splits, it's
 610 |         an apostrophe or closed single quote, but the regex will bear that out.
 611 |         A similar situation occurs for double quotes as well.
 612 |         So, for the first pass, we use a set of regexes from
 613 |         the initial_glyph_replacers. For all remaining passes,
 614 |         we use glyph_replacers
 615 |         """
 616 |         text = text.rstrip('\n')
 617 |         result = []
 618 |         standalone_amp_re = re.compile(
 619 |             r"&(?!#[0-9]+;|#x[a-f0-9]+;|[a-z][a-z0-9]*;)",
 620 |             flags=re.I)
 621 |         html_amp_symbol = self.glyph_definitions['ampersand']
 622 |         # split the text by any angle-bracketed tags
 623 |         lines = re.compile(r'(<[\w/!?].*?>)', re.U | re.S).split(text)
 624 |         for i, line in enumerate(lines):
 625 |             if i % 2 == 0:
 626 |                 if not self.restricted:
 627 |                     # Raw < > & chars have already been encoded
 628 |                     # when in restricted mode
 629 |                     line = (
 630 |                         standalone_amp_re
 631 |                         .sub(html_amp_symbol, line)
 632 |                         .replace('<', '&lt;')
 633 |                         .replace('>', '&gt;'))
 634 |                 for s, r in self.glyph_replacers:
 635 |                     line = s.sub(r, line)
 636 |             result.append(line)
 637 |         return ''.join(result)
 638 | 
 639 |     def getRefs(self, text):
 640 |         """Capture and store URL references in self.urlrefs."""
 641 |         return self.url_ref_regex.sub(self.refs, text)
 642 | 
 643 |     def refs(self, match):
 644 |         flag, url = match.groups()
 645 |         self.urlrefs[flag] = url
 646 |         return ''
 647 | 
 648 |     def relURL(self, url):
 649 |         scheme = urlparse(url)[0]
 650 |         if scheme and scheme not in self.url_schemes:
 651 |             return '#'
 652 |         return url
 653 | 
 654 |     def shelve(self, text):
 655 |         self.refIndex = self.refIndex + 1
 656 |         itemID = '{0}{1}:shelve'.format(self.uid, self.refIndex)
 657 |         self.shelf[itemID] = text
 658 |         return itemID
 659 | 
 660 |     def retrieve(self, text):
 661 |         while True:
 662 |             old = text
 663 |             for k, v in self.shelf.items():
 664 |                 text = text.replace(k, v)
 665 |             if text == old:
 666 |                 break
 667 |         return text
 668 | 
 669 |     def graf(self, text):
 670 |         if not self.lite:
 671 |             text = self.noTextile(text)
 672 |             text = self.code(text)
 673 | 
 674 |         text = self.getHTMLComments(text)
 675 | 
 676 |         text = self.getRefs(text)
 677 |         text = self.links(text)
 678 | 
 679 |         if not self.noimage:
 680 |             text = self.image(text)
 681 | 
 682 |         if not self.lite:
 683 |             text = self.table(text)
 684 |             text = self.redcloth_list(text)
 685 |             text = self.textileLists(text)
 686 | 
 687 |         text = self.span(text)
 688 |         text = self.footnoteRef(text)
 689 |         text = self.noteRef(text)
 690 |         text = self.glyphs(text)
 691 | 
 692 |         return text.rstrip('\n')
 693 | 
 694 |     def links(self, text):
 695 |         """For some reason, the part of the regex below that matches the url
 696 |         does not match a trailing parenthesis.  It gets caught by tail, and
 697 |         we check later to see if it should be included as part of the url."""
 698 |         text = self.markStartOfLinks(text)
 699 | 
 700 |         return self.replaceLinks(text)
 701 | 
 702 |     def markStartOfLinks(self, text):
 703 |         """Finds and marks the start of well formed links in the input text."""
 704 |         # Slice text on '":<not space>' boundaries. These always occur in
 705 |         # inline links between the link text and the url part and are much more
 706 |         # infrequent than '"' characters so we have less possible links to
 707 |         # process.
 708 |         slice_re = re.compile(r'":(?={0})'.format(regex_snippets['char']))
 709 |         slices = slice_re.split(text)
 710 |         output = []
 711 | 
 712 |         if len(slices) > 1:
 713 |             # There are never any start of links in the last slice, so pop it
 714 |             # off (we'll glue it back later).
 715 |             last_slice = slices.pop()
 716 | 
 717 |             for s in slices:
 718 |                 # If there is no possible start quote then this slice is not
 719 |                 # a link
 720 |                 if '"' not in s:
 721 |                     output.append(s)
 722 |                     continue
 723 |                 # Cut this slice into possible starting points wherever we find
 724 |                 # a '"' character. Any of these parts could represent the start
 725 |                 # of the link text - we have to find which one.
 726 |                 possible_start_quotes = s.split('"')
 727 | 
 728 |                 # Start our search for the start of the link with the closest
 729 |                 # prior quote mark.
 730 |                 possibility = possible_start_quotes.pop()
 731 | 
 732 |                 # Init the balanced count. If this is still zero at the end of
 733 |                 # our do loop we'll mark the " that caused it to balance as the
 734 |                 # start of the link and move on to the next slice.
 735 |                 balanced = 0
 736 |                 linkparts = []
 737 |                 i = 0
 738 | 
 739 |                 while balanced != 0 or i == 0:  # pragma: no branch
 740 |                     # Starting at the end, pop off the previous part of the
 741 |                     # slice's fragments.
 742 | 
 743 |                     # Add this part to those parts that make up the link text.
 744 |                     linkparts.append(possibility)
 745 | 
 746 |                     if len(possibility) > 0:
 747 |                         # did this part inc or dec the balanced count?
 748 |                         if re.search(r'^\S|=$', possibility, flags=re.U):  # pragma: no branch
 749 |                             balanced = balanced - 1
 750 |                         if re.search(r'\S$', possibility, flags=re.U):  # pragma: no branch
 751 |                             balanced = balanced + 1
 752 |                         try:
 753 |                             possibility = possible_start_quotes.pop()
 754 |                         except IndexError:
 755 |                             break
 756 |                     else:
 757 |                         # If quotes occur next to each other, we get zero
 758 |                         # length strings.  eg. ...""Open the door,
 759 |                         # HAL!"":url...  In this case we count a zero length in
 760 |                         # the last position as a closing quote and others as
 761 |                         # opening quotes.
 762 |                         if i == 0:
 763 |                             balanced = balanced + 1
 764 |                         else:
 765 |                             balanced = balanced - 1
 766 |                         i = i + 1
 767 | 
 768 |                         try:
 769 |                             possibility = possible_start_quotes.pop()
 770 |                         except IndexError:  # pragma: no cover
 771 |                             # If out of possible starting segments we back the
 772 |                             # last one from the linkparts array
 773 |                             linkparts.pop()
 774 |                             break
 775 |                         # If the next possibility is empty or ends in a space
 776 |                         # we have a closing ".
 777 |                         if (possibility == '' or possibility.endswith(' ')):
 778 |                             # force search exit
 779 |                             balanced = 0
 780 | 
 781 |                     if balanced <= 0:
 782 |                         possible_start_quotes.append(possibility)
 783 |                         break
 784 | 
 785 |                 # Rebuild the link's text by reversing the parts and sticking
 786 |                 # them back together with quotes.
 787 |                 linkparts.reverse()
 788 |                 link_content = '"'.join(linkparts)
 789 |                 # Rebuild the remaining stuff that goes before the link but
 790 |                 # that's already in order.
 791 |                 pre_link = '"'.join(possible_start_quotes)
 792 |                 # Re-assemble the link starts with a specific marker for the
 793 |                 # next regex.
 794 |                 o = '{0}{1}linkStartMarker:"{2}'.format(pre_link, self.uid,
 795 |                                                         link_content)
 796 |                 output.append(o)
 797 | 
 798 |             # Add the last part back
 799 |             output.append(last_slice)
 800 |             # Re-assemble the full text with the start and end markers
 801 |             text = '":'.join(output)
 802 | 
 803 |         return text
 804 | 
 805 |     def replaceLinks(self, text):
 806 |         """Replaces links with tokens and stores them on the shelf."""
 807 |         stopchars = r"\s|^'\"*"
 808 |         pattern = r"""
 809 |             (?P<pre>\[)?           # Optionally open with a square bracket eg. Look ["here":url]
 810 |             {0}linkStartMarker:"   # marks start of the link
 811 |             (?P<inner>(?:.|\n)*?)  # grab the content of the inner "..." part of the link, can be anything but
 812 |                                    # do not worry about matching class, id, lang or title yet
 813 |             ":                     # literal ": marks end of atts + text + title block
 814 |             (?P<urlx>[^{1}]*)      # url upto a stopchar
 815 |         """.format(self.uid, stopchars)
 816 |         text = re.compile(pattern, flags=re.X | re.U).sub(self.fLink, text)
 817 |         return text
 818 | 
 819 |     def fLink(self, m):
 820 |         in_ = m.group()
 821 |         pre, inner, url = m.groups()
 822 |         pre = pre or ''
 823 | 
 824 |         if inner == '':
 825 |             return '{0}"{1}":{2}'.format(pre, inner, url)
 826 | 
 827 |         m = re.search(r'''^
 828 |             (?P<atts>{0})                # $atts (if any)
 829 |             {1}*                         # any optional spaces
 830 |             (?P<text>                    # $text is...
 831 |                 (!.+!)                   #     an image
 832 |             |                            #   else...
 833 |                 .+?                      #     link text
 834 |             )                            # end of $text
 835 |             (?:\((?P<title>[^)]+?)\))?   # $title (if any)
 836 |             $'''.format(cls_re_s, regex_snippets['space']), inner,
 837 |                       flags=re.X | re.U)
 838 | 
 839 |         atts = (m and m.group('atts')) or ''
 840 |         text = (m and m.group('text')) or inner
 841 |         title = (m and m.group('title')) or ''
 842 | 
 843 |         pop, tight = '', ''
 844 |         counts = {'[': None, ']': url.count(']'), '(': None, ')': None}
 845 | 
 846 |         # Look for footnotes or other square-bracket delimited stuff at the end
 847 |         # of the url...
 848 |         #
 849 |         # eg. "text":url][otherstuff... will have "[otherstuff" popped back
 850 |         # out.
 851 |         #
 852 |         # "text":url?q[]=x][123]    will have "[123]" popped off the back, the
 853 |         # remaining closing square brackets will later be tested for balance
 854 |         if (counts[']']):
 855 |             m = re.search(r'(?P<url>^.*\])(?P<tight>\[.*?)$', url, flags=re.U)
 856 |             if m:
 857 |                 url, tight = m.groups()
 858 | 
 859 |         # Split off any trailing text that isn't part of an array assignment.
 860 |         # eg. "text":...?q[]=value1&q[]=value2 ... is ok
 861 |         # "text":...?q[]=value1]following  ... would have "following" popped
 862 |         # back out and the remaining square bracket will later be tested for
 863 |         # balance
 864 |         if (counts[']']):
 865 |             m = re.search(r'(?P<url>^.*\])(?!=)(?P<end>.*?)$', url, flags=re.U)
 866 |             url = m.group('url')
 867 |             tight = '{0}{1}'.format(m.group('end'), tight)
 868 | 
 869 |         # Now we have the array of all the multi-byte chars in the url we will
 870 |         # parse the  uri backwards and pop off  any chars that don't belong
 871 |         # there (like . or , or unmatched brackets of various kinds).
 872 |         first = True
 873 |         popped = True
 874 | 
 875 |         counts[']'] = url.count(']')
 876 |         url_chars = list(url)
 877 | 
 878 |         def _endchar(c, pop, popped, url_chars, counts, pre):
 879 |             """Textile URL shouldn't end in these characters, we pop them off
 880 |             the end and push them out the back of the url again."""
 881 |             pop = '{0}{1}'.format(c, pop)
 882 |             url_chars.pop()
 883 |             popped = True
 884 |             return pop, popped, url_chars, counts, pre
 885 | 
 886 |         def _rightanglebracket(c, pop, popped, url_chars, counts, pre):
 887 |             url_chars.pop()
 888 |             urlLeft = ''.join(url_chars)
 889 | 
 890 |             m = re.search(r'(?P<url_chars>.*)(?P<tag><\/[a-z]+)$', urlLeft)
 891 |             url_chars = m.group('url_chars')
 892 |             pop = '{0}{1}{2}'.format(m.group('tag'), c, pop)
 893 |             popped = True
 894 |             return pop, popped, url_chars, counts, pre
 895 | 
 896 |         def _closingsquarebracket(c, pop, popped, url_chars, counts, pre):
 897 |             """If we find a closing square bracket we are going to see if it is
 898 |             balanced.  If it is balanced with matching opening bracket then it
 899 |             is part of the URL else we spit it back out of the URL."""
 900 |             # If counts['['] is None, count the occurrences of '['
 901 |             counts['['] = counts['['] or url.count('[')
 902 | 
 903 |             if counts['['] == counts[']']:
 904 |                 # It is balanced, so keep it
 905 |                 url_chars.append(c)
 906 |             else:
 907 |                 # In the case of un-matched closing square brackets we just eat
 908 |                 # it
 909 |                 popped = True
 910 |                 url_chars.pop()
 911 |                 counts[']'] = counts[']'] - 1
 912 |                 if first:  # pragma: no branch
 913 |                     pre = ''
 914 |             return pop, popped, url_chars, counts, pre
 915 | 
 916 |         def _closingparenthesis(c, pop, popped, url_chars, counts, pre):
 917 |             if counts[')'] is None:  # pragma: no branch
 918 |                 counts['('] = url.count('(')
 919 |                 counts[')'] = url.count(')')
 920 | 
 921 |             if counts['('] != counts[')']:
 922 |                 # Unbalanced so spit it out the back end
 923 |                 popped = True
 924 |                 pop = '{0}{1}'.format(url_chars.pop(), pop)
 925 |                 counts[')'] = counts[')'] - 1
 926 |             return pop, popped, url_chars, counts, pre
 927 | 
 928 |         def _casesdefault(c, pop, popped, url_chars, counts, pre):
 929 |             return pop, popped, url_chars, counts, pre
 930 | 
 931 |         cases = {
 932 |             '!': _endchar,
 933 |             '?': _endchar,
 934 |             ':': _endchar,
 935 |             ';': _endchar,
 936 |             '.': _endchar,
 937 |             ',': _endchar,
 938 |             '>': _rightanglebracket,
 939 |             ']': _closingsquarebracket,
 940 |             ')': _closingparenthesis,
 941 |         }
 942 |         for c in url_chars[-1::-1]:  # pragma: no branch
 943 |             popped = False
 944 |             pop, popped, url_chars, counts, pre = cases.get(
 945 |                 c, _casesdefault)(c, pop, popped, url_chars, counts, pre)
 946 |             first = False
 947 |             if popped is False:
 948 |                 break
 949 | 
 950 |         url = ''.join(url_chars)
 951 |         uri_parts = urlsplit(url)
 952 | 
 953 |         scheme_in_list = uri_parts.scheme in self.url_schemes
 954 |         valid_scheme = (uri_parts.scheme and scheme_in_list)
 955 |         if not is_valid_url(url) and not valid_scheme:
 956 |             return in_.replace('{0}linkStartMarker:'.format(self.uid), '')
 957 | 
 958 |         if text == '$':
 959 |             if valid_scheme:
 960 |                 text = human_readable_url(url)
 961 |             else:
 962 |                 ref_url = self.urlrefs.get(url)
 963 |                 if ref_url is not None:
 964 |                     text = human_readable_url(ref_url)
 965 |                 else:
 966 |                     text = url
 967 | 
 968 |         text = text.strip()
 969 |         title = encode_html(title)
 970 | 
 971 |         if not self.noimage:  # pragma: no branch
 972 |             text = self.image(text)
 973 |         text = self.span(text)
 974 |         text = self.glyphs(text)
 975 |         url = self.shelveURL(self.encode_url(urlunsplit(uri_parts)))
 976 |         attributes = parse_attributes(atts, restricted=self.restricted)
 977 |         attributes['href'] = url
 978 |         if title:
 979 |             # if the title contains unicode data, it is annoying to get Python
 980 |             # 2.6 and all the latter versions working properly.  But shelving
 981 |             # the title is a quick and dirty solution.
 982 |             attributes['title'] = self.shelve(title)
 983 |         if self.rel:
 984 |             attributes['rel'] = self.rel
 985 |         a_text = generate_tag('a', text, attributes)
 986 |         a_shelf_id = self.shelve(a_text)
 987 | 
 988 |         out = '{0}{1}{2}{3}'.format(pre, a_shelf_id, pop, tight)
 989 | 
 990 |         return out
 991 | 
 992 |     def encode_url(self, url):
 993 |         """
 994 |         Converts a (unicode) URL to an ASCII URL, with the domain part
 995 |         IDNA-encoded and the path part %-encoded (as per RFC 3986).
 996 | 
 997 |         Fixed version of the following code fragment from Stack Overflow:
 998 |             http://stackoverflow.com/a/804380/72656
 999 |         """
1000 |         # parse it
1001 |         parsed = urlsplit(url)
1002 | 
1003 |         if parsed.netloc:
1004 |             # divide the netloc further
1005 |             netloc_pattern = re.compile(r"""
1006 |                 (?:(?P<user>[^:@]+)(?::(?P<password>[^:@]+))?@)?
1007 |                 (?P<host>[^:]+)
1008 |                 (?::(?P<port>[0-9]+))?
1009 |             """, re.X | re.U)
1010 |             netloc_parsed = netloc_pattern.match(parsed.netloc).groupdict()
1011 |         else:
1012 |             netloc_parsed = {'user': '', 'password': '', 'host': '', 'port': ''}
1013 | 
1014 |         # encode each component
1015 |         scheme = parsed.scheme
1016 |         user = netloc_parsed['user'] and quote(netloc_parsed['user'])
1017 |         password = (
1018 |             netloc_parsed['password'] and quote(netloc_parsed['password'])
1019 |         )
1020 |         host = netloc_parsed['host']
1021 |         port = netloc_parsed['port'] and netloc_parsed['port']
1022 |         # the below splits the path portion of the url by slashes, translates
1023 |         # percent-encoded characters back into strings, then re-percent-encodes
1024 |         # what's necessary. Sounds screwy, but the url could include encoded
1025 |         # slashes, and this is a way to clean that up. It branches for PY2/3
1026 |         # because the quote and unquote functions expects different input
1027 |         # types: unicode strings for PY2 and str for PY3.
1028 |         path_parts = (quote(unquote(pce), b'') for pce in
1029 |                       parsed.path.split('/'))
1030 |         path = '/'.join(path_parts)
1031 | 
1032 |         # put it back together
1033 |         netloc = ''
1034 |         if user:
1035 |             netloc = '{0}{1}'.format(netloc, user)
1036 |             if password:
1037 |                 netloc = '{0}:{1}'.format(netloc, password)
1038 |             netloc = '{0}@'.format(netloc)
1039 |         netloc = '{0}{1}'.format(netloc, host)
1040 |         if port:
1041 |             netloc = '{0}:{1}'.format(netloc, port)
1042 |         return urlunsplit((scheme, netloc, path, parsed.query, parsed.fragment))
1043 | 
1044 |     def span(self, text):
1045 |         qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__',
1046 |                  r'_', r'%', r'\+', r'~', r'\^')
1047 |         pnct = r""".,"'?!;:‹›«»„“”‚‘’"""
1048 |         self.span_depth = self.span_depth + 1
1049 | 
1050 |         if self.span_depth <= self.max_span_depth:
1051 |             for tag in qtags:
1052 |                 pattern = re.compile(r"""
1053 |                     (?P<pre>^|(?<=[\s>{pnct}\(])|[{{[])
1054 |                     (?P<tag>{tag})(?!{tag})
1055 |                     (?P<atts>{cls})
1056 |                     (?!{tag})
1057 |                     (?::(?P<cite>\S+[^{tag}]{space}))?
1058 |                     (?P<content>[^{space}{tag}]+|\S.*?[^\s{tag}\n])
1059 |                     (?P<end>[{pnct}]*)
1060 |                     {tag}
1061 |                     (?P<tail>$|[\[\]}}<]|(?=[{pnct}]{{1,2}}[^0-9]|\s|\)))
1062 |                 """.format(
1063 |                     **{'tag': tag, 'cls': cls_re_s, 'pnct': pnct, 'space':
1064 |                         regex_snippets['space']}
1065 |                 ), flags=re.X | re.U)
1066 |                 text = pattern.sub(self.fSpan, text)
1067 |         self.span_depth = self.span_depth - 1
1068 |         return text
1069 | 
1070 |     def getSpecialOptions(self, pre, tail):
1071 |         for before, after in self.spanWrappers:
1072 |             if pre == before and tail == after:
1073 |                 pre = tail = ''
1074 |                 break
1075 |         return (pre, tail)
1076 | 
1077 |     def fSpan(self, match):
1078 |         pre, tag, atts, cite, content, end, tail = match.groups()
1079 |         pre, tail = self.getSpecialOptions(pre, tail)
1080 | 
1081 |         qtags = {
1082 |             '*':  'strong',  # noqa: E241
1083 |             '**': 'b',       # noqa: E241
1084 |             '??': 'cite',    # noqa: E241
1085 |             '_':  'em',      # noqa: E241
1086 |             '__': 'i',       # noqa: E241
1087 |             '-':  'del',     # noqa: E241
1088 |             '%':  'span',    # noqa: E241
1089 |             '+':  'ins',     # noqa: E241
1090 |             '~':  'sub',     # noqa: E241
1091 |             '^':  'sup'      # noqa: E241
1092 |         }
1093 | 
1094 |         tag = qtags[tag]
1095 |         atts = pba(atts, restricted=self.restricted)
1096 |         if cite:
1097 |             atts = '{0} cite="{1}"'.format(atts, cite.rstrip())
1098 | 
1099 |         content = self.span(content)
1100 |         opentag = '<{0}{1}>'.format(tag, atts)
1101 |         closetag = '</{0}>'.format(tag)
1102 |         tags = self.storeTags(opentag, closetag)
1103 |         return pre + tags['open'] + content + end + tags['close'] + tail
1104 | 
1105 |     def storeTags(self, opentag, closetag=''):
1106 |         tags = {}
1107 |         self.refIndex += 1
1108 |         self.refCache[self.refIndex] = opentag
1109 |         tags['open'] = self.uid + str(self.refIndex) + ':ospan '
1110 | 
1111 |         self.refIndex += 1
1112 |         self.refCache[self.refIndex] = closetag
1113 |         tags['close'] = ' ' + self.uid + str(self.refIndex) + ':cspan'
1114 |         return tags
1115 | 
1116 |     def retrieveTags(self, text):
1117 |         text = (re.compile('{0}(?P<token>[0-9]+):ospan '.format(self.uid), re.U)
1118 |                 .sub(self.fRetrieveTags, text))
1119 |         text = (re.compile(' {0}(?P<token>[0-9]+):cspan'.format(self.uid), re.U)
1120 |                 .sub(self.fRetrieveTags, text))
1121 |         return text
1122 | 
1123 |     def fRetrieveTags(self, match):
1124 |         return self.refCache[int(match.group('token'))]
1125 | 
1126 |     def image(self, text):
1127 |         pattern = re.compile(r"""
1128 |             (?:[\[{{])?                # pre
1129 |             \!                         # opening !
1130 |             (\<|\=|\>)?                # optional alignment atts
1131 |             ({0})                      # optional style,class atts
1132 |             (?:\.\s)?                  # optional dot-space
1133 |             ([^\s(!]+)                 # presume this is the src
1134 |             \s?                        # optional space
1135 |             (?:\(([^\)]+)\))?          # optional title
1136 |             \!                         # closing
1137 |             (?::(\S+)(?<![\]).,]))?    # optional href sans final punct
1138 |             (?:[\]}}]|(?=[.,\s)|]|$))  # lookahead: space or end of string
1139 |         """.format(cls_re_s), re.U | re.X)
1140 |         return pattern.sub(self.fImage, text)
1141 | 
1142 |     def fImage(self, match):
1143 |         # (None, '', '/imgs/myphoto.jpg', None, None)
1144 |         align, attributes, url, title, href = match.groups()
1145 |         atts = OrderedDict()
1146 |         size = None
1147 | 
1148 |         alignments = {'<': 'left', '=': 'center', '>': 'right'}
1149 | 
1150 |         if not title:
1151 |             title = ''
1152 | 
1153 |         if not is_rel_url(url) and self.get_sizes:
1154 |             size = getimagesize(url)
1155 | 
1156 |         if href:
1157 |             href = self.shelveURL(href)
1158 | 
1159 |         url = self.shelveURL(url)
1160 | 
1161 |         if align:
1162 |             atts.update(align=alignments[align])
1163 |         atts.update(alt=title)
1164 |         if size:
1165 |             atts.update(height="{0}".format(size[1]))
1166 |         atts.update(src=url)
1167 |         if attributes:
1168 |             atts.update(parse_attributes(attributes, restricted=self.restricted))
1169 |         if title:
1170 |             atts.update(title=title)
1171 |         if size:
1172 |             atts.update(width="{0}".format(size[0]))
1173 |         img = generate_tag('img', ' /', atts)
1174 |         if href:
1175 |             a_atts = OrderedDict(href=href)
1176 |             if self.rel:
1177 |                 a_atts.update(rel=self.rel)
1178 |             img = generate_tag('a', img, a_atts)
1179 |         return img
1180 | 
1181 |     def code(self, text):
1182 |         text = self.doSpecial(text, '<code>', '</code>', self.fCode)
1183 |         text = self.doSpecial(text, '@', '@', self.fCode)
1184 |         text = self.doSpecial(text, '<pre>', '</pre>', self.fPre)
1185 |         return text
1186 | 
1187 |     def fCode(self, match):
1188 |         before, text, after = match.groups()
1189 |         after = after or ''
1190 |         before, after = self.getSpecialOptions(before, after)
1191 |         # text needs to be escaped
1192 |         text = encode_html(text, quotes=False)
1193 |         return ''.join([before, self.shelve('<code>{0}</code>'.format(text)), after])
1194 | 
1195 |     def fPre(self, match):
1196 |         before, text, after = match.groups()
1197 |         if after is None:
1198 |             after = ''
1199 |         before, after = self.getSpecialOptions(before, after)
1200 |         # text needs to be escaped
1201 |         text = encode_html(text)
1202 |         return ''.join([before, '<pre>', self.shelve(text), '</pre>', after])
1203 | 
1204 |     def doSpecial(self, text, start, end, method):
1205 |         pattern = re.compile(r'(^|\s|[\[({{>|]){0}(.*?){1}($|[\])}}])?'.format(
1206 |             re.escape(start), re.escape(end)), re.M | re.S)
1207 |         return pattern.sub(method, text)
1208 | 
1209 |     def noTextile(self, text):
1210 |         text = self.doSpecial(text, '<notextile>', '</notextile>',
1211 |                               self.fTextile)
1212 |         return self.doSpecial(text, '==', '==', self.fTextile)
1213 | 
1214 |     def fTextile(self, match):
1215 |         before, notextile, after = match.groups()
1216 |         if after is None:  # pragma: no branch
1217 |             after = ''
1218 |         before, after = self.getSpecialOptions(before, after)
1219 |         return ''.join([before, self.shelve(notextile), after])
1220 | 
1221 |     def getHTMLComments(self, text):
1222 |         """Search the string for HTML comments, e.g. <!-- comment text -->.  We
1223 |         send the text that matches this to fParseHTMLComments."""
1224 |         return self.doSpecial(text, '<!--', '-->', self.fParseHTMLComments)
1225 | 
1226 |     def fParseHTMLComments(self, match):
1227 |         """If self.restricted is True, clean the matched contents of the HTML
1228 |         comment.  Otherwise, return the comments unchanged.
1229 |         The original php had an if statement in here regarding restricted mode.
1230 |         nose reported that this line wasn't covered.  It's correct.  In
1231 |         restricted mode, the html comment tags have already been converted to
1232 |         &lt;!*#8212; and &#8212;&gt; so they don't match in getHTMLComments,
1233 |         and never arrive here.
1234 |         """
1235 |         before, commenttext, after = match.groups()
1236 |         commenttext = self.shelve(commenttext)
1237 |         return '{0}<!--{1}-->'.format(before, commenttext)
1238 | 
1239 |     def redcloth_list(self, text):
1240 |         """Parse the text for definition lists and send them to be
1241 |         formatted."""
1242 |         pattern = re.compile(r"^([-]+{0}[ .].*:=.*)$(?![^-])".format(cls_re_s),
1243 |                              re.M | re.U | re.S)
1244 |         return pattern.sub(self.fRCList, text)
1245 | 
1246 |     def fRCList(self, match):
1247 |         """Format a definition list."""
1248 |         out = []
1249 |         text = re.split(r'\n(?=[-])', match.group(), flags=re.M)
1250 |         for line in text:
1251 |             # parse the attributes and content
1252 |             m = re.match(r'^[-]+({0})[ .](.*)$'.format(cls_re_s), line,
1253 |                          flags=re.M | re.S)
1254 |             if not m:
1255 |                 continue
1256 | 
1257 |             atts, content = m.groups()
1258 |             # cleanup
1259 |             content = content.strip()
1260 |             atts = pba(atts, restricted=self.restricted)
1261 | 
1262 |             # split the content into the term and definition
1263 |             xm = re.match(
1264 |                 r'^(.*?){0}*:=(.*?){0}*(=:|:=)?{0}*$'
1265 |                 .format(regex_snippets['space']),
1266 |                 content,
1267 |                 re.S)
1268 |             term, definition, _ = xm.groups()
1269 |             # cleanup
1270 |             term = term.strip()
1271 |             definition = definition.strip(' ')
1272 | 
1273 |             # if this is the first time through, out as a bool is False
1274 |             if not out:
1275 |                 if definition == '':
1276 |                     dltag = "<dl{0}>".format(atts)
1277 |                 else:
1278 |                     dltag = "<dl>"
1279 |                 out.append(dltag)
1280 | 
1281 |             if term != '':
1282 |                 is_newline_started_def = definition.startswith('\n')
1283 |                 definition = (
1284 |                     definition
1285 |                     .strip()
1286 |                     .replace('\n', '<br />'))
1287 | 
1288 |                 if is_newline_started_def:
1289 |                     definition = '<p>{0}</p>'.format(definition)
1290 |                 term = term.replace('\n', '<br />')
1291 | 
1292 |                 term = self.graf(term)
1293 |                 definition = self.graf(definition)
1294 | 
1295 |                 out.append('\t<dt{0}>{1}</dt>'.format(atts, term))
1296 |                 if definition:
1297 |                     out.append('\t<dd>{0}</dd>'.format(definition))
1298 | 
1299 |         out.append('</dl>')
1300 |         out = '\n'.join(out)
1301 |         return out
1302 | 
1303 |     def placeNoteLists(self, text):
1304 |         """Parse the text for endnotes."""
1305 |         if self.notes:
1306 |             o = OrderedDict()
1307 |             for label, info in self.notes.items():
1308 |                 if 'seq' in info:
1309 |                     i = info['seq']
1310 |                     info['seq'] = label
1311 |                     o[i] = info
1312 |                 else:
1313 |                     self.unreferencedNotes[label] = info
1314 | 
1315 |             if o:  # pragma: no branch
1316 |                 # sort o by key
1317 |                 o = OrderedDict(sorted(o.items(), key=lambda t: t[0]))
1318 |             self.notes = o
1319 |         text_re = re.compile(r'<p>notelist({0})(?:\:([\w|{1}]))?([\^!]?)(\+?)'
1320 |                              r'\.?[\s]*</p>'.format(cls_re_s, syms_re_s), re.U)
1321 |         text = text_re.sub(self.fNoteLists, text)
1322 |         return text
1323 | 
1324 |     def fNoteLists(self, match):
1325 |         """Given the text that matches as a note, format it into HTML."""
1326 |         att, start_char, g_links, extras = match.groups()
1327 |         start_char = start_char or 'a'
1328 |         index = '{0}{1}{2}'.format(g_links, extras, start_char)
1329 |         result = ''
1330 | 
1331 |         if index not in self.notelist_cache:  # pragma: no branch
1332 |             o = []
1333 |             if self.notes:  # pragma: no branch
1334 |                 for seq, info in self.notes.items():
1335 |                     links = self.makeBackrefLink(info, g_links, start_char)
1336 |                     atts = ''
1337 |                     if 'def' in info:
1338 |                         infoid = info['id']
1339 |                         atts = info['def']['atts']
1340 |                         content = info['def']['content']
1341 |                         li = ('\t\t<li{0}>{1}<span id="note{2}"> '
1342 |                               '</span>{3}</li>').format(atts, links, infoid,
1343 |                                                         content)
1344 |                     else:
1345 |                         li = ('\t\t<li{0}>{1} Undefined Note [#{2}].</li>'
1346 |                               ).format(atts, links, info['seq'])
1347 |                     o.append(li)
1348 |             if '+' == extras and self.unreferencedNotes:
1349 |                 for seq, info in self.unreferencedNotes.items():
1350 |                     atts = info['def']['atts']
1351 |                     content = info['def']['content']
1352 |                     li = '\t\t<li{0}>{1}</li>'.format(atts, content)
1353 |                     o.append(li)
1354 |             self.notelist_cache[index] = "\n".join(o)
1355 |             result = self.notelist_cache[index]
1356 |         if result:
1357 |             list_atts = pba(att, restricted=self.restricted)
1358 |             result = '<ol{0}>\n{1}\n\t</ol>'.format(list_atts, result)
1359 |         return result
1360 | 
1361 |     def makeBackrefLink(self, info, g_links, i):
1362 |         """Given the pieces of a back reference link, create an <a> tag."""
1363 |         link = ''
1364 |         if 'def' in info:
1365 |             link = info['def']['link']
1366 |         backlink_type = link or g_links
1367 |         i_ = encode_high(i)
1368 |         allow_inc = i not in syms_re_s
1369 |         i_ = int(i_)
1370 | 
1371 |         if backlink_type == "!":
1372 |             return ''
1373 |         elif backlink_type == '^':
1374 |             return """<sup><a href="#noteref{0}">{1}</a></sup>""".format(
1375 |                 info['refids'][0], i)
1376 |         else:
1377 |             result = []
1378 |             for refid in info['refids']:
1379 |                 i_entity = decode_high(i_)
1380 |                 sup = """<sup><a href="#noteref{0}">{1}</a></sup>""".format(
1381 |                     refid, i_entity)
1382 |                 if allow_inc:
1383 |                     i_ = i_ + 1
1384 |                 result.append(sup)
1385 |             result = ' '.join(result)
1386 |             return result
1387 | 
1388 |     def fParseNoteDefs(self, m):
1389 |         """Parse the note definitions and format them as HTML"""
1390 |         label = m.group('label')
1391 |         link = m.group('link')
1392 |         att = m.group('att')
1393 |         content = m.group('content')
1394 | 
1395 |         # Assign an id if the note reference parse hasn't found the label yet.
1396 |         if label not in self.notes:
1397 |             self.notes[label] = {'id': '{0}{1}'.format(
1398 |                 self.linkPrefix, self._increment_link_index())}
1399 | 
1400 |         # Ignores subsequent defs using the same label
1401 |         if 'def' not in self.notes[label]:  # pragma: no branch
1402 |             self.notes[label]['def'] = {
1403 |                 'atts': pba(att, restricted=self.restricted), 'content':
1404 |                 self.graf(content), 'link': link}
1405 |         return ''
1406 | 
1407 |     def noteRef(self, text):
1408 |         """Search the text looking for note references."""
1409 |         text_re = re.compile(r"""
1410 |         \[          # start
1411 |         ({0})       # !atts
1412 |         \#
1413 |         ([^\]!]+)   # !label
1414 |         ([!]?)      # !nolink
1415 |         \]""".format(cls_re_s), re.X)
1416 |         text = text_re.sub(self.fParseNoteRefs, text)
1417 |         return text
1418 | 
1419 |     def fParseNoteRefs(self, match):
1420 |         """Parse and format the matched text into note references.
1421 |         By the time this function is called, all the defs will have been
1422 |         processed into the notes array. So now we can resolve the link numbers
1423 |         in the order we process the refs..."""
1424 |         atts, label, nolink = match.groups()
1425 |         atts = pba(atts, restricted=self.restricted)
1426 |         nolink = nolink == '!'
1427 | 
1428 |         # Assign a sequence number to this reference if there isn't one already
1429 |         if label in self.notes:
1430 |             num = self.notes[label]['seq']
1431 |         else:
1432 |             self.notes[label] = {
1433 |                 'seq': self.note_index, 'refids': [], 'id': ''
1434 |             }
1435 |             num = self.note_index
1436 |             self.note_index = self.note_index + 1
1437 | 
1438 |         # Make our anchor point and stash it for possible use in backlinks when
1439 |         # the note list is generated later...
1440 |         refid = '{0}{1}'.format(self.linkPrefix, self._increment_link_index())
1441 |         self.notes[label]['refids'].append(refid)
1442 | 
1443 |         # If we are referencing a note that hasn't had the definition parsed
1444 |         # yet, then assign it an ID...
1445 |         if not self.notes[label]['id']:
1446 |             self.notes[label]['id'] = '{0}{1}'.format(
1447 |                 self.linkPrefix, self._increment_link_index())
1448 |         labelid = self.notes[label]['id']
1449 | 
1450 |         # Build the link (if any)...
1451 |         result = '<span id="noteref{0}">{1}</span>'.format(refid, num)
1452 |         if not nolink:
1453 |             result = '<a href="#note{0}">{1}</a>'.format(labelid, result)
1454 | 
1455 |         # Build the reference...
1456 |         result = '<sup{0}>{1}</sup>'.format(atts, result)
1457 |         return result
1458 | 
1459 |     def shelveURL(self, text):
1460 |         if text == '':
1461 |             return ''
1462 |         self.refIndex = self.refIndex + 1
1463 |         self.refCache[self.refIndex] = text
1464 |         output = '{0}{1}{2}'.format(self.uid, self.refIndex, ':url')
1465 |         return output
1466 | 
1467 |     def retrieveURLs(self, text):
1468 |         return re.sub(r'{0}(?P<token>[0-9]+):url'.format(self.uid), self.retrieveURL, text)
1469 | 
1470 |     def retrieveURL(self, match):
1471 |         url = self.refCache.get(int(match.group('token')), '')
1472 |         if url == '':
1473 |             return url
1474 | 
1475 |         if url in self.urlrefs:
1476 |             url = self.urlrefs[url]
1477 | 
1478 |         return url
1479 | 
1480 |     def _increment_link_index(self):
1481 |         """The self.linkIndex property needs to be incremented in various
1482 |         places.  Don't Repeat Yourself."""
1483 |         self.linkIndex = self.linkIndex + 1
1484 |         return self.linkIndex
1485 | 
1486 | 
1487 | def textile(text, html_type='xhtml'):
1488 |     """
1489 |     Apply Textile to a block of text.
1490 | 
1491 |     This function takes the following additional parameters:
1492 | 
1493 |     html_type - 'xhtml' or 'html5' style tags (default: 'xhtml')
1494 | 
1495 |     """
1496 |     return Textile(html_type=html_type).parse(text)
1497 | 
1498 | 
1499 | def textile_restricted(text, lite=True, noimage=True, html_type='xhtml'):
1500 |     """
1501 |     Apply Textile to a block of text, with restrictions designed for weblog
1502 |     comments and other untrusted input.  Raw HTML is escaped, style attributes
1503 |     are disabled, and rel='nofollow' is added to external links.
1504 | 
1505 |     This function takes the following additional parameters:
1506 | 
1507 |     html_type - 'xhtml' or 'html5' style tags (default: 'xhtml')
1508 |     lite - restrict block tags to p, bq, and bc, disable tables (default: True)
1509 |     noimage - disable image tags (default: True)
1510 | 
1511 |     """
1512 |     return Textile(restricted=True, lite=lite, noimage=noimage,
1513 |                    html_type=html_type, rel='nofollow').parse(text)
1514 | 


--------------------------------------------------------------------------------
/textile/objects/__init__.py:
--------------------------------------------------------------------------------
1 | from .block import Block
2 | from .table import Table
3 | 
4 | __all__ = ['Block', 'Table']
5 | 


--------------------------------------------------------------------------------
/textile/objects/block.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from collections import OrderedDict
  3 | try:
  4 |     import regex as re
  5 | except ImportError:
  6 |     import re
  7 | 
  8 | from textile.regex_strings import cls_re_s, regex_snippets
  9 | from textile.utils import encode_html, generate_tag, parse_attributes
 10 | 
 11 | 
 12 | class Block(object):
 13 |     def __init__(self, textile, tag, atts, ext, cite, content):
 14 |         self.textile = textile
 15 |         self.tag = tag
 16 |         self.atts = atts
 17 |         self.ext = ext
 18 |         self.cite = cite
 19 |         self.content = content
 20 | 
 21 |         self.attributes = parse_attributes(atts, restricted=self.textile.restricted)
 22 |         self.outer_tag = ''
 23 |         self.inner_tag = ''
 24 |         self.outer_atts = OrderedDict()
 25 |         self.inner_atts = OrderedDict()
 26 |         self.eat = False
 27 |         self.process()
 28 | 
 29 |     def process(self):
 30 |         if self.tag == 'p':
 31 |             # is this an anonymous block with a note definition?
 32 |             notedef_re = re.compile(r"""
 33 |             ^note\#                               # start of note def marker
 34 |             (?P<label>[^%<*!@\#^([{{ {space}.]+)  # label
 35 |             (?P<link>[*!^]?)                      # link
 36 |             (?P<att>{cls})                        # att
 37 |             \.?                                   # optional period.
 38 |             [{space}]+                            # whitespace ends def marker
 39 |             (?P<content>.*)$                      # content""".format(
 40 |                 space=regex_snippets['space'], cls=cls_re_s),
 41 |                 flags=re.X | re.U)
 42 |             notedef = notedef_re.sub(self.textile.fParseNoteDefs, self.content)
 43 | 
 44 |             # It will be empty if the regex matched and ate it.
 45 |             if '' == notedef:
 46 |                 self.content = notedef
 47 |                 self.eat = True
 48 | 
 49 |         fns = re.search(r'fn(?P<fnid>{0}+)'.format(regex_snippets['digit']),
 50 |                         self.tag, flags=re.U)
 51 |         if fns:
 52 |             self.tag = 'p'
 53 |             fnid = self.textile.fn.get(fns.group('fnid'), None)
 54 |             if fnid is None:
 55 |                 fnid = '{0}{1}'.format(self.textile.linkPrefix,
 56 |                                        self.textile._increment_link_index())
 57 | 
 58 |             # If there is an author-specified ID goes on the wrapper & the
 59 |             # auto-id gets pushed to the <sup>
 60 |             supp_id = OrderedDict()
 61 | 
 62 |             # if class has not been previously specified, set it to "footnote"
 63 |             if 'class' not in self.attributes:
 64 |                 self.attributes.update({'class': 'footnote'})
 65 | 
 66 |             # if there's no specified id, use the generated one.
 67 |             if 'id' not in self.attributes:
 68 |                 self.attributes.update({'id': 'fn{0}'.format(fnid)})
 69 |             else:
 70 |                 supp_id = parse_attributes('(#fn{0})'.format(fnid), restricted=self.textile.restricted)
 71 | 
 72 |             if '^' not in self.atts:
 73 |                 sup = generate_tag('sup', fns.group('fnid'), supp_id)
 74 |             else:
 75 |                 fnrev = generate_tag('a', fns.group('fnid'), {'href':
 76 |                                      '#fnrev{0}'.format(fnid)})
 77 |                 sup = generate_tag('sup', fnrev, supp_id)
 78 | 
 79 |             self.content = '{0} {1}'.format(sup, self.content)
 80 | 
 81 |         if self.tag == 'bq':
 82 |             if self.cite:
 83 |                 self.cite = self.textile.shelveURL(self.cite)
 84 |                 cite_att = OrderedDict(cite=self.cite)
 85 |                 self.cite = ' cite="{0}"'.format(self.cite)
 86 |             else:
 87 |                 self.cite = ''
 88 |                 cite_att = OrderedDict()
 89 |             cite_att.update(self.attributes)
 90 |             self.outer_tag = 'blockquote'
 91 |             self.outer_atts = cite_att
 92 |             self.inner_tag = 'p'
 93 |             self.inner_atts = self.attributes
 94 |             self.eat = False
 95 | 
 96 |         elif self.tag == 'bc' or self.tag == 'pre':
 97 |             i_tag = ''
 98 |             if self.tag == 'bc':
 99 |                 i_tag = 'code'
100 |             content = encode_html(self.content)
101 |             self.content = self.textile.shelve(content)
102 |             self.outer_tag = 'pre'
103 |             self.outer_atts = self.attributes
104 |             self.inner_tag = i_tag
105 |             self.inner_atts = self.attributes
106 |             self.eat = False
107 | 
108 |         elif self.tag == 'notextile':
109 |             self.content = self.textile.shelve(self.content)
110 | 
111 |         elif self.tag == '###':
112 |             self.eat = True
113 | 
114 |         else:
115 |             self.outer_tag = self.tag
116 |             self.outer_atts = self.attributes
117 | 
118 |         if not self.eat:
119 |             self.content = self.textile.graf(self.content)
120 |         else:
121 |             self.content = ''
122 | 


--------------------------------------------------------------------------------
/textile/objects/table.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from xml.etree import ElementTree
  3 | 
  4 | from textile.regex_strings import (align_re_s, cls_re_s, regex_snippets,
  5 |                                    table_span_re_s, valign_re_s, pnct_re_s)
  6 | from textile.utils import generate_tag, parse_attributes
  7 | 
  8 | try:
  9 |     import regex as re
 10 | except ImportError:
 11 |     import re
 12 | 
 13 | 
 14 | class Table(object):
 15 |     caption_re = re.compile(
 16 |         (r'^\|\=(?P<capts>{s}{a}{c})\. '
 17 |          r'(?P<cap>[^\n]*)(?P<row>.*)'
 18 |          .format(**{'s': table_span_re_s, 'a': align_re_s, 'c': cls_re_s})),
 19 |         re.S)
 20 |     colgroup_re = re.compile(
 21 |         r'^\|:(?P<cols>{s}{a}{c}\. .*)'
 22 |         .format(**{'s': table_span_re_s, 'a': align_re_s, 'c': cls_re_s}),
 23 |         re.M)
 24 |     heading_re = re.compile(
 25 |         r'^_(?={0}|{1})'.format(regex_snippets['space'], pnct_re_s))
 26 | 
 27 |     def __init__(self, textile, tatts, rows, summary):
 28 |         self.textile = textile
 29 |         self.attributes = parse_attributes(tatts, 'table', restricted=self.textile.restricted)
 30 |         if summary:
 31 |             self.attributes.update(summary=summary.strip())
 32 |         self.input = rows
 33 |         self.caption = ''
 34 |         self.colgroup = ''
 35 |         self.content = []
 36 | 
 37 |     def process(self):
 38 |         rgrp = None
 39 |         groups = []
 40 |         split = (
 41 |             re.compile(r'\|{0}*?$'.format(regex_snippets['space']), re.M)
 42 |             .split(self.input))
 43 |         for i, row in enumerate([x for x in split if x]):
 44 |             row = row.lstrip()
 45 | 
 46 |             # Caption -- only occurs on row 1, otherwise treat '|=. foo |...'
 47 |             # as a normal center-aligned cell.
 48 |             cmtch = self.caption_re.match(row)
 49 |             if i == 0 and cmtch:
 50 |                 caption = Caption(restricted=self.textile.restricted, **cmtch.groupdict())
 51 |                 self.caption = '\n{0}'.format(caption.caption)
 52 |                 row = cmtch.group('row').lstrip()
 53 |                 if row == '':
 54 |                     continue
 55 | 
 56 |             # Colgroup -- A colgroup row will not necessarily end with a |.
 57 |             # Hence it may include the next row of actual table data.
 58 |             if row[:2] == '|:':
 59 |                 if '\n' in row:
 60 |                     colgroup_data, row = row[2:].split('\n')
 61 |                 else:
 62 |                     colgroup_data, row = row[2:], ''
 63 |                 colgroup_atts, cols = colgroup_data, None
 64 |                 if '|' in colgroup_data:
 65 |                     colgroup_atts, cols = colgroup_data.split('|', 1)
 66 |                 colgrp = Colgroup(cols, colgroup_atts, restricted=self.textile.restricted)
 67 |                 self.colgroup = colgrp.process()
 68 |                 if row == '':
 69 |                     continue
 70 | 
 71 |             # search the row for a table group - thead, tfoot, or tbody
 72 |             grpmatchpattern = (r"(:?^\|(?P<part>{v})(?P<rgrpatts>{s}{a}{c})"
 73 |                                r"\.\s*$\n)?^(?P<row>.*)").format(
 74 |                                    **{'v': valign_re_s, 's': table_span_re_s,
 75 |                                       'a': align_re_s, 'c': cls_re_s})
 76 |             grpmatch_re = re.compile(grpmatchpattern, re.S | re.M)
 77 |             grpmatch = grpmatch_re.match(row.lstrip())
 78 | 
 79 |             grptypes = {'^': Thead, '~': Tfoot, '-': Tbody}
 80 |             if grpmatch.group('part'):
 81 |                 # we're about to start a new group, so process the current one
 82 |                 # and add it to the output
 83 |                 if rgrp:
 84 |                     groups.append('\n\t{0}'.format(rgrp.process()))
 85 |                 rgrp = grptypes[grpmatch.group('part')](grpmatch.group(
 86 |                     'rgrpatts'), restricted=self.textile.restricted)
 87 |             row = grpmatch.group('row')
 88 | 
 89 |             rmtch = re.search(r'^(?P<ratts>{0}{1}\. )(?P<row>.*)'.format(
 90 |                 align_re_s, cls_re_s), row.lstrip())
 91 |             if rmtch:
 92 |                 row_atts = parse_attributes(rmtch.group('ratts'), 'tr', restricted=self.textile.restricted)
 93 |                 row = rmtch.group('row')
 94 |             else:
 95 |                 row_atts = {}
 96 | 
 97 |             # create a row to hold the cells.
 98 |             r = Row(row_atts, row)
 99 |             for cellctr, cell in enumerate(row.split('|')[1:]):
100 |                 ctag = 'td'
101 |                 if self.heading_re.match(cell):
102 |                     ctag = 'th'
103 | 
104 |                 cmtch = re.search(r'^(?P<catts>_?{0}{1}{2}\. )'
105 |                                   '(?P<cell>.*)'.format(
106 |                                       table_span_re_s, align_re_s, cls_re_s),
107 |                                   cell, flags=re.S)
108 |                 if cmtch:
109 |                     catts = cmtch.group('catts')
110 |                     cell_atts = parse_attributes(catts, 'td', restricted=self.textile.restricted)
111 |                     cell = cmtch.group('cell')
112 |                 else:
113 |                     cell_atts = {}
114 | 
115 |                 if not self.textile.lite:
116 |                     a_pattern = r'(?P<space>{0}*)(?P<cell>.*)'.format(
117 |                         regex_snippets['space'])
118 |                     a = re.search(a_pattern, cell, flags=re.S)
119 |                     cell = self.textile.redcloth_list(a.group('cell'))
120 |                     cell = self.textile.textileLists(cell)
121 |                     cell = '{0}{1}'.format(a.group('space'), cell)
122 | 
123 |                 # create a cell
124 |                 c = Cell(ctag, cell, cell_atts)
125 |                 cline_tag = '\n\t\t\t{0}'.format(c.process())
126 |                 # add the cell to the row
127 |                 r.cells.append(self.textile.doTagBr(ctag, cline_tag))
128 | 
129 |             # if we're in a group, add it to the group's rows, else add it
130 |             # directly to the content
131 |             if rgrp:
132 |                 rgrp.rows.append(r.process())
133 |             else:
134 |                 self.content.append(r.process())
135 | 
136 |         # if there's still an rgrp, process it and add it to the output
137 |         if rgrp:
138 |             groups.append('\n\t{0}'.format(rgrp.process()))
139 | 
140 |         content = '{0}{1}{2}{3}\n\t'.format(
141 |             self.caption, self.colgroup, ''.join(groups), ''.join(self.content))
142 |         tbl = generate_tag('table', content, self.attributes)
143 |         return '\t{0}\n\n'.format(tbl)
144 | 
145 | 
146 | class Caption(object):
147 |     def __init__(self, capts, cap, row, restricted):
148 |         self.attributes = parse_attributes(capts, restricted=restricted)
149 |         self.caption = self.process(cap)
150 | 
151 |     def process(self, cap):
152 |         tag = generate_tag('caption', cap.strip(), self.attributes)
153 |         return '\t{0}'.format(tag)
154 | 
155 | 
156 | class Colgroup(object):
157 |     def __init__(self, cols, atts, restricted):
158 |         self.row = ''
159 |         self.attributes = atts
160 |         self.cols = cols
161 |         self.restricted = restricted
162 | 
163 |     def process(self):
164 |         enc = 'unicode'
165 | 
166 |         group_atts = parse_attributes(self.attributes, 'col', restricted=self.restricted)
167 |         colgroup = ElementTree.Element('colgroup', attrib=group_atts)
168 |         colgroup.text = '\n\t'
169 |         if self.cols is not None:
170 |             match_cols = self.cols.replace('.', '').split('|')
171 |             # colgroup is the first item in match_cols, the remaining items are
172 |             # cols.
173 |             for idx, col in enumerate(match_cols):
174 |                 col_atts = parse_attributes(col.strip(), 'col', restricted=self.restricted)
175 |                 ElementTree.SubElement(colgroup, 'col', col_atts)
176 |         colgrp = ElementTree.tostring(colgroup, encoding=enc)
177 |         # cleanup the extra xml declaration if it exists, (python versions
178 |         # differ) and then format the resulting string accordingly: newline and
179 |         # tab between cols and a newline at the end
180 |         xml_declaration = "<?xml version='1.0' encoding='UTF-8'?>\n"
181 |         colgrp = colgrp.replace(xml_declaration, '')
182 |         colgrp = colgrp.replace('><', '>\n\t<')
183 |         return f"\n\t{colgrp}"
184 | 
185 | 
186 | class Row(object):
187 |     def __init__(self, attributes, row):
188 |         self.tag = 'tr'
189 |         self.attributes = attributes
190 |         self.cells = []
191 | 
192 |     def process(self):
193 |         output = []
194 |         for c in self.cells:
195 |             output.append(c)
196 |         cell_data = '{0}\n\t\t'.format(''.join(output))
197 |         tag = generate_tag('tr', cell_data, self.attributes)
198 |         return '\n\t\t{0}'.format(tag)
199 | 
200 | 
201 | class Cell(object):
202 |     def __init__(self, tag, content, attributes):
203 |         self.tag = tag
204 |         self.content = content
205 |         self.attributes = attributes
206 | 
207 |     def process(self):
208 |         return generate_tag(self.tag, self.content, self.attributes)
209 | 
210 | 
211 | class _TableSection(object):
212 |     def __init__(self, tag, attributes, restricted):
213 |         self.tag = tag
214 |         self.attributes = parse_attributes(attributes, restricted=restricted)
215 |         self.rows = []
216 | 
217 |     def process(self):
218 |         return generate_tag(self.tag, '{0}\n\t'.format(''.join(self.rows)), self.attributes)
219 | 
220 | 
221 | class Thead(_TableSection):
222 |     def __init__(self, attributes, restricted):
223 |         super(Thead, self).__init__('thead', attributes, restricted)
224 | 
225 | 
226 | class Tbody(_TableSection):
227 |     def __init__(self, attributes, restricted):
228 |         super(Tbody, self).__init__('tbody', attributes, restricted)
229 | 
230 | 
231 | class Tfoot(_TableSection):
232 |     def __init__(self, attributes, restricted):
233 |         super(Tfoot, self).__init__('tfoot', attributes, restricted)
234 | 


--------------------------------------------------------------------------------
/textile/regex_strings.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | try:
 3 |     # Use regex module for matching uppercase characters if installed,
 4 |     # otherwise fall back to finding all the uppercase chars in a loop.
 5 |     import regex as re  # noqa: F401
 6 |     upper_re_s = r'\p{Lu}'
 7 |     regex_snippets = {
 8 |         'acr': r'\p{Lu}\p{Nd}',
 9 |         'abr': r'\p{Lu}',
10 |         'nab': r'\p{Ll}',
11 |         'wrd': r'(?:\p{L}|\p{M}|\p{N}|\p{Pc})',
12 |         'cur': r'\p{Sc}',
13 |         'digit': r'\p{N}',
14 |         'space': r'(?:\p{Zs}|\v)',
15 |         'char': r'(?:[^\p{Zs}\v])',
16 |     }
17 | except ImportError:
18 |     from sys import maxunicode
19 |     upper_re_s = "".join(
20 |         [chr(c) for c in range(maxunicode) if chr(c).isupper()]
21 |     )
22 |     regex_snippets = {
23 |         'acr': r'{0}0-9'.format(upper_re_s),
24 |         'abr': r'{0}'.format(upper_re_s),
25 |         'nab': r'a-z',
26 |         'wrd': r'\w',
27 |         # All codepoints identified as currency symbols
28 |         # by the [mrab-regex library](https://pypi.org/project/regex/)
29 |         # and the UNICODE standard.
30 |         'cur': r'$¢-¥֏؋৲৳৻૱௹฿៛\u20a0-\u20cf\ua838﷼﹩＄￠￡￥￦',
31 |         'digit': r'\d',
32 |         'space': r'(?:\s|\v)',
33 |         'char': r'\S',
34 |     }
35 | 
36 | halign_re_s = r'(?:\<(?!>)|(?<!<)\>|\<\>|\=|[()]+(?! ))'
37 | valign_re_s = r'[\-^~]'
38 | class_re_s = r'(?:\([^)\n]+\))'       # Don't allow classes/ids,
39 | language_re_s = r'(?:\[[^\]\n]+\])'   # languages,
40 | style_re_s = r'(?:\{[^}\n]+\})'       # or styles to span across newlines
41 | colspan_re_s = r'(?:\\\d+)'
42 | rowspan_re_s = r'(?:\/\d+)'
43 | align_re_s = r'(?:{0}|{1})*'.format(halign_re_s, valign_re_s)
44 | table_span_re_s = r'(?:{0}|{1})*'.format(colspan_re_s, rowspan_re_s)
45 | # regex string to match class, style and language attributes
46 | cls_re_s = (r'(?:'
47 |             r'{c}(?:{l}(?:{s})?|{s}(?:{l})?)?|'
48 |             r'{l}(?:{c}(?:{s})?|{s}(?:{c})?)?|'
49 |             r'{s}(?:{c}(?:{l})?|{l}(?:{c})?)?'
50 |             r')?'
51 |             ).format(c=class_re_s, s=style_re_s, l=language_re_s)
52 | pnct_re_s = r'[-!"#$%&()*+,/:;<=>?@\'\[\\\]\.^_`{|}~]'
53 | syms_re_s = '¤§µ¶†‡•∗∴◊♠♣♥♦'
54 | 


--------------------------------------------------------------------------------
/textile/textilefactory.py:
--------------------------------------------------------------------------------
 1 | from .core import Textile
 2 | 
 3 | 
 4 | class TextileFactory(object):
 5 |     """ Use TextileFactory to create a Textile object which can be re-used to
 6 |     process multiple strings with the same settings."""
 7 | 
 8 |     def __init__(self, restricted=False, lite=False, sanitize=False,
 9 |                  noimage=None, get_sizes=False, html_type='xhtml'):
10 | 
11 |         self.class_parms = {}
12 |         self.method_parms = {}
13 | 
14 |         if lite and not restricted:
15 |             raise ValueError("lite can only be enabled in restricted mode")
16 | 
17 |         if restricted:
18 |             self.class_parms['restricted'] = True
19 |             self.class_parms['lite'] = lite
20 |             self.method_parms['rel'] = 'nofollow'
21 | 
22 |         if noimage is None:
23 |             noimage = bool(restricted)
24 | 
25 |         self.class_parms['noimage'] = noimage
26 |         self.method_parms['sanitize'] = sanitize
27 |         self.class_parms['get_sizes'] = get_sizes
28 | 
29 |         if html_type not in ['xhtml', 'html5']:
30 |             raise ValueError("html_type must be 'xhtml' or 'html5'")
31 |         else:
32 |             self.class_parms['html_type'] = html_type
33 | 
34 |     def process(self, text):
35 |         return Textile(**self.class_parms).parse(text, **self.method_parms)
36 | 


--------------------------------------------------------------------------------
/textile/utils.py:
--------------------------------------------------------------------------------
  1 | try:
  2 |     import regex as re
  3 | except ImportError:
  4 |     import re
  5 | 
  6 | from urllib.parse import urlparse
  7 | import html
  8 | 
  9 | from collections import OrderedDict
 10 | 
 11 | from xml.etree import ElementTree
 12 | 
 13 | from textile.regex_strings import valign_re_s, halign_re_s
 14 | 
 15 | # Regular expressions for stripping chunks of HTML,
 16 | # leaving only content not wrapped in a tag or a comment
 17 | RAW_TEXT_REVEALERS = (
 18 |     # The php version has orders the below list of tags differently.  The
 19 |     # important thing to note here is that the pre must occur before the p or
 20 |     # else the regex module doesn't properly match pre-s. It only matches the
 21 |     # p in pre.
 22 |     re.compile(r'<(pre|p|blockquote|div|form|table|ul|ol|dl|h[1-6])[^>]*?>.*</\1>',
 23 |                re.S),
 24 |     re.compile(r'<(hr|br)[^>]*?/>'),
 25 |     re.compile(r'<!--.*?-->'),
 26 | )
 27 | 
 28 | 
 29 | def decode_high(text):
 30 |     """Decode encoded HTML entities."""
 31 |     text = '&#{0};'.format(text)
 32 |     return html.unescape(text)
 33 | 
 34 | 
 35 | def encode_high(text):
 36 |     """Encode the text so that it is an appropriate HTML entity."""
 37 |     return ord(text)
 38 | 
 39 | 
 40 | def encode_html(text, quotes=True):
 41 |     """Return text that's safe for an HTML attribute."""
 42 |     a = (
 43 |         ('&', '&amp;'),
 44 |         ('<', '&lt;'),
 45 |         ('>', '&gt;'))
 46 | 
 47 |     if quotes:
 48 |         a = a + (("'", '&#39;'),
 49 |                  ('"', '&quot;'))
 50 | 
 51 |     for k, v in a:
 52 |         text = text.replace(k, v)
 53 |     return text
 54 | 
 55 | 
 56 | def generate_tag(tag, content, attributes=None):
 57 |     """Generate a complete html tag using the ElementTree module.  tag and
 58 |     content are strings, the attributes argument is a dictionary.  As
 59 |     a convenience, if the content is ' /', a self-closing tag is generated."""
 60 |     enc = 'unicode'
 61 |     if not tag:
 62 |         return content
 63 |     element = ElementTree.Element(tag, attrib=attributes)
 64 |     # Sort attributes for Python 3.8+, as suggested in
 65 |     # https://docs.python.org/3/library/xml.etree.elementtree.html
 66 |     if len(element.attrib) > 1:
 67 |         # adjust attribute order, e.g. by sorting
 68 |         attribs = sorted(element.attrib.items())
 69 |         element.attrib.clear()
 70 |         element.attrib.update(attribs)
 71 |     # FIXME: Kind of an ugly hack.  There *must* be a cleaner way.  I tried
 72 |     # adding text by assigning it to element_tag.text.  That results in
 73 |     # non-ascii text being html-entity encoded.  Not bad, but not entirely
 74 |     # matching php-textile either.
 75 |     element_tag = ElementTree.tostringlist(element, encoding=enc,
 76 |                                            method='html')
 77 |     element_tag.insert(len(element_tag) - 1, content)
 78 |     element_text = ''.join(element_tag)
 79 |     return element_text
 80 | 
 81 | 
 82 | def getimagesize(url):
 83 |     """
 84 |     Attempts to determine an image's width and height, and returns a tuple,
 85 |     (width, height), in pixels or an empty string in case of failure.
 86 |     Requires that PIL is installed.
 87 | 
 88 |     """
 89 | 
 90 |     try:
 91 |         from PIL import ImageFile
 92 |     except ImportError:
 93 |         return ''
 94 | 
 95 |     from urllib.request import urlopen
 96 | 
 97 |     try:
 98 |         p = ImageFile.Parser()
 99 |         f = urlopen(url)
100 |         while True:
101 |             s = f.read(1024)
102 |             if not s:
103 |                 break
104 |             p.feed(s)
105 |             if p.image:
106 |                 return p.image.size
107 |     except (IOError, ValueError):
108 |         return ''
109 | 
110 | 
111 | def has_raw_text(text):
112 |     """checks whether the text has text not already enclosed by a block tag"""
113 |     r = text.strip()
114 |     for pattern in RAW_TEXT_REVEALERS:
115 |         r = pattern.sub('', r).strip()
116 |     return r != ''
117 | 
118 | 
119 | def human_readable_url(url):
120 |     if "://" in url:
121 |         url = url.split("://")[1]
122 |     elif ":" in url:
123 |         url = url.split(":")[1]
124 |     return url
125 | 
126 | 
127 | def is_rel_url(url):
128 |     """Identify relative urls."""
129 |     (scheme, netloc) = urlparse(url)[0:2]
130 |     return not scheme and not netloc
131 | 
132 | 
133 | def is_valid_url(url):
134 |     parsed = urlparse(url)
135 |     if parsed.scheme == '':
136 |         return True
137 |     return False
138 | 
139 | 
140 | def list_type(list_string):
141 |     listtypes = {
142 |         list_string.endswith('*'): 'u',
143 |         list_string.endswith('#'): 'o',
144 |         (not list_string.endswith('*') and not list_string.endswith('#')):
145 |         'd'
146 |     }
147 |     return listtypes.get(True, False)
148 | 
149 | 
150 | def normalize_newlines(string):
151 |     out = re.sub(r'\r\n?', '\n', string)
152 |     out = re.compile(r'^[ \t]*\n', flags=re.M).sub('\n', out)
153 |     out = out.strip('\n')
154 |     return out
155 | 
156 | 
157 | def parse_attributes(block_attributes, element=None, include_id=True, restricted=False):
158 |     vAlign = {'^': 'top', '-': 'middle', '~': 'bottom'}
159 |     hAlign = {'<': 'left', '=': 'center', '>': 'right', '<>': 'justify'}
160 |     style = []
161 |     aclass = ''
162 |     lang = ''
163 |     colspan = ''
164 |     rowspan = ''
165 |     block_id = ''
166 |     span = ''
167 |     width = ''
168 |     result = OrderedDict()
169 | 
170 |     if not block_attributes:
171 |         return result
172 | 
173 |     matched = block_attributes
174 |     if element == 'td':
175 |         m = re.search(r'\\(\d+)', matched)
176 |         if m:
177 |             colspan = m.group(1)
178 | 
179 |         m = re.search(r'/(\d+)', matched)
180 |         if m:
181 |             rowspan = m.group(1)
182 | 
183 |     if element == 'td' or element == 'tr':
184 |         m = re.search(r'(^{0})'.format(valign_re_s), matched)
185 |         if m:
186 |             style.append("vertical-align:{0}".format(vAlign[m.group(1)]))
187 | 
188 |     if not restricted:
189 |         m = re.search(r'\{([^}]*)\}', matched)
190 |         if m:
191 |             style.extend(m.group(1).rstrip(';').split(';'))
192 |             matched = matched.replace(m.group(0), '')
193 | 
194 |     m = re.search(r'\[([^\]]+)\]', matched, re.U)
195 |     if m:
196 |         lang = m.group(1)
197 |         matched = matched.replace(m.group(0), '')
198 | 
199 |     m = re.search(r'\(([^()]+)\)', matched, re.U)
200 |     if m:
201 |         matched = matched.replace(m.group(0), '')
202 |         # Only allow a restricted subset of the CSS standard characters for classes/ids.
203 |         # No encoding markers allowed.
204 |         id_class_match = re.compile(r"^([-a-zA-Z 0-9_\/\[\]\.\:\#]+)$", re.U).match(m.group(1))
205 |         if id_class_match:
206 |             class_regex = re.compile(r"^([-a-zA-Z 0-9_\.\/\[\]]*)$")
207 |             id_class = id_class_match.group(1)
208 |             # If a textile class block attribute was found with a '#' in it
209 |             # split it into the css class and css id...
210 |             hashpos = id_class.find('#')
211 |             if hashpos >= 0:
212 |                 id_match = re.match(r"^#([-a-zA-Z0-9_\.\:]*)$", id_class[hashpos:])
213 |                 if id_match:
214 |                     block_id = id_match.group(1)
215 | 
216 |                 cls_match = class_regex.match(id_class[:hashpos])
217 |             else:
218 |                 cls_match = class_regex.match(id_class)
219 | 
220 |             if cls_match:
221 |                 aclass = cls_match.group(1)
222 | 
223 |     m = re.search(r'([(]+)', matched)
224 |     if m:
225 |         style.append("padding-left:{0}em".format(len(m.group(1))))
226 |         matched = matched.replace(m.group(0), '')
227 | 
228 |     m = re.search(r'([)]+)', matched)
229 |     if m:
230 |         style.append("padding-right:{0}em".format(len(m.group(1))))
231 |         matched = matched.replace(m.group(0), '')
232 | 
233 |     m = re.search(r'({0})'.format(halign_re_s), matched)
234 |     if m:
235 |         style.append("text-align:{0}".format(hAlign[m.group(1)]))
236 | 
237 |     if element == 'col':
238 |         pattern = r'(?:\\(\d+)\.?)?\s*(\d+)?'
239 |         csp = re.match(pattern, matched)
240 |         span, width = csp.groups()
241 | 
242 |     if colspan:
243 |         result['colspan'] = colspan
244 | 
245 |     if style:
246 |         # Previous splits that created style may have introduced extra
247 |         # whitespace into the list elements.  Clean it up.
248 |         style = [x.strip() for x in style]
249 |         result['style'] = '{0};'.format("; ".join(style))
250 |     if aclass:
251 |         result['class'] = aclass
252 |     if block_id and include_id:
253 |         result['id'] = block_id
254 |     if lang:
255 |         result['lang'] = lang
256 |     if rowspan:
257 |         result['rowspan'] = rowspan
258 |     if span:
259 |         result['span'] = span
260 |     if width:
261 |         result['width'] = width
262 |     return result
263 | 
264 | 
265 | def pba(block_attributes, element=None, include_id=True, restricted=False):
266 |     """Parse block attributes."""
267 |     attrs = parse_attributes(block_attributes, element, include_id, restricted)
268 |     if not attrs:
269 |         return ''
270 |     result = ' '.join(['{0}="{1}"'.format(k, v) for k, v in attrs.items()])
271 |     return ' {0}'.format(result)
272 | 


--------------------------------------------------------------------------------
/textile/version.py:
--------------------------------------------------------------------------------
1 | VERSION = '4.0.3'
2 | 


--------------------------------------------------------------------------------