├── .github └── workflows │ └── test.yml ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.rst ├── generate_entities.py ├── requirements.txt ├── setup.py └── wikimarkup ├── __init__.py ├── entities.py ├── parser.py └── tests.py /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | on: [push] 3 | 4 | 5 | jobs: 6 | test: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] 11 | name: Python ${{ matrix.python-version }} sample 12 | steps: 13 | - uses: actions/checkout@v3 14 | - name: Set up Python 15 | uses: actions/setup-python@v4 16 | with: 17 | python-version: ${{ matrix.python-version }} 18 | architecture: x64 19 | cache: 'pip' 20 | - run: pip install -r requirements.txt 21 | - run: python -Wdefault -m unittest 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | /dist 3 | /build 4 | py_wikimarkup.egg-info/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This program is free software: you can redistribute it and/or modify 2 | it under the terms of the GNU General Public License as published by 3 | the Free Software Foundation, either version 3 of the License, or 4 | (at your option) any later version. 5 | 6 | This program is distributed in the hope that it will be useful, 7 | but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | GNU General Public License for more details. 10 | 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include setup.py README.rst LICENSE MANIFEST.in requirements.txt 2 | global-exclude *~ 3 | exclude wikimarkup/tests.py 4 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Summary 2 | ======= 3 | 4 | Formats text following the `MediaWiki `_ 5 | syntax. 6 | 7 | 8 | Usage 9 | ===== 10 | 11 | To return HTML from Wiki:: 12 | 13 | from wikimarkup.parser import Parser 14 | 15 | parser = Parser() 16 | html = parser.parse(text[, show_toc=True]) 17 | 18 | To return HTML without certain "annoying" (TODO: define annoying) elements, such as headings:: 19 | 20 | from wikimarkup.parser import parselite 21 | 22 | parselite(text) 23 | 24 | 25 | Adding New Tags 26 | =============== 27 | 28 | You can add new tags with the `registerTagHook` method.:: 29 | 30 | from wikimarkup.parser import Parser 31 | import cgi 32 | 33 | def blockquoteTagHook(parser_env, body, attributes={}): 34 | """A paragraph of text.""" 35 | text = ['
'] 36 | if 'cite' in attributes: 37 | text.append('%s' % (cgi.escape(attributes['cite']),)) 38 | text.append(parse(body.strip())) 39 | text.append('
') 40 | return u'\n'.join(text) 41 | 42 | parser = Parser() 43 | parser.registerTagHook('quote', blockquoteTagHook) 44 | 45 | 46 | Adding Internal Links 47 | ===================== 48 | 49 | You can support ``[[internal links]]`` with the `registerInternalLinkHook` 50 | method. There is no default handling for internal links. If no hook 51 | handles the link, it will appear unchanged in the output. An internal 52 | link may have a `namespace:` prefix. Hooks are registered per namespace, 53 | with 'None' for unprefixed links:: 54 | 55 | def internalLinkHook(parser_env, namespace, body): 56 | ... 57 | return replacement 58 | 59 | parser.registerInternalLinkHook(None, internalLinkHook) # called for [[link]] 60 | parser.registerInternalLinkHook('Wikipedia', hook) # called for [[Wikipedia: Link]] 61 | parser.registerInternalLinkHook(':en', hook) # called for [[:en:link] 62 | parser.registerInternalLinkHook(':', hook) # called for [[:any:link]] 63 | parser.registerInternalLinkHook('*', hook) # called for [[anything]] 64 | 65 | 66 | Examples:: 67 | 68 | from wikimarkup.parser import Parser 69 | 70 | def wikipediaLinkHook(parser_env, namespace, body): 71 | # namespace is going to be 'Wikipedia' 72 | (article, pipe, text) = body.partition('|') 73 | href = article.strip().capitalize().replace(' ', '_') 74 | text = (text or article).strip() 75 | return '%s' % (href, text) 76 | 77 | parser = Parser() 78 | parser.registerInternalLinkHook('Wikipedia', wikipediaLinkHook) 79 | 80 | print parser.parse("[[Wikipedia:public transport|public transportation]]") 81 | print parser.parse("[[Wikipedia: bus]]") 82 | 83 | import settings 84 | from pytils.translit import slugify 85 | from blog.models import Post 86 | 87 | def byteflowLinkHook(parser_env, namespace, body): 88 | (article, pipe, text) = body.partition('|') 89 | slug = slugify(article.strip()) 90 | text = (text or article).strip() 91 | try: 92 | post = Post.objects.get(slug=slug) 93 | href = post.get_absolute_url() 94 | except Post.DoesNotExist: 95 | href = '#' 96 | return '%s' % (href, text) 97 | 98 | parser.registerInternalLinkHook(None, byteflowLinkHook) 99 | 100 | parser.parse("[[Blog post title]]") 101 | -------------------------------------------------------------------------------- /generate_entities.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | def main(): 5 | resp = requests.get("https://html.spec.whatwg.org/entities.json") 6 | resp.raise_for_status() 7 | resp = resp.json() 8 | 9 | names = set() 10 | 11 | output = "# Generated by generate_entities.py - do not edit!\n\n" 12 | output += "html_entities = {\n" 13 | for entity in resp: 14 | name = entity 15 | if name.startswith("&"): 16 | name = name[1:] 17 | if name.endswith(";"): 18 | name = name[:-1] 19 | 20 | if name in names: 21 | continue 22 | names.add(name) 23 | 24 | chars = resp[entity]["characters"].encode("unicode_escape").decode('ascii') 25 | if '"' in chars: 26 | chars = chars.replace('"', '\\"') 27 | output += f' "{name}": "{chars}",\n' 28 | output += "}\n" 29 | 30 | with open("wikimarkup/entities.py", "w") as fd: 31 | fd.write(output) 32 | 33 | 34 | if __name__ == "__main__": 35 | main() -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | bleach[css]==6.* 2 | unidecode==1.* 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup, find_packages 4 | 5 | with open('requirements.txt') as fd: 6 | reqs = [row.strip() for row in fd] 7 | 8 | setup( 9 | name='py-wikimarkup', 10 | version='2.3.0', 11 | packages=find_packages(), 12 | description='A basic MediaWiki markup parser.', 13 | long_description=open('README.rst').read(), 14 | author='David Cramer', 15 | author_email='dcramer@gmail.com', 16 | url='http://www.github.com/dgilman/py-wikimarkup/', 17 | classifiers=[ 18 | 'Development Status :: 6 - Mature', 19 | 'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)', 20 | 'Programming Language :: Python :: 3 :: Only', 21 | ], 22 | python_requires='>=3.7', 23 | zip_safe=False, 24 | include_package_data=True, 25 | install_requires=reqs, 26 | package_data = { '': ['README.rst'] }, 27 | ) 28 | -------------------------------------------------------------------------------- /wikimarkup/__init__.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | 3 | __all__ = ('__version__', '__build__') 4 | 5 | __version__ = (2, 3, 0) 6 | 7 | def _get_git_revision(path): 8 | revision_file = os.path.join(path, 'refs', 'heads', 'master') 9 | if not os.path.exists(revision_file): 10 | return None 11 | fh = open(revision_file, 'r') 12 | try: 13 | return fh.read() 14 | finally: 15 | fh.close() 16 | 17 | def get_revision(): 18 | """ 19 | :returns: Revision number of this branch/checkout, if available. None if 20 | no revision number can be determined. 21 | """ 22 | package_dir = os.path.dirname(__file__) 23 | checkout_dir = os.path.normpath(os.path.join(package_dir, '..')) 24 | path = os.path.join(checkout_dir, '.git') 25 | if os.path.exists(path): 26 | return _get_git_revision(path) 27 | return None 28 | 29 | __build__ = get_revision() 30 | 31 | -------------------------------------------------------------------------------- /wikimarkup/entities.py: -------------------------------------------------------------------------------- 1 | # Generated by generate_entities.py - do not edit! 2 | 3 | html_entities = { 4 | "AElig": "\xc6", 5 | "AMP": "&", 6 | "Aacute": "\xc1", 7 | "Abreve": "\u0102", 8 | "Acirc": "\xc2", 9 | "Acy": "\u0410", 10 | "Afr": "\U0001d504", 11 | "Agrave": "\xc0", 12 | "Alpha": "\u0391", 13 | "Amacr": "\u0100", 14 | "And": "\u2a53", 15 | "Aogon": "\u0104", 16 | "Aopf": "\U0001d538", 17 | "ApplyFunction": "\u2061", 18 | "Aring": "\xc5", 19 | "Ascr": "\U0001d49c", 20 | "Assign": "\u2254", 21 | "Atilde": "\xc3", 22 | "Auml": "\xc4", 23 | "Backslash": "\u2216", 24 | "Barv": "\u2ae7", 25 | "Barwed": "\u2306", 26 | "Bcy": "\u0411", 27 | "Because": "\u2235", 28 | "Bernoullis": "\u212c", 29 | "Beta": "\u0392", 30 | "Bfr": "\U0001d505", 31 | "Bopf": "\U0001d539", 32 | "Breve": "\u02d8", 33 | "Bscr": "\u212c", 34 | "Bumpeq": "\u224e", 35 | "CHcy": "\u0427", 36 | "COPY": "\xa9", 37 | "Cacute": "\u0106", 38 | "Cap": "\u22d2", 39 | "CapitalDifferentialD": "\u2145", 40 | "Cayleys": "\u212d", 41 | "Ccaron": "\u010c", 42 | "Ccedil": "\xc7", 43 | "Ccirc": "\u0108", 44 | "Cconint": "\u2230", 45 | "Cdot": "\u010a", 46 | "Cedilla": "\xb8", 47 | "CenterDot": "\xb7", 48 | "Cfr": "\u212d", 49 | "Chi": "\u03a7", 50 | "CircleDot": "\u2299", 51 | "CircleMinus": "\u2296", 52 | "CirclePlus": "\u2295", 53 | "CircleTimes": "\u2297", 54 | "ClockwiseContourIntegral": "\u2232", 55 | "CloseCurlyDoubleQuote": "\u201d", 56 | "CloseCurlyQuote": "\u2019", 57 | "Colon": "\u2237", 58 | "Colone": "\u2a74", 59 | "Congruent": "\u2261", 60 | "Conint": "\u222f", 61 | "ContourIntegral": "\u222e", 62 | "Copf": "\u2102", 63 | "Coproduct": "\u2210", 64 | "CounterClockwiseContourIntegral": "\u2233", 65 | "Cross": "\u2a2f", 66 | "Cscr": "\U0001d49e", 67 | "Cup": "\u22d3", 68 | "CupCap": "\u224d", 69 | "DD": "\u2145", 70 | "DDotrahd": "\u2911", 71 | "DJcy": "\u0402", 72 | "DScy": "\u0405", 73 | "DZcy": "\u040f", 74 | "Dagger": "\u2021", 75 | "Darr": "\u21a1", 76 | "Dashv": "\u2ae4", 77 | "Dcaron": "\u010e", 78 | "Dcy": "\u0414", 79 | "Del": "\u2207", 80 | "Delta": "\u0394", 81 | "Dfr": "\U0001d507", 82 | "DiacriticalAcute": "\xb4", 83 | "DiacriticalDot": "\u02d9", 84 | "DiacriticalDoubleAcute": "\u02dd", 85 | "DiacriticalGrave": "`", 86 | "DiacriticalTilde": "\u02dc", 87 | "Diamond": "\u22c4", 88 | "DifferentialD": "\u2146", 89 | "Dopf": "\U0001d53b", 90 | "Dot": "\xa8", 91 | "DotDot": "\u20dc", 92 | "DotEqual": "\u2250", 93 | "DoubleContourIntegral": "\u222f", 94 | "DoubleDot": "\xa8", 95 | "DoubleDownArrow": "\u21d3", 96 | "DoubleLeftArrow": "\u21d0", 97 | "DoubleLeftRightArrow": "\u21d4", 98 | "DoubleLeftTee": "\u2ae4", 99 | "DoubleLongLeftArrow": "\u27f8", 100 | "DoubleLongLeftRightArrow": "\u27fa", 101 | "DoubleLongRightArrow": "\u27f9", 102 | "DoubleRightArrow": "\u21d2", 103 | "DoubleRightTee": "\u22a8", 104 | "DoubleUpArrow": "\u21d1", 105 | "DoubleUpDownArrow": "\u21d5", 106 | "DoubleVerticalBar": "\u2225", 107 | "DownArrow": "\u2193", 108 | "DownArrowBar": "\u2913", 109 | "DownArrowUpArrow": "\u21f5", 110 | "DownBreve": "\u0311", 111 | "DownLeftRightVector": "\u2950", 112 | "DownLeftTeeVector": "\u295e", 113 | "DownLeftVector": "\u21bd", 114 | "DownLeftVectorBar": "\u2956", 115 | "DownRightTeeVector": "\u295f", 116 | "DownRightVector": "\u21c1", 117 | "DownRightVectorBar": "\u2957", 118 | "DownTee": "\u22a4", 119 | "DownTeeArrow": "\u21a7", 120 | "Downarrow": "\u21d3", 121 | "Dscr": "\U0001d49f", 122 | "Dstrok": "\u0110", 123 | "ENG": "\u014a", 124 | "ETH": "\xd0", 125 | "Eacute": "\xc9", 126 | "Ecaron": "\u011a", 127 | "Ecirc": "\xca", 128 | "Ecy": "\u042d", 129 | "Edot": "\u0116", 130 | "Efr": "\U0001d508", 131 | "Egrave": "\xc8", 132 | "Element": "\u2208", 133 | "Emacr": "\u0112", 134 | "EmptySmallSquare": "\u25fb", 135 | "EmptyVerySmallSquare": "\u25ab", 136 | "Eogon": "\u0118", 137 | "Eopf": "\U0001d53c", 138 | "Epsilon": "\u0395", 139 | "Equal": "\u2a75", 140 | "EqualTilde": "\u2242", 141 | "Equilibrium": "\u21cc", 142 | "Escr": "\u2130", 143 | "Esim": "\u2a73", 144 | "Eta": "\u0397", 145 | "Euml": "\xcb", 146 | "Exists": "\u2203", 147 | "ExponentialE": "\u2147", 148 | "Fcy": "\u0424", 149 | "Ffr": "\U0001d509", 150 | "FilledSmallSquare": "\u25fc", 151 | "FilledVerySmallSquare": "\u25aa", 152 | "Fopf": "\U0001d53d", 153 | "ForAll": "\u2200", 154 | "Fouriertrf": "\u2131", 155 | "Fscr": "\u2131", 156 | "GJcy": "\u0403", 157 | "GT": ">", 158 | "Gamma": "\u0393", 159 | "Gammad": "\u03dc", 160 | "Gbreve": "\u011e", 161 | "Gcedil": "\u0122", 162 | "Gcirc": "\u011c", 163 | "Gcy": "\u0413", 164 | "Gdot": "\u0120", 165 | "Gfr": "\U0001d50a", 166 | "Gg": "\u22d9", 167 | "Gopf": "\U0001d53e", 168 | "GreaterEqual": "\u2265", 169 | "GreaterEqualLess": "\u22db", 170 | "GreaterFullEqual": "\u2267", 171 | "GreaterGreater": "\u2aa2", 172 | "GreaterLess": "\u2277", 173 | "GreaterSlantEqual": "\u2a7e", 174 | "GreaterTilde": "\u2273", 175 | "Gscr": "\U0001d4a2", 176 | "Gt": "\u226b", 177 | "HARDcy": "\u042a", 178 | "Hacek": "\u02c7", 179 | "Hat": "^", 180 | "Hcirc": "\u0124", 181 | "Hfr": "\u210c", 182 | "HilbertSpace": "\u210b", 183 | "Hopf": "\u210d", 184 | "HorizontalLine": "\u2500", 185 | "Hscr": "\u210b", 186 | "Hstrok": "\u0126", 187 | "HumpDownHump": "\u224e", 188 | "HumpEqual": "\u224f", 189 | "IEcy": "\u0415", 190 | "IJlig": "\u0132", 191 | "IOcy": "\u0401", 192 | "Iacute": "\xcd", 193 | "Icirc": "\xce", 194 | "Icy": "\u0418", 195 | "Idot": "\u0130", 196 | "Ifr": "\u2111", 197 | "Igrave": "\xcc", 198 | "Im": "\u2111", 199 | "Imacr": "\u012a", 200 | "ImaginaryI": "\u2148", 201 | "Implies": "\u21d2", 202 | "Int": "\u222c", 203 | "Integral": "\u222b", 204 | "Intersection": "\u22c2", 205 | "InvisibleComma": "\u2063", 206 | "InvisibleTimes": "\u2062", 207 | "Iogon": "\u012e", 208 | "Iopf": "\U0001d540", 209 | "Iota": "\u0399", 210 | "Iscr": "\u2110", 211 | "Itilde": "\u0128", 212 | "Iukcy": "\u0406", 213 | "Iuml": "\xcf", 214 | "Jcirc": "\u0134", 215 | "Jcy": "\u0419", 216 | "Jfr": "\U0001d50d", 217 | "Jopf": "\U0001d541", 218 | "Jscr": "\U0001d4a5", 219 | "Jsercy": "\u0408", 220 | "Jukcy": "\u0404", 221 | "KHcy": "\u0425", 222 | "KJcy": "\u040c", 223 | "Kappa": "\u039a", 224 | "Kcedil": "\u0136", 225 | "Kcy": "\u041a", 226 | "Kfr": "\U0001d50e", 227 | "Kopf": "\U0001d542", 228 | "Kscr": "\U0001d4a6", 229 | "LJcy": "\u0409", 230 | "LT": "<", 231 | "Lacute": "\u0139", 232 | "Lambda": "\u039b", 233 | "Lang": "\u27ea", 234 | "Laplacetrf": "\u2112", 235 | "Larr": "\u219e", 236 | "Lcaron": "\u013d", 237 | "Lcedil": "\u013b", 238 | "Lcy": "\u041b", 239 | "LeftAngleBracket": "\u27e8", 240 | "LeftArrow": "\u2190", 241 | "LeftArrowBar": "\u21e4", 242 | "LeftArrowRightArrow": "\u21c6", 243 | "LeftCeiling": "\u2308", 244 | "LeftDoubleBracket": "\u27e6", 245 | "LeftDownTeeVector": "\u2961", 246 | "LeftDownVector": "\u21c3", 247 | "LeftDownVectorBar": "\u2959", 248 | "LeftFloor": "\u230a", 249 | "LeftRightArrow": "\u2194", 250 | "LeftRightVector": "\u294e", 251 | "LeftTee": "\u22a3", 252 | "LeftTeeArrow": "\u21a4", 253 | "LeftTeeVector": "\u295a", 254 | "LeftTriangle": "\u22b2", 255 | "LeftTriangleBar": "\u29cf", 256 | "LeftTriangleEqual": "\u22b4", 257 | "LeftUpDownVector": "\u2951", 258 | "LeftUpTeeVector": "\u2960", 259 | "LeftUpVector": "\u21bf", 260 | "LeftUpVectorBar": "\u2958", 261 | "LeftVector": "\u21bc", 262 | "LeftVectorBar": "\u2952", 263 | "Leftarrow": "\u21d0", 264 | "Leftrightarrow": "\u21d4", 265 | "LessEqualGreater": "\u22da", 266 | "LessFullEqual": "\u2266", 267 | "LessGreater": "\u2276", 268 | "LessLess": "\u2aa1", 269 | "LessSlantEqual": "\u2a7d", 270 | "LessTilde": "\u2272", 271 | "Lfr": "\U0001d50f", 272 | "Ll": "\u22d8", 273 | "Lleftarrow": "\u21da", 274 | "Lmidot": "\u013f", 275 | "LongLeftArrow": "\u27f5", 276 | "LongLeftRightArrow": "\u27f7", 277 | "LongRightArrow": "\u27f6", 278 | "Longleftarrow": "\u27f8", 279 | "Longleftrightarrow": "\u27fa", 280 | "Longrightarrow": "\u27f9", 281 | "Lopf": "\U0001d543", 282 | "LowerLeftArrow": "\u2199", 283 | "LowerRightArrow": "\u2198", 284 | "Lscr": "\u2112", 285 | "Lsh": "\u21b0", 286 | "Lstrok": "\u0141", 287 | "Lt": "\u226a", 288 | "Map": "\u2905", 289 | "Mcy": "\u041c", 290 | "MediumSpace": "\u205f", 291 | "Mellintrf": "\u2133", 292 | "Mfr": "\U0001d510", 293 | "MinusPlus": "\u2213", 294 | "Mopf": "\U0001d544", 295 | "Mscr": "\u2133", 296 | "Mu": "\u039c", 297 | "NJcy": "\u040a", 298 | "Nacute": "\u0143", 299 | "Ncaron": "\u0147", 300 | "Ncedil": "\u0145", 301 | "Ncy": "\u041d", 302 | "NegativeMediumSpace": "\u200b", 303 | "NegativeThickSpace": "\u200b", 304 | "NegativeThinSpace": "\u200b", 305 | "NegativeVeryThinSpace": "\u200b", 306 | "NestedGreaterGreater": "\u226b", 307 | "NestedLessLess": "\u226a", 308 | "NewLine": "\n", 309 | "Nfr": "\U0001d511", 310 | "NoBreak": "\u2060", 311 | "NonBreakingSpace": "\xa0", 312 | "Nopf": "\u2115", 313 | "Not": "\u2aec", 314 | "NotCongruent": "\u2262", 315 | "NotCupCap": "\u226d", 316 | "NotDoubleVerticalBar": "\u2226", 317 | "NotElement": "\u2209", 318 | "NotEqual": "\u2260", 319 | "NotEqualTilde": "\u2242\u0338", 320 | "NotExists": "\u2204", 321 | "NotGreater": "\u226f", 322 | "NotGreaterEqual": "\u2271", 323 | "NotGreaterFullEqual": "\u2267\u0338", 324 | "NotGreaterGreater": "\u226b\u0338", 325 | "NotGreaterLess": "\u2279", 326 | "NotGreaterSlantEqual": "\u2a7e\u0338", 327 | "NotGreaterTilde": "\u2275", 328 | "NotHumpDownHump": "\u224e\u0338", 329 | "NotHumpEqual": "\u224f\u0338", 330 | "NotLeftTriangle": "\u22ea", 331 | "NotLeftTriangleBar": "\u29cf\u0338", 332 | "NotLeftTriangleEqual": "\u22ec", 333 | "NotLess": "\u226e", 334 | "NotLessEqual": "\u2270", 335 | "NotLessGreater": "\u2278", 336 | "NotLessLess": "\u226a\u0338", 337 | "NotLessSlantEqual": "\u2a7d\u0338", 338 | "NotLessTilde": "\u2274", 339 | "NotNestedGreaterGreater": "\u2aa2\u0338", 340 | "NotNestedLessLess": "\u2aa1\u0338", 341 | "NotPrecedes": "\u2280", 342 | "NotPrecedesEqual": "\u2aaf\u0338", 343 | "NotPrecedesSlantEqual": "\u22e0", 344 | "NotReverseElement": "\u220c", 345 | "NotRightTriangle": "\u22eb", 346 | "NotRightTriangleBar": "\u29d0\u0338", 347 | "NotRightTriangleEqual": "\u22ed", 348 | "NotSquareSubset": "\u228f\u0338", 349 | "NotSquareSubsetEqual": "\u22e2", 350 | "NotSquareSuperset": "\u2290\u0338", 351 | "NotSquareSupersetEqual": "\u22e3", 352 | "NotSubset": "\u2282\u20d2", 353 | "NotSubsetEqual": "\u2288", 354 | "NotSucceeds": "\u2281", 355 | "NotSucceedsEqual": "\u2ab0\u0338", 356 | "NotSucceedsSlantEqual": "\u22e1", 357 | "NotSucceedsTilde": "\u227f\u0338", 358 | "NotSuperset": "\u2283\u20d2", 359 | "NotSupersetEqual": "\u2289", 360 | "NotTilde": "\u2241", 361 | "NotTildeEqual": "\u2244", 362 | "NotTildeFullEqual": "\u2247", 363 | "NotTildeTilde": "\u2249", 364 | "NotVerticalBar": "\u2224", 365 | "Nscr": "\U0001d4a9", 366 | "Ntilde": "\xd1", 367 | "Nu": "\u039d", 368 | "OElig": "\u0152", 369 | "Oacute": "\xd3", 370 | "Ocirc": "\xd4", 371 | "Ocy": "\u041e", 372 | "Odblac": "\u0150", 373 | "Ofr": "\U0001d512", 374 | "Ograve": "\xd2", 375 | "Omacr": "\u014c", 376 | "Omega": "\u03a9", 377 | "Omicron": "\u039f", 378 | "Oopf": "\U0001d546", 379 | "OpenCurlyDoubleQuote": "\u201c", 380 | "OpenCurlyQuote": "\u2018", 381 | "Or": "\u2a54", 382 | "Oscr": "\U0001d4aa", 383 | "Oslash": "\xd8", 384 | "Otilde": "\xd5", 385 | "Otimes": "\u2a37", 386 | "Ouml": "\xd6", 387 | "OverBar": "\u203e", 388 | "OverBrace": "\u23de", 389 | "OverBracket": "\u23b4", 390 | "OverParenthesis": "\u23dc", 391 | "PartialD": "\u2202", 392 | "Pcy": "\u041f", 393 | "Pfr": "\U0001d513", 394 | "Phi": "\u03a6", 395 | "Pi": "\u03a0", 396 | "PlusMinus": "\xb1", 397 | "Poincareplane": "\u210c", 398 | "Popf": "\u2119", 399 | "Pr": "\u2abb", 400 | "Precedes": "\u227a", 401 | "PrecedesEqual": "\u2aaf", 402 | "PrecedesSlantEqual": "\u227c", 403 | "PrecedesTilde": "\u227e", 404 | "Prime": "\u2033", 405 | "Product": "\u220f", 406 | "Proportion": "\u2237", 407 | "Proportional": "\u221d", 408 | "Pscr": "\U0001d4ab", 409 | "Psi": "\u03a8", 410 | "QUOT": "\"", 411 | "Qfr": "\U0001d514", 412 | "Qopf": "\u211a", 413 | "Qscr": "\U0001d4ac", 414 | "RBarr": "\u2910", 415 | "REG": "\xae", 416 | "Racute": "\u0154", 417 | "Rang": "\u27eb", 418 | "Rarr": "\u21a0", 419 | "Rarrtl": "\u2916", 420 | "Rcaron": "\u0158", 421 | "Rcedil": "\u0156", 422 | "Rcy": "\u0420", 423 | "Re": "\u211c", 424 | "ReverseElement": "\u220b", 425 | "ReverseEquilibrium": "\u21cb", 426 | "ReverseUpEquilibrium": "\u296f", 427 | "Rfr": "\u211c", 428 | "Rho": "\u03a1", 429 | "RightAngleBracket": "\u27e9", 430 | "RightArrow": "\u2192", 431 | "RightArrowBar": "\u21e5", 432 | "RightArrowLeftArrow": "\u21c4", 433 | "RightCeiling": "\u2309", 434 | "RightDoubleBracket": "\u27e7", 435 | "RightDownTeeVector": "\u295d", 436 | "RightDownVector": "\u21c2", 437 | "RightDownVectorBar": "\u2955", 438 | "RightFloor": "\u230b", 439 | "RightTee": "\u22a2", 440 | "RightTeeArrow": "\u21a6", 441 | "RightTeeVector": "\u295b", 442 | "RightTriangle": "\u22b3", 443 | "RightTriangleBar": "\u29d0", 444 | "RightTriangleEqual": "\u22b5", 445 | "RightUpDownVector": "\u294f", 446 | "RightUpTeeVector": "\u295c", 447 | "RightUpVector": "\u21be", 448 | "RightUpVectorBar": "\u2954", 449 | "RightVector": "\u21c0", 450 | "RightVectorBar": "\u2953", 451 | "Rightarrow": "\u21d2", 452 | "Ropf": "\u211d", 453 | "RoundImplies": "\u2970", 454 | "Rrightarrow": "\u21db", 455 | "Rscr": "\u211b", 456 | "Rsh": "\u21b1", 457 | "RuleDelayed": "\u29f4", 458 | "SHCHcy": "\u0429", 459 | "SHcy": "\u0428", 460 | "SOFTcy": "\u042c", 461 | "Sacute": "\u015a", 462 | "Sc": "\u2abc", 463 | "Scaron": "\u0160", 464 | "Scedil": "\u015e", 465 | "Scirc": "\u015c", 466 | "Scy": "\u0421", 467 | "Sfr": "\U0001d516", 468 | "ShortDownArrow": "\u2193", 469 | "ShortLeftArrow": "\u2190", 470 | "ShortRightArrow": "\u2192", 471 | "ShortUpArrow": "\u2191", 472 | "Sigma": "\u03a3", 473 | "SmallCircle": "\u2218", 474 | "Sopf": "\U0001d54a", 475 | "Sqrt": "\u221a", 476 | "Square": "\u25a1", 477 | "SquareIntersection": "\u2293", 478 | "SquareSubset": "\u228f", 479 | "SquareSubsetEqual": "\u2291", 480 | "SquareSuperset": "\u2290", 481 | "SquareSupersetEqual": "\u2292", 482 | "SquareUnion": "\u2294", 483 | "Sscr": "\U0001d4ae", 484 | "Star": "\u22c6", 485 | "Sub": "\u22d0", 486 | "Subset": "\u22d0", 487 | "SubsetEqual": "\u2286", 488 | "Succeeds": "\u227b", 489 | "SucceedsEqual": "\u2ab0", 490 | "SucceedsSlantEqual": "\u227d", 491 | "SucceedsTilde": "\u227f", 492 | "SuchThat": "\u220b", 493 | "Sum": "\u2211", 494 | "Sup": "\u22d1", 495 | "Superset": "\u2283", 496 | "SupersetEqual": "\u2287", 497 | "Supset": "\u22d1", 498 | "THORN": "\xde", 499 | "TRADE": "\u2122", 500 | "TSHcy": "\u040b", 501 | "TScy": "\u0426", 502 | "Tab": "\t", 503 | "Tau": "\u03a4", 504 | "Tcaron": "\u0164", 505 | "Tcedil": "\u0162", 506 | "Tcy": "\u0422", 507 | "Tfr": "\U0001d517", 508 | "Therefore": "\u2234", 509 | "Theta": "\u0398", 510 | "ThickSpace": "\u205f\u200a", 511 | "ThinSpace": "\u2009", 512 | "Tilde": "\u223c", 513 | "TildeEqual": "\u2243", 514 | "TildeFullEqual": "\u2245", 515 | "TildeTilde": "\u2248", 516 | "Topf": "\U0001d54b", 517 | "TripleDot": "\u20db", 518 | "Tscr": "\U0001d4af", 519 | "Tstrok": "\u0166", 520 | "Uacute": "\xda", 521 | "Uarr": "\u219f", 522 | "Uarrocir": "\u2949", 523 | "Ubrcy": "\u040e", 524 | "Ubreve": "\u016c", 525 | "Ucirc": "\xdb", 526 | "Ucy": "\u0423", 527 | "Udblac": "\u0170", 528 | "Ufr": "\U0001d518", 529 | "Ugrave": "\xd9", 530 | "Umacr": "\u016a", 531 | "UnderBar": "_", 532 | "UnderBrace": "\u23df", 533 | "UnderBracket": "\u23b5", 534 | "UnderParenthesis": "\u23dd", 535 | "Union": "\u22c3", 536 | "UnionPlus": "\u228e", 537 | "Uogon": "\u0172", 538 | "Uopf": "\U0001d54c", 539 | "UpArrow": "\u2191", 540 | "UpArrowBar": "\u2912", 541 | "UpArrowDownArrow": "\u21c5", 542 | "UpDownArrow": "\u2195", 543 | "UpEquilibrium": "\u296e", 544 | "UpTee": "\u22a5", 545 | "UpTeeArrow": "\u21a5", 546 | "Uparrow": "\u21d1", 547 | "Updownarrow": "\u21d5", 548 | "UpperLeftArrow": "\u2196", 549 | "UpperRightArrow": "\u2197", 550 | "Upsi": "\u03d2", 551 | "Upsilon": "\u03a5", 552 | "Uring": "\u016e", 553 | "Uscr": "\U0001d4b0", 554 | "Utilde": "\u0168", 555 | "Uuml": "\xdc", 556 | "VDash": "\u22ab", 557 | "Vbar": "\u2aeb", 558 | "Vcy": "\u0412", 559 | "Vdash": "\u22a9", 560 | "Vdashl": "\u2ae6", 561 | "Vee": "\u22c1", 562 | "Verbar": "\u2016", 563 | "Vert": "\u2016", 564 | "VerticalBar": "\u2223", 565 | "VerticalLine": "|", 566 | "VerticalSeparator": "\u2758", 567 | "VerticalTilde": "\u2240", 568 | "VeryThinSpace": "\u200a", 569 | "Vfr": "\U0001d519", 570 | "Vopf": "\U0001d54d", 571 | "Vscr": "\U0001d4b1", 572 | "Vvdash": "\u22aa", 573 | "Wcirc": "\u0174", 574 | "Wedge": "\u22c0", 575 | "Wfr": "\U0001d51a", 576 | "Wopf": "\U0001d54e", 577 | "Wscr": "\U0001d4b2", 578 | "Xfr": "\U0001d51b", 579 | "Xi": "\u039e", 580 | "Xopf": "\U0001d54f", 581 | "Xscr": "\U0001d4b3", 582 | "YAcy": "\u042f", 583 | "YIcy": "\u0407", 584 | "YUcy": "\u042e", 585 | "Yacute": "\xdd", 586 | "Ycirc": "\u0176", 587 | "Ycy": "\u042b", 588 | "Yfr": "\U0001d51c", 589 | "Yopf": "\U0001d550", 590 | "Yscr": "\U0001d4b4", 591 | "Yuml": "\u0178", 592 | "ZHcy": "\u0416", 593 | "Zacute": "\u0179", 594 | "Zcaron": "\u017d", 595 | "Zcy": "\u0417", 596 | "Zdot": "\u017b", 597 | "ZeroWidthSpace": "\u200b", 598 | "Zeta": "\u0396", 599 | "Zfr": "\u2128", 600 | "Zopf": "\u2124", 601 | "Zscr": "\U0001d4b5", 602 | "aacute": "\xe1", 603 | "abreve": "\u0103", 604 | "ac": "\u223e", 605 | "acE": "\u223e\u0333", 606 | "acd": "\u223f", 607 | "acirc": "\xe2", 608 | "acute": "\xb4", 609 | "acy": "\u0430", 610 | "aelig": "\xe6", 611 | "af": "\u2061", 612 | "afr": "\U0001d51e", 613 | "agrave": "\xe0", 614 | "alefsym": "\u2135", 615 | "aleph": "\u2135", 616 | "alpha": "\u03b1", 617 | "amacr": "\u0101", 618 | "amalg": "\u2a3f", 619 | "amp": "&", 620 | "and": "\u2227", 621 | "andand": "\u2a55", 622 | "andd": "\u2a5c", 623 | "andslope": "\u2a58", 624 | "andv": "\u2a5a", 625 | "ang": "\u2220", 626 | "ange": "\u29a4", 627 | "angle": "\u2220", 628 | "angmsd": "\u2221", 629 | "angmsdaa": "\u29a8", 630 | "angmsdab": "\u29a9", 631 | "angmsdac": "\u29aa", 632 | "angmsdad": "\u29ab", 633 | "angmsdae": "\u29ac", 634 | "angmsdaf": "\u29ad", 635 | "angmsdag": "\u29ae", 636 | "angmsdah": "\u29af", 637 | "angrt": "\u221f", 638 | "angrtvb": "\u22be", 639 | "angrtvbd": "\u299d", 640 | "angsph": "\u2222", 641 | "angst": "\xc5", 642 | "angzarr": "\u237c", 643 | "aogon": "\u0105", 644 | "aopf": "\U0001d552", 645 | "ap": "\u2248", 646 | "apE": "\u2a70", 647 | "apacir": "\u2a6f", 648 | "ape": "\u224a", 649 | "apid": "\u224b", 650 | "apos": "'", 651 | "approx": "\u2248", 652 | "approxeq": "\u224a", 653 | "aring": "\xe5", 654 | "ascr": "\U0001d4b6", 655 | "ast": "*", 656 | "asymp": "\u2248", 657 | "asympeq": "\u224d", 658 | "atilde": "\xe3", 659 | "auml": "\xe4", 660 | "awconint": "\u2233", 661 | "awint": "\u2a11", 662 | "bNot": "\u2aed", 663 | "backcong": "\u224c", 664 | "backepsilon": "\u03f6", 665 | "backprime": "\u2035", 666 | "backsim": "\u223d", 667 | "backsimeq": "\u22cd", 668 | "barvee": "\u22bd", 669 | "barwed": "\u2305", 670 | "barwedge": "\u2305", 671 | "bbrk": "\u23b5", 672 | "bbrktbrk": "\u23b6", 673 | "bcong": "\u224c", 674 | "bcy": "\u0431", 675 | "bdquo": "\u201e", 676 | "becaus": "\u2235", 677 | "because": "\u2235", 678 | "bemptyv": "\u29b0", 679 | "bepsi": "\u03f6", 680 | "bernou": "\u212c", 681 | "beta": "\u03b2", 682 | "beth": "\u2136", 683 | "between": "\u226c", 684 | "bfr": "\U0001d51f", 685 | "bigcap": "\u22c2", 686 | "bigcirc": "\u25ef", 687 | "bigcup": "\u22c3", 688 | "bigodot": "\u2a00", 689 | "bigoplus": "\u2a01", 690 | "bigotimes": "\u2a02", 691 | "bigsqcup": "\u2a06", 692 | "bigstar": "\u2605", 693 | "bigtriangledown": "\u25bd", 694 | "bigtriangleup": "\u25b3", 695 | "biguplus": "\u2a04", 696 | "bigvee": "\u22c1", 697 | "bigwedge": "\u22c0", 698 | "bkarow": "\u290d", 699 | "blacklozenge": "\u29eb", 700 | "blacksquare": "\u25aa", 701 | "blacktriangle": "\u25b4", 702 | "blacktriangledown": "\u25be", 703 | "blacktriangleleft": "\u25c2", 704 | "blacktriangleright": "\u25b8", 705 | "blank": "\u2423", 706 | "blk12": "\u2592", 707 | "blk14": "\u2591", 708 | "blk34": "\u2593", 709 | "block": "\u2588", 710 | "bne": "=\u20e5", 711 | "bnequiv": "\u2261\u20e5", 712 | "bnot": "\u2310", 713 | "bopf": "\U0001d553", 714 | "bot": "\u22a5", 715 | "bottom": "\u22a5", 716 | "bowtie": "\u22c8", 717 | "boxDL": "\u2557", 718 | "boxDR": "\u2554", 719 | "boxDl": "\u2556", 720 | "boxDr": "\u2553", 721 | "boxH": "\u2550", 722 | "boxHD": "\u2566", 723 | "boxHU": "\u2569", 724 | "boxHd": "\u2564", 725 | "boxHu": "\u2567", 726 | "boxUL": "\u255d", 727 | "boxUR": "\u255a", 728 | "boxUl": "\u255c", 729 | "boxUr": "\u2559", 730 | "boxV": "\u2551", 731 | "boxVH": "\u256c", 732 | "boxVL": "\u2563", 733 | "boxVR": "\u2560", 734 | "boxVh": "\u256b", 735 | "boxVl": "\u2562", 736 | "boxVr": "\u255f", 737 | "boxbox": "\u29c9", 738 | "boxdL": "\u2555", 739 | "boxdR": "\u2552", 740 | "boxdl": "\u2510", 741 | "boxdr": "\u250c", 742 | "boxh": "\u2500", 743 | "boxhD": "\u2565", 744 | "boxhU": "\u2568", 745 | "boxhd": "\u252c", 746 | "boxhu": "\u2534", 747 | "boxminus": "\u229f", 748 | "boxplus": "\u229e", 749 | "boxtimes": "\u22a0", 750 | "boxuL": "\u255b", 751 | "boxuR": "\u2558", 752 | "boxul": "\u2518", 753 | "boxur": "\u2514", 754 | "boxv": "\u2502", 755 | "boxvH": "\u256a", 756 | "boxvL": "\u2561", 757 | "boxvR": "\u255e", 758 | "boxvh": "\u253c", 759 | "boxvl": "\u2524", 760 | "boxvr": "\u251c", 761 | "bprime": "\u2035", 762 | "breve": "\u02d8", 763 | "brvbar": "\xa6", 764 | "bscr": "\U0001d4b7", 765 | "bsemi": "\u204f", 766 | "bsim": "\u223d", 767 | "bsime": "\u22cd", 768 | "bsol": "\\", 769 | "bsolb": "\u29c5", 770 | "bsolhsub": "\u27c8", 771 | "bull": "\u2022", 772 | "bullet": "\u2022", 773 | "bump": "\u224e", 774 | "bumpE": "\u2aae", 775 | "bumpe": "\u224f", 776 | "bumpeq": "\u224f", 777 | "cacute": "\u0107", 778 | "cap": "\u2229", 779 | "capand": "\u2a44", 780 | "capbrcup": "\u2a49", 781 | "capcap": "\u2a4b", 782 | "capcup": "\u2a47", 783 | "capdot": "\u2a40", 784 | "caps": "\u2229\ufe00", 785 | "caret": "\u2041", 786 | "caron": "\u02c7", 787 | "ccaps": "\u2a4d", 788 | "ccaron": "\u010d", 789 | "ccedil": "\xe7", 790 | "ccirc": "\u0109", 791 | "ccups": "\u2a4c", 792 | "ccupssm": "\u2a50", 793 | "cdot": "\u010b", 794 | "cedil": "\xb8", 795 | "cemptyv": "\u29b2", 796 | "cent": "\xa2", 797 | "centerdot": "\xb7", 798 | "cfr": "\U0001d520", 799 | "chcy": "\u0447", 800 | "check": "\u2713", 801 | "checkmark": "\u2713", 802 | "chi": "\u03c7", 803 | "cir": "\u25cb", 804 | "cirE": "\u29c3", 805 | "circ": "\u02c6", 806 | "circeq": "\u2257", 807 | "circlearrowleft": "\u21ba", 808 | "circlearrowright": "\u21bb", 809 | "circledR": "\xae", 810 | "circledS": "\u24c8", 811 | "circledast": "\u229b", 812 | "circledcirc": "\u229a", 813 | "circleddash": "\u229d", 814 | "cire": "\u2257", 815 | "cirfnint": "\u2a10", 816 | "cirmid": "\u2aef", 817 | "cirscir": "\u29c2", 818 | "clubs": "\u2663", 819 | "clubsuit": "\u2663", 820 | "colon": ":", 821 | "colone": "\u2254", 822 | "coloneq": "\u2254", 823 | "comma": ",", 824 | "commat": "@", 825 | "comp": "\u2201", 826 | "compfn": "\u2218", 827 | "complement": "\u2201", 828 | "complexes": "\u2102", 829 | "cong": "\u2245", 830 | "congdot": "\u2a6d", 831 | "conint": "\u222e", 832 | "copf": "\U0001d554", 833 | "coprod": "\u2210", 834 | "copy": "\xa9", 835 | "copysr": "\u2117", 836 | "crarr": "\u21b5", 837 | "cross": "\u2717", 838 | "cscr": "\U0001d4b8", 839 | "csub": "\u2acf", 840 | "csube": "\u2ad1", 841 | "csup": "\u2ad0", 842 | "csupe": "\u2ad2", 843 | "ctdot": "\u22ef", 844 | "cudarrl": "\u2938", 845 | "cudarrr": "\u2935", 846 | "cuepr": "\u22de", 847 | "cuesc": "\u22df", 848 | "cularr": "\u21b6", 849 | "cularrp": "\u293d", 850 | "cup": "\u222a", 851 | "cupbrcap": "\u2a48", 852 | "cupcap": "\u2a46", 853 | "cupcup": "\u2a4a", 854 | "cupdot": "\u228d", 855 | "cupor": "\u2a45", 856 | "cups": "\u222a\ufe00", 857 | "curarr": "\u21b7", 858 | "curarrm": "\u293c", 859 | "curlyeqprec": "\u22de", 860 | "curlyeqsucc": "\u22df", 861 | "curlyvee": "\u22ce", 862 | "curlywedge": "\u22cf", 863 | "curren": "\xa4", 864 | "curvearrowleft": "\u21b6", 865 | "curvearrowright": "\u21b7", 866 | "cuvee": "\u22ce", 867 | "cuwed": "\u22cf", 868 | "cwconint": "\u2232", 869 | "cwint": "\u2231", 870 | "cylcty": "\u232d", 871 | "dArr": "\u21d3", 872 | "dHar": "\u2965", 873 | "dagger": "\u2020", 874 | "daleth": "\u2138", 875 | "darr": "\u2193", 876 | "dash": "\u2010", 877 | "dashv": "\u22a3", 878 | "dbkarow": "\u290f", 879 | "dblac": "\u02dd", 880 | "dcaron": "\u010f", 881 | "dcy": "\u0434", 882 | "dd": "\u2146", 883 | "ddagger": "\u2021", 884 | "ddarr": "\u21ca", 885 | "ddotseq": "\u2a77", 886 | "deg": "\xb0", 887 | "delta": "\u03b4", 888 | "demptyv": "\u29b1", 889 | "dfisht": "\u297f", 890 | "dfr": "\U0001d521", 891 | "dharl": "\u21c3", 892 | "dharr": "\u21c2", 893 | "diam": "\u22c4", 894 | "diamond": "\u22c4", 895 | "diamondsuit": "\u2666", 896 | "diams": "\u2666", 897 | "die": "\xa8", 898 | "digamma": "\u03dd", 899 | "disin": "\u22f2", 900 | "div": "\xf7", 901 | "divide": "\xf7", 902 | "divideontimes": "\u22c7", 903 | "divonx": "\u22c7", 904 | "djcy": "\u0452", 905 | "dlcorn": "\u231e", 906 | "dlcrop": "\u230d", 907 | "dollar": "$", 908 | "dopf": "\U0001d555", 909 | "dot": "\u02d9", 910 | "doteq": "\u2250", 911 | "doteqdot": "\u2251", 912 | "dotminus": "\u2238", 913 | "dotplus": "\u2214", 914 | "dotsquare": "\u22a1", 915 | "doublebarwedge": "\u2306", 916 | "downarrow": "\u2193", 917 | "downdownarrows": "\u21ca", 918 | "downharpoonleft": "\u21c3", 919 | "downharpoonright": "\u21c2", 920 | "drbkarow": "\u2910", 921 | "drcorn": "\u231f", 922 | "drcrop": "\u230c", 923 | "dscr": "\U0001d4b9", 924 | "dscy": "\u0455", 925 | "dsol": "\u29f6", 926 | "dstrok": "\u0111", 927 | "dtdot": "\u22f1", 928 | "dtri": "\u25bf", 929 | "dtrif": "\u25be", 930 | "duarr": "\u21f5", 931 | "duhar": "\u296f", 932 | "dwangle": "\u29a6", 933 | "dzcy": "\u045f", 934 | "dzigrarr": "\u27ff", 935 | "eDDot": "\u2a77", 936 | "eDot": "\u2251", 937 | "eacute": "\xe9", 938 | "easter": "\u2a6e", 939 | "ecaron": "\u011b", 940 | "ecir": "\u2256", 941 | "ecirc": "\xea", 942 | "ecolon": "\u2255", 943 | "ecy": "\u044d", 944 | "edot": "\u0117", 945 | "ee": "\u2147", 946 | "efDot": "\u2252", 947 | "efr": "\U0001d522", 948 | "eg": "\u2a9a", 949 | "egrave": "\xe8", 950 | "egs": "\u2a96", 951 | "egsdot": "\u2a98", 952 | "el": "\u2a99", 953 | "elinters": "\u23e7", 954 | "ell": "\u2113", 955 | "els": "\u2a95", 956 | "elsdot": "\u2a97", 957 | "emacr": "\u0113", 958 | "empty": "\u2205", 959 | "emptyset": "\u2205", 960 | "emptyv": "\u2205", 961 | "emsp13": "\u2004", 962 | "emsp14": "\u2005", 963 | "emsp": "\u2003", 964 | "eng": "\u014b", 965 | "ensp": "\u2002", 966 | "eogon": "\u0119", 967 | "eopf": "\U0001d556", 968 | "epar": "\u22d5", 969 | "eparsl": "\u29e3", 970 | "eplus": "\u2a71", 971 | "epsi": "\u03b5", 972 | "epsilon": "\u03b5", 973 | "epsiv": "\u03f5", 974 | "eqcirc": "\u2256", 975 | "eqcolon": "\u2255", 976 | "eqsim": "\u2242", 977 | "eqslantgtr": "\u2a96", 978 | "eqslantless": "\u2a95", 979 | "equals": "=", 980 | "equest": "\u225f", 981 | "equiv": "\u2261", 982 | "equivDD": "\u2a78", 983 | "eqvparsl": "\u29e5", 984 | "erDot": "\u2253", 985 | "erarr": "\u2971", 986 | "escr": "\u212f", 987 | "esdot": "\u2250", 988 | "esim": "\u2242", 989 | "eta": "\u03b7", 990 | "eth": "\xf0", 991 | "euml": "\xeb", 992 | "euro": "\u20ac", 993 | "excl": "!", 994 | "exist": "\u2203", 995 | "expectation": "\u2130", 996 | "exponentiale": "\u2147", 997 | "fallingdotseq": "\u2252", 998 | "fcy": "\u0444", 999 | "female": "\u2640", 1000 | "ffilig": "\ufb03", 1001 | "fflig": "\ufb00", 1002 | "ffllig": "\ufb04", 1003 | "ffr": "\U0001d523", 1004 | "filig": "\ufb01", 1005 | "fjlig": "fj", 1006 | "flat": "\u266d", 1007 | "fllig": "\ufb02", 1008 | "fltns": "\u25b1", 1009 | "fnof": "\u0192", 1010 | "fopf": "\U0001d557", 1011 | "forall": "\u2200", 1012 | "fork": "\u22d4", 1013 | "forkv": "\u2ad9", 1014 | "fpartint": "\u2a0d", 1015 | "frac12": "\xbd", 1016 | "frac13": "\u2153", 1017 | "frac14": "\xbc", 1018 | "frac15": "\u2155", 1019 | "frac16": "\u2159", 1020 | "frac18": "\u215b", 1021 | "frac23": "\u2154", 1022 | "frac25": "\u2156", 1023 | "frac34": "\xbe", 1024 | "frac35": "\u2157", 1025 | "frac38": "\u215c", 1026 | "frac45": "\u2158", 1027 | "frac56": "\u215a", 1028 | "frac58": "\u215d", 1029 | "frac78": "\u215e", 1030 | "frasl": "\u2044", 1031 | "frown": "\u2322", 1032 | "fscr": "\U0001d4bb", 1033 | "gE": "\u2267", 1034 | "gEl": "\u2a8c", 1035 | "gacute": "\u01f5", 1036 | "gamma": "\u03b3", 1037 | "gammad": "\u03dd", 1038 | "gap": "\u2a86", 1039 | "gbreve": "\u011f", 1040 | "gcirc": "\u011d", 1041 | "gcy": "\u0433", 1042 | "gdot": "\u0121", 1043 | "ge": "\u2265", 1044 | "gel": "\u22db", 1045 | "geq": "\u2265", 1046 | "geqq": "\u2267", 1047 | "geqslant": "\u2a7e", 1048 | "ges": "\u2a7e", 1049 | "gescc": "\u2aa9", 1050 | "gesdot": "\u2a80", 1051 | "gesdoto": "\u2a82", 1052 | "gesdotol": "\u2a84", 1053 | "gesl": "\u22db\ufe00", 1054 | "gesles": "\u2a94", 1055 | "gfr": "\U0001d524", 1056 | "gg": "\u226b", 1057 | "ggg": "\u22d9", 1058 | "gimel": "\u2137", 1059 | "gjcy": "\u0453", 1060 | "gl": "\u2277", 1061 | "glE": "\u2a92", 1062 | "gla": "\u2aa5", 1063 | "glj": "\u2aa4", 1064 | "gnE": "\u2269", 1065 | "gnap": "\u2a8a", 1066 | "gnapprox": "\u2a8a", 1067 | "gne": "\u2a88", 1068 | "gneq": "\u2a88", 1069 | "gneqq": "\u2269", 1070 | "gnsim": "\u22e7", 1071 | "gopf": "\U0001d558", 1072 | "grave": "`", 1073 | "gscr": "\u210a", 1074 | "gsim": "\u2273", 1075 | "gsime": "\u2a8e", 1076 | "gsiml": "\u2a90", 1077 | "gt": ">", 1078 | "gtcc": "\u2aa7", 1079 | "gtcir": "\u2a7a", 1080 | "gtdot": "\u22d7", 1081 | "gtlPar": "\u2995", 1082 | "gtquest": "\u2a7c", 1083 | "gtrapprox": "\u2a86", 1084 | "gtrarr": "\u2978", 1085 | "gtrdot": "\u22d7", 1086 | "gtreqless": "\u22db", 1087 | "gtreqqless": "\u2a8c", 1088 | "gtrless": "\u2277", 1089 | "gtrsim": "\u2273", 1090 | "gvertneqq": "\u2269\ufe00", 1091 | "gvnE": "\u2269\ufe00", 1092 | "hArr": "\u21d4", 1093 | "hairsp": "\u200a", 1094 | "half": "\xbd", 1095 | "hamilt": "\u210b", 1096 | "hardcy": "\u044a", 1097 | "harr": "\u2194", 1098 | "harrcir": "\u2948", 1099 | "harrw": "\u21ad", 1100 | "hbar": "\u210f", 1101 | "hcirc": "\u0125", 1102 | "hearts": "\u2665", 1103 | "heartsuit": "\u2665", 1104 | "hellip": "\u2026", 1105 | "hercon": "\u22b9", 1106 | "hfr": "\U0001d525", 1107 | "hksearow": "\u2925", 1108 | "hkswarow": "\u2926", 1109 | "hoarr": "\u21ff", 1110 | "homtht": "\u223b", 1111 | "hookleftarrow": "\u21a9", 1112 | "hookrightarrow": "\u21aa", 1113 | "hopf": "\U0001d559", 1114 | "horbar": "\u2015", 1115 | "hscr": "\U0001d4bd", 1116 | "hslash": "\u210f", 1117 | "hstrok": "\u0127", 1118 | "hybull": "\u2043", 1119 | "hyphen": "\u2010", 1120 | "iacute": "\xed", 1121 | "ic": "\u2063", 1122 | "icirc": "\xee", 1123 | "icy": "\u0438", 1124 | "iecy": "\u0435", 1125 | "iexcl": "\xa1", 1126 | "iff": "\u21d4", 1127 | "ifr": "\U0001d526", 1128 | "igrave": "\xec", 1129 | "ii": "\u2148", 1130 | "iiiint": "\u2a0c", 1131 | "iiint": "\u222d", 1132 | "iinfin": "\u29dc", 1133 | "iiota": "\u2129", 1134 | "ijlig": "\u0133", 1135 | "imacr": "\u012b", 1136 | "image": "\u2111", 1137 | "imagline": "\u2110", 1138 | "imagpart": "\u2111", 1139 | "imath": "\u0131", 1140 | "imof": "\u22b7", 1141 | "imped": "\u01b5", 1142 | "in": "\u2208", 1143 | "incare": "\u2105", 1144 | "infin": "\u221e", 1145 | "infintie": "\u29dd", 1146 | "inodot": "\u0131", 1147 | "int": "\u222b", 1148 | "intcal": "\u22ba", 1149 | "integers": "\u2124", 1150 | "intercal": "\u22ba", 1151 | "intlarhk": "\u2a17", 1152 | "intprod": "\u2a3c", 1153 | "iocy": "\u0451", 1154 | "iogon": "\u012f", 1155 | "iopf": "\U0001d55a", 1156 | "iota": "\u03b9", 1157 | "iprod": "\u2a3c", 1158 | "iquest": "\xbf", 1159 | "iscr": "\U0001d4be", 1160 | "isin": "\u2208", 1161 | "isinE": "\u22f9", 1162 | "isindot": "\u22f5", 1163 | "isins": "\u22f4", 1164 | "isinsv": "\u22f3", 1165 | "isinv": "\u2208", 1166 | "it": "\u2062", 1167 | "itilde": "\u0129", 1168 | "iukcy": "\u0456", 1169 | "iuml": "\xef", 1170 | "jcirc": "\u0135", 1171 | "jcy": "\u0439", 1172 | "jfr": "\U0001d527", 1173 | "jmath": "\u0237", 1174 | "jopf": "\U0001d55b", 1175 | "jscr": "\U0001d4bf", 1176 | "jsercy": "\u0458", 1177 | "jukcy": "\u0454", 1178 | "kappa": "\u03ba", 1179 | "kappav": "\u03f0", 1180 | "kcedil": "\u0137", 1181 | "kcy": "\u043a", 1182 | "kfr": "\U0001d528", 1183 | "kgreen": "\u0138", 1184 | "khcy": "\u0445", 1185 | "kjcy": "\u045c", 1186 | "kopf": "\U0001d55c", 1187 | "kscr": "\U0001d4c0", 1188 | "lAarr": "\u21da", 1189 | "lArr": "\u21d0", 1190 | "lAtail": "\u291b", 1191 | "lBarr": "\u290e", 1192 | "lE": "\u2266", 1193 | "lEg": "\u2a8b", 1194 | "lHar": "\u2962", 1195 | "lacute": "\u013a", 1196 | "laemptyv": "\u29b4", 1197 | "lagran": "\u2112", 1198 | "lambda": "\u03bb", 1199 | "lang": "\u27e8", 1200 | "langd": "\u2991", 1201 | "langle": "\u27e8", 1202 | "lap": "\u2a85", 1203 | "laquo": "\xab", 1204 | "larr": "\u2190", 1205 | "larrb": "\u21e4", 1206 | "larrbfs": "\u291f", 1207 | "larrfs": "\u291d", 1208 | "larrhk": "\u21a9", 1209 | "larrlp": "\u21ab", 1210 | "larrpl": "\u2939", 1211 | "larrsim": "\u2973", 1212 | "larrtl": "\u21a2", 1213 | "lat": "\u2aab", 1214 | "latail": "\u2919", 1215 | "late": "\u2aad", 1216 | "lates": "\u2aad\ufe00", 1217 | "lbarr": "\u290c", 1218 | "lbbrk": "\u2772", 1219 | "lbrace": "{", 1220 | "lbrack": "[", 1221 | "lbrke": "\u298b", 1222 | "lbrksld": "\u298f", 1223 | "lbrkslu": "\u298d", 1224 | "lcaron": "\u013e", 1225 | "lcedil": "\u013c", 1226 | "lceil": "\u2308", 1227 | "lcub": "{", 1228 | "lcy": "\u043b", 1229 | "ldca": "\u2936", 1230 | "ldquo": "\u201c", 1231 | "ldquor": "\u201e", 1232 | "ldrdhar": "\u2967", 1233 | "ldrushar": "\u294b", 1234 | "ldsh": "\u21b2", 1235 | "le": "\u2264", 1236 | "leftarrow": "\u2190", 1237 | "leftarrowtail": "\u21a2", 1238 | "leftharpoondown": "\u21bd", 1239 | "leftharpoonup": "\u21bc", 1240 | "leftleftarrows": "\u21c7", 1241 | "leftrightarrow": "\u2194", 1242 | "leftrightarrows": "\u21c6", 1243 | "leftrightharpoons": "\u21cb", 1244 | "leftrightsquigarrow": "\u21ad", 1245 | "leftthreetimes": "\u22cb", 1246 | "leg": "\u22da", 1247 | "leq": "\u2264", 1248 | "leqq": "\u2266", 1249 | "leqslant": "\u2a7d", 1250 | "les": "\u2a7d", 1251 | "lescc": "\u2aa8", 1252 | "lesdot": "\u2a7f", 1253 | "lesdoto": "\u2a81", 1254 | "lesdotor": "\u2a83", 1255 | "lesg": "\u22da\ufe00", 1256 | "lesges": "\u2a93", 1257 | "lessapprox": "\u2a85", 1258 | "lessdot": "\u22d6", 1259 | "lesseqgtr": "\u22da", 1260 | "lesseqqgtr": "\u2a8b", 1261 | "lessgtr": "\u2276", 1262 | "lesssim": "\u2272", 1263 | "lfisht": "\u297c", 1264 | "lfloor": "\u230a", 1265 | "lfr": "\U0001d529", 1266 | "lg": "\u2276", 1267 | "lgE": "\u2a91", 1268 | "lhard": "\u21bd", 1269 | "lharu": "\u21bc", 1270 | "lharul": "\u296a", 1271 | "lhblk": "\u2584", 1272 | "ljcy": "\u0459", 1273 | "ll": "\u226a", 1274 | "llarr": "\u21c7", 1275 | "llcorner": "\u231e", 1276 | "llhard": "\u296b", 1277 | "lltri": "\u25fa", 1278 | "lmidot": "\u0140", 1279 | "lmoust": "\u23b0", 1280 | "lmoustache": "\u23b0", 1281 | "lnE": "\u2268", 1282 | "lnap": "\u2a89", 1283 | "lnapprox": "\u2a89", 1284 | "lne": "\u2a87", 1285 | "lneq": "\u2a87", 1286 | "lneqq": "\u2268", 1287 | "lnsim": "\u22e6", 1288 | "loang": "\u27ec", 1289 | "loarr": "\u21fd", 1290 | "lobrk": "\u27e6", 1291 | "longleftarrow": "\u27f5", 1292 | "longleftrightarrow": "\u27f7", 1293 | "longmapsto": "\u27fc", 1294 | "longrightarrow": "\u27f6", 1295 | "looparrowleft": "\u21ab", 1296 | "looparrowright": "\u21ac", 1297 | "lopar": "\u2985", 1298 | "lopf": "\U0001d55d", 1299 | "loplus": "\u2a2d", 1300 | "lotimes": "\u2a34", 1301 | "lowast": "\u2217", 1302 | "lowbar": "_", 1303 | "loz": "\u25ca", 1304 | "lozenge": "\u25ca", 1305 | "lozf": "\u29eb", 1306 | "lpar": "(", 1307 | "lparlt": "\u2993", 1308 | "lrarr": "\u21c6", 1309 | "lrcorner": "\u231f", 1310 | "lrhar": "\u21cb", 1311 | "lrhard": "\u296d", 1312 | "lrm": "\u200e", 1313 | "lrtri": "\u22bf", 1314 | "lsaquo": "\u2039", 1315 | "lscr": "\U0001d4c1", 1316 | "lsh": "\u21b0", 1317 | "lsim": "\u2272", 1318 | "lsime": "\u2a8d", 1319 | "lsimg": "\u2a8f", 1320 | "lsqb": "[", 1321 | "lsquo": "\u2018", 1322 | "lsquor": "\u201a", 1323 | "lstrok": "\u0142", 1324 | "lt": "<", 1325 | "ltcc": "\u2aa6", 1326 | "ltcir": "\u2a79", 1327 | "ltdot": "\u22d6", 1328 | "lthree": "\u22cb", 1329 | "ltimes": "\u22c9", 1330 | "ltlarr": "\u2976", 1331 | "ltquest": "\u2a7b", 1332 | "ltrPar": "\u2996", 1333 | "ltri": "\u25c3", 1334 | "ltrie": "\u22b4", 1335 | "ltrif": "\u25c2", 1336 | "lurdshar": "\u294a", 1337 | "luruhar": "\u2966", 1338 | "lvertneqq": "\u2268\ufe00", 1339 | "lvnE": "\u2268\ufe00", 1340 | "mDDot": "\u223a", 1341 | "macr": "\xaf", 1342 | "male": "\u2642", 1343 | "malt": "\u2720", 1344 | "maltese": "\u2720", 1345 | "map": "\u21a6", 1346 | "mapsto": "\u21a6", 1347 | "mapstodown": "\u21a7", 1348 | "mapstoleft": "\u21a4", 1349 | "mapstoup": "\u21a5", 1350 | "marker": "\u25ae", 1351 | "mcomma": "\u2a29", 1352 | "mcy": "\u043c", 1353 | "mdash": "\u2014", 1354 | "measuredangle": "\u2221", 1355 | "mfr": "\U0001d52a", 1356 | "mho": "\u2127", 1357 | "micro": "\xb5", 1358 | "mid": "\u2223", 1359 | "midast": "*", 1360 | "midcir": "\u2af0", 1361 | "middot": "\xb7", 1362 | "minus": "\u2212", 1363 | "minusb": "\u229f", 1364 | "minusd": "\u2238", 1365 | "minusdu": "\u2a2a", 1366 | "mlcp": "\u2adb", 1367 | "mldr": "\u2026", 1368 | "mnplus": "\u2213", 1369 | "models": "\u22a7", 1370 | "mopf": "\U0001d55e", 1371 | "mp": "\u2213", 1372 | "mscr": "\U0001d4c2", 1373 | "mstpos": "\u223e", 1374 | "mu": "\u03bc", 1375 | "multimap": "\u22b8", 1376 | "mumap": "\u22b8", 1377 | "nGg": "\u22d9\u0338", 1378 | "nGt": "\u226b\u20d2", 1379 | "nGtv": "\u226b\u0338", 1380 | "nLeftarrow": "\u21cd", 1381 | "nLeftrightarrow": "\u21ce", 1382 | "nLl": "\u22d8\u0338", 1383 | "nLt": "\u226a\u20d2", 1384 | "nLtv": "\u226a\u0338", 1385 | "nRightarrow": "\u21cf", 1386 | "nVDash": "\u22af", 1387 | "nVdash": "\u22ae", 1388 | "nabla": "\u2207", 1389 | "nacute": "\u0144", 1390 | "nang": "\u2220\u20d2", 1391 | "nap": "\u2249", 1392 | "napE": "\u2a70\u0338", 1393 | "napid": "\u224b\u0338", 1394 | "napos": "\u0149", 1395 | "napprox": "\u2249", 1396 | "natur": "\u266e", 1397 | "natural": "\u266e", 1398 | "naturals": "\u2115", 1399 | "nbsp": "\xa0", 1400 | "nbump": "\u224e\u0338", 1401 | "nbumpe": "\u224f\u0338", 1402 | "ncap": "\u2a43", 1403 | "ncaron": "\u0148", 1404 | "ncedil": "\u0146", 1405 | "ncong": "\u2247", 1406 | "ncongdot": "\u2a6d\u0338", 1407 | "ncup": "\u2a42", 1408 | "ncy": "\u043d", 1409 | "ndash": "\u2013", 1410 | "ne": "\u2260", 1411 | "neArr": "\u21d7", 1412 | "nearhk": "\u2924", 1413 | "nearr": "\u2197", 1414 | "nearrow": "\u2197", 1415 | "nedot": "\u2250\u0338", 1416 | "nequiv": "\u2262", 1417 | "nesear": "\u2928", 1418 | "nesim": "\u2242\u0338", 1419 | "nexist": "\u2204", 1420 | "nexists": "\u2204", 1421 | "nfr": "\U0001d52b", 1422 | "ngE": "\u2267\u0338", 1423 | "nge": "\u2271", 1424 | "ngeq": "\u2271", 1425 | "ngeqq": "\u2267\u0338", 1426 | "ngeqslant": "\u2a7e\u0338", 1427 | "nges": "\u2a7e\u0338", 1428 | "ngsim": "\u2275", 1429 | "ngt": "\u226f", 1430 | "ngtr": "\u226f", 1431 | "nhArr": "\u21ce", 1432 | "nharr": "\u21ae", 1433 | "nhpar": "\u2af2", 1434 | "ni": "\u220b", 1435 | "nis": "\u22fc", 1436 | "nisd": "\u22fa", 1437 | "niv": "\u220b", 1438 | "njcy": "\u045a", 1439 | "nlArr": "\u21cd", 1440 | "nlE": "\u2266\u0338", 1441 | "nlarr": "\u219a", 1442 | "nldr": "\u2025", 1443 | "nle": "\u2270", 1444 | "nleftarrow": "\u219a", 1445 | "nleftrightarrow": "\u21ae", 1446 | "nleq": "\u2270", 1447 | "nleqq": "\u2266\u0338", 1448 | "nleqslant": "\u2a7d\u0338", 1449 | "nles": "\u2a7d\u0338", 1450 | "nless": "\u226e", 1451 | "nlsim": "\u2274", 1452 | "nlt": "\u226e", 1453 | "nltri": "\u22ea", 1454 | "nltrie": "\u22ec", 1455 | "nmid": "\u2224", 1456 | "nopf": "\U0001d55f", 1457 | "not": "\xac", 1458 | "notin": "\u2209", 1459 | "notinE": "\u22f9\u0338", 1460 | "notindot": "\u22f5\u0338", 1461 | "notinva": "\u2209", 1462 | "notinvb": "\u22f7", 1463 | "notinvc": "\u22f6", 1464 | "notni": "\u220c", 1465 | "notniva": "\u220c", 1466 | "notnivb": "\u22fe", 1467 | "notnivc": "\u22fd", 1468 | "npar": "\u2226", 1469 | "nparallel": "\u2226", 1470 | "nparsl": "\u2afd\u20e5", 1471 | "npart": "\u2202\u0338", 1472 | "npolint": "\u2a14", 1473 | "npr": "\u2280", 1474 | "nprcue": "\u22e0", 1475 | "npre": "\u2aaf\u0338", 1476 | "nprec": "\u2280", 1477 | "npreceq": "\u2aaf\u0338", 1478 | "nrArr": "\u21cf", 1479 | "nrarr": "\u219b", 1480 | "nrarrc": "\u2933\u0338", 1481 | "nrarrw": "\u219d\u0338", 1482 | "nrightarrow": "\u219b", 1483 | "nrtri": "\u22eb", 1484 | "nrtrie": "\u22ed", 1485 | "nsc": "\u2281", 1486 | "nsccue": "\u22e1", 1487 | "nsce": "\u2ab0\u0338", 1488 | "nscr": "\U0001d4c3", 1489 | "nshortmid": "\u2224", 1490 | "nshortparallel": "\u2226", 1491 | "nsim": "\u2241", 1492 | "nsime": "\u2244", 1493 | "nsimeq": "\u2244", 1494 | "nsmid": "\u2224", 1495 | "nspar": "\u2226", 1496 | "nsqsube": "\u22e2", 1497 | "nsqsupe": "\u22e3", 1498 | "nsub": "\u2284", 1499 | "nsubE": "\u2ac5\u0338", 1500 | "nsube": "\u2288", 1501 | "nsubset": "\u2282\u20d2", 1502 | "nsubseteq": "\u2288", 1503 | "nsubseteqq": "\u2ac5\u0338", 1504 | "nsucc": "\u2281", 1505 | "nsucceq": "\u2ab0\u0338", 1506 | "nsup": "\u2285", 1507 | "nsupE": "\u2ac6\u0338", 1508 | "nsupe": "\u2289", 1509 | "nsupset": "\u2283\u20d2", 1510 | "nsupseteq": "\u2289", 1511 | "nsupseteqq": "\u2ac6\u0338", 1512 | "ntgl": "\u2279", 1513 | "ntilde": "\xf1", 1514 | "ntlg": "\u2278", 1515 | "ntriangleleft": "\u22ea", 1516 | "ntrianglelefteq": "\u22ec", 1517 | "ntriangleright": "\u22eb", 1518 | "ntrianglerighteq": "\u22ed", 1519 | "nu": "\u03bd", 1520 | "num": "#", 1521 | "numero": "\u2116", 1522 | "numsp": "\u2007", 1523 | "nvDash": "\u22ad", 1524 | "nvHarr": "\u2904", 1525 | "nvap": "\u224d\u20d2", 1526 | "nvdash": "\u22ac", 1527 | "nvge": "\u2265\u20d2", 1528 | "nvgt": ">\u20d2", 1529 | "nvinfin": "\u29de", 1530 | "nvlArr": "\u2902", 1531 | "nvle": "\u2264\u20d2", 1532 | "nvlt": "<\u20d2", 1533 | "nvltrie": "\u22b4\u20d2", 1534 | "nvrArr": "\u2903", 1535 | "nvrtrie": "\u22b5\u20d2", 1536 | "nvsim": "\u223c\u20d2", 1537 | "nwArr": "\u21d6", 1538 | "nwarhk": "\u2923", 1539 | "nwarr": "\u2196", 1540 | "nwarrow": "\u2196", 1541 | "nwnear": "\u2927", 1542 | "oS": "\u24c8", 1543 | "oacute": "\xf3", 1544 | "oast": "\u229b", 1545 | "ocir": "\u229a", 1546 | "ocirc": "\xf4", 1547 | "ocy": "\u043e", 1548 | "odash": "\u229d", 1549 | "odblac": "\u0151", 1550 | "odiv": "\u2a38", 1551 | "odot": "\u2299", 1552 | "odsold": "\u29bc", 1553 | "oelig": "\u0153", 1554 | "ofcir": "\u29bf", 1555 | "ofr": "\U0001d52c", 1556 | "ogon": "\u02db", 1557 | "ograve": "\xf2", 1558 | "ogt": "\u29c1", 1559 | "ohbar": "\u29b5", 1560 | "ohm": "\u03a9", 1561 | "oint": "\u222e", 1562 | "olarr": "\u21ba", 1563 | "olcir": "\u29be", 1564 | "olcross": "\u29bb", 1565 | "oline": "\u203e", 1566 | "olt": "\u29c0", 1567 | "omacr": "\u014d", 1568 | "omega": "\u03c9", 1569 | "omicron": "\u03bf", 1570 | "omid": "\u29b6", 1571 | "ominus": "\u2296", 1572 | "oopf": "\U0001d560", 1573 | "opar": "\u29b7", 1574 | "operp": "\u29b9", 1575 | "oplus": "\u2295", 1576 | "or": "\u2228", 1577 | "orarr": "\u21bb", 1578 | "ord": "\u2a5d", 1579 | "order": "\u2134", 1580 | "orderof": "\u2134", 1581 | "ordf": "\xaa", 1582 | "ordm": "\xba", 1583 | "origof": "\u22b6", 1584 | "oror": "\u2a56", 1585 | "orslope": "\u2a57", 1586 | "orv": "\u2a5b", 1587 | "oscr": "\u2134", 1588 | "oslash": "\xf8", 1589 | "osol": "\u2298", 1590 | "otilde": "\xf5", 1591 | "otimes": "\u2297", 1592 | "otimesas": "\u2a36", 1593 | "ouml": "\xf6", 1594 | "ovbar": "\u233d", 1595 | "par": "\u2225", 1596 | "para": "\xb6", 1597 | "parallel": "\u2225", 1598 | "parsim": "\u2af3", 1599 | "parsl": "\u2afd", 1600 | "part": "\u2202", 1601 | "pcy": "\u043f", 1602 | "percnt": "%", 1603 | "period": ".", 1604 | "permil": "\u2030", 1605 | "perp": "\u22a5", 1606 | "pertenk": "\u2031", 1607 | "pfr": "\U0001d52d", 1608 | "phi": "\u03c6", 1609 | "phiv": "\u03d5", 1610 | "phmmat": "\u2133", 1611 | "phone": "\u260e", 1612 | "pi": "\u03c0", 1613 | "pitchfork": "\u22d4", 1614 | "piv": "\u03d6", 1615 | "planck": "\u210f", 1616 | "planckh": "\u210e", 1617 | "plankv": "\u210f", 1618 | "plus": "+", 1619 | "plusacir": "\u2a23", 1620 | "plusb": "\u229e", 1621 | "pluscir": "\u2a22", 1622 | "plusdo": "\u2214", 1623 | "plusdu": "\u2a25", 1624 | "pluse": "\u2a72", 1625 | "plusmn": "\xb1", 1626 | "plussim": "\u2a26", 1627 | "plustwo": "\u2a27", 1628 | "pm": "\xb1", 1629 | "pointint": "\u2a15", 1630 | "popf": "\U0001d561", 1631 | "pound": "\xa3", 1632 | "pr": "\u227a", 1633 | "prE": "\u2ab3", 1634 | "prap": "\u2ab7", 1635 | "prcue": "\u227c", 1636 | "pre": "\u2aaf", 1637 | "prec": "\u227a", 1638 | "precapprox": "\u2ab7", 1639 | "preccurlyeq": "\u227c", 1640 | "preceq": "\u2aaf", 1641 | "precnapprox": "\u2ab9", 1642 | "precneqq": "\u2ab5", 1643 | "precnsim": "\u22e8", 1644 | "precsim": "\u227e", 1645 | "prime": "\u2032", 1646 | "primes": "\u2119", 1647 | "prnE": "\u2ab5", 1648 | "prnap": "\u2ab9", 1649 | "prnsim": "\u22e8", 1650 | "prod": "\u220f", 1651 | "profalar": "\u232e", 1652 | "profline": "\u2312", 1653 | "profsurf": "\u2313", 1654 | "prop": "\u221d", 1655 | "propto": "\u221d", 1656 | "prsim": "\u227e", 1657 | "prurel": "\u22b0", 1658 | "pscr": "\U0001d4c5", 1659 | "psi": "\u03c8", 1660 | "puncsp": "\u2008", 1661 | "qfr": "\U0001d52e", 1662 | "qint": "\u2a0c", 1663 | "qopf": "\U0001d562", 1664 | "qprime": "\u2057", 1665 | "qscr": "\U0001d4c6", 1666 | "quaternions": "\u210d", 1667 | "quatint": "\u2a16", 1668 | "quest": "?", 1669 | "questeq": "\u225f", 1670 | "quot": "\"", 1671 | "rAarr": "\u21db", 1672 | "rArr": "\u21d2", 1673 | "rAtail": "\u291c", 1674 | "rBarr": "\u290f", 1675 | "rHar": "\u2964", 1676 | "race": "\u223d\u0331", 1677 | "racute": "\u0155", 1678 | "radic": "\u221a", 1679 | "raemptyv": "\u29b3", 1680 | "rang": "\u27e9", 1681 | "rangd": "\u2992", 1682 | "range": "\u29a5", 1683 | "rangle": "\u27e9", 1684 | "raquo": "\xbb", 1685 | "rarr": "\u2192", 1686 | "rarrap": "\u2975", 1687 | "rarrb": "\u21e5", 1688 | "rarrbfs": "\u2920", 1689 | "rarrc": "\u2933", 1690 | "rarrfs": "\u291e", 1691 | "rarrhk": "\u21aa", 1692 | "rarrlp": "\u21ac", 1693 | "rarrpl": "\u2945", 1694 | "rarrsim": "\u2974", 1695 | "rarrtl": "\u21a3", 1696 | "rarrw": "\u219d", 1697 | "ratail": "\u291a", 1698 | "ratio": "\u2236", 1699 | "rationals": "\u211a", 1700 | "rbarr": "\u290d", 1701 | "rbbrk": "\u2773", 1702 | "rbrace": "}", 1703 | "rbrack": "]", 1704 | "rbrke": "\u298c", 1705 | "rbrksld": "\u298e", 1706 | "rbrkslu": "\u2990", 1707 | "rcaron": "\u0159", 1708 | "rcedil": "\u0157", 1709 | "rceil": "\u2309", 1710 | "rcub": "}", 1711 | "rcy": "\u0440", 1712 | "rdca": "\u2937", 1713 | "rdldhar": "\u2969", 1714 | "rdquo": "\u201d", 1715 | "rdquor": "\u201d", 1716 | "rdsh": "\u21b3", 1717 | "real": "\u211c", 1718 | "realine": "\u211b", 1719 | "realpart": "\u211c", 1720 | "reals": "\u211d", 1721 | "rect": "\u25ad", 1722 | "reg": "\xae", 1723 | "rfisht": "\u297d", 1724 | "rfloor": "\u230b", 1725 | "rfr": "\U0001d52f", 1726 | "rhard": "\u21c1", 1727 | "rharu": "\u21c0", 1728 | "rharul": "\u296c", 1729 | "rho": "\u03c1", 1730 | "rhov": "\u03f1", 1731 | "rightarrow": "\u2192", 1732 | "rightarrowtail": "\u21a3", 1733 | "rightharpoondown": "\u21c1", 1734 | "rightharpoonup": "\u21c0", 1735 | "rightleftarrows": "\u21c4", 1736 | "rightleftharpoons": "\u21cc", 1737 | "rightrightarrows": "\u21c9", 1738 | "rightsquigarrow": "\u219d", 1739 | "rightthreetimes": "\u22cc", 1740 | "ring": "\u02da", 1741 | "risingdotseq": "\u2253", 1742 | "rlarr": "\u21c4", 1743 | "rlhar": "\u21cc", 1744 | "rlm": "\u200f", 1745 | "rmoust": "\u23b1", 1746 | "rmoustache": "\u23b1", 1747 | "rnmid": "\u2aee", 1748 | "roang": "\u27ed", 1749 | "roarr": "\u21fe", 1750 | "robrk": "\u27e7", 1751 | "ropar": "\u2986", 1752 | "ropf": "\U0001d563", 1753 | "roplus": "\u2a2e", 1754 | "rotimes": "\u2a35", 1755 | "rpar": ")", 1756 | "rpargt": "\u2994", 1757 | "rppolint": "\u2a12", 1758 | "rrarr": "\u21c9", 1759 | "rsaquo": "\u203a", 1760 | "rscr": "\U0001d4c7", 1761 | "rsh": "\u21b1", 1762 | "rsqb": "]", 1763 | "rsquo": "\u2019", 1764 | "rsquor": "\u2019", 1765 | "rthree": "\u22cc", 1766 | "rtimes": "\u22ca", 1767 | "rtri": "\u25b9", 1768 | "rtrie": "\u22b5", 1769 | "rtrif": "\u25b8", 1770 | "rtriltri": "\u29ce", 1771 | "ruluhar": "\u2968", 1772 | "rx": "\u211e", 1773 | "sacute": "\u015b", 1774 | "sbquo": "\u201a", 1775 | "sc": "\u227b", 1776 | "scE": "\u2ab4", 1777 | "scap": "\u2ab8", 1778 | "scaron": "\u0161", 1779 | "sccue": "\u227d", 1780 | "sce": "\u2ab0", 1781 | "scedil": "\u015f", 1782 | "scirc": "\u015d", 1783 | "scnE": "\u2ab6", 1784 | "scnap": "\u2aba", 1785 | "scnsim": "\u22e9", 1786 | "scpolint": "\u2a13", 1787 | "scsim": "\u227f", 1788 | "scy": "\u0441", 1789 | "sdot": "\u22c5", 1790 | "sdotb": "\u22a1", 1791 | "sdote": "\u2a66", 1792 | "seArr": "\u21d8", 1793 | "searhk": "\u2925", 1794 | "searr": "\u2198", 1795 | "searrow": "\u2198", 1796 | "sect": "\xa7", 1797 | "semi": ";", 1798 | "seswar": "\u2929", 1799 | "setminus": "\u2216", 1800 | "setmn": "\u2216", 1801 | "sext": "\u2736", 1802 | "sfr": "\U0001d530", 1803 | "sfrown": "\u2322", 1804 | "sharp": "\u266f", 1805 | "shchcy": "\u0449", 1806 | "shcy": "\u0448", 1807 | "shortmid": "\u2223", 1808 | "shortparallel": "\u2225", 1809 | "shy": "\xad", 1810 | "sigma": "\u03c3", 1811 | "sigmaf": "\u03c2", 1812 | "sigmav": "\u03c2", 1813 | "sim": "\u223c", 1814 | "simdot": "\u2a6a", 1815 | "sime": "\u2243", 1816 | "simeq": "\u2243", 1817 | "simg": "\u2a9e", 1818 | "simgE": "\u2aa0", 1819 | "siml": "\u2a9d", 1820 | "simlE": "\u2a9f", 1821 | "simne": "\u2246", 1822 | "simplus": "\u2a24", 1823 | "simrarr": "\u2972", 1824 | "slarr": "\u2190", 1825 | "smallsetminus": "\u2216", 1826 | "smashp": "\u2a33", 1827 | "smeparsl": "\u29e4", 1828 | "smid": "\u2223", 1829 | "smile": "\u2323", 1830 | "smt": "\u2aaa", 1831 | "smte": "\u2aac", 1832 | "smtes": "\u2aac\ufe00", 1833 | "softcy": "\u044c", 1834 | "sol": "/", 1835 | "solb": "\u29c4", 1836 | "solbar": "\u233f", 1837 | "sopf": "\U0001d564", 1838 | "spades": "\u2660", 1839 | "spadesuit": "\u2660", 1840 | "spar": "\u2225", 1841 | "sqcap": "\u2293", 1842 | "sqcaps": "\u2293\ufe00", 1843 | "sqcup": "\u2294", 1844 | "sqcups": "\u2294\ufe00", 1845 | "sqsub": "\u228f", 1846 | "sqsube": "\u2291", 1847 | "sqsubset": "\u228f", 1848 | "sqsubseteq": "\u2291", 1849 | "sqsup": "\u2290", 1850 | "sqsupe": "\u2292", 1851 | "sqsupset": "\u2290", 1852 | "sqsupseteq": "\u2292", 1853 | "squ": "\u25a1", 1854 | "square": "\u25a1", 1855 | "squarf": "\u25aa", 1856 | "squf": "\u25aa", 1857 | "srarr": "\u2192", 1858 | "sscr": "\U0001d4c8", 1859 | "ssetmn": "\u2216", 1860 | "ssmile": "\u2323", 1861 | "sstarf": "\u22c6", 1862 | "star": "\u2606", 1863 | "starf": "\u2605", 1864 | "straightepsilon": "\u03f5", 1865 | "straightphi": "\u03d5", 1866 | "strns": "\xaf", 1867 | "sub": "\u2282", 1868 | "subE": "\u2ac5", 1869 | "subdot": "\u2abd", 1870 | "sube": "\u2286", 1871 | "subedot": "\u2ac3", 1872 | "submult": "\u2ac1", 1873 | "subnE": "\u2acb", 1874 | "subne": "\u228a", 1875 | "subplus": "\u2abf", 1876 | "subrarr": "\u2979", 1877 | "subset": "\u2282", 1878 | "subseteq": "\u2286", 1879 | "subseteqq": "\u2ac5", 1880 | "subsetneq": "\u228a", 1881 | "subsetneqq": "\u2acb", 1882 | "subsim": "\u2ac7", 1883 | "subsub": "\u2ad5", 1884 | "subsup": "\u2ad3", 1885 | "succ": "\u227b", 1886 | "succapprox": "\u2ab8", 1887 | "succcurlyeq": "\u227d", 1888 | "succeq": "\u2ab0", 1889 | "succnapprox": "\u2aba", 1890 | "succneqq": "\u2ab6", 1891 | "succnsim": "\u22e9", 1892 | "succsim": "\u227f", 1893 | "sum": "\u2211", 1894 | "sung": "\u266a", 1895 | "sup1": "\xb9", 1896 | "sup2": "\xb2", 1897 | "sup3": "\xb3", 1898 | "sup": "\u2283", 1899 | "supE": "\u2ac6", 1900 | "supdot": "\u2abe", 1901 | "supdsub": "\u2ad8", 1902 | "supe": "\u2287", 1903 | "supedot": "\u2ac4", 1904 | "suphsol": "\u27c9", 1905 | "suphsub": "\u2ad7", 1906 | "suplarr": "\u297b", 1907 | "supmult": "\u2ac2", 1908 | "supnE": "\u2acc", 1909 | "supne": "\u228b", 1910 | "supplus": "\u2ac0", 1911 | "supset": "\u2283", 1912 | "supseteq": "\u2287", 1913 | "supseteqq": "\u2ac6", 1914 | "supsetneq": "\u228b", 1915 | "supsetneqq": "\u2acc", 1916 | "supsim": "\u2ac8", 1917 | "supsub": "\u2ad4", 1918 | "supsup": "\u2ad6", 1919 | "swArr": "\u21d9", 1920 | "swarhk": "\u2926", 1921 | "swarr": "\u2199", 1922 | "swarrow": "\u2199", 1923 | "swnwar": "\u292a", 1924 | "szlig": "\xdf", 1925 | "target": "\u2316", 1926 | "tau": "\u03c4", 1927 | "tbrk": "\u23b4", 1928 | "tcaron": "\u0165", 1929 | "tcedil": "\u0163", 1930 | "tcy": "\u0442", 1931 | "tdot": "\u20db", 1932 | "telrec": "\u2315", 1933 | "tfr": "\U0001d531", 1934 | "there4": "\u2234", 1935 | "therefore": "\u2234", 1936 | "theta": "\u03b8", 1937 | "thetasym": "\u03d1", 1938 | "thetav": "\u03d1", 1939 | "thickapprox": "\u2248", 1940 | "thicksim": "\u223c", 1941 | "thinsp": "\u2009", 1942 | "thkap": "\u2248", 1943 | "thksim": "\u223c", 1944 | "thorn": "\xfe", 1945 | "tilde": "\u02dc", 1946 | "times": "\xd7", 1947 | "timesb": "\u22a0", 1948 | "timesbar": "\u2a31", 1949 | "timesd": "\u2a30", 1950 | "tint": "\u222d", 1951 | "toea": "\u2928", 1952 | "top": "\u22a4", 1953 | "topbot": "\u2336", 1954 | "topcir": "\u2af1", 1955 | "topf": "\U0001d565", 1956 | "topfork": "\u2ada", 1957 | "tosa": "\u2929", 1958 | "tprime": "\u2034", 1959 | "trade": "\u2122", 1960 | "triangle": "\u25b5", 1961 | "triangledown": "\u25bf", 1962 | "triangleleft": "\u25c3", 1963 | "trianglelefteq": "\u22b4", 1964 | "triangleq": "\u225c", 1965 | "triangleright": "\u25b9", 1966 | "trianglerighteq": "\u22b5", 1967 | "tridot": "\u25ec", 1968 | "trie": "\u225c", 1969 | "triminus": "\u2a3a", 1970 | "triplus": "\u2a39", 1971 | "trisb": "\u29cd", 1972 | "tritime": "\u2a3b", 1973 | "trpezium": "\u23e2", 1974 | "tscr": "\U0001d4c9", 1975 | "tscy": "\u0446", 1976 | "tshcy": "\u045b", 1977 | "tstrok": "\u0167", 1978 | "twixt": "\u226c", 1979 | "twoheadleftarrow": "\u219e", 1980 | "twoheadrightarrow": "\u21a0", 1981 | "uArr": "\u21d1", 1982 | "uHar": "\u2963", 1983 | "uacute": "\xfa", 1984 | "uarr": "\u2191", 1985 | "ubrcy": "\u045e", 1986 | "ubreve": "\u016d", 1987 | "ucirc": "\xfb", 1988 | "ucy": "\u0443", 1989 | "udarr": "\u21c5", 1990 | "udblac": "\u0171", 1991 | "udhar": "\u296e", 1992 | "ufisht": "\u297e", 1993 | "ufr": "\U0001d532", 1994 | "ugrave": "\xf9", 1995 | "uharl": "\u21bf", 1996 | "uharr": "\u21be", 1997 | "uhblk": "\u2580", 1998 | "ulcorn": "\u231c", 1999 | "ulcorner": "\u231c", 2000 | "ulcrop": "\u230f", 2001 | "ultri": "\u25f8", 2002 | "umacr": "\u016b", 2003 | "uml": "\xa8", 2004 | "uogon": "\u0173", 2005 | "uopf": "\U0001d566", 2006 | "uparrow": "\u2191", 2007 | "updownarrow": "\u2195", 2008 | "upharpoonleft": "\u21bf", 2009 | "upharpoonright": "\u21be", 2010 | "uplus": "\u228e", 2011 | "upsi": "\u03c5", 2012 | "upsih": "\u03d2", 2013 | "upsilon": "\u03c5", 2014 | "upuparrows": "\u21c8", 2015 | "urcorn": "\u231d", 2016 | "urcorner": "\u231d", 2017 | "urcrop": "\u230e", 2018 | "uring": "\u016f", 2019 | "urtri": "\u25f9", 2020 | "uscr": "\U0001d4ca", 2021 | "utdot": "\u22f0", 2022 | "utilde": "\u0169", 2023 | "utri": "\u25b5", 2024 | "utrif": "\u25b4", 2025 | "uuarr": "\u21c8", 2026 | "uuml": "\xfc", 2027 | "uwangle": "\u29a7", 2028 | "vArr": "\u21d5", 2029 | "vBar": "\u2ae8", 2030 | "vBarv": "\u2ae9", 2031 | "vDash": "\u22a8", 2032 | "vangrt": "\u299c", 2033 | "varepsilon": "\u03f5", 2034 | "varkappa": "\u03f0", 2035 | "varnothing": "\u2205", 2036 | "varphi": "\u03d5", 2037 | "varpi": "\u03d6", 2038 | "varpropto": "\u221d", 2039 | "varr": "\u2195", 2040 | "varrho": "\u03f1", 2041 | "varsigma": "\u03c2", 2042 | "varsubsetneq": "\u228a\ufe00", 2043 | "varsubsetneqq": "\u2acb\ufe00", 2044 | "varsupsetneq": "\u228b\ufe00", 2045 | "varsupsetneqq": "\u2acc\ufe00", 2046 | "vartheta": "\u03d1", 2047 | "vartriangleleft": "\u22b2", 2048 | "vartriangleright": "\u22b3", 2049 | "vcy": "\u0432", 2050 | "vdash": "\u22a2", 2051 | "vee": "\u2228", 2052 | "veebar": "\u22bb", 2053 | "veeeq": "\u225a", 2054 | "vellip": "\u22ee", 2055 | "verbar": "|", 2056 | "vert": "|", 2057 | "vfr": "\U0001d533", 2058 | "vltri": "\u22b2", 2059 | "vnsub": "\u2282\u20d2", 2060 | "vnsup": "\u2283\u20d2", 2061 | "vopf": "\U0001d567", 2062 | "vprop": "\u221d", 2063 | "vrtri": "\u22b3", 2064 | "vscr": "\U0001d4cb", 2065 | "vsubnE": "\u2acb\ufe00", 2066 | "vsubne": "\u228a\ufe00", 2067 | "vsupnE": "\u2acc\ufe00", 2068 | "vsupne": "\u228b\ufe00", 2069 | "vzigzag": "\u299a", 2070 | "wcirc": "\u0175", 2071 | "wedbar": "\u2a5f", 2072 | "wedge": "\u2227", 2073 | "wedgeq": "\u2259", 2074 | "weierp": "\u2118", 2075 | "wfr": "\U0001d534", 2076 | "wopf": "\U0001d568", 2077 | "wp": "\u2118", 2078 | "wr": "\u2240", 2079 | "wreath": "\u2240", 2080 | "wscr": "\U0001d4cc", 2081 | "xcap": "\u22c2", 2082 | "xcirc": "\u25ef", 2083 | "xcup": "\u22c3", 2084 | "xdtri": "\u25bd", 2085 | "xfr": "\U0001d535", 2086 | "xhArr": "\u27fa", 2087 | "xharr": "\u27f7", 2088 | "xi": "\u03be", 2089 | "xlArr": "\u27f8", 2090 | "xlarr": "\u27f5", 2091 | "xmap": "\u27fc", 2092 | "xnis": "\u22fb", 2093 | "xodot": "\u2a00", 2094 | "xopf": "\U0001d569", 2095 | "xoplus": "\u2a01", 2096 | "xotime": "\u2a02", 2097 | "xrArr": "\u27f9", 2098 | "xrarr": "\u27f6", 2099 | "xscr": "\U0001d4cd", 2100 | "xsqcup": "\u2a06", 2101 | "xuplus": "\u2a04", 2102 | "xutri": "\u25b3", 2103 | "xvee": "\u22c1", 2104 | "xwedge": "\u22c0", 2105 | "yacute": "\xfd", 2106 | "yacy": "\u044f", 2107 | "ycirc": "\u0177", 2108 | "ycy": "\u044b", 2109 | "yen": "\xa5", 2110 | "yfr": "\U0001d536", 2111 | "yicy": "\u0457", 2112 | "yopf": "\U0001d56a", 2113 | "yscr": "\U0001d4ce", 2114 | "yucy": "\u044e", 2115 | "yuml": "\xff", 2116 | "zacute": "\u017a", 2117 | "zcaron": "\u017e", 2118 | "zcy": "\u0437", 2119 | "zdot": "\u017c", 2120 | "zeetrf": "\u2128", 2121 | "zeta": "\u03b6", 2122 | "zfr": "\U0001d537", 2123 | "zhcy": "\u0436", 2124 | "zigrarr": "\u21dd", 2125 | "zopf": "\U0001d56b", 2126 | "zscr": "\U0001d4cf", 2127 | "zwj": "\u200d", 2128 | "zwnj": "\u200c", 2129 | } 2130 | -------------------------------------------------------------------------------- /wikimarkup/parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | MediaWiki-style markup 3 | 4 | Copyright (C) 2008 David Cramer 5 | 6 | This program is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published by 8 | the Free Software Foundation, either version 3 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with this program. If not, see . 18 | """ 19 | 20 | import re, random 21 | from base64 import b64decode 22 | import unidecode 23 | 24 | import bleach 25 | from bleach.css_sanitizer import CSSSanitizer 26 | 27 | from .entities import html_entities as _htmlEntities 28 | 29 | # a few patterns we use later 30 | 31 | MW_COLON_STATE_TEXT = 0 32 | MW_COLON_STATE_TAG = 1 33 | MW_COLON_STATE_TAGSTART = 2 34 | MW_COLON_STATE_CLOSETAG = 3 35 | MW_COLON_STATE_TAGSLASH = 4 36 | MW_COLON_STATE_COMMENT = 5 37 | MW_COLON_STATE_COMMENTDASH = 6 38 | MW_COLON_STATE_COMMENTDASHDASH = 7 39 | 40 | _attributePat = re.compile(r"(?:^|\s)([A-Za-z0-9]+)(?:\s*=\s*(?:\"([^<\"]*)\"|\'([^<\']*)\'|([a-zA-Z0-9!#$%&()*,\-./:;<>?@\[\]^_{|}~]+)|#([0-9a-fA-F]+)))") 41 | _space = re.compile(r'\s+') 42 | _closePrePat = re.compile("]*?)(/?>)([^<]*)$') 46 | 47 | _htmlpairs = ( # Tags that must be closed 48 | 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 49 | 'sup', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 50 | 'em', 's', 'strike', 'strong', 'tt', 'var', 'div', 'center', 51 | 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre', 52 | 'p', 'span', 'u', 'li', 'dd', 'dt', 'video', 'section', 'noscript' 53 | ) 54 | _htmlsingle = ( # Elements that cannot have close tags 55 | 'br', 'hr', 'img', 'source', 56 | ) 57 | _htmlnest = ( # Tags that can be nested--?? 58 | 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', 59 | 'dl', 'font', 'big', 'small', 'sub', 'sup', 'span', 'img', 60 | 'tbody', 'thead', 'tfoot', 'colgroup', 'col', 'section', 61 | ) 62 | _tabletags = ( # Can only appear inside table 63 | 'td', 'th', 'tr', 'tbody', 'thead', 'tfoot', 'colgroup', 'col', 64 | ) 65 | _htmllist = ( # Tags used by list 66 | 'ul', 'ol', 67 | ) 68 | _listtags = ( # Tags that can appear in a list 69 | 'li', 70 | ) 71 | _htmlsingleallowed = _htmlsingle + _tabletags 72 | _htmlelements = _htmlsingle + _htmlpairs + _htmlnest 73 | 74 | _charRefsPat = re.compile(r'''(&([A-Za-z0-9]+);|&#([0-9]+);|&#[xX]([0-9A-Za-z]+);|(&))''') 75 | _cssCommentPat = re.compile(r'''\*.*?\*''') 76 | _toUTFPat = re.compile(r'''\\([0-9A-Fa-f]{1,6})[\s]?''') 77 | _hackPat = re.compile(r'''(expression|tps*://|url\s*\().*''', re.IGNORECASE) 78 | _hrPat = re.compile('''^-----*''', re.MULTILINE) 79 | _h1Pat = re.compile(r'^=(.+)=\s*$', re.MULTILINE) 80 | _h2Pat = re.compile(r'^==(.+)==\s*$', re.MULTILINE) 81 | _h3Pat = re.compile(r'^===(.+)===\s*$', re.MULTILINE) 82 | _h4Pat = re.compile(r'^====(.+)====\s*$', re.MULTILINE) 83 | _h5Pat = re.compile(r'^=====(.+)=====\s*$', re.MULTILINE) 84 | _h6Pat = re.compile(r'^======(.+)======\s*$', re.MULTILINE) 85 | _quotePat = re.compile("""(''+)""") 86 | _removePat = re.compile(r'\b(' + r'|'.join((u"a", "an", "as", "at", "before", "but", "by", "for", "from", 87 | "is", "in", "into", "like", "of", "off", "on", "onto", "per", 88 | "since", "than", "the", "this", "that", "to", "up", "via", 89 | "with")) + r')\b', re.IGNORECASE) 90 | _nonWordSpaceDashPat = re.compile(r'[^\w\s\-\./]') 91 | _multiSpacePat = re.compile(r'[\s\-_\./]+') 92 | _spacePat = re.compile(r' ') 93 | _linkPat = re.compile(r'^(?:([A-Za-z0-9]+):)?([^\|]+)(?:\|([^\n]+?))?\]\](.*)$', re.DOTALL) 94 | _bracketedLinkPat = re.compile(r'(?:\[((?:mailto:|git://|irc://|https?://|ftp://|/)[^<>\]\[' + "\x00-\x20\x7f" + r']*)\s*(.*?)\])') 95 | _internalLinkPat = re.compile(r'\[\[(?:(:?[^:\]]*?):\s*)?(.*?)\]\]') 96 | _protocolPat = re.compile(r'(\b(?:mailto:|irc://|https?://|ftp://))') 97 | _specialUrlPat = re.compile(r'^([^<>\]\[' + "\x00-\x20\x7f" + r']+)(.*)$') 98 | _protocolsPat = re.compile(r'^(mailto:|irc://|https?://|ftp://)$') 99 | _controlCharsPat = re.compile(r'[\]\[<>"' + "\\x00-\\x20\\x7F" + r']]') 100 | _hostnamePat = re.compile(r'^([^:]+:)(//[^/]+)?(.*)$') 101 | _stripPat = re.compile('\\s|\u00ad|\u1806|\u200b|\u2060|\ufeff|\u03f4|\u034f|\u180b|\u180c|\u180d|\u200c|\u200d|[\ufe00-\ufe0f]') 102 | _zomgPat = re.compile(r'^(:*)\{\|(.*)$') 103 | _headerPat = re.compile(r"<[Hh]([1-6])(.*?)>(.*?)") 104 | _templateSectionPat = re.compile(r"") 105 | _tagPat = re.compile(r"<.*?>") 106 | _startRegexHash = {} 107 | _endRegexHash = {} 108 | _endCommentPat = re.compile(r'(-->)') 109 | _extractTagsAndParams_n = 1 110 | _guillemetLeftPat = re.compile(r'(.) (\?|:|;|!|\302\273)') 111 | _guillemetRightPat = re.compile(r'(\302\253) ') 112 | 113 | def setupAttributeWhitelist(): 114 | common = ( 'id', 'class', 'lang', 'dir', 'title', 'style' ) 115 | block = common + ('align',) 116 | tablealign = ( 'align', 'char', 'charoff', 'valign' ) 117 | tablecell = ( 'abbr', 118 | 'axis', 119 | 'headers', 120 | 'scope', 121 | 'rowspan', 122 | 'colspan', 123 | 'nowrap', # deprecated 124 | 'width', # deprecated 125 | 'height', # deprecated 126 | 'bgcolor' # deprecated 127 | ) 128 | return { 129 | 'div': block, 130 | 'center': common, # deprecated 131 | 'span': block, # ?? 132 | 'h1': block, 133 | 'h2': block, 134 | 'h3': block, 135 | 'h4': block, 136 | 'h5': block, 137 | 'h6': block, 138 | 'em': common, 139 | 'strong': common, 140 | 'cite': common, 141 | 'code': common, 142 | 'var': common, 143 | 'img': common + ('src', 'alt', 'width', 'height',), 144 | 'blockquote': common + ('cite',), 145 | 'sub': common, 146 | 'sup': common, 147 | 'p': block, 148 | 'br': ('id', 'class', 'title', 'style', 'clear',), 149 | 'pre': common + ('width',), 150 | 'ins': common + ('cite', 'datetime'), 151 | 'del': common + ('cite', 'datetime'), 152 | 'ul': common + ('type',), 153 | 'ol': common + ('type', 'start'), 154 | 'li': common + ('type', 'value'), 155 | 'dl': common, 156 | 'dd': common, 157 | 'dt': common, 158 | 'table': common + ( 'summary', 'width', 'border', 'frame', 159 | 'rules', 'cellspacing', 'cellpadding', 160 | 'align', 'bgcolor', 161 | ), 162 | 'caption': common + ('align',), 163 | 'thead': common + tablealign, 164 | 'tfoot': common + tablealign, 165 | 'tbody': common + tablealign, 166 | 'colgroup': common + ( 'span', 'width' ) + tablealign, 167 | 'col': common + ( 'span', 'width' ) + tablealign, 168 | 'tr': common + ( 'bgcolor', ) + tablealign, 169 | 'td': common + tablecell + tablealign, 170 | 'th': common + tablecell + tablealign, 171 | 'tt': common, 172 | 'b': common, 173 | 'i': common, 174 | 'big': common, 175 | 'small': common, 176 | 'strike': common, 177 | 's': common, 178 | 'u': common, 179 | 'font': common + ( 'size', 'color', 'face' ), 180 | 'hr': common + ( 'noshade', 'size', 'width' ), 181 | 'video': common + ( 'width', 'height', 'controls' ), 182 | 'source': common + ( 'src', 'type' ), 183 | } 184 | _whitelist = setupAttributeWhitelist() 185 | _page_cache = {} 186 | 187 | # Used for bleach, list of allowed tags 188 | ALLOWED_TAGS = list(_htmlelements + ('a',)) 189 | 190 | ALLOWED_ATTRIBUTES = { 191 | 'a': ['href', 'title', 'rel'], 192 | 'div': ['id'], 193 | 'h1': ['id'], 194 | 'h2': ['id'], 195 | 'h3': ['id'], 196 | 'h4': ['id'], 197 | 'h5': ['id'], 198 | 'h6': ['id'], 199 | 'li': ['class'], 200 | 'span': ['class'], 201 | } 202 | 203 | 204 | class BaseParser(object): 205 | def __init__(self): 206 | self.uniq_prefix = "\x07UNIQ" + str(random.randint(1, 1000000000)) 207 | self.strip_state = {} 208 | self.arg_stack = [] 209 | # tag hooks 210 | self.tagHooks = {} 211 | # [[internal link]] hooks 212 | self.internalLinkHooks = {} 213 | self.closeMatchPat = re.compile(r"(', '>') 293 | else: 294 | if tagName in self.tagHooks: 295 | output = self.tagHooks[tagName](self, content, params) 296 | else: 297 | output = content.replace('&', '&').replace('<', '<').replace('>', '>') 298 | else: 299 | # Just stripping tags; keep the source 300 | output = tag 301 | 302 | # Unstrip the output, because unstrip() is no longer recursive so 303 | # it won't do it itself 304 | output = self.unstrip(output) 305 | 306 | if not stripcomments and element == '!--': 307 | commentState[marker] = output 308 | elif element == 'html' or element == 'nowiki': 309 | if 'nowiki' not in self.strip_state: 310 | self.strip_state['nowiki'] = {} 311 | self.strip_state['nowiki'][marker] = output 312 | else: 313 | if 'general' not in self.strip_state: 314 | self.strip_state['general'] = {} 315 | self.strip_state['general'][marker] = output 316 | 317 | # Unstrip comments unless explicitly told otherwise. 318 | # (The comments are always stripped prior to this point, so as to 319 | # not invoke any extension tags / parser hooks contained within 320 | # a comment.) 321 | if not stripcomments: 322 | # Put them all back and forget them 323 | for k in commentState: 324 | v = commentState[k] 325 | text = text.replace(k, v) 326 | 327 | return text 328 | 329 | def removeHtmlTags(self, text): 330 | """convert bad tags into HTML identities""" 331 | sb = [] 332 | bits = text.split('<') 333 | sb.append(bits.pop(0)) 334 | tagstack = [] 335 | tablestack = tagstack 336 | for x in bits: 337 | m = _tagPattern.match(x) 338 | if not m: 339 | # If it isn't a tag, leave it in place and move on 340 | sb.append('<%s' % x) 341 | continue 342 | slash, t, params, brace, rest = m.groups() 343 | t = t.lower() 344 | badtag = False 345 | if t in _htmlelements: 346 | # Check our stack 347 | if slash: 348 | # Closing a tag... 349 | if t in _htmlsingle or len(tagstack) == 0: 350 | badtag = True 351 | else: 352 | ot = tagstack.pop() 353 | if ot != t: 354 | if ot in _htmlsingleallowed: 355 | # Pop all elements with an optional close tag 356 | # and see if we find a match below them 357 | optstack = [] 358 | optstack.append(ot) 359 | while True: 360 | if len(tagstack) == 0: 361 | break 362 | ot = tagstack.pop() 363 | if ot == t or ot not in _htmlsingleallowed: 364 | break 365 | optstack.append(ot) 366 | if t != ot: 367 | # No match. Push the optinal elements back again 368 | badtag = True 369 | tagstack += reversed(optstack) 370 | else: 371 | tagstack.append(ot) 372 | #
  • can be nested in
      or
        , skip those cases: 373 | if ot not in _htmllist and t in _listtags: 374 | badtag = True 375 | elif t == 'table': 376 | if len(tablestack) == 0: 377 | bagtag = True 378 | else: 379 | tagstack = tablestack.pop() 380 | newparams = '' 381 | else: 382 | # Keep track for later 383 | if t in _tabletags and 'table' not in tagstack: 384 | badtag = True 385 | elif t in tagstack and t not in _htmlnest: 386 | badtag = True 387 | # Is it a self-closed htmlpair? (bug 5487) 388 | elif brace == '/>' and t in _htmlpairs: 389 | badTag = True 390 | elif t in _htmlsingle: 391 | # Hack to force empty tag for uncloseable elements 392 | brace = '/>' 393 | else: 394 | if t == 'table': 395 | tablestack.append(tagstack) 396 | tagstack = [] 397 | tagstack.append(t) 398 | newparams = self.fixTagAttributes(params, t) 399 | if not badtag: 400 | rest = rest.replace('>', u'>') 401 | if brace == '/>': 402 | close = ' /' 403 | else: 404 | close = '' 405 | sb.append('<') 406 | sb.append(slash) 407 | sb.append(t) 408 | sb.append(newparams) 409 | sb.append(close) 410 | sb.append('>') 411 | sb.append(rest) 412 | continue 413 | sb.append('<') 414 | sb.append(x.replace('>', '>')) 415 | 416 | # Close off any remaining tags 417 | while tagstack: 418 | t = tagstack.pop() 419 | sb.append('\n') 422 | if t == 'table': 423 | if not tablestack: 424 | break 425 | tagstack = tablestack.pop() 426 | 427 | return ''.join(sb) 428 | 429 | def removeHtmlComments(self, text): 430 | """remove comments from given text""" 431 | sb = [] 432 | start = text.find('', start) 436 | if end == -1: 437 | break 438 | end += 3 439 | 440 | spaceStart = max(0, start-1) 441 | spaceEnd = end 442 | while text[spaceStart] == ' ' and spaceStart > 0: 443 | spaceStart -= 1 444 | while text[spaceEnd] == ' ': 445 | spaceEnd += 1 446 | 447 | if text[spaceStart] == '\n' and text[spaceEnd] == '\n': 448 | sb.append(text[last:spaceStart]) 449 | sb.append('\n') 450 | last = spaceEnd+1 451 | else: 452 | sb.append(text[last:spaceStart+1]) 453 | last = spaceEnd 454 | 455 | start = text.find('' 1099 | 1100 | return result, mDTopen 1101 | 1102 | def nextItem(self, char, mDTopen): 1103 | if char == '*' or char == '#': 1104 | return '
      1. ', None 1105 | elif char == ':' or char == ';': 1106 | close = '' 1107 | if mDTopen: 1108 | close = '' 1109 | if char == ';': 1110 | return close + '
        ', True 1111 | else: 1112 | return close + '
        ', False 1113 | return '' 1114 | 1115 | def closeList(self, char, mDTopen): 1116 | if char == '*': 1117 | return '
    \n' 1118 | elif char == '#': 1119 | return '
  • \n' 1120 | elif char == ':': 1121 | if mDTopen: 1122 | return '\n' 1123 | else: 1124 | return '\n' 1125 | else: 1126 | return '' 1127 | 1128 | def findColonNoLinks(self, text, before, after): 1129 | try: 1130 | pos = text.search(':') 1131 | except: 1132 | return False 1133 | 1134 | lt = text.find('<') 1135 | if lt == -1 or lt > pos: 1136 | # Easy; no tag nesting to worry about 1137 | before = text[0:pos] 1138 | after = text[0:pos+1] 1139 | return before, after, pos 1140 | 1141 | # Ugly state machine to walk through avoiding tags. 1142 | state = MW_COLON_STATE_TEXT 1143 | stack = 0 1144 | i = 0 1145 | while i < len(text): 1146 | c = text[i] 1147 | 1148 | if state == 0: # MW_COLON_STATE_TEXT: 1149 | if text[i] == '<': 1150 | # Could be either a tag or an tag 1151 | state = MW_COLON_STATE_TAGSTART 1152 | elif text[i] == ':': 1153 | if stack == 0: 1154 | # we found it 1155 | return text[0:i], text[i+1], i 1156 | else: 1157 | # Skip ahead looking for something interesting 1158 | try: 1159 | colon = text.search(':', i) 1160 | except: 1161 | return False 1162 | lt = text.find('<', i) 1163 | if stack == 0: 1164 | if lt == -1 or colon < lt: 1165 | # we found it 1166 | return text[0:colon], text[colon+1], i 1167 | if lt == -1: 1168 | break 1169 | # Skip ahead to next tag start 1170 | i = lt 1171 | state = MW_COLON_STATE_TAGSTART 1172 | elif state == 1: # MW_COLON_STATE_TAG: 1173 | # In a 1174 | if text[i] == '>': 1175 | stack += 1 1176 | state = MW_COLON_STATE_TEXT 1177 | elif text[i] == '/': 1178 | state = MW_COLON_STATE_TAGSLASH 1179 | elif state == 2: # MW_COLON_STATE_TAGSTART: 1180 | if text[i] == '/': 1181 | state = MW_COLON_STATE_CLOSETAG 1182 | elif text[i] == '!': 1183 | state = MW_COLON_STATE_COMMENT 1184 | elif text[i] == '>': 1185 | # Illegal early close? This shouldn't happen D: 1186 | state = MW_COLON_STATE_TEXT 1187 | else: 1188 | state = MW_COLON_STATE_TAG 1189 | elif state == 3: # MW_COLON_STATE_CLOSETAG: 1190 | # In a 1191 | if text[i] == '>': 1192 | stack -= 1 1193 | if stack < 0: 1194 | return False 1195 | state = MW_COLON_STATE_TEXT 1196 | elif state == MW_COLON_STATE_TAGSLASH: 1197 | if text[i] == '>': 1198 | # Yes, a self-closed tag 1199 | state = MW_COLON_STATE_TEXT 1200 | else: 1201 | # Probably we're jumping the gun, and this is an attribute 1202 | state = MW_COLON_STATE_TAG 1203 | elif state == 5: # MW_COLON_STATE_COMMENT: 1204 | if text[i] == '-': 1205 | state = MW_COLON_STATE_COMMENTDASH 1206 | elif state == MW_COLON_STATE_COMMENTDASH: 1207 | if text[i] == '-': 1208 | state = MW_COLON_STATE_COMMENTDASHDASH 1209 | else: 1210 | state = MW_COLON_STATE_COMMENT 1211 | elif state == MW_COLON_STATE_COMMENTDASHDASH: 1212 | if text[i] == '>': 1213 | state = MW_COLON_STATE_TEXT 1214 | else: 1215 | state = MW_COLON_STATE_COMMENT 1216 | else: 1217 | raise 1218 | if stack > 0: 1219 | return False 1220 | return False 1221 | 1222 | def doBlockLevels(self, text, linestart): 1223 | # Parsing through the text line by line. The main thing 1224 | # happening here is handling of block-level elements p, pre, 1225 | # and making lists from lines starting with * # : etc. 1226 | lastPrefix = u'' 1227 | mDTopen = inBlockElem = False 1228 | prefixLength = 0 1229 | paragraphStack = False 1230 | mInPre = False 1231 | mLastSection = '' 1232 | mDTopen = False 1233 | output = [] 1234 | for oLine in text.split('\n')[not linestart and 1 or 0:]: 1235 | lastPrefixLength = len(lastPrefix) 1236 | preCloseMatch = _closePrePat.search(oLine) 1237 | preOpenMatch = _openPrePat.search(oLine) 1238 | if not mInPre: 1239 | chars = '*#:;' 1240 | prefixLength = 0 1241 | for c in oLine: 1242 | if c in chars: 1243 | prefixLength += 1 1244 | else: 1245 | break 1246 | pref = oLine[0:prefixLength] 1247 | 1248 | # eh? 1249 | pref2 = pref.replace(';', ':') 1250 | t = oLine[prefixLength:] 1251 | mInPre = bool(preOpenMatch) 1252 | else: 1253 | # Don't interpret any other prefixes in preformatted text 1254 | prefixLength = 0 1255 | pref = pref2 = u'' 1256 | t = oLine 1257 | 1258 | # List generation 1259 | if prefixLength and lastPrefix == pref2: 1260 | # Same as the last item, so no need to deal with nesting or opening stuff 1261 | tmpOutput, tmpMDTopen = self.nextItem(pref[-1:], mDTopen) 1262 | output.append(tmpOutput) 1263 | if tmpMDTopen is not None: 1264 | mDTopen = tmpMDTopen 1265 | paragraphStack = False 1266 | 1267 | if pref[-1:] == ';': 1268 | # The one nasty exception: definition lists work like this: 1269 | # ; title : definition text 1270 | # So we check for : in the remainder text to split up the 1271 | # title and definition, without b0rking links. 1272 | term = t2 = '' 1273 | z = self.findColonNoLinks(t, term, t2) 1274 | if z is not False: 1275 | term, t2 = z[1:2] 1276 | t = t2 1277 | output.append(term) 1278 | tmpOutput, tmpMDTopen = self.nextItem(':', mDTopen) 1279 | output.append(tmpOutput) 1280 | if tmpMDTopen is not None: 1281 | mDTopen = tmpMDTopen 1282 | 1283 | elif prefixLength or lastPrefixLength: 1284 | # Either open or close a level... 1285 | commonPrefixLength = self.getCommon(pref, lastPrefix) 1286 | paragraphStack = False 1287 | while commonPrefixLength < lastPrefixLength: 1288 | tmp = self.closeList(lastPrefix[lastPrefixLength-1], mDTopen) 1289 | output.append(tmp) 1290 | mDTopen = False 1291 | lastPrefixLength -= 1 1292 | if prefixLength <= commonPrefixLength and commonPrefixLength > 0: 1293 | tmpOutput, tmpMDTopen = self.nextItem(pref[commonPrefixLength-1], mDTopen) 1294 | output.append(tmpOutput) 1295 | if tmpMDTopen is not None: 1296 | mDTopen = tmpMDTopen 1297 | 1298 | while prefixLength > commonPrefixLength: 1299 | char = pref[commonPrefixLength:commonPrefixLength+1] 1300 | tmpOutput, tmpMDTOpen = self.openList(char, mLastSection) 1301 | if tmpMDTOpen: 1302 | mDTopen = True 1303 | output.append(tmpOutput) 1304 | mLastSection = '' 1305 | mInPre = False 1306 | 1307 | if char == ';': 1308 | # FIXME: This is dupe of code above 1309 | term = t2 = '' 1310 | z = self.findColonNoLinks(t, term, t2) 1311 | if z is not False: 1312 | term, t2 = z[1:2] 1313 | t = t2 1314 | output.append(term) 1315 | tmpOutput, tmpMDTopen = self.nextItem(':', mDTopen) 1316 | output.append(tmpOutput) 1317 | if tmpMDTopen is not None: 1318 | mDTopen = tmpMDTopen 1319 | 1320 | commonPrefixLength += 1 1321 | 1322 | lastPrefix = pref2 1323 | 1324 | if prefixLength == 0: 1325 | # No prefix (not in list)--go to paragraph mode 1326 | # XXX: use a stack for nestable elements like span, table and div 1327 | openmatch = _openMatchPat.search(t) 1328 | closematch = self.closeMatchPat.search(t) 1329 | if openmatch or closematch: 1330 | paragraphStack = False 1331 | output.append(self.closeParagraph(mLastSection)) 1332 | mLastSection = '' 1333 | if preCloseMatch: 1334 | mInPre = False 1335 | if preOpenMatch: 1336 | mInPre = True 1337 | inBlockElem = bool(not closematch) 1338 | elif not inBlockElem and not mInPre: 1339 | if t[0:1] == ' ' and (mLastSection == 'pre' or t.strip() != ''): 1340 | # pre 1341 | if mLastSection != 'pre': 1342 | paragraphStack = False 1343 | output.append(self.closeParagraph('') + '
    ')
    1344 |                             mInPre = False
    1345 |                             mLastSection = 'pre'
    1346 |                         t = t[1:]
    1347 |                     else:
    1348 |                         # paragraph
    1349 |                         if t.strip() == '':
    1350 |                             if paragraphStack:
    1351 |                                 output.append(paragraphStack + '
    ') 1352 | paragraphStack = False 1353 | mLastSection = 'p' 1354 | else: 1355 | if mLastSection != 'p': 1356 | output.append(self.closeParagraph(mLastSection)) 1357 | mLastSection = '' 1358 | mInPre = False 1359 | paragraphStack = '

    ' 1360 | else: 1361 | paragraphStack = '

    ' 1362 | else: 1363 | if paragraphStack: 1364 | output.append(paragraphStack) 1365 | paragraphStack = False 1366 | mLastSection = 'p' 1367 | elif mLastSection != 'p': 1368 | output.append(self.closeParagraph(mLastSection) + '

    ') 1369 | mLastSection = 'p' 1370 | mInPre = False 1371 | 1372 | # somewhere above we forget to get out of pre block (bug 785) 1373 | if preCloseMatch and mInPre: 1374 | mInPre = False 1375 | 1376 | if paragraphStack is False: 1377 | output.append(t + "\n") 1378 | 1379 | while prefixLength: 1380 | output.append(self.closeList(pref2[prefixLength-1], mDTopen)) 1381 | mDTopen = False 1382 | prefixLength -= 1 1383 | 1384 | if mLastSection != '': 1385 | output.append('') 1386 | mLastSection = '' 1387 | 1388 | return ''.join(output) 1389 | 1390 | 1391 | class Parser(BaseParser): 1392 | def __init__(self, base_url=None): 1393 | super(Parser, self).__init__() 1394 | self.base_url = base_url 1395 | self.show_toc = True 1396 | 1397 | def parse(self, text, show_toc=True, tags=ALLOWED_TAGS, 1398 | attributes=ALLOWED_ATTRIBUTES, styles=[], nofollow=False, 1399 | strip_comments=False, toc_string='Table of Contents'): 1400 | """Returns HTML from MediaWiki markup""" 1401 | self.show_toc = show_toc 1402 | self.tags = tags 1403 | if text[-1:] != '\n': 1404 | text = text + '\n' 1405 | taggedNewline = True 1406 | else: 1407 | taggedNewline = False 1408 | 1409 | text = self.strip(text, stripcomments=strip_comments) 1410 | text = self.removeHtmlTags(text) 1411 | if self.base_url: 1412 | text = self.replaceVariables(text) 1413 | text = self.doTableStuff(text) 1414 | text = self.parseHorizontalRule(text) 1415 | text = self.checkTOC(text) 1416 | text = self.parseHeaders(text) 1417 | text = self.parseAllQuotes(text) 1418 | text = self.replaceExternalLinks(text) 1419 | text = self.formatHeadings(text, True, toc_string) 1420 | text = self.unstrip(text) 1421 | text = self.fixtags(text) 1422 | text = self.doBlockLevels(text, True) 1423 | text = self.replaceInternalLinks(text) 1424 | text = self.unstripNoWiki(text) 1425 | text = text.split('\n') 1426 | text = '\n'.join(text) 1427 | if taggedNewline and text[-1:] == '\n': 1428 | text = text[:-1] 1429 | text = self.bleach(text, nofollow, attributes, styles, strip_comments) 1430 | return text 1431 | 1432 | def bleach(self, text, nofollow, attributes, styles, strip_comments): 1433 | # Pass output through bleach and linkify 1434 | if nofollow: 1435 | callbacks = [bleach.callbacks.nofollow] 1436 | else: 1437 | callbacks = [] 1438 | text = bleach.linkify(text, callbacks=callbacks) 1439 | css_sanitizer = CSSSanitizer(allowed_css_properties=styles) 1440 | return bleach.clean(text, tags=self.tags, attributes=attributes, 1441 | css_sanitizer=css_sanitizer, strip_comments=strip_comments) 1442 | 1443 | def checkTOC(self, text): 1444 | if text.find("__NOTOC__") != -1: 1445 | text = text.replace("__NOTOC__", "") 1446 | self.show_toc = False 1447 | if text.find("__TOC__") != -1: 1448 | text = text.replace("__TOC__", "") 1449 | self.show_toc = True 1450 | return text 1451 | 1452 | def replaceVariables(self, text, args={}, argsOnly=False): 1453 | """ 1454 | Replace magic variables, templates, and template arguments 1455 | with the appropriate text. Templates are substituted recursively, 1456 | taking care to avoid infinite loops. 1457 | """ 1458 | 1459 | # Prevent too big inclusions 1460 | # if (len(text) > self.max_include_size: 1461 | # return text 1462 | 1463 | # This function is called recursively. To keep track of arguments we need a stack: 1464 | self.arg_stack.append(args) 1465 | 1466 | braceCallbacks = {} 1467 | if not argsOnly: 1468 | braceCallbacks[2] = [None, self.braceSubstitution] 1469 | braceCallbacks[3] = [None, self.argSubstitution] 1470 | 1471 | callbacks = { 1472 | '{': { 1473 | 'end': '}', 1474 | 'cb': braceCallbacks, 1475 | 'min': argsOnly and 3 or 2, 1476 | 'max': 3 1477 | }, 1478 | '[': { 1479 | 'end': ']', 1480 | 'cb': {2: None}, 1481 | 'min': 2, 1482 | 'max': 2 1483 | } 1484 | } 1485 | text = replace_callback(text, callbacks) 1486 | mArgStack.pop() 1487 | 1488 | return text 1489 | 1490 | def replace_callback(self, text, callbacks): 1491 | """ 1492 | parse any parentheses in format ((title|part|part)) 1493 | and call callbacks to get a replacement text for any found piece 1494 | """ 1495 | openingBraceStack = [] # this array will hold a stack of parentheses which are not closed yet 1496 | lastOpeningBrace = -1 # last not closed parentheses 1497 | 1498 | validOpeningBraces = ''.join(callbacks.keys()) 1499 | 1500 | i = 0 1501 | while i < len(text): 1502 | if lastOpeningBrace == -1: 1503 | currentClosing = '' 1504 | search = validOpeningBraces 1505 | else: 1506 | currentClosing = openingBraceStack[lastOpeningBrace]['braceEnd'] 1507 | search = validOpeningBraces + '|' + currentClosing 1508 | rule = None 1509 | pos = -1 1510 | for c in search: 1511 | pos = max(pos, text.find(c, i)) 1512 | pos -= i 1513 | pos += 1 1514 | if pos == 0: 1515 | pos = len(text)-i 1516 | i += pos 1517 | if i < len(text): 1518 | if text[i] == '|': 1519 | found = 'pipe' 1520 | elif text[i] == currentClosing: 1521 | found = 'close' 1522 | elif text[i] in callbacks: 1523 | found = 'open' 1524 | rule = callbacks[text[i]] 1525 | else: 1526 | i += 1 1527 | continue 1528 | else: 1529 | break 1530 | 1531 | if found == 'open': 1532 | # found opening brace, let's add it to parentheses stack 1533 | piece = { 1534 | 'brace': text[i], 1535 | 'braceEnd': rule['end'], 1536 | 'title': u'', 1537 | 'parts': None 1538 | } 1539 | 1540 | # count opening brace characters 1541 | count = 0 1542 | while True: 1543 | if text[i+count:i+1+count] == piece['brace']: 1544 | count += 1 1545 | else: 1546 | break 1547 | piece['count'] = count 1548 | i += piece['count'] 1549 | piece['startAt'] = piece['partStart'] = i 1550 | 1551 | # we need to add to stack only if opening brace count is enough for one of the rules 1552 | if piece['count'] >= rule['min']: 1553 | lastOpeningBrace += 1 1554 | openingBraceStack[lastOpeningBrace] = piece 1555 | elif found == 'close': 1556 | maxCount = openingBraceStack[lastOpeningBrace]['count'] 1557 | count = 0 1558 | while count < maxCount: 1559 | if text[i+count:i+1+count] == text[i]: 1560 | count += 1 1561 | else: 1562 | break 1563 | 1564 | # check for maximum matching characters (if there are 5 closing 1565 | # characters, we will probably need only 3 - depending on the rules) 1566 | matchingCount = 0 1567 | matchingCallback = None 1568 | cbType = callbacks[openingBraceStack[lastOpeningBrace]['brace']] 1569 | if count > cbType['max']: 1570 | # The specified maximum exists in the callback array, unless the caller 1571 | # has made an error 1572 | matchingCount = cbType['max'] 1573 | else: 1574 | # Count is less than the maximum 1575 | # Skip any gaps in the callback array to find the true largest match 1576 | # Need to use array_key_exists not isset because the callback can be null 1577 | matchingCount = count 1578 | while matchingCount > 0 and matchingCount not in cbType['cb']: 1579 | matchingCount -= 1 1580 | 1581 | if matchingCount <= 0: 1582 | i += count 1583 | continue 1584 | matchingCallback = cbType['cb'][matchingCount] 1585 | 1586 | # let's set a title or last part (if '|' was found) 1587 | if openingBraceStack[lastOpeningBrace]['parts'] is None: 1588 | openingBraceStack[lastOpeningBrace]['title'] = \ 1589 | text[openingBraceStack[lastOpeningBrace]['partStart']:i] 1590 | else: 1591 | openingBraceStack[lastOpeningBrace]['parts'].append( 1592 | text[openingBraceStack[lastOpeningBrace]['partStart']:i] 1593 | ) 1594 | 1595 | pieceStart = openingBraceStack[lastOpeningBrace]['startAt'] - matchingCount 1596 | pieceEnd = i + matchingCount 1597 | 1598 | if callable(matchingCallback): 1599 | cbArgs = { 1600 | 'text': text[pieceStart:pieceEnd], 1601 | 'title': openingBraceStack[lastOpeningBrace]['title'].strip(), 1602 | 'parts': openingBraceStack[lastOpeningBrace]['parts'], 1603 | 'lineStart': pieceStart > 0 and text[pieceStart-1] == "\n" 1604 | } 1605 | # finally we can call a user callback and replace piece of text 1606 | replaceWith = matchingCallback(cbArgs) 1607 | text = text[:pieceStart] + replaceWith + text[pieceEnd:] 1608 | i = pieceStart + len(replaceWith) 1609 | else: 1610 | # null value for callback means that parentheses should be parsed, but not replaced 1611 | i += matchingCount 1612 | 1613 | # reset last opening parentheses, but keep it in case there are unused characters 1614 | piece = { 1615 | 'brace': openingBraceStack[lastOpeningBrace]['brace'], 1616 | 'braceEnd': openingBraceStack[lastOpeningBrace]['braceEnd'], 1617 | 'count': openingBraceStack[lastOpeningBrace]['count'], 1618 | 'title': '', 1619 | 'parts': None, 1620 | 'startAt': openingBraceStack[lastOpeningBrace]['startAt'] 1621 | } 1622 | openingBraceStack[lastOpeningBrace] = None 1623 | lastOpeningBrace -= 1 1624 | 1625 | if matchingCount < piece['count']: 1626 | piece['count'] -= matchingCount 1627 | piece['startAt'] -= matchingCount 1628 | piece['partStart'] = piece['startAt'] 1629 | # do we still qualify for any callback with remaining count? 1630 | currentCbList = callbacks[piece['brace']]['cb'] 1631 | while piece['count']: 1632 | if piece['count'] in currentCbList: 1633 | lastOpeningBrace += 1 1634 | openingBraceStack[lastOpeningBrace] = piece 1635 | break 1636 | 1637 | piece['count'] -= 1 1638 | 1639 | elif found == 'pipe': 1640 | # lets set a title if it is a first separator, or next part otherwise 1641 | if openingBraceStack[lastOpeningBrace]['parts'] is None: 1642 | openingBraceStack[lastOpeningBrace]['title'] = \ 1643 | text[openingBraceStack[lastOpeningBrace]['partStart']:i] 1644 | openingBraceStack[lastOpeningBrace]['parts'] = [] 1645 | else: 1646 | openingBraceStack[lastOpeningBrace]['parts'].append( 1647 | text[openingBraceStack[lastOpeningBrace]['partStart']:i] 1648 | ) 1649 | i += 1 1650 | openingBraceStack[lastOpeningBrace]['partStart'] = i 1651 | 1652 | return text 1653 | 1654 | def doTableStuff(self, text): 1655 | t = text.split("\n") 1656 | td = [] # Is currently a td tag open? 1657 | ltd = [] # Was it TD or TH? 1658 | tr = [] # Is currently a tr tag open? 1659 | ltr = [] # tr attributes 1660 | has_opened_tr = [] # Did this table open a element? 1661 | indent_level = 0 # indent level of the table 1662 | 1663 | for k, x in zip(range(len(t)), t): 1664 | x = x.strip() 1665 | fc = x[0:1] 1666 | matches = _zomgPat.match(x) 1667 | if matches: 1668 | indent_level = len(matches.group(1)) 1669 | 1670 | attributes = self.unstripForHTML(matches.group(2)) 1671 | 1672 | t[k] = '

    '*indent_level + '' 1673 | td.append(False) 1674 | ltd.append('') 1675 | tr.append(False) 1676 | ltr.append('') 1677 | has_opened_tr.append(False) 1678 | elif len(td) == 0: 1679 | pass 1680 | elif '|}' == x[0:2]: 1681 | z = "" + x[2:] 1682 | l = ltd.pop() 1683 | if not has_opened_tr.pop(): 1684 | z = "" + z 1685 | if tr.pop(): 1686 | z = "" + z 1687 | if td.pop(): 1688 | z = '' + z 1689 | ltr.pop() 1690 | t[k] = z + '
    '*indent_level 1691 | elif '|-' == x[0:2]: # Allows for |------------- 1692 | x = x[1:] 1693 | while x != '' and x[0:1] == '-': 1694 | x = x[1:] 1695 | z = '' 1696 | l = ltd.pop() 1697 | has_opened_tr.pop() 1698 | has_opened_tr.append(True) 1699 | if tr.pop(): 1700 | z = '' + z 1701 | if td.pop(): 1702 | z = '' + z 1703 | ltr.pop() 1704 | t[k] = z 1705 | tr.append(False) 1706 | td.append(False) 1707 | ltd.append('') 1708 | attributes = self.unstripForHTML(x) 1709 | ltr.append(self.fixTagAttributes(attributes, 'tr')) 1710 | elif '|' == fc or '!' == fc or '|+' == x[0:2]: # Caption 1711 | # x is a table row 1712 | if '|+' == x[0:2]: 1713 | fc = '+' 1714 | x = x[1:] 1715 | x = x[1:] 1716 | if fc == '!': 1717 | x = x.replace('!!', '||') 1718 | # Split up multiple cells on the same line. 1719 | # FIXME: This can result in improper nesting of tags processed 1720 | # by earlier parser steps, but should avoid splitting up eg 1721 | # attribute values containing literal "||". 1722 | x = x.split('||') 1723 | 1724 | t[k] = '' 1725 | 1726 | # Loop through each table cell 1727 | for theline in x: 1728 | z = '' 1729 | if fc != '+': 1730 | tra = ltr.pop() 1731 | if not tr.pop(): 1732 | z = '\n' 1733 | tr.append(True) 1734 | ltr.append('') 1735 | has_opened_tr.pop() 1736 | has_opened_tr.append(True) 1737 | l = ltd.pop() 1738 | if td.pop(): 1739 | z = '' + z 1740 | if fc == '|': 1741 | l = 'td' 1742 | elif fc == '!': 1743 | l = 'th' 1744 | elif fc == '+': 1745 | l = 'caption' 1746 | else: 1747 | l = '' 1748 | ltd.append(l) 1749 | 1750 | #Cell parameters 1751 | y = theline.split('|', 1) 1752 | # Note that a '|' inside an invalid link should not 1753 | # be mistaken as delimiting cell parameters 1754 | if y[0].find('[[') != -1: 1755 | y = [theline] 1756 | 1757 | if len(y) == 1: 1758 | y = z + "<" + l + ">" + y[0] 1759 | else: 1760 | attributes = self.unstripForHTML(y[0]) 1761 | y = z + "<" + l + self.fixTagAttributes(attributes, l) + ">" + y[1] 1762 | 1763 | t[k] += y 1764 | td.append(True) 1765 | 1766 | while len(td) > 0: 1767 | l = ltd.pop() 1768 | if td.pop(): 1769 | t.append('') 1770 | if tr.pop(): 1771 | t.append('') 1772 | if not has_opened_tr.pop(): 1773 | t.append('') 1774 | t.append('') 1775 | 1776 | text = '\n'.join(t) 1777 | # special case: don't return empty table 1778 | if text == "\n\n
    ": 1779 | text = '' 1780 | 1781 | return text 1782 | 1783 | def formatToc(self, toc_string): 1784 | return '

    {0}

    '.format(toc_string) 1785 | 1786 | def formatHeadings(self, text, isMain, toc_string): 1787 | """ 1788 | This function accomplishes several tasks: 1789 | 1) Auto-number headings if that option is enabled 1790 | 2) Add an [edit] link to sections for logged in users who have enabled the option 1791 | 3) Add a Table of contents on the top for users who have enabled the option 1792 | 4) Auto-anchor headings 1793 | 1794 | It loops through all headlines, collects the necessary data, then splits up the 1795 | string and re-inserts the newly formatted headlines. 1796 | """ 1797 | doNumberHeadings = False 1798 | 1799 | # For compatibility with MediaWiki and backwards compatibility with ourselves 1800 | # we'll strip these 1801 | text = text.replace("__NOEDITSECTION__", "") 1802 | text = text.replace("__NEWSECTIONLINK__", "") 1803 | 1804 | # Get all headlines for numbering them and adding funky stuff like [edit] 1805 | # links - this is for later, but we need the number of headlines right now 1806 | matches = _headerPat.findall(text) 1807 | numMatches = len(matches) 1808 | 1809 | # if there are fewer than 4 headlines in the article, do not show TOC 1810 | # unless it's been explicitly enabled. 1811 | enoughToc = self.show_toc and (numMatches >= 4 or text.find("") != -1) 1812 | 1813 | # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML, 1814 | # override above conditions and always show TOC above first header 1815 | if text.find("__FORCETOC__") != -1: 1816 | self.show_toc = True 1817 | enoughToc = True 1818 | text = text.replace("__FORCETOC__", "") 1819 | # Never ever show TOC if no headers 1820 | if numMatches < 1: 1821 | enoughToc = False 1822 | 1823 | # headline counter 1824 | headlineCount = 0 1825 | sectionCount = 0 # headlineCount excluding template sections 1826 | 1827 | # Ugh .. the TOC should have neat indentation levels which can be 1828 | # passed to the skin functions. These are determined here 1829 | toc = [] 1830 | head = {} 1831 | sublevelCount = {} 1832 | levelCount = {} 1833 | level = 0 1834 | prevlevel = 0 1835 | toclevel = 0 1836 | prevtoclevel = 0 1837 | refers = {} 1838 | refcount = {} 1839 | wgMaxTocLevel = 5 1840 | 1841 | for match in matches: 1842 | headline = match[2] 1843 | istemplate = False 1844 | templatetitle = '' 1845 | templatesection = 0 1846 | numbering = [] 1847 | 1848 | m = _templateSectionPat.search(headline) 1849 | if m: 1850 | istemplate = True 1851 | templatetitle = b64decode(m[0]) 1852 | templatesection = 1 + int(b64decode(m[1])) 1853 | headline = _templateSectionPat.sub('', headline) 1854 | 1855 | if toclevel: 1856 | prevlevel = level 1857 | prevtoclevel = toclevel 1858 | 1859 | level = int(matches[headlineCount][0]) 1860 | 1861 | if doNumberHeadings or enoughToc: 1862 | if level > prevlevel: 1863 | toclevel += 1 1864 | sublevelCount[toclevel] = 0 1865 | if toclevel < wgMaxTocLevel: 1866 | toc.append('
      ') 1867 | elif level < prevlevel and toclevel > 1: 1868 | # Decrease TOC level, find level to jump to 1869 | 1870 | if toclevel == 2 and level < levelCount[1]: 1871 | toclevel = 1 1872 | else: 1873 | for i in range(toclevel, 0, -1): 1874 | if levelCount[i] == level: 1875 | # Found last matching level 1876 | toclevel = i 1877 | break 1878 | elif levelCount[i] < level: 1879 | toclevel = i + 1 1880 | break 1881 | if toclevel < wgMaxTocLevel: 1882 | toc.append("") 1883 | toc.append("
    " * max(prevtoclevel - toclevel, 0)) 1884 | else: 1885 | toc.append("") 1886 | 1887 | levelCount[toclevel] = level 1888 | 1889 | # count number of headlines for each level 1890 | sublevelCount[toclevel] += 1 1891 | for i in range(1, toclevel+1): 1892 | if sublevelCount[i]: 1893 | numbering.append(sublevelCount[i]) 1894 | 1895 | # The canonized header is a version of the header text safe to use for links 1896 | # Avoid insertion of weird stuff like by expanding the relevant sections 1897 | canonized_headline = self.unstrip(headline) 1898 | canonized_headline = self.unstripNoWiki(canonized_headline) 1899 | 1900 | # -- don't know what to do with this yet. 1901 | # Remove link placeholders by the link text. 1902 | # 1903 | # turns into 1904 | # link text with suffix 1905 | # $canonized_headline = preg_replace( '//e', 1906 | # "\$this->mLinkHolders['texts'][\$1]", 1907 | # $canonized_headline ); 1908 | # $canonized_headline = preg_replace( '//e', 1909 | # "\$this->mInterwikiLinkHolders['texts'][\$1]", 1910 | # $canonized_headline ); 1911 | 1912 | # strip out HTML 1913 | canonized_headline = _tagPat.sub('', canonized_headline) 1914 | tocline = canonized_headline.strip() 1915 | # Save headline for section edit hint before it's escaped 1916 | headline_hint = tocline 1917 | canonized_headline = self.escapeId(tocline) 1918 | refers[headlineCount] = canonized_headline 1919 | 1920 | # count how many in assoc. array so we can track dupes in anchors 1921 | if canonized_headline not in refers: 1922 | refers[canonized_headline] = 1 1923 | else: 1924 | refers[canonized_headline] += 1 1925 | refcount[headlineCount] = refers[canonized_headline] 1926 | 1927 | numbering = '.'.join(str(x) for x in numbering) 1928 | 1929 | # Don't number the heading if it is the only one (looks silly) 1930 | if doNumberHeadings and numMatches > 1: 1931 | # the two are different if the line contains a link 1932 | headline = numbering + ' ' + headline 1933 | 1934 | # Create the anchor for linking from the TOC to the section 1935 | anchor = canonized_headline 1936 | if refcount[headlineCount] > 1: 1937 | anchor += '_' + str(refcount[headlineCount]) 1938 | 1939 | if enoughToc: 1940 | toc.append('
  • ') 1945 | toc.append(numbering) 1946 | toc.append(' ') 1947 | toc.append(tocline) 1948 | toc.append('') 1949 | 1950 | # if showEditLink and (not istemplate or templatetitle != u""): 1951 | # if not head[headlineCount]: 1952 | # head[headlineCount] = u'' 1953 | # 1954 | # if istemplate: 1955 | # head[headlineCount] += sk.editSectionLinkForOther(templatetile, templatesection) 1956 | # else: 1957 | # head[headlineCount] += sk.editSectionLink(mTitle, sectionCount+1, headline_hint) 1958 | 1959 | # give headline the correct tag 1960 | if headlineCount not in head: 1961 | head[headlineCount] = [] 1962 | h = head[headlineCount] 1963 | h.append('') 1968 | h.append(matches[headlineCount][1].strip()) 1969 | h.append(headline.strip()) 1970 | h.append('') 1973 | 1974 | headlineCount += 1 1975 | 1976 | if not istemplate: 1977 | sectionCount += 1 1978 | 1979 | if enoughToc: 1980 | if toclevel < wgMaxTocLevel: 1981 | toc.append("
  • ") 1982 | toc.append("" * max(0, toclevel - 1)) 1983 | toc.insert(0, self.formatToc(toc_string)) 1984 | toc.append('
    ') 1985 | 1986 | # split up and insert constructed headlines 1987 | 1988 | blocks = _headerPat.split(text) 1989 | 1990 | i = 0 1991 | len_blocks = len(blocks) 1992 | forceTocPosition = text.find("") 1993 | full = [] 1994 | while i < len_blocks: 1995 | j = i//4 1996 | full.append(blocks[i]) 1997 | if enoughToc and not i and isMain and forceTocPosition == -1: 1998 | full += toc 1999 | toc = None 2000 | if j in head and head[j]: 2001 | full += head[j] 2002 | head[j] = None 2003 | i += 4 2004 | full = ''.join(str(x) for x in full) 2005 | if forceTocPosition != -1: 2006 | return full.replace("", ''.join(str(x) for x in toc), 1) 2007 | else: 2008 | return full 2009 | 2010 | 2011 | def parse(text, show_toc=True, tags=ALLOWED_TAGS, 2012 | attributes=ALLOWED_ATTRIBUTES, nofollow=False, 2013 | toc_string='Table of Contents'): 2014 | """Returns HTML from MediaWiki markup""" 2015 | p = Parser() 2016 | return p.parse(text, show_toc=show_toc, tags=tags, attributes=attributes, 2017 | nofollow=nofollow, toc_string=toc_string) 2018 | 2019 | 2020 | def parselite(text): 2021 | """Returns HTML from MediaWiki markup ignoring 2022 | without headings""" 2023 | p = BaseParser() 2024 | return p.parse(text) 2025 | 2026 | 2027 | TRUNCATE_URL_PATTERN = re.compile(r'(/[^/]+/?)$') 2028 | 2029 | 2030 | def truncate_url(url, length=40): 2031 | if len(url) <= length: 2032 | return url 2033 | match = TRUNCATE_URL_PATTERN.search(url) 2034 | if not match: 2035 | return url 2036 | l = len(match.group(1)) 2037 | firstpart = url[0:len(url)-l] 2038 | secondpart = match.group(1) 2039 | if firstpart == firstpart[0:length-3]: 2040 | if secondpart != secondpart[0:length-3]: 2041 | secondpart = secondpart[0:length-3] + '...' 2042 | else: 2043 | firstpart = firstpart[0:length-3] 2044 | secondpart = '...' + secondpart 2045 | t_url = firstpart+secondpart 2046 | return t_url 2047 | 2048 | 2049 | SAFE_NAME_REMOVE_SLASHES = re.compile(r"[^a-zA-Z0-9\-_\s.]") 2050 | SAFE_NAME_NO_REMOVE_SLASHES = re.compile(r"[^a-zA-Z0-9\-_\s./]") 2051 | SAFE_NAME_PUNCTUATION_TO_HYPHEN = re.compile(r"[\s._]") 2052 | SAFE_NAME_REMOVE_DUPLICATE_HYPHEN = re.compile(r"[-]+") 2053 | 2054 | 2055 | def safe_name(name=None, remove_slashes=True): 2056 | if name is None: 2057 | return None 2058 | name = str2url(name) 2059 | if remove_slashes: 2060 | name = SAFE_NAME_REMOVE_SLASHES.sub("", name) 2061 | else: 2062 | name = SAFE_NAME_NO_REMOVE_SLASHES.sub("", name) 2063 | name = SAFE_NAME_PUNCTUATION_TO_HYPHEN.sub("-", name) 2064 | name = SAFE_NAME_REMOVE_DUPLICATE_HYPHEN.sub("-", name) 2065 | return name.strip("-").lower() 2066 | 2067 | 2068 | def str2url(strng): 2069 | """ 2070 | Takes a UTF-8 string and replaces all characters with the equivalent in 7-bit 2071 | ASCII. It returns a plain ASCII string usable in URLs. 2072 | """ 2073 | return unidecode.unidecode(strng, errors="replace") 2074 | -------------------------------------------------------------------------------- /wikimarkup/tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from .parser import parse, Parser, ALLOWED_ATTRIBUTES 4 | 5 | 6 | class WikimarkupTestCase(unittest.TestCase): 7 | def testHeadings(self): 8 | """ 9 | Test the parsing of all heading levels 10 | """ 11 | text = "=heading1=" 12 | self.assertEqual(parse(text), '

    heading1

    ') 13 | 14 | text = "==heading2==" 15 | self.assertEqual(parse(text), '

    heading2

    ') 16 | 17 | text = """=heading1=\n==heading2==\n===heading3===\n====heading4====\n=====heading5=====\n======heading6======""" 18 | assumed = """

    heading1

    \n

    heading2

    \n

    heading3

    \n

    heading4

    \n
    heading5
    \n
    heading6
    """ 19 | self.assertEqual(parse(text), assumed) 20 | 21 | def testLinks(self): 22 | """ 23 | Test the parsing of all link formats 24 | """ 25 | text = "[http://mydomain.com/ mydomain.com]" 26 | self.assertEqual(parse(text), '

    mydomain.com\n

    ') 27 | 28 | text = "[http://mydomain.com/]" 29 | self.assertEqual(parse(text), '

    http://mydomain.com/\n

    ') 30 | 31 | text = "[mydomain.com/]" 32 | self.assertEqual( 33 | parse(text), 34 | '

    [mydomain.com/]\n

    ') 35 | 36 | text = "[/hello/world]" 37 | self.assertEqual(parse(text), '

    /hello/world\n

    ') 38 | 39 | def testNofollowLinks(self): 40 | """ 41 | Test the parsing with nofollow set to True. 42 | """ 43 | text = 'http://mydomain.com/test' 44 | expected = '

    http://mydomain.com/test\n

    ' 45 | parsed = parse(text, nofollow=True) 46 | try: 47 | self.assertEqual(parsed, expected) 48 | except AssertionError: 49 | expected = ('

    ' 50 | 'http://mydomain.com/test\n

    ') 51 | self.assertEqual(parsed, expected) 52 | 53 | def testStyles(self): 54 | """ 55 | Test the parsing of all styles, including italic and bold 56 | """ 57 | text = "'''hey''' dude" 58 | self.assertEqual(parse(text), '

    hey dude\n

    ') 59 | 60 | text = "'''hey''' ''dude''" 61 | self.assertEqual(parse(text), '

    hey dude\n

    ') 62 | 63 | text = "''hey'' dude" 64 | self.assertEqual(parse(text), '

    hey dude\n

    ') 65 | 66 | text = "'''''hey dude'''''" 67 | self.assertEqual(parse(text), '

    hey dude\n

    ') 68 | 69 | text = "'''''hey'' dude'''" 70 | self.assertEqual(parse(text), '

    hey dude\n

    ') 71 | 72 | def testParagraphs(self): 73 | """ 74 | Test the parsing of newlines into paragraphs and linebreaks 75 | """ 76 | text = "hello\nhow are you\n\ngoodbye" 77 | self.assertEqual(parse(text), '

    hello\nhow are you\n

    goodbye\n

    ') 78 | 79 | text = "hello
    how are you\n\ngoodbye" 80 | self.assertEqual(parse(text), '

    hello
    how are you\n

    goodbye\n

    ') 81 | 82 | def testSafeHTML(self): 83 | """ 84 | Test the stripping of unsafe HTML code 85 | """ 86 | text = "" 87 | self.assertEqual(parse(text), '

    <script>alert(\'hi\');</script>\n

    ') 88 | 89 | text = '
    Hi
    ' 90 | self.assertEqual(parse(text), '

    <form method="post">Hi</form>\n

    ') 91 | 92 | def testLists(self): 93 | """ 94 | Test formatting ordered and unordered lists 95 | """ 96 | text = "* hello\n* world\n* list" 97 | self.assertEqual(parse(text), '
    • hello\n
    • world\n
    • list\n
    ') 98 | 99 | text = "# hello\n# world\n# list" 100 | self.assertEqual(parse(text), '
    1. hello\n
    2. world\n
    3. list\n
    ') 101 | 102 | text = "* hello\n* world\n# list\n* fun\n** isnt it\n*# yep" 103 | self.assertEqual(parse(text), '
    • hello\n
    • world\n
    \n
    1. list\n
    \n
    • fun\n
      • isnt it\n
      \n
      1. yep\n
      \n
    ') 104 | 105 | def testMixed(self): 106 | """ 107 | Test many formatting methods together 108 | """ 109 | text = """=My Heading=\n* here\n* is\n* a\n* [http://mydomain.com list]\n==Subheading==\n==show me a toc==\n===pretty please===""" 110 | assumed = '

    My Heading

    \n
    • here\n
    • is\n
    • a\n
    • list\n
    \n

    Subheading

    \n

    show me a toc

    \n

    pretty please

    ' 111 | self.assertEqual(parse(text), assumed) 112 | 113 | def testCommentsNotRemoved(self): 114 | """ 115 | Comments are removed. 116 | """ 117 | text = '' 118 | assumed = '

    \n

    ' 119 | self.assertEqual(parse(text), assumed) 120 | 121 | text = ('\n\n# follow\n# by\n\n= Another heading =') 124 | assumed = '

    ' 128 | self.assertEqual(parse(text), assumed) 129 | 130 | def testLessThanBracket(self): 131 | """ 132 | Make sure strings like "<- test" aren't removed 133 | """ 134 | text = '<- test' 135 | assumed = '

    <- test\n

    ' 136 | self.assertEqual(parse(text), assumed) 137 | 138 | text = '<- some text' 139 | assumed = '

    <- some text\n

    ' 140 | self.assertEqual(parse(text), assumed) 141 | 142 | text = '<- some text ->' 143 | assumed = '

    <- some text ->\n

    ' 144 | self.assertEqual(parse(text), assumed) 145 | 146 | text = '<- <->' 147 | assumed = '

    <- <->\n

    ' 148 | self.assertEqual(parse(text), assumed) 149 | 150 | text = '< ' 151 | assumed = '

    < <script type="text/javascript">alert("evil")</script>\n

    ' 152 | self.assertEqual(parse(text), assumed) 153 | 154 | text = '<> click here' 155 | assumed = '

    <> <a href="javascript:alert(\'boo!\')">click here</a>\n

    ' 156 | self.assertEqual(parse(text), assumed) 157 | 158 | text = '<- ' 159 | assumed = '

    <- \n

    ' 160 | self.assertEqual(parse(text), assumed) 161 | 162 | text= '<<< ' 163 | assumed = '

    <<< \n

    ' 164 | self.assertEqual(parse(text), assumed) 165 | 166 | text= '<
    010
    ' 167 | assumed = '<
    010
    ' 168 | self.assertEqual(parse(text), assumed) 169 | 170 | text= '<-script -->>alert("hi")' 171 | assumed = '

    <-script -->>alert("hi")</script>\n

    ' 172 | self.assertEqual(parse(text), assumed) 173 | 174 | text='' 175 | assumed = '

    <script>alert("foo");</script>\n</p></script>

    ' 176 | self.assertEqual(parse(text), assumed) 177 | 178 | def test_internal_link_hook_simple(self): 179 | p = Parser() 180 | """Internal link simple hook works""" 181 | def testHook(parser, space, name): 182 | return '%s' % (name.replace(' ', '+'), name) 183 | p.registerInternalLinkHook(None, testHook) 184 | text = '[[Some link]]' 185 | assumed = '

    Some link\n

    ' 186 | self.assertEqual(p.parse(text), assumed) 187 | 188 | def test_internal_link_hook_namespace(self): 189 | p = Parser() 190 | """Internal link namespace works""" 191 | def testHook(parser, space, name): 192 | return 'space:%s,name:%s' % (space, name) 193 | p.registerInternalLinkHook('Space', testHook) 194 | text = '[[Space:Name]]' 195 | assumed = '

    space:Space,name:Name\n

    ' 196 | self.assertEqual(p.parse(text), assumed) 197 | 198 | def test_nowiki(self): 199 | """ escapes formatting""" 200 | p = Parser() 201 | text = "\n\n# item '''1'''\nitem ''2''\n\n" 202 | assumed = u"

    \n\n# item '''1'''\nitem ''2''\n\n\n

    " 203 | 204 | self.assertEqual(p.parse(text), assumed) 205 | 206 | def testToc(self): 207 | """ 208 | Test that the table of contents can be configured. 209 | """ 210 | text = """=My Heading=\n* here\n* is\n* a\n* [http://mydomain.com list]\n==Subheading==\n==show me a toc==\n===pretty please===""" 211 | assumed = '

    My Heading

    \n
    • here\n
    • is\n
    • a\n
    • list\n
    \n

    Subheading

    \n

    show me a toc

    \n

    pretty please

    ' 212 | self.assertEqual(parse(text, toc_string='TOC'), assumed) 213 | 214 | def test_toc_placement(self): 215 | """ 216 | TOC is placed where the comment is within the text. 217 | """ 218 | text = """=First Heading=\n\nstuff""" 219 | assumed = '

    First Heading

    \n

    Table of Contents

    \n

    stuff\n

    ' 220 | self.assertEqual(parse(text), assumed) 221 | 222 | def test_forced_toc(self): 223 | """ 224 | Text containing __FORCETOC__ will have a toc regardless of length. 225 | """ 226 | text = """=First Heading=\n__FORCETOC__\nstuff""" 227 | assumed = '

    Table of Contents

    First Heading

    \n

    stuff\n

    ' 228 | self.assertEqual(parse(text), assumed) 229 | 230 | def test_entities_in_attributes(self): 231 | """ 232 | HTML entities should render in attributes without problems. 233 | """ 234 | assumed = '
    ' 235 | 236 | text = '
    ' 237 | self.assertEqual(parse(text), assumed) 238 | 239 | text = '
    ' 240 | self.assertEqual(parse(text), assumed) 241 | 242 | def test_styles(self): 243 | """Test the CSS style sanitizer.""" 244 | # border-color is whitelisted by default 245 | text = '
    Anyway, here\'s wonderwall
    ' 246 | pp = Parser() 247 | attribs = ALLOWED_ATTRIBUTES.copy() 248 | attribs['div'].append('style') 249 | 250 | self.assertEqual(pp.parse(text, styles=['font-weight'], attributes=attribs), text) 251 | 252 | text = '
    Anyway, here\'s wonderwall
    ' 253 | assumed = '
    Anyway, here\'s wonderwall
    ' 254 | self.assertEqual(pp.parse(text), assumed) 255 | 256 | 257 | 258 | 259 | 260 | --------------------------------------------------------------------------------