tags.
1208 | """
1209 | yield 0, ""
1210 | for tup in inner:
1211 | yield tup
1212 | yield 0, ""
1213 |
1214 | def wrap(self, source, outfile):
1215 | """Return the source with a code, pre, and div."""
1216 | return self._wrap_div(self._wrap_pre(self._wrap_code(source)))
1217 |
1218 | formatter = HtmlCodeFormatter(cssclass="codehilite", **formatter_opts)
1219 | return pygments.highlight(codeblock, lexer, formatter)
1220 |
1221 | def _code_block_sub(self, match):
1222 | codeblock = match.group(1)
1223 | codeblock = self._outdent(codeblock)
1224 | codeblock = self._detab(codeblock)
1225 | codeblock = codeblock.lstrip('\n') # trim leading newlines
1226 | codeblock = codeblock.rstrip() # trim trailing whitespace
1227 |
1228 | if "code-color" in self.extras and codeblock.startswith(":::"):
1229 | lexer_name, rest = codeblock.split('\n', 1)
1230 | lexer_name = lexer_name[3:].strip()
1231 | lexer = self._get_pygments_lexer(lexer_name)
1232 | codeblock = rest.lstrip("\n") # Remove lexer declaration line.
1233 | if lexer:
1234 | formatter_opts = self.extras['code-color'] or {}
1235 | colored = self._color_with_pygments(codeblock, lexer,
1236 | **formatter_opts)
1237 | return "\n\n%s\n\n" % colored
1238 |
1239 | codeblock = self._encode_code(codeblock)
1240 | return "\n\n%s\n
\n\n" % codeblock
1241 |
1242 | def _do_code_blocks(self, text):
1243 | """Process Markdown `` blocks."""
1244 | code_block_re = re.compile(r'''
1245 | (?:\n\n|\A)
1246 | ( # $1 = the code block -- one or more lines, starting with a space/tab
1247 | (?:
1248 | (?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces
1249 | .*\n+
1250 | )+
1251 | )
1252 | ((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1253 | ''' % (self.tab_width, self.tab_width),
1254 | re.M | re.X)
1255 |
1256 | return code_block_re.sub(self._code_block_sub, text)
1257 |
1258 | # Rules for a code span:
1259 | # - backslash escapes are not interpreted in a code span
1260 | # - to include one or or a run of more backticks the delimiters must
1261 | # be a longer run of backticks
1262 | # - cannot start or end a code span with a backtick; pad with a
1263 | # space and that space will be removed in the emitted HTML
1264 | # See `test/tm-cases/escapes.text` for a number of edge-case
1265 | # examples.
1266 | _code_span_re = re.compile(r'''
1267 | (?%s" % c
1280 |
1281 | def _do_code_spans(self, text):
1282 | # * Backtick quotes are used for spans.
1283 | #
1284 | # * You can use multiple backticks as the delimiters if you want to
1285 | # include literal backticks in the code span. So, this input:
1286 | #
1287 | # Just type ``foo `bar` baz`` at the prompt.
1288 | #
1289 | # Will translate to:
1290 | #
1291 | # Just type foo `bar` baz at the prompt.
1292 | #
1293 | # There's no arbitrary limit to the number of backticks you
1294 | # can use as delimters. If you need three consecutive backticks
1295 | # in your code, use four for delimiters, etc.
1296 | #
1297 | # * You can use spaces to get literal backticks at the edges:
1298 | #
1299 | # ... type `` `bar` `` ...
1300 | #
1301 | # Turns to:
1302 | #
1303 | # ... type `bar` ...
1304 | return self._code_span_re.sub(self._code_span_sub, text)
1305 |
1306 | def _encode_code(self, text):
1307 | """Encode/escape certain characters inside Markdown code runs.
1308 | The point is that in code, these characters are literals,
1309 | and lose their special Markdown meanings.
1310 | """
1311 | replacements = [
1312 | # Encode all ampersands; HTML entities are not
1313 | # entities within a Markdown code span.
1314 | ('&', '&'),
1315 | # Do the angle bracket song and dance:
1316 | ('<', '<'),
1317 | ('>', '>'),
1318 | # Now, escape characters that are magic in Markdown:
1319 | ('*', g_escape_table['*']),
1320 | ('_', g_escape_table['_']),
1321 | ('{', g_escape_table['{']),
1322 | ('}', g_escape_table['}']),
1323 | ('[', g_escape_table['[']),
1324 | (']', g_escape_table[']']),
1325 | ('\\', g_escape_table['\\']),
1326 | ]
1327 | for before, after in replacements:
1328 | text = text.replace(before, after)
1329 | return text
1330 |
1331 | _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S)
1332 | _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S)
1333 | _code_friendly_strong_re = re.compile(
1334 | r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S)
1335 | _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S)
1336 |
1337 | def _do_italics_and_bold(self, text):
1338 | # must go first:
1339 | if "code-friendly" in self.extras:
1340 | text = self._code_friendly_strong_re.sub(
1341 | r"\1", text)
1342 | text = self._code_friendly_em_re.sub(r"\1", text)
1343 | else:
1344 | text = self._strong_re.sub(r"\2", text)
1345 | text = self._em_re.sub(r"\2", text)
1346 | return text
1347 |
1348 | _block_quote_re = re.compile(r'''
1349 | ( # Wrap whole match in \1
1350 | (
1351 | ^[ \t]*>[ \t]? # '>' at the start of a line
1352 | .+\n # rest of the first line
1353 | (.+\n)* # subsequent consecutive lines
1354 | \n* # blanks
1355 | )+
1356 | )
1357 | ''', re.M | re.X)
1358 | _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M)
1359 |
1360 | _html_pre_block_re = re.compile(r'(\s*.+?
)', re.S)
1361 |
1362 | def _dedent_two_spaces_sub(self, match):
1363 | return re.sub(r'(?m)^ ', '', match.group(1))
1364 |
1365 | def _block_quote_sub(self, match):
1366 | bq = match.group(1)
1367 | bq = self._bq_one_level_re.sub('', bq) # trim one level of quoting
1368 | bq = self._ws_only_line_re.sub('', bq) # trim whitespace-only lines
1369 | bq = self._run_block_gamut(bq) # recurse
1370 |
1371 | bq = re.sub('(?m)^', ' ', bq)
1372 | # These leading spaces screw with content, so we need to fix
1373 | # that:
1374 | bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq)
1375 |
1376 | return "\n%s\n
\n\n" % bq
1377 |
1378 | def _do_block_quotes(self, text):
1379 | if '>' not in text:
1380 | return text
1381 | return self._block_quote_re.sub(self._block_quote_sub, text)
1382 |
1383 | def _form_paragraphs(self, text):
1384 | # Strip leading and trailing lines:
1385 | text = text.strip('\n')
1386 |
1387 | # Wrap tags.
1388 | grafs = re.split(r"\n{2,}", text)
1389 | for i, graf in enumerate(grafs):
1390 | if graf in self.html_blocks:
1391 | # Unhashify HTML blocks
1392 | grafs[i] = self.html_blocks[graf]
1393 | else:
1394 | # Wrap
tags.
1395 | graf = self._run_span_gamut(graf)
1396 | grafs[i] = "
" + graf.lstrip(" \t") + "
"
1397 |
1398 | return "\n\n".join(grafs)
1399 |
1400 | def _add_footnotes(self, text):
1401 | if self.footnotes:
1402 | footer = [
1403 | '')
1424 | return text + '\n\n' + '\n'.join(footer)
1425 | else:
1426 | return text
1427 |
1428 | # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1429 | # http://bumppo.net/projects/amputator/
1430 | _ampersand_re = re.compile(r'&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)')
1431 | _naked_lt_re = re.compile(r'<(?![a-z/?\$!])', re.I)
1432 | _naked_gt_re = re.compile(r'''(?''', re.I)
1433 |
1434 | def _encode_amps_and_angles(self, text):
1435 | # Smart processing for ampersands and angle brackets that need
1436 | # to be encoded.
1437 | text = self._ampersand_re.sub('&', text)
1438 |
1439 | # Encode naked <'s
1440 | text = self._naked_lt_re.sub('<', text)
1441 |
1442 | # Encode naked >'s
1443 | # Note: Other markdown implementations (e.g. Markdown.pl, PHP
1444 | # Markdown) don't do this.
1445 | text = self._naked_gt_re.sub('>', text)
1446 | return text
1447 |
1448 | def _encode_backslash_escapes(self, text):
1449 | for ch, escape in g_escape_table.items():
1450 | text = text.replace("\\" + ch, escape)
1451 | return text
1452 |
1453 | _auto_link_re = re.compile(r'<((https?|ftp):[^\'">\s]+)>', re.I)
1454 |
1455 | def _auto_link_sub(self, match):
1456 | g1 = match.group(1)
1457 | return '%s' % (g1, g1)
1458 |
1459 | _auto_email_link_re = re.compile(r"""
1460 | <
1461 | (?:mailto:)?
1462 | (
1463 | [-.\w]+
1464 | \@
1465 | [-\w]+(\.[-\w]+)*\.[a-z]+
1466 | )
1467 | >
1468 | """, re.I | re.X | re.U)
1469 |
1470 | def _auto_email_link_sub(self, match):
1471 | return self._encode_email_address(
1472 | self._unescape_special_chars(match.group(1)))
1473 |
1474 | def _do_auto_links(self, text):
1475 | text = self._auto_link_re.sub(self._auto_link_sub, text)
1476 | text = self._auto_email_link_re.sub(self._auto_email_link_sub, text)
1477 | return text
1478 |
1479 | def _encode_email_address(self, addr):
1480 | # Input: an email address, e.g. "foo@example.com"
1481 | #
1482 | # Output: the email address as a mailto link, with each character
1483 | # of the address encoded as either a decimal or hex entity, in
1484 | # the hopes of foiling most address harvesting spam bots. E.g.:
1485 | #
1486 | # foo
1488 | # @example.com
1489 | #
1490 | # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1491 | # mailing list:
1492 | chars = [_xml_encode_email_char_at_random(ch)
1493 | for ch in "mailto:" + addr]
1494 | # Strip the mailto: from the visible part.
1495 | addr = '%s' \
1496 | % (''.join(chars), ''.join(chars[7:]))
1497 | return addr
1498 |
1499 | def _do_link_patterns(self, text):
1500 | """Caveat emptor: there isn't much guarding against link
1501 | patterns being formed inside other standard Markdown links, e.g.
1502 | inside a [link def][like this].
1503 |
1504 | Dev Notes: *Could* consider prefixing regexes with a negative
1505 | lookbehind assertion to attempt to guard against this.
1506 | """
1507 | link_from_hash = {}
1508 | for regex, repl in self.link_patterns:
1509 | replacements = []
1510 | for match in regex.finditer(text):
1511 | if hasattr(repl, "__call__"):
1512 | href = repl(match)
1513 | else:
1514 | href = match.expand(repl)
1515 | replacements.append((match.span(), href))
1516 | for (start, end), href in reversed(replacements):
1517 | escaped_href = (
1518 | href.replace('"', '"') # b/c of attr quote
1519 | # To avoid markdown and :
1520 | .replace('*', g_escape_table['*'])
1521 | .replace('_', g_escape_table['_']))
1522 | link = '%s' % (escaped_href, text[start:end])
1523 | hash = md5(link).hexdigest()
1524 | link_from_hash[hash] = link
1525 | text = text[:start] + hash + text[end:]
1526 | for hash, link in link_from_hash.items():
1527 | text = text.replace(hash, link)
1528 | return text
1529 |
1530 | def _unescape_special_chars(self, text):
1531 | # Swap back in all the special characters we've hidden.
1532 | for ch, hash in g_escape_table.items():
1533 | text = text.replace(hash, ch)
1534 | return text
1535 |
1536 | def _outdent(self, text):
1537 | # Remove one level of line-leading tabs or spaces
1538 | return self._outdent_re.sub('', text)
1539 |
1540 |
1541 | class MarkdownWithExtras(Markdown):
1542 | """A markdowner class that enables most extras:
1543 |
1544 | - footnotes
1545 | - code-color (only has effect if 'pygments' Python module on path)
1546 |
1547 | These are not included:
1548 | - pyshell (specific to Python-related documenting)
1549 | - code-friendly (because it *disables* part of the syntax)
1550 | - link-patterns (because you need to specify some actual
1551 | link-patterns anyway)
1552 | """
1553 | extras = ["footnotes", "code-color"]
1554 |
1555 |
1556 | #---- internal support functions
1557 |
1558 | # From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52549
1559 | def _curry(*args, **kwargs):
1560 | function, args = args[0], args[1:]
1561 |
1562 | def result(*rest, **kwrest):
1563 | combined = kwargs.copy()
1564 | combined.update(kwrest)
1565 | return function(*args + rest, **combined)
1566 | return result
1567 |
1568 | # Recipe: regex_from_encoded_pattern (1.0)
1569 |
1570 |
1571 | def _regex_from_encoded_pattern(s):
1572 | """'foo' -> re.compile(re.escape('foo'))
1573 | '/foo/' -> re.compile('foo')
1574 | '/foo/i' -> re.compile('foo', re.I)
1575 | """
1576 | if s.startswith('/') and s.rfind('/') != 0:
1577 | # Parse it: /PATTERN/FLAGS
1578 | idx = s.rfind('/')
1579 | pattern, flags_str = s[1:idx], s[idx + 1:]
1580 | flag_from_char = {
1581 | "i": re.IGNORECASE,
1582 | "l": re.LOCALE,
1583 | "s": re.DOTALL,
1584 | "m": re.MULTILINE,
1585 | "u": re.UNICODE,
1586 | }
1587 | flags = 0
1588 | for char in flags_str:
1589 | try:
1590 | flags |= flag_from_char[char]
1591 | except KeyError:
1592 | raise ValueError("unsupported regex flag: '%s' in '%s' "
1593 | "(must be one of '%s')"
1594 | % (char, s, ''.join(flag_from_char.keys())))
1595 | return re.compile(s[1:idx], flags)
1596 | else: # not an encoded regex
1597 | return re.compile(re.escape(s))
1598 |
1599 | # Recipe: dedent (0.1.2)
1600 |
1601 |
1602 | def _dedentlines(lines, tabsize=8, skip_first_line=False):
1603 | """_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines
1604 |
1605 | "lines" is a list of lines to dedent.
1606 | "tabsize" is the tab width to use for indent width calculations.
1607 | "skip_first_line" is a boolean indicating if the first line should
1608 | be skipped for calculating the indent width and for dedenting.
1609 | This is sometimes useful for docstrings and similar.
1610 |
1611 | Same as dedent() except operates on a sequence of lines. Note: the
1612 | lines list is modified **in-place**.
1613 | """
1614 | DEBUG = False
1615 | if DEBUG:
1616 | print "dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\
1617 | % (tabsize, skip_first_line)
1618 | indents = []
1619 | margin = None
1620 | for i, line in enumerate(lines):
1621 | if i == 0 and skip_first_line:
1622 | continue
1623 | indent = 0
1624 | for ch in line:
1625 | if ch == ' ':
1626 | indent += 1
1627 | elif ch == '\t':
1628 | indent += tabsize - (indent % tabsize)
1629 | elif ch in '\r\n':
1630 | continue # skip all-whitespace lines
1631 | else:
1632 | break
1633 | else:
1634 | continue # skip all-whitespace lines
1635 | if DEBUG:
1636 | print "dedent: indent=%d: %r" % (indent, line)
1637 | if margin is None:
1638 | margin = indent
1639 | else:
1640 | margin = min(margin, indent)
1641 | if DEBUG:
1642 | print "dedent: margin=%r" % margin
1643 |
1644 | if margin is not None and margin > 0:
1645 | for i, line in enumerate(lines):
1646 | if i == 0 and skip_first_line:
1647 | continue
1648 | removed = 0
1649 | for j, ch in enumerate(line):
1650 | if ch == ' ':
1651 | removed += 1
1652 | elif ch == '\t':
1653 | removed += tabsize - (removed % tabsize)
1654 | elif ch in '\r\n':
1655 | if DEBUG:
1656 | print "dedent: %r: EOL -> strip up to EOL" % line
1657 | lines[i] = lines[i][j:]
1658 | break
1659 | else:
1660 | raise ValueError("unexpected non-whitespace char %r in "
1661 | "line %r while removing %d-space margin"
1662 | % (ch, line, margin))
1663 | if DEBUG:
1664 | print "dedent: %r: %r -> removed %d/%d"\
1665 | % (line, ch, removed, margin)
1666 | if removed == margin:
1667 | lines[i] = lines[i][j + 1:]
1668 | break
1669 | elif removed > margin:
1670 | lines[i] = ' ' * (removed - margin) + lines[i][j + 1:]
1671 | break
1672 | else:
1673 | if removed:
1674 | lines[i] = lines[i][removed:]
1675 | return lines
1676 |
1677 |
1678 | def _dedent(text, tabsize=8, skip_first_line=False):
1679 | """_dedent(text, tabsize=8, skip_first_line=False) -> dedented text
1680 |
1681 | "text" is the text to dedent.
1682 | "tabsize" is the tab width to use for indent width calculations.
1683 | "skip_first_line" is a boolean indicating if the first line should
1684 | be skipped for calculating the indent width and for dedenting.
1685 | This is sometimes useful for docstrings and similar.
1686 |
1687 | textwrap.dedent(s), but don't expand tabs to spaces
1688 | """
1689 | lines = text.splitlines(1)
1690 | _dedentlines(lines, tabsize=tabsize, skip_first_line=skip_first_line)
1691 | return ''.join(lines)
1692 |
1693 |
1694 | class _memoized(object):
1695 | """Decorator that caches a function's return value each time it is called.
1696 | If called later with the same arguments, the cached value is returned, and
1697 | not re-evaluated.
1698 |
1699 | http://wiki.python.org/moin/PythonDecoratorLibrary
1700 | """
1701 |
1702 | def __init__(self, func):
1703 | self.func = func
1704 | self.cache = {}
1705 |
1706 | def __call__(self, *args):
1707 | try:
1708 | return self.cache[args]
1709 | except KeyError:
1710 | self.cache[args] = value = self.func(*args)
1711 | return value
1712 | except TypeError:
1713 | # uncachable -- for instance, passing a list as an argument.
1714 | # Better to not cache than to blow up entirely.
1715 | return self.func(*args)
1716 |
1717 | def __repr__(self):
1718 | """Return the function's docstring."""
1719 | return self.func.__doc__
1720 |
1721 |
1722 | def _xml_oneliner_re_from_tab_width(tab_width):
1723 | """Standalone XML processing instruction regex."""
1724 | return re.compile(r"""
1725 | (?:
1726 | (?<=\n\n) # Starting after a blank line
1727 | | # or
1728 | \A\n? # the beginning of the doc
1729 | )
1730 | ( # save in $1
1731 | [ ]{0,%d}
1732 | (?:
1733 | <\?\w+\b\s+.*?\?> # XML processing instruction
1734 | |
1735 | <\w+:\w+\b\s+.*?/> # namespaced single tag
1736 | )
1737 | [ \t]*
1738 | (?=\n{2,}|\Z) # followed by a blank line or end of document
1739 | )
1740 | """ % (tab_width - 1), re.X)
1741 | _xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width)
1742 |
1743 |
1744 | def _hr_tag_re_from_tab_width(tab_width):
1745 | return re.compile(r"""
1746 | (?:
1747 | (?<=\n\n) # Starting after a blank line
1748 | | # or
1749 | \A\n? # the beginning of the doc
1750 | )
1751 | ( # save in \1
1752 | [ ]{0,%d}
1753 | <(hr) # start tag = \2
1754 | \b # word break
1755 | ([^<>])*? #
1756 | /?> # the matching end tag
1757 | [ \t]*
1758 | (?=\n{2,}|\Z) # followed by a blank line or end of document
1759 | )
1760 | """ % (tab_width - 1), re.X)
1761 | _hr_tag_re_from_tab_width = _memoized(_hr_tag_re_from_tab_width)
1762 |
1763 |
1764 | def _xml_encode_email_char_at_random(ch):
1765 | r = random()
1766 | # Roughly 10% raw, 45% hex, 45% dec.
1767 | # '@' *must* be encoded. I [John Gruber] insist.
1768 | # Issue 26: '_' must be encoded.
1769 | if r > 0.9 and ch not in "@_":
1770 | return ch
1771 | elif r < 0.45:
1772 | # The [1:] is to drop leading '0': 0x63 -> x63
1773 | return '%s;' % hex(ord(ch))[1:]
1774 | else:
1775 | return '%s;' % ord(ch)
1776 |
1777 |
1778 | def _hash_text(text):
1779 | return 'md5:' + md5(text.encode("utf-8")).hexdigest()
1780 |
1781 |
1782 | #---- mainline
1783 |
1784 | class _NoReflowFormatter(optparse.IndentedHelpFormatter):
1785 | """An optparse formatter that does NOT reflow the description."""
1786 |
1787 | def format_description(self, description):
1788 | return description or ""
1789 |
1790 |
1791 | def _test():
1792 | import doctest
1793 | doctest.testmod()
1794 |
1795 |
1796 | def main(argv=None):
1797 | if argv is None:
1798 | argv = sys.argv
1799 | if not logging.root.handlers:
1800 | logging.basicConfig()
1801 |
1802 | usage = "usage: %prog [PATHS...]"
1803 | version = "%prog " + __version__
1804 | parser = optparse.OptionParser(prog="markdown2", usage=usage,
1805 | version=version, description=cmdln_desc,
1806 | formatter=_NoReflowFormatter())
1807 | parser.add_option("-v", "--verbose", dest="log_level",
1808 | action="store_const", const=logging.DEBUG,
1809 | help="more verbose output")
1810 | parser.add_option("--encoding",
1811 | help="specify encoding of text content")
1812 | parser.add_option("--html4tags", action="store_true", default=False,
1813 | help="use HTML 4 style for empty element tags")
1814 | parser.add_option("-s", "--safe", metavar="MODE", dest="safe_mode",
1815 | help="sanitize literal HTML: 'escape' escapes "
1816 | "HTML meta chars, 'replace' replaces with an "
1817 | "[HTML_REMOVED] note")
1818 | parser.add_option("-x", "--extras", action="append",
1819 | help="Turn on specific extra features (not part of "
1820 | "the core Markdown spec). Supported values: "
1821 | "'code-friendly' disables _/__ for emphasis; "
1822 | "'code-color' adds code-block syntax coloring; "
1823 | "'link-patterns' adds auto-linking based on patterns; "
1824 | "'footnotes' adds the footnotes syntax;"
1825 | "'xml' passes one-liner processing instructions and namespaced XML tags;"
1826 | "'pyshell' to put unindented Python interactive shell sessions in a block.")
1827 | parser.add_option("--use-file-vars",
1828 | help="Look for and use Emacs-style 'markdown-extras' "
1829 | "file var to turn on extras. See "
1830 | ".")
1831 | parser.add_option("--link-patterns-file",
1832 | help="path to a link pattern file")
1833 | parser.add_option("--self-test", action="store_true",
1834 | help="run internal self-tests (some doctests)")
1835 | parser.add_option("--compare", action="store_true",
1836 | help="run against Markdown.pl as well (for testing)")
1837 | parser.set_defaults(log_level=logging.INFO, compare=False,
1838 | encoding="utf-8", safe_mode=None, use_file_vars=False)
1839 | opts, paths = parser.parse_args()
1840 | log.setLevel(opts.log_level)
1841 |
1842 | if opts.self_test:
1843 | return _test()
1844 |
1845 | if opts.extras:
1846 | extras = {}
1847 | for s in opts.extras:
1848 | splitter = re.compile("[,;: ]+")
1849 | for e in splitter.split(s):
1850 | if '=' in e:
1851 | ename, earg = e.split('=', 1)
1852 | try:
1853 | earg = int(earg)
1854 | except ValueError:
1855 | pass
1856 | else:
1857 | ename, earg = e, None
1858 | extras[ename] = earg
1859 | else:
1860 | extras = None
1861 |
1862 | if opts.link_patterns_file:
1863 | link_patterns = []
1864 | f = open(opts.link_patterns_file)
1865 | try:
1866 | for i, line in enumerate(f.readlines()):
1867 | if not line.strip():
1868 | continue
1869 | if line.lstrip().startswith("#"):
1870 | continue
1871 | try:
1872 | pat, href = line.rstrip().rsplit(None, 1)
1873 | except ValueError:
1874 | raise MarkdownError("%s:%d: invalid link pattern line: %r"
1875 | % (opts.link_patterns_file, i + 1, line))
1876 | link_patterns.append(
1877 | (_regex_from_encoded_pattern(pat), href))
1878 | finally:
1879 | f.close()
1880 | else:
1881 | link_patterns = None
1882 |
1883 | from os.path import join, dirname, abspath, exists
1884 | markdown_pl = join(dirname(dirname(abspath(__file__))), "test",
1885 | "Markdown.pl")
1886 | for path in paths:
1887 | if opts.compare:
1888 | print "==== Markdown.pl ===="
1889 | perl_cmd = 'perl %s "%s"' % (markdown_pl, path)
1890 | o = os.popen(perl_cmd)
1891 | perl_html = o.read()
1892 | o.close()
1893 | sys.stdout.write(perl_html)
1894 | print "==== markdown2.py ===="
1895 | html = markdown_path(path, encoding=opts.encoding,
1896 | html4tags=opts.html4tags,
1897 | safe_mode=opts.safe_mode,
1898 | extras=extras, link_patterns=link_patterns,
1899 | use_file_vars=opts.use_file_vars)
1900 | sys.stdout.write(
1901 | html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))
1902 | if opts.compare:
1903 | test_dir = join(dirname(dirname(abspath(__file__))), "test")
1904 | if exists(join(test_dir, "test_markdown2.py")):
1905 | sys.path.insert(0, test_dir)
1906 | from test_markdown2 import norm_html_from_html
1907 | norm_html = norm_html_from_html(html)
1908 | norm_perl_html = norm_html_from_html(perl_html)
1909 | else:
1910 | norm_html = html
1911 | norm_perl_html = perl_html
1912 | print "==== match? %r ====" % (norm_perl_html == norm_html)
1913 |
1914 |
1915 | if __name__ == "__main__":
1916 | sys.exit(main(sys.argv))
1917 |
--------------------------------------------------------------------------------
{{comment.content}} 17 |
18 | 19 |