├── .bumpversion.cfg
├── .flake8
├── .github
    └── workflows
    │   └── tests.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── CHANGELOG.md
├── LICENSE
├── README.md
├── pyproject.toml
├── src
    └── mdformat_gfm
    │   ├── __init__.py
    │   ├── _gfm.py
    │   ├── _mdformat_plugin.py
    │   ├── _mdit_gfm_autolink_plugin.py
    │   └── _text_inline_rule.py
└── tests
    ├── __init__.py
    ├── data
        ├── default_style.md
        ├── generate_json_spec.sh
        ├── gfm_autolink.md
        ├── gfm_spec.commit-85d895289c5ab67f988ca659493a64abb5fec7b4.json
        └── wrap_width_50.md
    ├── requirements.txt
    ├── test_gfm_compliancy.py
    ├── test_markdown_it_plugin.py
    └── test_mdformat_gfm.py


/.bumpversion.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | commit = True
 3 | tag = True
 4 | tag_name = {new_version}
 5 | current_version = 0.4.1
 6 | 
 7 | [bumpversion:file:pyproject.toml]
 8 | search = version = "{current_version}"  # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT
 9 | replace = version = "{new_version}"  # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT
10 | 
11 | [bumpversion:file:src/mdformat_gfm/__init__.py]
12 | search = __version__ = "{current_version}"
13 | replace = __version__ = "{new_version}"
14 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 88
3 | max-complexity = 10
4 | # These checks violate PEP8 so let's ignore them
5 | extend-ignore = E203
6 | extend-exclude = */site-packages/*
7 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yaml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |     tags: [ '[0-9]+.[0-9]+.[0-9]+*' ]
 7 |   pull_request:
 8 |     branches: [ master ]
 9 | 
10 | jobs:
11 | 
12 |   build:
13 |     runs-on: ${{ matrix.os }}
14 |     strategy:
15 |       matrix:
16 |         python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
17 |         os: [ubuntu-latest, macos-latest, windows-latest]
18 | 
19 |     steps:
20 |     - uses: actions/checkout@v4
21 | 
22 |     - name: Set up Python ${{ matrix.python-version }}
23 |       uses: actions/setup-python@v5
24 |       with:
25 |         python-version: ${{ matrix.python-version }}
26 | 
27 |     - name: Installation (deps and package)
28 |       run: |
29 |         pip install .
30 | 
31 |     - name: Test with pytest
32 |       run: |
33 |         pip install -r tests/requirements.txt
34 |         pytest
35 | 
36 |     - name: Linters
37 |       if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.11'
38 |       run: |
39 |         pip install pre-commit mypy==1.11.2
40 |         pre-commit run --all-files
41 |         mypy src/ tests/
42 | 
43 |   pypi-publish:
44 |     # Only publish if all other jobs succeed
45 |     needs: [ build ]
46 |     if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags')
47 |     runs-on: ubuntu-latest
48 |     steps:
49 |     - uses: actions/checkout@v4
50 |     - uses: actions/setup-python@v5
51 |       with:
52 |         python-version: '3.x'
53 |     - name: Install build and publish tools
54 |       run: |
55 |         pip install build twine
56 |     - name: Build and check
57 |       run: |
58 |         rm -rf dist/ && python -m build
59 |         twine check --strict dist/*
60 |     - name: Publish
61 |       run: |
62 |         twine upload dist/*
63 |       env:
64 |         TWINE_USERNAME: __token__
65 |         TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
66 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # IntelliJ
132 | .idea/
133 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |   rev: cef0300fd0fc4d2a87a85fa2093c6b283ea36f4b  # frozen: v5.0.0
 4 |   hooks:
 5 |   - id: check-yaml
 6 |   - id: check-toml
 7 | - repo: https://github.com/pre-commit/pygrep-hooks
 8 |   rev: 3a6eb0fadf60b3cccfd80bad9dbb6fae7e47b316  # frozen: v1.10.0
 9 |   hooks:
10 |   - id: python-use-type-annotations
11 |   - id: python-check-blanket-noqa
12 | - repo: https://github.com/PyCQA/isort
13 |   rev: c235f5e450b4b84e58d114ed4c589cbf454175a3  # frozen: 5.13.2
14 |   hooks:
15 |   - id: isort
16 | - repo: https://github.com/psf/black
17 |   rev: 1b2427a2b785cc4aac97c19bb4b9a0de063f9547  # frozen: 24.10.0
18 |   hooks:
19 |   - id: black
20 | - repo: https://github.com/PyCQA/flake8
21 |   rev: e43806be3607110919eff72939fda031776e885a  # frozen: 7.1.1
22 |   hooks:
23 |   - id: flake8
24 |     additional_dependencies:
25 |     - flake8-bugbear
26 |     - flake8-builtins
27 |     - flake8-comprehensions
28 | - repo: https://github.com/hukkin/docformatter
29 |   rev: ab802050e6e96aaaf7f917fcbc333bb74e2e57f7  # frozen: v1.4.2
30 |   hooks:
31 |   - id: docformatter
32 | - repo: https://github.com/executablebooks/mdformat
33 |   rev: 00812cd1850e41aae5c0916645b4b7404f538e8c  # frozen: 0.7.19
34 |   hooks:
35 |   - id: mdformat
36 |     files: 'README.md'
37 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## 0.4.1
 4 | 
 5 | - Fixed
 6 |   - Stop new autolink parser from finding autolinks in link and image labels
 7 | 
 8 | ## 0.4.0 (yanked from PyPI)
 9 | 
10 | - Changed
11 |   - Replaced `linkify-it-py` dependency with a vendored GFM compatible markdown-it-py autolink plugin.
12 | - Fixed
13 |   - Error on angle bracketed `linkify-it-py` links that are not CommonMark autolinks
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Taneli Hukkinen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Build Status](https://github.com/hukkin/mdformat-gfm/actions/workflows/tests.yaml/badge.svg?branch=master)](https://github.com/hukkin/mdformat-gfm/actions?query=workflow%3ATests+branch%3Amaster+event%3Apush)
 2 | [![PyPI version](https://img.shields.io/pypi/v/mdformat-gfm)](https://pypi.org/project/mdformat-gfm)
 3 | 
 4 | # mdformat-gfm
 5 | 
 6 | > Mdformat plugin for GitHub Flavored Markdown compatibility
 7 | 
 8 | ## Description
 9 | 
10 | [Mdformat](https://github.com/executablebooks/mdformat) is a formatter for
11 | [CommonMark](https://spec.commonmark.org/current/)
12 | compliant Markdown.
13 | 
14 | Mdformat-gfm is an mdformat plugin that changes the target specification to
15 | [GitHub Flavored Markdown (GFM)](https://github.github.com/gfm/),
16 | making the tool able to format the following syntax extensions:
17 | 
18 | - [tables](https://github.github.com/gfm/#tables-extension-)
19 | - [task list items](https://github.github.com/gfm/#task-list-items-extension-)
20 | - [strikethroughs](https://github.github.com/gfm/#strikethrough-extension-)
21 | - [autolinks](https://github.github.com/gfm/#autolinks-extension-)
22 | - [disallowed raw HTML](https://github.github.com/gfm/#disallowed-raw-html-extension-)
23 |   (note that no changes are required from a formatter to support this extension)
24 | 
25 | ## Install
26 | 
27 | ```sh
28 | pipx install mdformat
29 | pipx inject mdformat mdformat-gfm
30 | ```
31 | 
32 | ## Usage
33 | 
34 | ```sh
35 | mdformat <filename>
36 | ```
37 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["flit_core>=3.2.0,<4"]
 3 | build-backend = "flit_core.buildapi"
 4 | 
 5 | 
 6 | [project]
 7 | name = "mdformat-gfm"
 8 | version = "0.4.1"  # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT
 9 | authors = [
10 |     { name = "Taneli Hukkinen", email = "hukkin@users.noreply.github.com"},
11 | ]
12 | description = "Mdformat plugin for GitHub Flavored Markdown compatibility"
13 | readme = "README.md"
14 | license = { file = "LICENSE" }
15 | requires-python = ">=3.9"
16 | dependencies = [
17 |     'mdformat >=0.7.5,<0.8.0',
18 |     'markdown-it-py',  # Let `mdformat` choose version boundaries for `markdown-it-py`
19 |     'mdit-py-plugins >=0.2.0',
20 |     'mdformat-tables >=0.4.0',
21 | ]
22 | classifiers = [
23 |     "Topic :: Documentation",
24 |     "Topic :: Text Processing :: Markup",
25 | ]
26 | keywords = ["mdformat", "markdown", "formatter", "gfm"]
27 | 
28 | [project.urls]
29 | "Homepage" = "https://github.com/hukkin/mdformat-gfm"
30 | "Changelog" = "https://github.com/hukkin/mdformat-gfm/blob/master/CHANGELOG.md"
31 | 
32 | [project.entry-points."mdformat.parser_extension"]
33 | "gfm" = "mdformat_gfm._mdformat_plugin"
34 | 
35 | 
36 | [tool.tox]
37 | requires = ["tox>=4.21.1"]
38 | env_list = ["3.9", "3.10", "3.11", "3.12", "3.13"]
39 | 
40 | [tool.tox.env_run_base]
41 | description = "Run tests under {base_python}"
42 | deps = ["-r tests/requirements.txt"]
43 | commands = [["pytest", { replace = "posargs", extend = true }]]
44 | 
45 | [tool.tox.env."git-mdformat"]
46 | description = "Run tests against unreleased mdformat from git"
47 | deps = [
48 |     "-r tests/requirements.txt",
49 |     "git+https://github.com/executablebooks/mdformat.git@master",
50 |     "git+https://github.com/executablebooks/mdformat-tables.git@master",
51 | ]
52 | 
53 | [tool.tox.env."mypy"]
54 | base_python = ["python3.11"]
55 | description = "Run mypy under {base_python}"
56 | deps = [
57 |     "-r tests/requirements.txt",
58 |     "mypy ==1.11.2",
59 | ]
60 | commands = [["mypy", { replace = "posargs", default = ["src/", "tests/"], extend = true }]]
61 | 
62 | 
63 | [tool.isort]
64 | # Force imports to be sorted by module, independent of import type
65 | force_sort_within_sections = true
66 | # Group first party and local folder imports together
67 | no_lines_before = ["LOCALFOLDER"]
68 | 
69 | # Configure isort to work without access to site-packages
70 | known_first_party = ["mdformat_gfm", "tests"]
71 | 
72 | # Settings for Black compatibility
73 | profile = "black"
74 | 
75 | 
76 | [tool.pytest.ini_options]
77 | addopts = "--strict-markers --strict-config"
78 | xfail_strict = true
79 | 
80 | 
81 | [tool.mypy]
82 | show_error_codes = true
83 | warn_unreachable = true
84 | warn_unused_ignores = true
85 | warn_redundant_casts = true
86 | warn_unused_configs = true
87 | # Disabling incremental mode is required for `warn_unused_configs = true` to work
88 | incremental = false
89 | disallow_untyped_defs = true
90 | check_untyped_defs = true
91 | strict_equality = true
92 | implicit_reexport = false
93 | no_implicit_optional = true
94 | overrides = [
95 |     { module = "tests.*", disallow_untyped_defs = false },
96 | ]
97 | 


--------------------------------------------------------------------------------
/src/mdformat_gfm/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.4.1"  # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT
2 | 


--------------------------------------------------------------------------------
/src/mdformat_gfm/_gfm.py:
--------------------------------------------------------------------------------
1 | # Whitespace characters, as specified in
2 | # https://github.github.com/gfm/#whitespace-character
3 | # (spec version 0.29-gfm (2019-04-06)
4 | WHITESPACE = frozenset(" \t\n\v\f\r")
5 | 
6 | BEFORE_AUTOLINK_CHARS = WHITESPACE | {"*", "_", "~", "("}
7 | 


--------------------------------------------------------------------------------
/src/mdformat_gfm/_mdformat_plugin.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | from markdown_it import MarkdownIt
  4 | import mdformat.plugins
  5 | from mdformat.renderer import DEFAULT_RENDERERS, RenderContext, RenderTreeNode
  6 | from mdit_py_plugins.tasklists import tasklists_plugin
  7 | 
  8 | from mdformat_gfm._mdit_gfm_autolink_plugin import gfm_autolink_plugin
  9 | 
 10 | 
 11 | def update_mdit(mdit: MarkdownIt) -> None:
 12 |     # Enable GFM autolink extension
 13 |     mdit.use(gfm_autolink_plugin)
 14 | 
 15 |     # Enable mdformat-tables plugin
 16 |     tables_plugin = mdformat.plugins.PARSER_EXTENSIONS["tables"]
 17 |     if tables_plugin not in mdit.options["parser_extension"]:
 18 |         mdit.options["parser_extension"].append(tables_plugin)
 19 |         tables_plugin.update_mdit(mdit)
 20 | 
 21 |     # Enable strikethrough markdown-it extension
 22 |     mdit.enable("strikethrough")
 23 | 
 24 |     # Enable tasklist markdown-it extension
 25 |     mdit.use(tasklists_plugin)
 26 | 
 27 | 
 28 | def _strikethrough_renderer(node: RenderTreeNode, context: RenderContext) -> str:
 29 |     content = "".join(child.render(context) for child in node.children)
 30 |     return "~~" + content + "~~"
 31 | 
 32 | 
 33 | def _render_with_default_renderer(node: RenderTreeNode, context: RenderContext) -> str:
 34 |     """Render the node using default renderer instead of the one in `context`.
 35 | 
 36 |     We don't use `RenderContext.with_default_renderer_for` because that
 37 |     changes the default renderer in context, where it's applied
 38 |     recursively to render functions of children.
 39 |     """
 40 |     syntax_type = node.type
 41 |     text = DEFAULT_RENDERERS[syntax_type](node, context)
 42 |     for postprocessor in context.postprocessors.get(syntax_type, ()):
 43 |         text = postprocessor(text, node, context)
 44 |     return text
 45 | 
 46 | 
 47 | def _is_task_list_item(node: RenderTreeNode) -> bool:
 48 |     assert node.type == "list_item"
 49 |     classes = node.attrs.get("class", "")
 50 |     assert isinstance(classes, str)
 51 |     return "task-list-item" in classes
 52 | 
 53 | 
 54 | def _list_item_renderer(node: RenderTreeNode, context: RenderContext) -> str:
 55 |     if not _is_task_list_item(node):
 56 |         return _render_with_default_renderer(node, context)
 57 | 
 58 |     # Tasklists extension makes a bit weird token stream where
 59 |     # tasks are annotated by html. We need to remove the HTML.
 60 |     paragraph_node = node.children[0]
 61 |     inline_node = paragraph_node.children[0]
 62 |     assert inline_node.type == "inline"
 63 |     assert inline_node.children, "inline token must have children"
 64 |     html_inline_node = inline_node.children[0]
 65 |     assert 'class="task-list-item-checkbox"' in html_inline_node.content
 66 | 
 67 |     # This is naughty, shouldn't mutate and rely on `.remove` here
 68 |     inline_node.children.remove(html_inline_node)
 69 | 
 70 |     checkmark = "x" if 'checked="checked"' in html_inline_node.content else " "
 71 | 
 72 |     text = _render_with_default_renderer(node, context)
 73 | 
 74 |     if context.do_wrap:
 75 |         wrap_mode = context.options["mdformat"]["wrap"]
 76 |         if isinstance(wrap_mode, int):
 77 |             text = text[4:]  # Remove the "xxxx" added in `_postprocess_inline`
 78 |     # Strip leading space chars (numeric representations)
 79 |     text = re.sub(r"^(&#32;)+", "", text)
 80 |     text = text.lstrip()
 81 |     return f"[{checkmark}] {text}"
 82 | 
 83 | 
 84 | def _postprocess_inline(text: str, node: RenderTreeNode, context: RenderContext) -> str:
 85 |     """Postprocess inline tokens.
 86 | 
 87 |     Fix word wrap of the first line in a task list item. It should be
 88 |     wrapped narrower than normal because of the "[ ] " prefix that
 89 |     indicates a task list item. We fool word wrap by prefixing an
 90 |     unwrappable dummy string of the same length. This prefix needs to be
 91 |     later removed (in `_list_item_renderer`).
 92 |     """
 93 |     if not context.do_wrap:
 94 |         return text
 95 |     wrap_mode = context.options["mdformat"]["wrap"]
 96 |     if not isinstance(wrap_mode, int):
 97 |         return text
 98 |     if (
 99 |         node.parent
100 |         and node.parent.type == "paragraph"
101 |         and not node.parent.previous_sibling
102 |         and node.parent.parent
103 |         and node.parent.parent.type == "list_item"
104 |         and _is_task_list_item(node.parent.parent)
105 |     ):
106 |         text = text.lstrip("\x00")
107 |         text = text.lstrip()
108 |         text = "xxxx" + text
109 |     return text
110 | 
111 | 
112 | def _gfm_autolink_renderer(node: RenderTreeNode, context: RenderContext) -> str:
113 |     return node.meta["source_text"]
114 | 
115 | 
116 | def _escape_text(text: str, node: RenderTreeNode, context: RenderContext) -> str:
117 |     # Escape strikethroughs
118 |     text = text.replace("~~", "\\~~")
119 | 
120 |     return text
121 | 
122 | 
123 | _RE_GFM_TICK_BOX = re.compile(r"^\[([ xX])]", flags=re.MULTILINE)
124 | 
125 | 
126 | def _escape_paragraph(text: str, node: RenderTreeNode, context: RenderContext) -> str:
127 |     # Escape tasklists
128 |     text = _RE_GFM_TICK_BOX.sub(r"\[" + r"\g<1>" + r"\]", text)
129 | 
130 |     return text
131 | 
132 | 
133 | RENDERERS = {
134 |     "s": _strikethrough_renderer,
135 |     "list_item": _list_item_renderer,
136 |     "gfm_autolink": _gfm_autolink_renderer,
137 | }
138 | POSTPROCESSORS = {
139 |     "text": _escape_text,
140 |     "inline": _postprocess_inline,
141 |     "paragraph": _escape_paragraph,
142 | }
143 | 


--------------------------------------------------------------------------------
/src/mdformat_gfm/_mdit_gfm_autolink_plugin.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | from markdown_it import MarkdownIt
  4 | from markdown_it.rules_inline import StateInline
  5 | 
  6 | from mdformat_gfm import _gfm
  7 | from mdformat_gfm._text_inline_rule import text_rule
  8 | 
  9 | 
 10 | def gfm_autolink_plugin(md: MarkdownIt) -> None:
 11 |     """Markdown-it plugin to parse GFM autolinks."""
 12 |     md.inline.ruler.before("linkify", "gfm_autolink", gfm_autolink)
 13 | 
 14 |     # The default "text" inline rule will skip starting characters of GFM
 15 |     # autolinks. It can be disabled, but that is disastrous for performance.
 16 |     # Instead, we replace it with a custom "text" inline rule that yields at
 17 |     # locations that can potentially be the beginning of a GFM autolink.
 18 |     md.inline.ruler.at("text", text_rule)
 19 | 
 20 | 
 21 | # A string that matches this must still be invalidated if it ends with "_" or "-"
 22 | RE_GFM_EMAIL = re.compile(r"[a-zA-Z0-9._+-]+@[a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)+")
 23 | # A string that matches this must still be invalidated if last two segments contain "_"
 24 | RE_GFM_AUTOLINK_DOMAIN = re.compile(r"[a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)+")
 25 | 
 26 | RE_ENDS_IN_ENTITY_REF = re.compile(r"&[a-zA-Z0-9]+;\Z")
 27 | 
 28 | ASCII_ALPHANUMERICS = frozenset(
 29 |     "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789"
 30 | )
 31 | 
 32 | 
 33 | def gfm_autolink(state: StateInline, silent: bool) -> bool:  # noqa: C901
 34 |     """Markdown-it-py rule to parse GFM autolinks.
 35 | 
 36 |     This parser autolinks as specified here:
 37 |     https://github.github.com/gfm/#autolinks-extension-
 38 | 
 39 |     Args:
 40 |         state: Parse state object.
 41 |         silent: Disables token generation.
 42 |     Returns:
 43 |         bool: True if GFM autolink found.
 44 |     """
 45 |     # Prevents autolink parsing in link and image labels
 46 |     if state.level > 0:
 47 |         return False
 48 | 
 49 |     pos = state.pos
 50 |     src = state.src
 51 | 
 52 |     # Autolink can only be at the beginning of a line, after whitespace,
 53 |     # or any of the delimiting characters *, _, ~, and (.
 54 |     if pos:
 55 |         preceding_char = src[pos - 1]
 56 |         if preceding_char not in _gfm.BEFORE_AUTOLINK_CHARS:
 57 |             return False
 58 | 
 59 |     if src.startswith("www.", pos):
 60 |         pos += 4
 61 |         try:
 62 |             pos, domain, resource = read_domain_and_resource(src, pos)
 63 |         except NotFound:
 64 |             return False
 65 | 
 66 |         url = f"www.{domain}{resource}"
 67 |         full_url = "http://" + url
 68 |     elif src.startswith(("http://", "https://"), pos):
 69 |         scheme = "https://" if src[pos + 4] == "s" else "http://"
 70 |         pos += len(scheme)
 71 | 
 72 |         try:
 73 |             pos, domain, resource = read_domain_and_resource(src, pos)
 74 |         except NotFound:
 75 |             return False
 76 | 
 77 |         url = f"{scheme}{domain}{resource}"
 78 |         full_url = url
 79 |     elif src.startswith(("mailto:", "xmpp:"), pos):
 80 |         scheme = "xmpp:" if src[pos] == "x" else "mailto:"
 81 |         pos += len(scheme)
 82 | 
 83 |         try:
 84 |             pos, email = read_email(src, pos)
 85 |         except NotFound:
 86 |             return False
 87 | 
 88 |         if scheme == "xmpp:" and src[pos : pos + 1] == "/":
 89 |             pos += 1
 90 |             resource_start_pos = pos
 91 |             while pos < len(src) and src[pos] in ASCII_ALPHANUMERICS | {".", "@"}:
 92 |                 pos += 1
 93 |             resource = src[resource_start_pos:pos]
 94 |             if resource.endswith("."):
 95 |                 pos -= 1
 96 |                 resource = resource[:-1]
 97 |             if not resource:
 98 |                 return False
 99 |         else:
100 |             resource = ""
101 | 
102 |         source_autolink = scheme + email
103 |         if resource:
104 |             source_autolink += "/" + resource
105 | 
106 |         url = source_autolink
107 |         full_url = source_autolink
108 |     else:
109 |         try:
110 |             pos, email = read_email(src, pos)
111 |         except NotFound:
112 |             return False
113 | 
114 |         url = email
115 |         full_url = "mailto:" + email
116 | 
117 |     normalized_full_url = state.md.normalizeLink(full_url)
118 |     if not state.md.validateLink(normalized_full_url):
119 |         return False
120 | 
121 |     push_tokens(state, normalized_full_url, url, silent)
122 |     state.pos = pos
123 |     return True
124 | 
125 | 
126 | def push_tokens(
127 |     state: StateInline, full_url: str, source_url: str, silent: bool
128 | ) -> None:
129 |     if silent:
130 |         return
131 |     token = state.push("gfm_autolink_open", "a", 1)
132 |     token.attrs = {"href": full_url}
133 |     token.meta = {"source_text": source_url}
134 | 
135 |     token = state.push("text", "", 0)
136 |     token.content = state.md.normalizeLinkText(source_url)
137 | 
138 |     state.push("gfm_autolink_close", "a", -1)
139 | 
140 | 
141 | def trim_resource(untrimmed: str) -> tuple[str, int]:
142 |     """Trim illegal trailing chars from autolink resource.
143 | 
144 |     Trim trailing punctuation, parentheses and entity refs as per GFM
145 |     spec. Also trim backslashes. The spec does not mention backslash,
146 |     but I think it should. This is referred to as "extended autolink
147 |     path validation" in the GFM spec. Return a tuple with the trimmed
148 |     resource and the amount of characters removed.
149 |     """
150 |     i = len(untrimmed) - 1
151 |     while i >= 0:
152 |         c = untrimmed[i]
153 |         if c == ";":
154 |             ending_entity_match = RE_ENDS_IN_ENTITY_REF.search(untrimmed, endpos=i + 1)
155 |             if not ending_entity_match:
156 |                 break
157 |             i = ending_entity_match.start()
158 |         elif c == ")":
159 |             if untrimmed.count("(", 0, i + 1) >= untrimmed.count(")", 0, i + 1):
160 |                 break
161 |         elif c in {"?", "!", ".", ",", ":", "*", "_", "~"}:
162 |             pass
163 |         elif c == "\\":  # not part of the spec, but should be
164 |             pass
165 |         else:
166 |             break
167 |         i -= 1
168 | 
169 |     trimmed = untrimmed[: i + 1]
170 |     trim_count = len(untrimmed) - len(trimmed)
171 |     return trimmed, trim_count
172 | 
173 | 
174 | class NotFound(Exception):
175 |     """Raised if a function didn't find what it was looking for."""
176 | 
177 | 
178 | def read_domain_and_resource(src: str, pos: int) -> tuple[int, str, str]:
179 |     """Read autolink domain and resource.
180 | 
181 |     Raise NotFound if not found. Return a tuple (pos, domain, resource).
182 |     """
183 |     domain_match = RE_GFM_AUTOLINK_DOMAIN.match(src, pos)
184 |     if not domain_match:
185 |         raise NotFound
186 |     domain = domain_match.group()
187 |     pos = domain_match.end()
188 |     segments = domain.rsplit(".", 2)
189 |     if "_" in segments[-2] or "_" in segments[-1]:
190 |         raise NotFound
191 | 
192 |     resource_start_pos = pos
193 |     while pos < len(src) and src[pos] not in _gfm.WHITESPACE | {"<"}:
194 |         pos += 1
195 |     resource = src[resource_start_pos:pos]
196 | 
197 |     resource, trim_count = trim_resource(resource)
198 |     pos -= trim_count
199 |     return pos, domain, resource
200 | 
201 | 
202 | def read_email(src: str, pos: int) -> tuple[int, str]:
203 |     """Read autolink email.
204 | 
205 |     Raise NotFound if not found. Return a tuple (pos, email).
206 |     """
207 |     email_match = RE_GFM_EMAIL.match(src, pos)
208 |     email = email_match.group() if email_match else None
209 |     if not email or email[-1] in {"-", "_"}:
210 |         raise NotFound
211 |     assert email_match is not None
212 |     pos = email_match.end()
213 | 
214 |     # This isn't really part of the GFM spec, but an attempt to cover
215 |     # up its flaws. If a trailing hyphen or underscore invalidates an
216 |     # autolink, then an escaped hyphen or underscore should too.
217 |     if src[pos : pos + 2] in {"\\-", "\\_"}:
218 |         raise NotFound
219 | 
220 |     return pos, email
221 | 


--------------------------------------------------------------------------------
/src/mdformat_gfm/_text_inline_rule.py:
--------------------------------------------------------------------------------
 1 | """A replacement for the "text" inline rule in markdown-it.
 2 | 
 3 | The default "text" rule will skip until the next character in
 4 | `_TerminatorChars` is found. This extends the set of termination points
 5 | to those that can potentially be the beginning of a GFM autolink. The
 6 | GFM autolink plugin also works with "text" inline rule disabled, but
 7 | this should (at least partially) bring back the performance boost that
 8 | "text" inline rule provides.
 9 | """
10 | 
11 | import re
12 | 
13 | from markdown_it.rules_inline import StateInline
14 | 
15 | from mdformat_gfm import _gfm
16 | 
17 | # The default set of terminator characters
18 | _TerminatorChars = {
19 |     "\n",
20 |     "!",
21 |     "#",
22 |     "$",
23 |     "%",
24 |     "&",
25 |     "*",
26 |     "+",
27 |     "-",
28 |     ":",
29 |     "<",
30 |     "=",
31 |     ">",
32 |     "@",
33 |     "[",
34 |     "\\",
35 |     "]",
36 |     "^",
37 |     "_",
38 |     "`",
39 |     "{",
40 |     "}",
41 |     "~",
42 | }
43 | 
44 | _default_terminator = "[" + re.escape("".join(_TerminatorChars)) + "]"
45 | _gfm_autolink_terminator = (
46 |     r"(?:" r"www\." "|" "http" "|" "mailto:" "|" "xmpp:" "|" r"[a-zA-Z0-9._+-]+@" r")"
47 | )
48 | _before_autolink = "[" + re.escape("".join(_gfm.BEFORE_AUTOLINK_CHARS)) + "]"
49 | 
50 | _RE_TERMINATOR_FIRST_CHAR = re.compile(
51 |     _default_terminator + "|" + _gfm_autolink_terminator
52 | )
53 | _RE_TERMINATOR_NON_FIRST_CHAR = re.compile(
54 |     r"(?s:.)"  # match any character (also newline)
55 |     + _default_terminator
56 |     + "|"
57 |     + _before_autolink
58 |     + _gfm_autolink_terminator
59 | )
60 | 
61 | 
62 | def text_rule(state: StateInline, silent: bool) -> bool:
63 |     pos = state.pos
64 | 
65 |     # Handle the special case where `pos` is zero
66 |     if not pos:
67 |         if _RE_TERMINATOR_FIRST_CHAR.match(state.src):
68 |             return False
69 |         pos = 1
70 | 
71 |     # Now `pos` cannot be zero, so we can search with a regex that looks at
72 |     # preceding character too.
73 |     terminator_match = _RE_TERMINATOR_NON_FIRST_CHAR.search(state.src, pos - 1)
74 |     if terminator_match:
75 |         pos = terminator_match.start() + 1
76 |     else:
77 |         pos = state.posMax
78 | 
79 |     if pos == state.pos:
80 |         return False
81 | 
82 |     if not silent:
83 |         state.pending += state.src[state.pos : pos]
84 | 
85 |     state.pos = pos
86 | 
87 |     return True
88 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hukkin/mdformat-gfm/3ab8cac2a17e0a4a439b771b79846973312a2ae3/tests/__init__.py


--------------------------------------------------------------------------------
/tests/data/default_style.md:
--------------------------------------------------------------------------------
 1 | Table
 2 | .
 3 | a | b | c
 4 | :- | -: | :-:
 5 | 1 | 2 | 3
 6 | xxxxxx | yyyyyy | zzzzzz
 7 | .
 8 | | a      |      b |   c    |
 9 | | :----- | -----: | :----: |
10 | | 1      |      2 |   3    |
11 | | xxxxxx | yyyyyy | zzzzzz |
12 | .
13 | 
14 | Simple strikethrough
15 | .
16 | ~~Hi~~ Hello, world!
17 | .
18 | ~~Hi~~ Hello, world!
19 | .
20 | 
21 | Escaped strikethrough
22 | .
23 | ~~Hi~\~ Hello, world!
24 | .
25 | \~~Hi\~~ Hello, world!
26 | .
27 | 
28 | Nested tasklists
29 | .
30 | - [x] foo
31 |   - [ ] bar
32 |   - [x] baz
33 | - [ ] bim
34 | .
35 | - [x] foo
36 |   - [ ] bar
37 |   - [x] baz
38 | - [ ] bim
39 | .
40 | 
41 | Mix tasks and other items
42 | .
43 | 1. [x] task done
44 | 2. not a task
45 | 3. [ ] task not done
46 | 4. not a task
47 | .
48 | 1. [x] task done
49 | 1. not a task
50 | 1. [ ] task not done
51 | 1. not a task
52 | .
53 | 
54 | Reduce tasklist whitespace
55 | .
56 | -   [x]    reduce spaces
57 | .
58 | - [x] reduce spaces
59 | .
60 | 
61 | Autolink with a backslash
62 | .
63 | http://www.python.org/autolink\extension
64 | .
65 | http://www.python.org/autolink\extension
66 | .
67 | 
68 | Autolink with percentage encoded space
69 | .
70 | https://mytest.com/files/word%20document.docx
71 | .
72 | https://mytest.com/files/word%20document.docx
73 | .
74 | 
75 | Autolink with port
76 | .
77 | test.com:443
78 | .
79 | test.com:443
80 | .
81 | 
82 | Tasklist escape
83 | .
84 | - [x] foo
85 | - \[ ] bim 
86 | .
87 | - [x] foo
88 | - \[ \] bim
89 | .
90 | 


--------------------------------------------------------------------------------
/tests/data/generate_json_spec.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # A script to generate JSON file with all the examples in the
 3 | # GFM spec. Update `gfmcommit` commit hash value to generate an
 4 | # updated JSON. Run this script in `tests/data/` directory to write
 5 | # a file in `tests/data/gfm_spec.commit-{commit-hash}.json`.
 6 | 
 7 | git clone https://github.com/github/cmark-gfm.git
 8 | 
 9 | gfmcommit=85d895289c5ab67f988ca659493a64abb5fec7b4
10 | cd cmark-gfm/ \
11 | && git reset --hard $gfmcommit \
12 | && cd ..
13 | 
14 | python3 cmark-gfm/test/spec_tests.py --dump-tests --spec=cmark-gfm/test/spec.txt > gfm_spec.commit-${gfmcommit}.json
15 | 


--------------------------------------------------------------------------------
/tests/data/gfm_autolink.md:
--------------------------------------------------------------------------------
  1 | linkify
  2 | .
  3 | url http://www.youtube.com/watch?v=5Jt5GEr4AYg.
  4 | .
  5 | <p>url <a href="http://www.youtube.com/watch?v=5Jt5GEr4AYg">http://www.youtube.com/watch?v=5Jt5GEr4AYg</a>.</p>
  6 | .
  7 | 
  8 | 
  9 | don't touch text in links
 10 | .
 11 | [https://example.com](https://example.com)
 12 | .
 13 | <p><a href="https://example.com">https://example.com</a></p>
 14 | .
 15 | 
 16 | 
 17 | don't touch text in autolinks
 18 | .
 19 | <https://example.com>
 20 | .
 21 | <p><a href="https://example.com">https://example.com</a></p>
 22 | .
 23 | 
 24 | 
 25 | don't touch text in html <a> tags
 26 | .
 27 | <a href="https://example.com">https://example.com</a>
 28 | .
 29 | <p><a href="https://example.com">https://example.com</a></p>
 30 | .
 31 | 
 32 | 
 33 | space separated autolink in html <a> tags
 34 | .
 35 | <a href="https://example.fi"> https://example.com </a>
 36 | .
 37 | <p><a href="https://example.fi"> <a href="https://example.com">https://example.com</a> </a></p>
 38 | .
 39 | 
 40 | space separated autolink after html </a> tag
 41 | .
 42 | </a> https://example.com
 43 | .
 44 | <p></a> <a href="https://example.com">https://example.com</a></p>
 45 | .
 46 | 
 47 | autolink in link after </a> tag
 48 | .
 49 | </a> [t https://example.fi](https://example.com)
 50 | .
 51 | <p></a> <a href="https://example.com">t https://example.fi</a></p>
 52 | .
 53 | 
 54 | 
 55 | autolink in link after <a> tag
 56 | .
 57 | <a> [t https://example.fi](https://example.com)
 58 | .
 59 | <p><a> <a href="https://example.com">t https://example.fi</a></p>
 60 | .
 61 | 
 62 | 
 63 | entities inside raw links
 64 | .
 65 | https://example.com/foo&amp;bar
 66 | .
 67 | <p><a href="https://example.com/foo&amp;amp;bar">https://example.com/foo&amp;amp;bar</a></p>
 68 | .
 69 | 
 70 | 
 71 | emphasis inside raw links (asterisk, can happen in links with params)
 72 | .
 73 | https://example.com/foo*bar*baz
 74 | .
 75 | <p><a href="https://example.com/foo*bar*baz">https://example.com/foo*bar*baz</a></p>
 76 | .
 77 | 
 78 | 
 79 | emphasis inside raw links (underscore)
 80 | .
 81 | http://example.org/foo._bar_-_baz
 82 | .
 83 | <p><a href="http://example.org/foo._bar_-_baz">http://example.org/foo._bar_-_baz</a></p>
 84 | .
 85 | 
 86 | 
 87 | backticks inside raw links
 88 | .
 89 | https://example.com/foo`bar`baz
 90 | .
 91 | <p><a href="https://example.com/foo%60bar%60baz">https://example.com/foo`bar`baz</a></p>
 92 | .
 93 | 
 94 | 
 95 | links inside raw links
 96 | .
 97 | https://example.com/foo[123](456)bar
 98 | .
 99 | <p><a href="https://example.com/foo%5B123%5D(456)bar">https://example.com/foo[123](456)bar</a></p>
100 | .
101 | 
102 | escaped CommonMark autolink
103 | .
104 | \<https://python.org>
105 | .
106 | <p>&lt;https://python.org&gt;</p>
107 | .
108 | 
109 | escapes not allowed at the start
110 | .
111 | \https://example.com
112 | .
113 | <p>\https://example.com</p>
114 | .
115 | 
116 | 
117 | escapes not allowed at comma
118 | .
119 | https\://example.com
120 | .
121 | <p>https://example.com</p>
122 | .
123 | 
124 | 
125 | escapes not allowed at slashes
126 | .
127 | https:\//aa.org https://bb.org
128 | .
129 | <p>https://aa.org <a href="https://bb.org">https://bb.org</a></p>
130 | .
131 | 
132 | 
133 | fuzzy link shouldn't match cc.org
134 | .
135 | https:/\/cc.org
136 | .
137 | <p>https://cc.org</p>
138 | .
139 | 
140 | 
141 | bold links (exclude markup of pairs from link tail)
142 | .
143 | **http://example.com/foobar**
144 | .
145 | <p><strong><a href="http://example.com/foobar">http://example.com/foobar</a></strong></p>
146 | .
147 | 
148 | match links without protocol
149 | .
150 | www.example.org
151 | .
152 | <p><a href="http://www.example.org">www.example.org</a></p>
153 | .
154 | 
155 | match links without protocol, part 2
156 | .
157 | GFM autolink www.commonmark.org
158 | .
159 | <p>GFM autolink <a href="http://www.commonmark.org">www.commonmark.org</a></p>
160 | .
161 | 
162 | coverage, prefix not valid
163 | .
164 | http:/example.com/
165 | .
166 | <p>http:/example.com/</p>
167 | .
168 | 
169 | 
170 | coverage, negative link level
171 | .
172 | </a>[https://example.com](https://example.com)
173 | .
174 | <p></a><a href="https://example.com">https://example.com</a></p>
175 | .
176 | 
177 | 
178 | emphasis with '*', real link:
179 | .
180 | http://cdecl.ridiculousfish.com/?q=int+%28*f%29+%28float+*%29%3B
181 | .
182 | <p><a href="http://cdecl.ridiculousfish.com/?q=int+%28*f%29+%28float+*%29%3B">http://cdecl.ridiculousfish.com/?q=int+(*f)+(float+*)%3B</a></p>
183 | .
184 | 
185 | emphasis with '_', real link:
186 | .
187 | https://www.sell.fi/sites/default/files/elainlaakarilehti/tieteelliset_artikkelit/kahkonen_t._et_al.canine_pancreatitis-_review.pdf
188 | .
189 | <p><a href="https://www.sell.fi/sites/default/files/elainlaakarilehti/tieteelliset_artikkelit/kahkonen_t._et_al.canine_pancreatitis-_review.pdf">https://www.sell.fi/sites/default/files/elainlaakarilehti/tieteelliset_artikkelit/kahkonen_t._et_al.canine_pancreatitis-_review.pdf</a></p>
190 | .
191 | 
192 | emails
193 | .
194 | test@example.com
195 | 
196 | mailto:test@example.com
197 | 
198 | xmpp:foo@bar.baz/blaa@flii.buu.
199 | .
200 | <p><a href="mailto:test@example.com">test@example.com</a></p>
201 | <p><a href="mailto:test@example.com">mailto:test@example.com</a></p>
202 | <p><a href="xmpp:foo@bar.baz/blaa@flii.buu">xmpp:foo@bar.baz/blaa@flii.buu</a>.</p>
203 | .
204 | 
205 | 
206 | typorgapher should not break href
207 | .
208 | http://example.com/(c)
209 | .
210 | <p><a href="http://example.com/(c)">http://example.com/(c)</a></p>
211 | .
212 | 
213 | before line
214 | .
215 | before
216 | www.github.com
217 | .
218 | <p>before
219 | <a href="http://www.github.com">www.github.com</a></p>
220 | .
221 | 
222 | after line
223 | .
224 | github.com
225 | after
226 | .
227 | <p>github.com
228 | after</p>
229 | .
230 | 
231 | before after lines
232 | .
233 | before
234 | github.com
235 | after
236 | .
237 | <p>before
238 | github.com
239 | after</p>
240 | .
241 | 
242 | before after lines with blank line
243 | .
244 | before
245 | 
246 | github.com
247 | 
248 | after
249 | .
250 | <p>before</p>
251 | <p>github.com</p>
252 | <p>after</p>
253 | .
254 | 
255 | Don't match escaped
256 | .
257 | google\.com
258 | .
259 | <p>google.com</p>
260 | .
261 | 
262 | Issue [#300](https://github.com/executablebooks/markdown-it-py/issues/300) emphasis inside raw links (underscore) at beginning of line
263 | .
264 | http://example.org/foo._bar_-_baz This works
265 | .
266 | <p><a href="http://example.org/foo._bar_-_baz">http://example.org/foo._bar_-_baz</a> This works</p>
267 | .
268 | 
269 | Issue [#300](https://github.com/executablebooks/markdown-it-py/issues/300) emphasis inside raw links (underscore) at end of line
270 | .
271 | This doesnt http://example.org/foo._bar_-_baz
272 | .
273 | <p>This doesnt <a href="http://example.org/foo._bar_-_baz">http://example.org/foo._bar_-_baz</a></p>
274 | .
275 | 
276 | Issue [#300](https://github.com/executablebooks/markdown-it-py/issues/300) emphasis inside raw links (underscore) mix1
277 | .
278 | While this `does` http://example.org/foo._bar_-_baz, this doesnt http://example.org/foo._bar_-_baz and this **does** http://example.org/foo._bar_-_baz
279 | .
280 | <p>While this <code>does</code> <a href="http://example.org/foo._bar_-_baz">http://example.org/foo._bar_-_baz</a>, this doesnt <a href="http://example.org/foo._bar_-_baz">http://example.org/foo._bar_-_baz</a> and this <strong>does</strong> <a href="http://example.org/foo._bar_-_baz">http://example.org/foo._bar_-_baz</a></p>
281 | .
282 | 
283 | Issue [#300](https://github.com/executablebooks/markdown-it-py/issues/300) emphasis inside raw links (underscore) mix2
284 | .
285 | This applies to _series of URLs too_ http://example.org/foo._bar_-_baz http://example.org/foo._bar_-_baz, these dont http://example.org/foo._bar_-_baz http://example.org/foo._bar_-_baz and these **do** http://example.org/foo._bar_-_baz http://example.org/foo._bar_-_baz
286 | .
287 | <p>This applies to <em>series of URLs too</em> <a href="http://example.org/foo._bar_-_baz">http://example.org/foo._bar_-_baz</a> <a href="http://example.org/foo._bar_-_baz">http://example.org/foo._bar_-_baz</a>, these dont <a href="http://example.org/foo._bar_-_baz">http://example.org/foo._bar_-_baz</a> <a href="http://example.org/foo._bar_-_baz">http://example.org/foo._bar_-_baz</a> and these <strong>do</strong> <a href="http://example.org/foo._bar_-_baz">http://example.org/foo._bar_-_baz</a> <a href="http://example.org/foo._bar_-_baz">http://example.org/foo._bar_-_baz</a></p>
288 | .
289 | 
290 | emphasis inside raw links (asterisk) at end of line
291 | .
292 | This doesnt http://example.org/foo.*bar*-*baz
293 | .
294 | <p>This doesnt <a href="http://example.org/foo.*bar*-*baz">http://example.org/foo.*bar*-*baz</a></p>
295 | .
296 | 
297 | autolink inside link
298 | .
299 | [t https://blaa.org](https://www.gaah.fi)
300 | 
301 | [https://blaa.org](https://www.gaah.fi)
302 | .
303 | <p><a href="https://www.gaah.fi">t https://blaa.org</a></p>
304 | <p><a href="https://www.gaah.fi">https://blaa.org</a></p>
305 | .
306 | 
307 | autolink inside image
308 | .
309 | ![t https://blaa.org](https://www.gaah.fi)
310 | 
311 | ![https://blaa.org](https://www.gaah.fi)
312 | .
313 | <p><img src="https://www.gaah.fi" alt="t https://blaa.org" /></p>
314 | <p><img src="https://www.gaah.fi" alt="https://blaa.org" /></p>
315 | .
316 | 


--------------------------------------------------------------------------------
/tests/data/wrap_width_50.md:
--------------------------------------------------------------------------------
 1 | Wrap strikethrough, middle of paragraph
 2 | .
 3 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. ~~Duis fermentum, tellus quis vulputate vehicula, metus ipsum dictum felis,~~ aliquam mattis purus sem luctus urna.
 4 | .
 5 | Lorem ipsum dolor sit amet, consectetur adipiscing
 6 | elit. ~~Duis fermentum, tellus quis vulputate
 7 | vehicula, metus ipsum dictum felis,~~ aliquam
 8 | mattis purus sem luctus urna.
 9 | .
10 | 
11 | 
12 | Wrap tasklist
13 | .
14 | - [ ] no wrap aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa a
15 |   - [ ] no wrap aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa a
16 | - [ ] do wrap aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa a
17 |   - [ ] do wrap aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa a
18 | .
19 | - [ ] no wrap aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa a
20 |   - [ ] no wrap aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa a
21 | - [ ] do wrap aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
22 |   a
23 |   - [ ] do wrap aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
24 |     a
25 | .
26 | 


--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-randomly
3 | 


--------------------------------------------------------------------------------
/tests/test_gfm_compliancy.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | 
 4 | import mdformat
 5 | from mdformat._util import is_md_equal
 6 | import pytest
 7 | 
 8 | TEST_DATA_DIR = Path(__file__).parent / "data"
 9 | SPECTESTS_PATH = next(TEST_DATA_DIR.glob("gfm_spec.commit-*.json"))
10 | SPECTESTS_CASES = tuple(
11 |     {"name": str(entry["example"]), "md": entry["markdown"]}
12 |     for entry in json.loads(SPECTESTS_PATH.read_text(encoding="utf-8"))
13 | )
14 | 
15 | 
16 | @pytest.mark.parametrize(
17 |     "entry", SPECTESTS_CASES, ids=[c["name"] for c in SPECTESTS_CASES]
18 | )
19 | def test_gfm_spec(entry):
20 |     """Test mdformat-gfm against the GFM spec.
21 | 
22 |     Test that:
23 |     1. Markdown AST is the same before and after 1 pass of formatting
24 |     2. Markdown after 1st pass and 2nd pass of formatting are equal
25 |     """
26 |     md_original = entry["md"]
27 |     md_new = mdformat.text(md_original, extensions={"gfm"})
28 |     md_2nd_pass = mdformat.text(md_new, extensions={"gfm"})
29 |     assert is_md_equal(md_original, md_new, extensions={"gfm"})
30 |     assert md_new == md_2nd_pass
31 | 


--------------------------------------------------------------------------------
/tests/test_markdown_it_plugin.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from markdown_it import MarkdownIt
 4 | from markdown_it.utils import read_fixture_file
 5 | import pytest
 6 | 
 7 | from mdformat_gfm._mdit_gfm_autolink_plugin import gfm_autolink_plugin
 8 | 
 9 | FIXTURE_PATH = Path(__file__).parent / "data" / "gfm_autolink.md"
10 | 
11 | 
12 | @pytest.mark.parametrize("line,title,md,expected_html", read_fixture_file(FIXTURE_PATH))
13 | def test_gfm_autolink(line, title, md, expected_html):
14 |     mdit = MarkdownIt().use(gfm_autolink_plugin)
15 |     text = mdit.render(md)
16 |     assert text.rstrip() == expected_html.rstrip()
17 | 


--------------------------------------------------------------------------------
/tests/test_mdformat_gfm.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from markdown_it.utils import read_fixture_file
 4 | import mdformat
 5 | import mdformat._cli
 6 | import pytest
 7 | 
 8 | DEFAULT_STYLE_CASES = read_fixture_file(
 9 |     Path(__file__).parent / "data" / "default_style.md"
10 | )
11 | WRAP_WIDTH_50_CASES = read_fixture_file(
12 |     Path(__file__).parent / "data" / "wrap_width_50.md"
13 | )
14 | 
15 | 
16 | @pytest.mark.parametrize(
17 |     "line,title,text,expected",
18 |     DEFAULT_STYLE_CASES,
19 |     ids=[f[1] for f in DEFAULT_STYLE_CASES],
20 | )
21 | def test_default_style__api(line, title, text, expected):
22 |     """Test fixtures in tests/data/default_style.md."""
23 |     md_new = mdformat.text(text, extensions={"gfm"})
24 |     if md_new != expected:
25 |         print("Formatted (unexpected) Markdown below:")
26 |         print(md_new)
27 |     assert md_new == expected
28 | 
29 | 
30 | @pytest.mark.parametrize(
31 |     "line,title,text,expected",
32 |     DEFAULT_STYLE_CASES,
33 |     ids=[f[1] for f in DEFAULT_STYLE_CASES],
34 | )
35 | def test_default_style__cli(line, title, text, expected, tmp_path):
36 |     """Test fixtures in tests/data/default_style.md."""
37 |     file_path = tmp_path / "test_markdown.md"
38 |     file_path.write_text(text)
39 |     assert mdformat._cli.run([str(file_path)]) == 0
40 |     md_new = file_path.read_text()
41 |     if md_new != expected:
42 |         print("Formatted (unexpected) Markdown below:")
43 |         print(md_new)
44 |     assert md_new == expected
45 | 
46 | 
47 | @pytest.mark.parametrize(
48 |     "line,title,text,expected",
49 |     WRAP_WIDTH_50_CASES,
50 |     ids=[f[1] for f in WRAP_WIDTH_50_CASES],
51 | )
52 | def test_wrap_width_50__cli(line, title, text, expected, tmp_path):
53 |     """Test fixtures in tests/data/wrap_width_50.md."""
54 |     file_path = tmp_path / "test_markdown.md"
55 |     file_path.write_text(text)
56 |     assert mdformat._cli.run([str(file_path), "--wrap=50"]) == 0
57 |     md_new = file_path.read_text()
58 |     if md_new != expected:
59 |         print("Formatted (unexpected) Markdown below:")
60 |         print(md_new)
61 |     assert md_new == expected
62 | 


--------------------------------------------------------------------------------