├── .bumpversion.cfg ├── .flake8 ├── .github └── workflows │ └── tests.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── LICENSE ├── README.md ├── pyproject.toml ├── src └── mdformat_gfm │ ├── __init__.py │ ├── _gfm.py │ ├── _mdformat_plugin.py │ ├── _mdit_gfm_autolink_plugin.py │ └── _text_inline_rule.py └── tests ├── __init__.py ├── data ├── default_style.md ├── generate_json_spec.sh ├── gfm_autolink.md ├── gfm_spec.commit-85d895289c5ab67f988ca659493a64abb5fec7b4.json └── wrap_width_50.md ├── requirements.txt ├── test_gfm_compliancy.py ├── test_markdown_it_plugin.py └── test_mdformat_gfm.py /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | commit = True 3 | tag = True 4 | tag_name = {new_version} 5 | current_version = 0.4.1 6 | 7 | [bumpversion:file:pyproject.toml] 8 | search = version = "{current_version}" # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT 9 | replace = version = "{new_version}" # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT 10 | 11 | [bumpversion:file:src/mdformat_gfm/__init__.py] 12 | search = __version__ = "{current_version}" 13 | replace = __version__ = "{new_version}" 14 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 88 3 | max-complexity = 10 4 | # These checks violate PEP8 so let's ignore them 5 | extend-ignore = E203 6 | extend-exclude = */site-packages/* 7 | -------------------------------------------------------------------------------- /.github/workflows/tests.yaml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | tags: [ '[0-9]+.[0-9]+.[0-9]+*' ] 7 | pull_request: 8 | branches: [ master ] 9 | 10 | jobs: 11 | 12 | build: 13 | runs-on: ${{ matrix.os }} 14 | strategy: 15 | matrix: 16 | python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] 17 | os: [ubuntu-latest, macos-latest, windows-latest] 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v5 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | 27 | - name: Installation (deps and package) 28 | run: | 29 | pip install . 30 | 31 | - name: Test with pytest 32 | run: | 33 | pip install -r tests/requirements.txt 34 | pytest 35 | 36 | - name: Linters 37 | if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.11' 38 | run: | 39 | pip install pre-commit mypy==1.11.2 40 | pre-commit run --all-files 41 | mypy src/ tests/ 42 | 43 | pypi-publish: 44 | # Only publish if all other jobs succeed 45 | needs: [ build ] 46 | if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') 47 | runs-on: ubuntu-latest 48 | steps: 49 | - uses: actions/checkout@v4 50 | - uses: actions/setup-python@v5 51 | with: 52 | python-version: '3.x' 53 | - name: Install build and publish tools 54 | run: | 55 | pip install build twine 56 | - name: Build and check 57 | run: | 58 | rm -rf dist/ && python -m build 59 | twine check --strict dist/* 60 | - name: Publish 61 | run: | 62 | twine upload dist/* 63 | env: 64 | TWINE_USERNAME: __token__ 65 | TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} 66 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # IntelliJ 132 | .idea/ 133 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: cef0300fd0fc4d2a87a85fa2093c6b283ea36f4b # frozen: v5.0.0 4 | hooks: 5 | - id: check-yaml 6 | - id: check-toml 7 | - repo: https://github.com/pre-commit/pygrep-hooks 8 | rev: 3a6eb0fadf60b3cccfd80bad9dbb6fae7e47b316 # frozen: v1.10.0 9 | hooks: 10 | - id: python-use-type-annotations 11 | - id: python-check-blanket-noqa 12 | - repo: https://github.com/PyCQA/isort 13 | rev: c235f5e450b4b84e58d114ed4c589cbf454175a3 # frozen: 5.13.2 14 | hooks: 15 | - id: isort 16 | - repo: https://github.com/psf/black 17 | rev: 1b2427a2b785cc4aac97c19bb4b9a0de063f9547 # frozen: 24.10.0 18 | hooks: 19 | - id: black 20 | - repo: https://github.com/PyCQA/flake8 21 | rev: e43806be3607110919eff72939fda031776e885a # frozen: 7.1.1 22 | hooks: 23 | - id: flake8 24 | additional_dependencies: 25 | - flake8-bugbear 26 | - flake8-builtins 27 | - flake8-comprehensions 28 | - repo: https://github.com/hukkin/docformatter 29 | rev: ab802050e6e96aaaf7f917fcbc333bb74e2e57f7 # frozen: v1.4.2 30 | hooks: 31 | - id: docformatter 32 | - repo: https://github.com/executablebooks/mdformat 33 | rev: 00812cd1850e41aae5c0916645b4b7404f538e8c # frozen: 0.7.19 34 | hooks: 35 | - id: mdformat 36 | files: 'README.md' 37 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 0.4.1 4 | 5 | - Fixed 6 | - Stop new autolink parser from finding autolinks in link and image labels 7 | 8 | ## 0.4.0 (yanked from PyPI) 9 | 10 | - Changed 11 | - Replaced `linkify-it-py` dependency with a vendored GFM compatible markdown-it-py autolink plugin. 12 | - Fixed 13 | - Error on angle bracketed `linkify-it-py` links that are not CommonMark autolinks 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Taneli Hukkinen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://github.com/hukkin/mdformat-gfm/actions/workflows/tests.yaml/badge.svg?branch=master)](https://github.com/hukkin/mdformat-gfm/actions?query=workflow%3ATests+branch%3Amaster+event%3Apush) 2 | [![PyPI version](https://img.shields.io/pypi/v/mdformat-gfm)](https://pypi.org/project/mdformat-gfm) 3 | 4 | # mdformat-gfm 5 | 6 | > Mdformat plugin for GitHub Flavored Markdown compatibility 7 | 8 | ## Description 9 | 10 | [Mdformat](https://github.com/executablebooks/mdformat) is a formatter for 11 | [CommonMark](https://spec.commonmark.org/current/) 12 | compliant Markdown. 13 | 14 | Mdformat-gfm is an mdformat plugin that changes the target specification to 15 | [GitHub Flavored Markdown (GFM)](https://github.github.com/gfm/), 16 | making the tool able to format the following syntax extensions: 17 | 18 | - [tables](https://github.github.com/gfm/#tables-extension-) 19 | - [task list items](https://github.github.com/gfm/#task-list-items-extension-) 20 | - [strikethroughs](https://github.github.com/gfm/#strikethrough-extension-) 21 | - [autolinks](https://github.github.com/gfm/#autolinks-extension-) 22 | - [disallowed raw HTML](https://github.github.com/gfm/#disallowed-raw-html-extension-) 23 | (note that no changes are required from a formatter to support this extension) 24 | 25 | ## Install 26 | 27 | ```sh 28 | pipx install mdformat 29 | pipx inject mdformat mdformat-gfm 30 | ``` 31 | 32 | ## Usage 33 | 34 | ```sh 35 | mdformat 36 | ``` 37 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["flit_core>=3.2.0,<4"] 3 | build-backend = "flit_core.buildapi" 4 | 5 | 6 | [project] 7 | name = "mdformat-gfm" 8 | version = "0.4.1" # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT 9 | authors = [ 10 | { name = "Taneli Hukkinen", email = "hukkin@users.noreply.github.com"}, 11 | ] 12 | description = "Mdformat plugin for GitHub Flavored Markdown compatibility" 13 | readme = "README.md" 14 | license = { file = "LICENSE" } 15 | requires-python = ">=3.9" 16 | dependencies = [ 17 | 'mdformat >=0.7.5,<0.8.0', 18 | 'markdown-it-py', # Let `mdformat` choose version boundaries for `markdown-it-py` 19 | 'mdit-py-plugins >=0.2.0', 20 | 'mdformat-tables >=0.4.0', 21 | ] 22 | classifiers = [ 23 | "Topic :: Documentation", 24 | "Topic :: Text Processing :: Markup", 25 | ] 26 | keywords = ["mdformat", "markdown", "formatter", "gfm"] 27 | 28 | [project.urls] 29 | "Homepage" = "https://github.com/hukkin/mdformat-gfm" 30 | "Changelog" = "https://github.com/hukkin/mdformat-gfm/blob/master/CHANGELOG.md" 31 | 32 | [project.entry-points."mdformat.parser_extension"] 33 | "gfm" = "mdformat_gfm._mdformat_plugin" 34 | 35 | 36 | [tool.tox] 37 | requires = ["tox>=4.21.1"] 38 | env_list = ["3.9", "3.10", "3.11", "3.12", "3.13"] 39 | 40 | [tool.tox.env_run_base] 41 | description = "Run tests under {base_python}" 42 | deps = ["-r tests/requirements.txt"] 43 | commands = [["pytest", { replace = "posargs", extend = true }]] 44 | 45 | [tool.tox.env."git-mdformat"] 46 | description = "Run tests against unreleased mdformat from git" 47 | deps = [ 48 | "-r tests/requirements.txt", 49 | "git+https://github.com/executablebooks/mdformat.git@master", 50 | "git+https://github.com/executablebooks/mdformat-tables.git@master", 51 | ] 52 | 53 | [tool.tox.env."mypy"] 54 | base_python = ["python3.11"] 55 | description = "Run mypy under {base_python}" 56 | deps = [ 57 | "-r tests/requirements.txt", 58 | "mypy ==1.11.2", 59 | ] 60 | commands = [["mypy", { replace = "posargs", default = ["src/", "tests/"], extend = true }]] 61 | 62 | 63 | [tool.isort] 64 | # Force imports to be sorted by module, independent of import type 65 | force_sort_within_sections = true 66 | # Group first party and local folder imports together 67 | no_lines_before = ["LOCALFOLDER"] 68 | 69 | # Configure isort to work without access to site-packages 70 | known_first_party = ["mdformat_gfm", "tests"] 71 | 72 | # Settings for Black compatibility 73 | profile = "black" 74 | 75 | 76 | [tool.pytest.ini_options] 77 | addopts = "--strict-markers --strict-config" 78 | xfail_strict = true 79 | 80 | 81 | [tool.mypy] 82 | show_error_codes = true 83 | warn_unreachable = true 84 | warn_unused_ignores = true 85 | warn_redundant_casts = true 86 | warn_unused_configs = true 87 | # Disabling incremental mode is required for `warn_unused_configs = true` to work 88 | incremental = false 89 | disallow_untyped_defs = true 90 | check_untyped_defs = true 91 | strict_equality = true 92 | implicit_reexport = false 93 | no_implicit_optional = true 94 | overrides = [ 95 | { module = "tests.*", disallow_untyped_defs = false }, 96 | ] 97 | -------------------------------------------------------------------------------- /src/mdformat_gfm/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.4.1" # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT 2 | -------------------------------------------------------------------------------- /src/mdformat_gfm/_gfm.py: -------------------------------------------------------------------------------- 1 | # Whitespace characters, as specified in 2 | # https://github.github.com/gfm/#whitespace-character 3 | # (spec version 0.29-gfm (2019-04-06) 4 | WHITESPACE = frozenset(" \t\n\v\f\r") 5 | 6 | BEFORE_AUTOLINK_CHARS = WHITESPACE | {"*", "_", "~", "("} 7 | -------------------------------------------------------------------------------- /src/mdformat_gfm/_mdformat_plugin.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from markdown_it import MarkdownIt 4 | import mdformat.plugins 5 | from mdformat.renderer import DEFAULT_RENDERERS, RenderContext, RenderTreeNode 6 | from mdit_py_plugins.tasklists import tasklists_plugin 7 | 8 | from mdformat_gfm._mdit_gfm_autolink_plugin import gfm_autolink_plugin 9 | 10 | 11 | def update_mdit(mdit: MarkdownIt) -> None: 12 | # Enable GFM autolink extension 13 | mdit.use(gfm_autolink_plugin) 14 | 15 | # Enable mdformat-tables plugin 16 | tables_plugin = mdformat.plugins.PARSER_EXTENSIONS["tables"] 17 | if tables_plugin not in mdit.options["parser_extension"]: 18 | mdit.options["parser_extension"].append(tables_plugin) 19 | tables_plugin.update_mdit(mdit) 20 | 21 | # Enable strikethrough markdown-it extension 22 | mdit.enable("strikethrough") 23 | 24 | # Enable tasklist markdown-it extension 25 | mdit.use(tasklists_plugin) 26 | 27 | 28 | def _strikethrough_renderer(node: RenderTreeNode, context: RenderContext) -> str: 29 | content = "".join(child.render(context) for child in node.children) 30 | return "~~" + content + "~~" 31 | 32 | 33 | def _render_with_default_renderer(node: RenderTreeNode, context: RenderContext) -> str: 34 | """Render the node using default renderer instead of the one in `context`. 35 | 36 | We don't use `RenderContext.with_default_renderer_for` because that 37 | changes the default renderer in context, where it's applied 38 | recursively to render functions of children. 39 | """ 40 | syntax_type = node.type 41 | text = DEFAULT_RENDERERS[syntax_type](node, context) 42 | for postprocessor in context.postprocessors.get(syntax_type, ()): 43 | text = postprocessor(text, node, context) 44 | return text 45 | 46 | 47 | def _is_task_list_item(node: RenderTreeNode) -> bool: 48 | assert node.type == "list_item" 49 | classes = node.attrs.get("class", "") 50 | assert isinstance(classes, str) 51 | return "task-list-item" in classes 52 | 53 | 54 | def _list_item_renderer(node: RenderTreeNode, context: RenderContext) -> str: 55 | if not _is_task_list_item(node): 56 | return _render_with_default_renderer(node, context) 57 | 58 | # Tasklists extension makes a bit weird token stream where 59 | # tasks are annotated by html. We need to remove the HTML. 60 | paragraph_node = node.children[0] 61 | inline_node = paragraph_node.children[0] 62 | assert inline_node.type == "inline" 63 | assert inline_node.children, "inline token must have children" 64 | html_inline_node = inline_node.children[0] 65 | assert 'class="task-list-item-checkbox"' in html_inline_node.content 66 | 67 | # This is naughty, shouldn't mutate and rely on `.remove` here 68 | inline_node.children.remove(html_inline_node) 69 | 70 | checkmark = "x" if 'checked="checked"' in html_inline_node.content else " " 71 | 72 | text = _render_with_default_renderer(node, context) 73 | 74 | if context.do_wrap: 75 | wrap_mode = context.options["mdformat"]["wrap"] 76 | if isinstance(wrap_mode, int): 77 | text = text[4:] # Remove the "xxxx" added in `_postprocess_inline` 78 | # Strip leading space chars (numeric representations) 79 | text = re.sub(r"^( )+", "", text) 80 | text = text.lstrip() 81 | return f"[{checkmark}] {text}" 82 | 83 | 84 | def _postprocess_inline(text: str, node: RenderTreeNode, context: RenderContext) -> str: 85 | """Postprocess inline tokens. 86 | 87 | Fix word wrap of the first line in a task list item. It should be 88 | wrapped narrower than normal because of the "[ ] " prefix that 89 | indicates a task list item. We fool word wrap by prefixing an 90 | unwrappable dummy string of the same length. This prefix needs to be 91 | later removed (in `_list_item_renderer`). 92 | """ 93 | if not context.do_wrap: 94 | return text 95 | wrap_mode = context.options["mdformat"]["wrap"] 96 | if not isinstance(wrap_mode, int): 97 | return text 98 | if ( 99 | node.parent 100 | and node.parent.type == "paragraph" 101 | and not node.parent.previous_sibling 102 | and node.parent.parent 103 | and node.parent.parent.type == "list_item" 104 | and _is_task_list_item(node.parent.parent) 105 | ): 106 | text = text.lstrip("\x00") 107 | text = text.lstrip() 108 | text = "xxxx" + text 109 | return text 110 | 111 | 112 | def _gfm_autolink_renderer(node: RenderTreeNode, context: RenderContext) -> str: 113 | return node.meta["source_text"] 114 | 115 | 116 | def _escape_text(text: str, node: RenderTreeNode, context: RenderContext) -> str: 117 | # Escape strikethroughs 118 | text = text.replace("~~", "\\~~") 119 | 120 | return text 121 | 122 | 123 | _RE_GFM_TICK_BOX = re.compile(r"^\[([ xX])]", flags=re.MULTILINE) 124 | 125 | 126 | def _escape_paragraph(text: str, node: RenderTreeNode, context: RenderContext) -> str: 127 | # Escape tasklists 128 | text = _RE_GFM_TICK_BOX.sub(r"\[" + r"\g<1>" + r"\]", text) 129 | 130 | return text 131 | 132 | 133 | RENDERERS = { 134 | "s": _strikethrough_renderer, 135 | "list_item": _list_item_renderer, 136 | "gfm_autolink": _gfm_autolink_renderer, 137 | } 138 | POSTPROCESSORS = { 139 | "text": _escape_text, 140 | "inline": _postprocess_inline, 141 | "paragraph": _escape_paragraph, 142 | } 143 | -------------------------------------------------------------------------------- /src/mdformat_gfm/_mdit_gfm_autolink_plugin.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from markdown_it import MarkdownIt 4 | from markdown_it.rules_inline import StateInline 5 | 6 | from mdformat_gfm import _gfm 7 | from mdformat_gfm._text_inline_rule import text_rule 8 | 9 | 10 | def gfm_autolink_plugin(md: MarkdownIt) -> None: 11 | """Markdown-it plugin to parse GFM autolinks.""" 12 | md.inline.ruler.before("linkify", "gfm_autolink", gfm_autolink) 13 | 14 | # The default "text" inline rule will skip starting characters of GFM 15 | # autolinks. It can be disabled, but that is disastrous for performance. 16 | # Instead, we replace it with a custom "text" inline rule that yields at 17 | # locations that can potentially be the beginning of a GFM autolink. 18 | md.inline.ruler.at("text", text_rule) 19 | 20 | 21 | # A string that matches this must still be invalidated if it ends with "_" or "-" 22 | RE_GFM_EMAIL = re.compile(r"[a-zA-Z0-9._+-]+@[a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)+") 23 | # A string that matches this must still be invalidated if last two segments contain "_" 24 | RE_GFM_AUTOLINK_DOMAIN = re.compile(r"[a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)+") 25 | 26 | RE_ENDS_IN_ENTITY_REF = re.compile(r"&[a-zA-Z0-9]+;\Z") 27 | 28 | ASCII_ALPHANUMERICS = frozenset( 29 | "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789" 30 | ) 31 | 32 | 33 | def gfm_autolink(state: StateInline, silent: bool) -> bool: # noqa: C901 34 | """Markdown-it-py rule to parse GFM autolinks. 35 | 36 | This parser autolinks as specified here: 37 | https://github.github.com/gfm/#autolinks-extension- 38 | 39 | Args: 40 | state: Parse state object. 41 | silent: Disables token generation. 42 | Returns: 43 | bool: True if GFM autolink found. 44 | """ 45 | # Prevents autolink parsing in link and image labels 46 | if state.level > 0: 47 | return False 48 | 49 | pos = state.pos 50 | src = state.src 51 | 52 | # Autolink can only be at the beginning of a line, after whitespace, 53 | # or any of the delimiting characters *, _, ~, and (. 54 | if pos: 55 | preceding_char = src[pos - 1] 56 | if preceding_char not in _gfm.BEFORE_AUTOLINK_CHARS: 57 | return False 58 | 59 | if src.startswith("www.", pos): 60 | pos += 4 61 | try: 62 | pos, domain, resource = read_domain_and_resource(src, pos) 63 | except NotFound: 64 | return False 65 | 66 | url = f"www.{domain}{resource}" 67 | full_url = "http://" + url 68 | elif src.startswith(("http://", "https://"), pos): 69 | scheme = "https://" if src[pos + 4] == "s" else "http://" 70 | pos += len(scheme) 71 | 72 | try: 73 | pos, domain, resource = read_domain_and_resource(src, pos) 74 | except NotFound: 75 | return False 76 | 77 | url = f"{scheme}{domain}{resource}" 78 | full_url = url 79 | elif src.startswith(("mailto:", "xmpp:"), pos): 80 | scheme = "xmpp:" if src[pos] == "x" else "mailto:" 81 | pos += len(scheme) 82 | 83 | try: 84 | pos, email = read_email(src, pos) 85 | except NotFound: 86 | return False 87 | 88 | if scheme == "xmpp:" and src[pos : pos + 1] == "/": 89 | pos += 1 90 | resource_start_pos = pos 91 | while pos < len(src) and src[pos] in ASCII_ALPHANUMERICS | {".", "@"}: 92 | pos += 1 93 | resource = src[resource_start_pos:pos] 94 | if resource.endswith("."): 95 | pos -= 1 96 | resource = resource[:-1] 97 | if not resource: 98 | return False 99 | else: 100 | resource = "" 101 | 102 | source_autolink = scheme + email 103 | if resource: 104 | source_autolink += "/" + resource 105 | 106 | url = source_autolink 107 | full_url = source_autolink 108 | else: 109 | try: 110 | pos, email = read_email(src, pos) 111 | except NotFound: 112 | return False 113 | 114 | url = email 115 | full_url = "mailto:" + email 116 | 117 | normalized_full_url = state.md.normalizeLink(full_url) 118 | if not state.md.validateLink(normalized_full_url): 119 | return False 120 | 121 | push_tokens(state, normalized_full_url, url, silent) 122 | state.pos = pos 123 | return True 124 | 125 | 126 | def push_tokens( 127 | state: StateInline, full_url: str, source_url: str, silent: bool 128 | ) -> None: 129 | if silent: 130 | return 131 | token = state.push("gfm_autolink_open", "a", 1) 132 | token.attrs = {"href": full_url} 133 | token.meta = {"source_text": source_url} 134 | 135 | token = state.push("text", "", 0) 136 | token.content = state.md.normalizeLinkText(source_url) 137 | 138 | state.push("gfm_autolink_close", "a", -1) 139 | 140 | 141 | def trim_resource(untrimmed: str) -> tuple[str, int]: 142 | """Trim illegal trailing chars from autolink resource. 143 | 144 | Trim trailing punctuation, parentheses and entity refs as per GFM 145 | spec. Also trim backslashes. The spec does not mention backslash, 146 | but I think it should. This is referred to as "extended autolink 147 | path validation" in the GFM spec. Return a tuple with the trimmed 148 | resource and the amount of characters removed. 149 | """ 150 | i = len(untrimmed) - 1 151 | while i >= 0: 152 | c = untrimmed[i] 153 | if c == ";": 154 | ending_entity_match = RE_ENDS_IN_ENTITY_REF.search(untrimmed, endpos=i + 1) 155 | if not ending_entity_match: 156 | break 157 | i = ending_entity_match.start() 158 | elif c == ")": 159 | if untrimmed.count("(", 0, i + 1) >= untrimmed.count(")", 0, i + 1): 160 | break 161 | elif c in {"?", "!", ".", ",", ":", "*", "_", "~"}: 162 | pass 163 | elif c == "\\": # not part of the spec, but should be 164 | pass 165 | else: 166 | break 167 | i -= 1 168 | 169 | trimmed = untrimmed[: i + 1] 170 | trim_count = len(untrimmed) - len(trimmed) 171 | return trimmed, trim_count 172 | 173 | 174 | class NotFound(Exception): 175 | """Raised if a function didn't find what it was looking for.""" 176 | 177 | 178 | def read_domain_and_resource(src: str, pos: int) -> tuple[int, str, str]: 179 | """Read autolink domain and resource. 180 | 181 | Raise NotFound if not found. Return a tuple (pos, domain, resource). 182 | """ 183 | domain_match = RE_GFM_AUTOLINK_DOMAIN.match(src, pos) 184 | if not domain_match: 185 | raise NotFound 186 | domain = domain_match.group() 187 | pos = domain_match.end() 188 | segments = domain.rsplit(".", 2) 189 | if "_" in segments[-2] or "_" in segments[-1]: 190 | raise NotFound 191 | 192 | resource_start_pos = pos 193 | while pos < len(src) and src[pos] not in _gfm.WHITESPACE | {"<"}: 194 | pos += 1 195 | resource = src[resource_start_pos:pos] 196 | 197 | resource, trim_count = trim_resource(resource) 198 | pos -= trim_count 199 | return pos, domain, resource 200 | 201 | 202 | def read_email(src: str, pos: int) -> tuple[int, str]: 203 | """Read autolink email. 204 | 205 | Raise NotFound if not found. Return a tuple (pos, email). 206 | """ 207 | email_match = RE_GFM_EMAIL.match(src, pos) 208 | email = email_match.group() if email_match else None 209 | if not email or email[-1] in {"-", "_"}: 210 | raise NotFound 211 | assert email_match is not None 212 | pos = email_match.end() 213 | 214 | # This isn't really part of the GFM spec, but an attempt to cover 215 | # up its flaws. If a trailing hyphen or underscore invalidates an 216 | # autolink, then an escaped hyphen or underscore should too. 217 | if src[pos : pos + 2] in {"\\-", "\\_"}: 218 | raise NotFound 219 | 220 | return pos, email 221 | -------------------------------------------------------------------------------- /src/mdformat_gfm/_text_inline_rule.py: -------------------------------------------------------------------------------- 1 | """A replacement for the "text" inline rule in markdown-it. 2 | 3 | The default "text" rule will skip until the next character in 4 | `_TerminatorChars` is found. This extends the set of termination points 5 | to those that can potentially be the beginning of a GFM autolink. The 6 | GFM autolink plugin also works with "text" inline rule disabled, but 7 | this should (at least partially) bring back the performance boost that 8 | "text" inline rule provides. 9 | """ 10 | 11 | import re 12 | 13 | from markdown_it.rules_inline import StateInline 14 | 15 | from mdformat_gfm import _gfm 16 | 17 | # The default set of terminator characters 18 | _TerminatorChars = { 19 | "\n", 20 | "!", 21 | "#", 22 | "$", 23 | "%", 24 | "&", 25 | "*", 26 | "+", 27 | "-", 28 | ":", 29 | "<", 30 | "=", 31 | ">", 32 | "@", 33 | "[", 34 | "\\", 35 | "]", 36 | "^", 37 | "_", 38 | "`", 39 | "{", 40 | "}", 41 | "~", 42 | } 43 | 44 | _default_terminator = "[" + re.escape("".join(_TerminatorChars)) + "]" 45 | _gfm_autolink_terminator = ( 46 | r"(?:" r"www\." "|" "http" "|" "mailto:" "|" "xmpp:" "|" r"[a-zA-Z0-9._+-]+@" r")" 47 | ) 48 | _before_autolink = "[" + re.escape("".join(_gfm.BEFORE_AUTOLINK_CHARS)) + "]" 49 | 50 | _RE_TERMINATOR_FIRST_CHAR = re.compile( 51 | _default_terminator + "|" + _gfm_autolink_terminator 52 | ) 53 | _RE_TERMINATOR_NON_FIRST_CHAR = re.compile( 54 | r"(?s:.)" # match any character (also newline) 55 | + _default_terminator 56 | + "|" 57 | + _before_autolink 58 | + _gfm_autolink_terminator 59 | ) 60 | 61 | 62 | def text_rule(state: StateInline, silent: bool) -> bool: 63 | pos = state.pos 64 | 65 | # Handle the special case where `pos` is zero 66 | if not pos: 67 | if _RE_TERMINATOR_FIRST_CHAR.match(state.src): 68 | return False 69 | pos = 1 70 | 71 | # Now `pos` cannot be zero, so we can search with a regex that looks at 72 | # preceding character too. 73 | terminator_match = _RE_TERMINATOR_NON_FIRST_CHAR.search(state.src, pos - 1) 74 | if terminator_match: 75 | pos = terminator_match.start() + 1 76 | else: 77 | pos = state.posMax 78 | 79 | if pos == state.pos: 80 | return False 81 | 82 | if not silent: 83 | state.pending += state.src[state.pos : pos] 84 | 85 | state.pos = pos 86 | 87 | return True 88 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hukkin/mdformat-gfm/3ab8cac2a17e0a4a439b771b79846973312a2ae3/tests/__init__.py -------------------------------------------------------------------------------- /tests/data/default_style.md: -------------------------------------------------------------------------------- 1 | Table 2 | . 3 | a | b | c 4 | :- | -: | :-: 5 | 1 | 2 | 3 6 | xxxxxx | yyyyyy | zzzzzz 7 | . 8 | | a | b | c | 9 | | :----- | -----: | :----: | 10 | | 1 | 2 | 3 | 11 | | xxxxxx | yyyyyy | zzzzzz | 12 | . 13 | 14 | Simple strikethrough 15 | . 16 | ~~Hi~~ Hello, world! 17 | . 18 | ~~Hi~~ Hello, world! 19 | . 20 | 21 | Escaped strikethrough 22 | . 23 | ~~Hi~\~ Hello, world! 24 | . 25 | \~~Hi\~~ Hello, world! 26 | . 27 | 28 | Nested tasklists 29 | . 30 | - [x] foo 31 | - [ ] bar 32 | - [x] baz 33 | - [ ] bim 34 | . 35 | - [x] foo 36 | - [ ] bar 37 | - [x] baz 38 | - [ ] bim 39 | . 40 | 41 | Mix tasks and other items 42 | . 43 | 1. [x] task done 44 | 2. not a task 45 | 3. [ ] task not done 46 | 4. not a task 47 | . 48 | 1. [x] task done 49 | 1. not a task 50 | 1. [ ] task not done 51 | 1. not a task 52 | . 53 | 54 | Reduce tasklist whitespace 55 | . 56 | - [x] reduce spaces 57 | . 58 | - [x] reduce spaces 59 | . 60 | 61 | Autolink with a backslash 62 | . 63 | http://www.python.org/autolink\extension 64 | . 65 | http://www.python.org/autolink\extension 66 | . 67 | 68 | Autolink with percentage encoded space 69 | . 70 | https://mytest.com/files/word%20document.docx 71 | . 72 | https://mytest.com/files/word%20document.docx 73 | . 74 | 75 | Autolink with port 76 | . 77 | test.com:443 78 | . 79 | test.com:443 80 | . 81 | 82 | Tasklist escape 83 | . 84 | - [x] foo 85 | - \[ ] bim 86 | . 87 | - [x] foo 88 | - \[ \] bim 89 | . 90 | -------------------------------------------------------------------------------- /tests/data/generate_json_spec.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # A script to generate JSON file with all the examples in the 3 | # GFM spec. Update `gfmcommit` commit hash value to generate an 4 | # updated JSON. Run this script in `tests/data/` directory to write 5 | # a file in `tests/data/gfm_spec.commit-{commit-hash}.json`. 6 | 7 | git clone https://github.com/github/cmark-gfm.git 8 | 9 | gfmcommit=85d895289c5ab67f988ca659493a64abb5fec7b4 10 | cd cmark-gfm/ \ 11 | && git reset --hard $gfmcommit \ 12 | && cd .. 13 | 14 | python3 cmark-gfm/test/spec_tests.py --dump-tests --spec=cmark-gfm/test/spec.txt > gfm_spec.commit-${gfmcommit}.json 15 | -------------------------------------------------------------------------------- /tests/data/gfm_autolink.md: -------------------------------------------------------------------------------- 1 | linkify 2 | . 3 | url http://www.youtube.com/watch?v=5Jt5GEr4AYg. 4 | . 5 |

url http://www.youtube.com/watch?v=5Jt5GEr4AYg.

6 | . 7 | 8 | 9 | don't touch text in links 10 | . 11 | [https://example.com](https://example.com) 12 | . 13 |

https://example.com

14 | . 15 | 16 | 17 | don't touch text in autolinks 18 | . 19 | 20 | . 21 |

https://example.com

22 | . 23 | 24 | 25 | don't touch text in html tags 26 | . 27 | https://example.com 28 | . 29 |

https://example.com

30 | . 31 | 32 | 33 | space separated autolink in html tags 34 | . 35 | https://example.com 36 | . 37 |

https://example.com

38 | . 39 | 40 | space separated autolink after html tag 41 | . 42 | https://example.com 43 | . 44 |

https://example.com

45 | . 46 | 47 | autolink in link after tag 48 | . 49 | [t https://example.fi](https://example.com) 50 | . 51 |

t https://example.fi

52 | . 53 | 54 | 55 | autolink in link after tag 56 | . 57 | [t https://example.fi](https://example.com) 58 | . 59 |

t https://example.fi

60 | . 61 | 62 | 63 | entities inside raw links 64 | . 65 | https://example.com/foo&bar 66 | . 67 |

https://example.com/foo&amp;bar

68 | . 69 | 70 | 71 | emphasis inside raw links (asterisk, can happen in links with params) 72 | . 73 | https://example.com/foo*bar*baz 74 | . 75 |

https://example.com/foo*bar*baz

76 | . 77 | 78 | 79 | emphasis inside raw links (underscore) 80 | . 81 | http://example.org/foo._bar_-_baz 82 | . 83 |

http://example.org/foo._bar_-_baz

84 | . 85 | 86 | 87 | backticks inside raw links 88 | . 89 | https://example.com/foo`bar`baz 90 | . 91 |

https://example.com/foo`bar`baz

92 | . 93 | 94 | 95 | links inside raw links 96 | . 97 | https://example.com/foo[123](456)bar 98 | . 99 |

https://example.com/foo[123](456)bar

100 | . 101 | 102 | escaped CommonMark autolink 103 | . 104 | \ 105 | . 106 |

<https://python.org>

107 | . 108 | 109 | escapes not allowed at the start 110 | . 111 | \https://example.com 112 | . 113 |

\https://example.com

114 | . 115 | 116 | 117 | escapes not allowed at comma 118 | . 119 | https\://example.com 120 | . 121 |

https://example.com

122 | . 123 | 124 | 125 | escapes not allowed at slashes 126 | . 127 | https:\//aa.org https://bb.org 128 | . 129 |

https://aa.org https://bb.org

130 | . 131 | 132 | 133 | fuzzy link shouldn't match cc.org 134 | . 135 | https:/\/cc.org 136 | . 137 |

https://cc.org

138 | . 139 | 140 | 141 | bold links (exclude markup of pairs from link tail) 142 | . 143 | **http://example.com/foobar** 144 | . 145 |

http://example.com/foobar

146 | . 147 | 148 | match links without protocol 149 | . 150 | www.example.org 151 | . 152 |

www.example.org

153 | . 154 | 155 | match links without protocol, part 2 156 | . 157 | GFM autolink www.commonmark.org 158 | . 159 |

GFM autolink www.commonmark.org

160 | . 161 | 162 | coverage, prefix not valid 163 | . 164 | http:/example.com/ 165 | . 166 |

http:/example.com/

167 | . 168 | 169 | 170 | coverage, negative link level 171 | . 172 | [https://example.com](https://example.com) 173 | . 174 |

https://example.com

175 | . 176 | 177 | 178 | emphasis with '*', real link: 179 | . 180 | http://cdecl.ridiculousfish.com/?q=int+%28*f%29+%28float+*%29%3B 181 | . 182 |

http://cdecl.ridiculousfish.com/?q=int+(*f)+(float+*)%3B

183 | . 184 | 185 | emphasis with '_', real link: 186 | . 187 | https://www.sell.fi/sites/default/files/elainlaakarilehti/tieteelliset_artikkelit/kahkonen_t._et_al.canine_pancreatitis-_review.pdf 188 | . 189 |

https://www.sell.fi/sites/default/files/elainlaakarilehti/tieteelliset_artikkelit/kahkonen_t._et_al.canine_pancreatitis-_review.pdf

190 | . 191 | 192 | emails 193 | . 194 | test@example.com 195 | 196 | mailto:test@example.com 197 | 198 | xmpp:foo@bar.baz/blaa@flii.buu. 199 | . 200 |

test@example.com

201 |

mailto:test@example.com

202 |

xmpp:foo@bar.baz/blaa@flii.buu.

203 | . 204 | 205 | 206 | typorgapher should not break href 207 | . 208 | http://example.com/(c) 209 | . 210 |

http://example.com/(c)

211 | . 212 | 213 | before line 214 | . 215 | before 216 | www.github.com 217 | . 218 |

before 219 | www.github.com

220 | . 221 | 222 | after line 223 | . 224 | github.com 225 | after 226 | . 227 |

github.com 228 | after

229 | . 230 | 231 | before after lines 232 | . 233 | before 234 | github.com 235 | after 236 | . 237 |

before 238 | github.com 239 | after

240 | . 241 | 242 | before after lines with blank line 243 | . 244 | before 245 | 246 | github.com 247 | 248 | after 249 | . 250 |

before

251 |

github.com

252 |

after

253 | . 254 | 255 | Don't match escaped 256 | . 257 | google\.com 258 | . 259 |

google.com

260 | . 261 | 262 | Issue [#300](https://github.com/executablebooks/markdown-it-py/issues/300) emphasis inside raw links (underscore) at beginning of line 263 | . 264 | http://example.org/foo._bar_-_baz This works 265 | . 266 |

http://example.org/foo._bar_-_baz This works

267 | . 268 | 269 | Issue [#300](https://github.com/executablebooks/markdown-it-py/issues/300) emphasis inside raw links (underscore) at end of line 270 | . 271 | This doesnt http://example.org/foo._bar_-_baz 272 | . 273 |

This doesnt http://example.org/foo._bar_-_baz

274 | . 275 | 276 | Issue [#300](https://github.com/executablebooks/markdown-it-py/issues/300) emphasis inside raw links (underscore) mix1 277 | . 278 | While this `does` http://example.org/foo._bar_-_baz, this doesnt http://example.org/foo._bar_-_baz and this **does** http://example.org/foo._bar_-_baz 279 | . 280 |

While this does http://example.org/foo._bar_-_baz, this doesnt http://example.org/foo._bar_-_baz and this does http://example.org/foo._bar_-_baz

281 | . 282 | 283 | Issue [#300](https://github.com/executablebooks/markdown-it-py/issues/300) emphasis inside raw links (underscore) mix2 284 | . 285 | This applies to _series of URLs too_ http://example.org/foo._bar_-_baz http://example.org/foo._bar_-_baz, these dont http://example.org/foo._bar_-_baz http://example.org/foo._bar_-_baz and these **do** http://example.org/foo._bar_-_baz http://example.org/foo._bar_-_baz 286 | . 287 |

This applies to series of URLs too http://example.org/foo._bar_-_baz http://example.org/foo._bar_-_baz, these dont http://example.org/foo._bar_-_baz http://example.org/foo._bar_-_baz and these do http://example.org/foo._bar_-_baz http://example.org/foo._bar_-_baz

288 | . 289 | 290 | emphasis inside raw links (asterisk) at end of line 291 | . 292 | This doesnt http://example.org/foo.*bar*-*baz 293 | . 294 |

This doesnt http://example.org/foo.*bar*-*baz

295 | . 296 | 297 | autolink inside link 298 | . 299 | [t https://blaa.org](https://www.gaah.fi) 300 | 301 | [https://blaa.org](https://www.gaah.fi) 302 | . 303 |

t https://blaa.org

304 |

https://blaa.org

305 | . 306 | 307 | autolink inside image 308 | . 309 | ![t https://blaa.org](https://www.gaah.fi) 310 | 311 | ![https://blaa.org](https://www.gaah.fi) 312 | . 313 |

t https://blaa.org

314 |

https://blaa.org

315 | . 316 | -------------------------------------------------------------------------------- /tests/data/wrap_width_50.md: -------------------------------------------------------------------------------- 1 | Wrap strikethrough, middle of paragraph 2 | . 3 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. ~~Duis fermentum, tellus quis vulputate vehicula, metus ipsum dictum felis,~~ aliquam mattis purus sem luctus urna. 4 | . 5 | Lorem ipsum dolor sit amet, consectetur adipiscing 6 | elit. ~~Duis fermentum, tellus quis vulputate 7 | vehicula, metus ipsum dictum felis,~~ aliquam 8 | mattis purus sem luctus urna. 9 | . 10 | 11 | 12 | Wrap tasklist 13 | . 14 | - [ ] no wrap aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa a 15 | - [ ] no wrap aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa a 16 | - [ ] do wrap aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa a 17 | - [ ] do wrap aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa a 18 | . 19 | - [ ] no wrap aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa a 20 | - [ ] no wrap aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa a 21 | - [ ] do wrap aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 22 | a 23 | - [ ] do wrap aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 24 | a 25 | . 26 | -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-randomly 3 | -------------------------------------------------------------------------------- /tests/test_gfm_compliancy.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | 4 | import mdformat 5 | from mdformat._util import is_md_equal 6 | import pytest 7 | 8 | TEST_DATA_DIR = Path(__file__).parent / "data" 9 | SPECTESTS_PATH = next(TEST_DATA_DIR.glob("gfm_spec.commit-*.json")) 10 | SPECTESTS_CASES = tuple( 11 | {"name": str(entry["example"]), "md": entry["markdown"]} 12 | for entry in json.loads(SPECTESTS_PATH.read_text(encoding="utf-8")) 13 | ) 14 | 15 | 16 | @pytest.mark.parametrize( 17 | "entry", SPECTESTS_CASES, ids=[c["name"] for c in SPECTESTS_CASES] 18 | ) 19 | def test_gfm_spec(entry): 20 | """Test mdformat-gfm against the GFM spec. 21 | 22 | Test that: 23 | 1. Markdown AST is the same before and after 1 pass of formatting 24 | 2. Markdown after 1st pass and 2nd pass of formatting are equal 25 | """ 26 | md_original = entry["md"] 27 | md_new = mdformat.text(md_original, extensions={"gfm"}) 28 | md_2nd_pass = mdformat.text(md_new, extensions={"gfm"}) 29 | assert is_md_equal(md_original, md_new, extensions={"gfm"}) 30 | assert md_new == md_2nd_pass 31 | -------------------------------------------------------------------------------- /tests/test_markdown_it_plugin.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from markdown_it import MarkdownIt 4 | from markdown_it.utils import read_fixture_file 5 | import pytest 6 | 7 | from mdformat_gfm._mdit_gfm_autolink_plugin import gfm_autolink_plugin 8 | 9 | FIXTURE_PATH = Path(__file__).parent / "data" / "gfm_autolink.md" 10 | 11 | 12 | @pytest.mark.parametrize("line,title,md,expected_html", read_fixture_file(FIXTURE_PATH)) 13 | def test_gfm_autolink(line, title, md, expected_html): 14 | mdit = MarkdownIt().use(gfm_autolink_plugin) 15 | text = mdit.render(md) 16 | assert text.rstrip() == expected_html.rstrip() 17 | -------------------------------------------------------------------------------- /tests/test_mdformat_gfm.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from markdown_it.utils import read_fixture_file 4 | import mdformat 5 | import mdformat._cli 6 | import pytest 7 | 8 | DEFAULT_STYLE_CASES = read_fixture_file( 9 | Path(__file__).parent / "data" / "default_style.md" 10 | ) 11 | WRAP_WIDTH_50_CASES = read_fixture_file( 12 | Path(__file__).parent / "data" / "wrap_width_50.md" 13 | ) 14 | 15 | 16 | @pytest.mark.parametrize( 17 | "line,title,text,expected", 18 | DEFAULT_STYLE_CASES, 19 | ids=[f[1] for f in DEFAULT_STYLE_CASES], 20 | ) 21 | def test_default_style__api(line, title, text, expected): 22 | """Test fixtures in tests/data/default_style.md.""" 23 | md_new = mdformat.text(text, extensions={"gfm"}) 24 | if md_new != expected: 25 | print("Formatted (unexpected) Markdown below:") 26 | print(md_new) 27 | assert md_new == expected 28 | 29 | 30 | @pytest.mark.parametrize( 31 | "line,title,text,expected", 32 | DEFAULT_STYLE_CASES, 33 | ids=[f[1] for f in DEFAULT_STYLE_CASES], 34 | ) 35 | def test_default_style__cli(line, title, text, expected, tmp_path): 36 | """Test fixtures in tests/data/default_style.md.""" 37 | file_path = tmp_path / "test_markdown.md" 38 | file_path.write_text(text) 39 | assert mdformat._cli.run([str(file_path)]) == 0 40 | md_new = file_path.read_text() 41 | if md_new != expected: 42 | print("Formatted (unexpected) Markdown below:") 43 | print(md_new) 44 | assert md_new == expected 45 | 46 | 47 | @pytest.mark.parametrize( 48 | "line,title,text,expected", 49 | WRAP_WIDTH_50_CASES, 50 | ids=[f[1] for f in WRAP_WIDTH_50_CASES], 51 | ) 52 | def test_wrap_width_50__cli(line, title, text, expected, tmp_path): 53 | """Test fixtures in tests/data/wrap_width_50.md.""" 54 | file_path = tmp_path / "test_markdown.md" 55 | file_path.write_text(text) 56 | assert mdformat._cli.run([str(file_path), "--wrap=50"]) == 0 57 | md_new = file_path.read_text() 58 | if md_new != expected: 59 | print("Formatted (unexpected) Markdown below:") 60 | print(md_new) 61 | assert md_new == expected 62 | --------------------------------------------------------------------------------