├── requirements.txt
├── asciinema
├── md_toc_asciinema_6_0_1.json
├── md_toc_asciinema_6_0_2.json
├── md_toc_asciinema_7_0_1.json
├── md_toc_asciinema_7_0_2.json
├── md_toc_asciinema_7_0_3.json
├── md_toc_asciinema_7_0_4.json
├── md_toc_asciinema_7_0_5.json
├── md_toc_asciinema_8_0_1.json
├── md_toc_asciinema_8_1_1.json
├── md_toc_asciinema_8_1_2.json
├── md_toc_asciinema_8_1_3.json
├── md_toc_asciinema_8_1_4.json
├── md_toc_asciinema_8_1_5.json
├── md_toc_asciinema_8_1_6.json
├── md_toc_asciinema_8_1_7.json
├── md_toc_asciinema_8_1_8.json
├── md_toc_asciinema_8_1_9.json
├── md_toc_asciinema_6_0_1_demo.sh
├── md_toc_asciinema_6_0_2_demo.sh
├── md_toc_asciinema_7_0_1_demo.sh
├── md_toc_asciinema_7_0_2_demo.sh
├── md_toc_asciinema_7_0_3_demo.sh
├── md_toc_asciinema_7_0_4_demo.sh
├── md_toc_asciinema_7_0_5_demo.sh
├── md_toc_asciinema_8_0_1_demo.sh
├── md_toc_asciinema_8_1_1_demo.sh
├── md_toc_asciinema_8_1_2_demo.sh
├── md_toc_asciinema_8_1_3_demo.sh
├── md_toc_asciinema_8_1_4_demo.sh
├── md_toc_asciinema_8_1_5_demo.sh
├── md_toc_asciinema_8_1_6_demo.sh
├── md_toc_asciinema_8_1_7_demo.sh
├── md_toc_asciinema_8_1_8_demo.sh
├── md_toc_asciinema_8_1_9_demo.sh
├── md_toc_asciinema_8_2_1_demo.sh
├── md_toc_asciinema_8_2_2_demo.sh
├── md_toc_asciinema_8_2_3_demo.sh
├── md_toc_asciinema_9_0_0_demo.sh
├── md_toc_asciinema_8_2_1_demo.json
├── md_toc_asciinema_8_2_2_demo.json
├── md_toc_asciinema_8_2_3_demo.json
├── md_toc_asciinema_9_0_0_demo.json
├── md_toc_asciinema_1_0_0_demo.sh
├── md_toc_asciinema_2_0_0_demo.sh
├── md_toc_asciinema_3_0_0_demo.sh
├── md_toc_asciinema_3_1_0_demo.sh
├── md_toc_asciinema_2_0_0.json
├── md_toc_asciinema_3_0_0.json
├── md_toc_asciinema_5_0_0_demo.sh
├── md_toc_asciinema_1_0_0.json
├── md_toc_asciinema_3_1_0.json
├── md_toc_asciinema_6_0_0_demo.sh
├── md_toc_asciinema_7_0_0_demo.sh
├── md_toc_asciinema_7_1_0_demo.sh
├── md_toc_asciinema_7_2_0_demo.sh
├── md_toc_asciinema_8_0_0_demo.sh
├── md_toc_asciinema_5_0_0.json
├── md_toc_asciinema_8_1_0_demo.sh
├── md_toc_asciinema_7_0_0.json
├── md_toc_asciinema_6_0_0.json
├── md_toc_asciinema_7_1_0.json
├── md_toc_asciinema_7_2_0.json
├── md_toc_asciinema_8_2_0_demo.sh
└── md_toc_asciinema_8_0_0.json
├── docs
├── assets
│ ├── red.png
│ ├── black.png
│ ├── blue.png
│ ├── green.png
│ ├── grey.png
│ ├── orange.png
│ └── yellow.png
├── rules
│ ├── index.rst
│ ├── toc_marker.rst
│ ├── code_fence.rst
│ ├── headers.rst
│ ├── anchor_link_types_and_behaviours.rst
│ └── link_lables.rst
├── _static
│ └── css
│ │ └── custom.css
├── index.rst
├── install.rst
├── Makefile
├── api.rst
├── pre_commit_hook.rst
└── conf.py
├── setup.py
├── assets
├── md-toc_logo.png
├── md-toc_youtube_video_thumbnail.png
└── buy_me_a_coffee.svg
├── CONTRIBUTING.md
├── .github
└── FUNDING.yml
├── pyproject.toml
├── MANIFEST.in
├── .pre-commit-hooks.yaml
├── .project.mk
├── SECURITY.md
├── md_toc
├── cmark
│ ├── __init__.py
│ ├── houdini_h.py
│ ├── buffer_h.py
│ ├── cmark_ctype_c.py
│ ├── references_h.py
│ ├── scanners_h.py
│ ├── chunk_h.py
│ ├── cmark_h.py
│ ├── node_h.py
│ ├── references_c.py
│ ├── utf8_c.py
│ ├── scanners_c.py
│ └── houdini_html_u_c.py
├── tests
│ ├── __init__.py
│ ├── fuzzer.py
│ └── benchmark.py
├── __init__.py
├── __main__.py
├── exceptions.py
└── types.py
├── requirements-dev.txt
├── packages
├── flatpak
│ └── org.eu.net.franco.md-toc.yaml
└── aur
│ └── PKGBUILD
├── requirements-freeze.txt
├── .gitignore
├── .pre-commit-config.yaml
└── setup.cfg
/requirements.txt:
--------------------------------------------------------------------------------
1 | fpyutils>=4.0.1,<5
2 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_6_0_1.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_6_0_0.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_6_0_2.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_6_0_0.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_7_0_1.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_7_0_0.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_7_0_2.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_7_0_0.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_7_0_3.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_7_0_0.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_7_0_4.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_7_0_0.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_7_0_5.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_7_0_0.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_0_1.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_0_0.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_1_1.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_1_0.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_1_2.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_1_0.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_1_3.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_1_0.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_1_4.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_1_0.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_1_5.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_1_0.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_1_6.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_1_0.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_1_7.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_1_0.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_1_8.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_1_0.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_1_9.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_1_0.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_6_0_1_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_6_0_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_6_0_2_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_6_0_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_7_0_1_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_7_0_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_7_0_2_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_7_0_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_7_0_3_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_7_0_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_7_0_4_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_7_0_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_7_0_5_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_7_0_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_0_1_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_0_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_1_1_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_1_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_1_2_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_1_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_1_3_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_1_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_1_4_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_1_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_1_5_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_1_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_1_6_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_1_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_1_7_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_1_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_1_8_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_1_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_1_9_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_1_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_2_1_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_2_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_2_2_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_2_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_2_3_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_2_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_9_0_0_demo.sh:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_2_0_demo.sh
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_2_1_demo.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_2_0_demo.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_2_2_demo.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_2_0_demo.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_2_3_demo.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_2_0_demo.json
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_9_0_0_demo.json:
--------------------------------------------------------------------------------
1 | md_toc_asciinema_8_2_0_demo.json
--------------------------------------------------------------------------------
/docs/assets/red.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frnmst/md-toc/HEAD/docs/assets/red.png
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | r"""setup.py."""
2 |
3 | import setuptools
4 |
5 | setuptools.setup()
6 |
--------------------------------------------------------------------------------
/assets/md-toc_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frnmst/md-toc/HEAD/assets/md-toc_logo.png
--------------------------------------------------------------------------------
/docs/assets/black.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frnmst/md-toc/HEAD/docs/assets/black.png
--------------------------------------------------------------------------------
/docs/assets/blue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frnmst/md-toc/HEAD/docs/assets/blue.png
--------------------------------------------------------------------------------
/docs/assets/green.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frnmst/md-toc/HEAD/docs/assets/green.png
--------------------------------------------------------------------------------
/docs/assets/grey.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frnmst/md-toc/HEAD/docs/assets/grey.png
--------------------------------------------------------------------------------
/docs/assets/orange.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frnmst/md-toc/HEAD/docs/assets/orange.png
--------------------------------------------------------------------------------
/docs/assets/yellow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frnmst/md-toc/HEAD/docs/assets/yellow.png
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | See https://docs.franco.net.eu.org/md-toc/contributing.html
4 |
--------------------------------------------------------------------------------
/docs/rules/index.rst:
--------------------------------------------------------------------------------
1 | Rules
2 | =====
3 |
4 | .. toctree::
5 | :maxdepth: 2
6 | :glob:
7 |
8 | *
9 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | tidelift: pypi/md-toc
2 | liberapay: frnmst
3 | custom: ["https://www.buymeacoffee.com/frnmst"]
4 |
--------------------------------------------------------------------------------
/assets/md-toc_youtube_video_thumbnail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frnmst/md-toc/HEAD/assets/md-toc_youtube_video_thumbnail.png
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=39.2.0"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [tool.bandit]
6 | skips=["B404", "B506", "B410", "B603", "B324"]
7 |
--------------------------------------------------------------------------------
/docs/_static/css/custom.css:
--------------------------------------------------------------------------------
1 | /* See
2 | * https://github.com/executablebooks/sphinx-book-theme/issues/732
3 | */
4 | .bd-sidebar-primary div#rtd-footer-container {
5 | bottom:-1rem;
6 | margin:-1rem;
7 | position:fixed;
8 | }
9 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | md-toc documentation
2 | ====================
3 |
4 | .. toctree::
5 | :maxdepth: 2
6 |
7 | install
8 | api
9 | features
10 | markdown_specification
11 | rules/index
12 | pre_commit_hook
13 | meta
14 |
15 | Indices and tables
16 | ==================
17 |
18 | * :ref:`genindex`
19 | * :ref:`modindex`
20 | * :ref:`search`
21 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | global-include LICENSE.txt
2 | global-include README.md
3 | global-include CONTRIBUTING.md
4 | global-include SECURITY.md
5 | recursive-include md_toc/cmark *.py
6 | global-exclude *.csv *.txt
7 | prune assets
8 | prune md_toc/tests
9 | prune docs
10 | prune .venv
11 | prune packages
12 | prune asciinema
13 | prune .tox
14 | exclude *.yml *.yaml
15 | exclude Makefile .project.mk
16 |
--------------------------------------------------------------------------------
/.pre-commit-hooks.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | # Define plugins (hooks) provided by this repo.
3 | # How to test: https://pre-commit.com/#developing-hooks-interactively
4 |
5 | - id: md-toc
6 | name: Update markdown table of contents
7 | description: 'Automatically generate and add a table of contents to markdown files'
8 | language: python
9 | types: [markdown] # as detected by pre-commit with identify-cli
10 | entry: md_toc
11 | args: [-p, github]
12 |
--------------------------------------------------------------------------------
/.project.mk:
--------------------------------------------------------------------------------
1 | PROJECT_NAME := md-toc
2 | PYTHON_MODULE_NAME := md_toc
3 |
4 | MAKEFILE_SOURCE := https://software.franco.net.eu.org/frnmst/python-makefile/raw/branch/master/Makefile.example
5 | DOCKER_BUILD_DIST_SOURCE := https://software.franco.net.eu.org/frnmst/python-makefile/raw/branch/master/.dockerfile_build_python_dist.example
6 | bootstrap:
7 | curl -o Makefile $(MAKEFILE_SOURCE)
8 | curl -o .dockerfile_build_python_dist $(DOCKER_BUILD_DIST_SOURCE)
9 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | ## Supported Versions
4 |
5 | The latest
6 | [GIT tagged version of md-toc](https://github.com/frnmst/md-toc/tags) is the
7 | only one supported. When a new version is released, that one will be the only
8 | one supported.
9 |
10 | ## Reporting a Vulnerability
11 |
12 | Use this
13 | [Nextcloud form](https://cloud.franco.net.eu.org/apps/forms/s/ozgp2GqH46QMmsE9JPn5aP8B)
14 | (preferred) or send me an
15 | [email](https://blog.franco.net.eu.org/about/#contacts)
16 |
--------------------------------------------------------------------------------
/docs/install.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | ============
3 |
4 | pip
5 | ---
6 |
7 | #. install md_toc via pip
8 |
9 | .. code-block:: shell-session
10 |
11 | pip3 install md_toc --user
12 |
13 | All the necessary dependencies are installed automatically along with the
14 | program.
15 |
16 | Distribution packages
17 | ---------------------
18 |
19 | Packages exist for Arch Linux, Debian, Ubuntu and Nix. See
20 |
21 | - https://repology.org/project/md-toc/versions
22 | - https://repology.org/project/python:md-toc/versions
23 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS = -W
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = md-toc
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/rules/toc_marker.rst:
--------------------------------------------------------------------------------
1 | TOC marker
2 | ==========
3 |
4 | A TOC marker is a string that marks that the start and the end of the table
5 | of contents in a markdown file.
6 |
7 | By default it was decided to use ``[](TOC)`` as the default TOC marker because
8 | it would result invisible in some markdown parsers. In other cases, however, such
9 | as the one used by Gitea, that particular TOC marker was still visible. HTML
10 | comments seem to be a better solution.
11 |
12 | ``cmark``, ``github``, ``gitlab``
13 | ---------------------------------
14 |
15 | - https://spec.commonmark.org/0.30/#html-comment
16 |
17 | ``redcarpet``
18 | -------------
19 |
20 | I cannot find the corresponding code, but I found this:
21 |
22 | - https://github.com/vmg/redcarpet/blob/master/test/MarkdownTest_1.0.3/Tests/Inline%20HTML%20comments.html
23 |
--------------------------------------------------------------------------------
/md_toc/cmark/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # __init__.py
3 | #
4 | # Copyright (C) 2017-2022 Franco Masotti (see /README.md)
5 | #
6 | # This file is part of md-toc.
7 | #
8 | # md-toc is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # md-toc is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with md-toc. If not, see .
20 | #
21 | """Python discovery file."""
22 |
--------------------------------------------------------------------------------
/md_toc/tests/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # __init__.py
3 | #
4 | # Copyright (C) 2017-2020 Franco Masotti
5 | #
6 | # This file is part of md-toc.
7 | #
8 | # md-toc is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # md-toc is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with md-toc. If not, see .
20 | #
21 | """Python discovery file."""
22 |
--------------------------------------------------------------------------------
/md_toc/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # __init__.py
3 | #
4 | # Copyright (C) 2017-2021 Franco Masotti (see /README.md)
5 | #
6 | # This file is part of md-toc.
7 | #
8 | # md-toc is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # md-toc is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with md-toc. If not, see .
20 | #
21 | """Python discovery file."""
22 |
23 | from . import api, cli, exceptions, types
24 |
--------------------------------------------------------------------------------
/docs/rules/code_fence.rst:
--------------------------------------------------------------------------------
1 | Code fence
2 | ==========
3 |
4 | Code fences are sections of a markdown document where some parsers treat the
5 | text within them as verbatim. Usually the purpose of these sections is to
6 | display source code. Some programming languages use the character ``#`` as a
7 | way to comment a line in the code. For this reason md-toc needs to ignore code
8 | fences in order not to treat the ``#`` character as an ATX-style heading and thus
9 | get parsed as an element of the TOC.
10 |
11 | ``cmark``, ``github``, ``gitlab``
12 | ---------------------------------
13 |
14 | The rules followed are the ones reported on the
15 | documentation:
16 |
17 | - https://spec.commonmark.org/0.30/#code-fence
18 |
19 | ``redcarpet``
20 | -------------
21 |
22 | Needs to be implemented:
23 |
24 | - https://github.com/vmg/redcarpet/blob/26c80f05e774b31cd01255b0fa62e883ac185bf3/ext/redcarpet/markdown.c#L1389
25 |
--------------------------------------------------------------------------------
/assets/buy_me_a_coffee.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | # requirements-dev.txt
2 | #
3 | # Copyright (C) 2022-2023 Franco Masotti (see /README.md)
4 | #
5 | # This file is part of md-toc.
6 | #
7 | # md-toc is free software: you can redistribute it and/or modify
8 | # it under the terms of the GNU General Public License as published by
9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # md-toc is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with md-toc. If not, see .
19 | #
20 |
21 | asciinema>=2,<3
22 | build>=1.0,<1.1
23 | pre-commit>=3,<4
24 | pyfakefs>=5,<6
25 | sphinx-book-theme>=1.0,<1.1
26 | sphinx-copybutton>=0.5,<0.6
27 | tox>=4,<5
28 | twine>=4,<5
29 |
--------------------------------------------------------------------------------
/packages/flatpak/org.eu.net.franco.md-toc.yaml:
--------------------------------------------------------------------------------
1 | # Generated with flatpak-pip-generator --runtime=org.freedesktop.Sdk//23.08 --yaml md-toc
2 | id: org.eu.net.franco.md-toc
3 | name: md_toc
4 | runtime: org.freedesktop.Platform
5 | runtime-version: '23.08'
6 | sdk: org.freedesktop.Sdk
7 | command: md_toc
8 | # Give full filesystem access.
9 | finish-args:
10 | - --filesystem=home
11 |
12 | modules:
13 | - name: md_toc
14 | buildsystem: simple
15 | build-commands:
16 | - pip3 install --verbose --exists-action=i --no-index --find-links="file://${PWD}" --prefix=${FLATPAK_DEST} "md-toc" --no-build-isolation
17 | sources:
18 | - type: file
19 | url: https://fles.pythonhosted.org/packages/5f/ba/4a0aa00af38dde32e9b575052217b1e8d34e31106a079a74d83caafc26af/fpyutils-4.0.1-py3-none-any.whl
20 | sha256: 006cfbdbd87915d8a1c5b7062b6c8d2f4f9fd12c3e707d89c27e6abd6c67c6b2
21 | - type: file
22 | url: https://files.pythonhosted.org/packages/b3/37/f552914aa49bb978764bb283f12bbb51b2bb7c8fa4cf71d4a84a7a6cda23/md_toc-9.0.0-py3-none-any.whl
23 | sha256: b4361ca283f602336c9220f6319fc9c5131c734d0c853557bd5c96c1170738df
24 |
--------------------------------------------------------------------------------
/md_toc/cmark/houdini_h.py:
--------------------------------------------------------------------------------
1 | #
2 | # houdini_h.py
3 | #
4 | # Copyright (C) 2017-2022 Franco Masotti (see /README.md)
5 | #
6 | # This file is part of md-toc.
7 | #
8 | # md-toc is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # md-toc is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with md-toc. If not, see .
20 | #
21 | r"""A cmark implementation file."""
22 |
23 | # License F applies to this file except for non derivative code:
24 | # in that case the license header at the top of the file applies.
25 | # See docs/copyright_license.rst
26 |
27 |
28 | def _cmark_HOUDINI_ESCAPED_SIZE(x: int) -> float:
29 | return (x * 12) / 10
30 |
31 |
32 | def _cmark_HOUDINI_UNESCAPED_SIZE(x: int) -> int:
33 | return x
34 |
--------------------------------------------------------------------------------
/packages/aur/PKGBUILD:
--------------------------------------------------------------------------------
1 | # Maintainer: Franco Masotti (See /README.md in project source)
2 | # Contributor: Franco Masotti (See /README.md in project source)
3 | pkgname=python-md_toc
4 | pkgver=9.0.0
5 | pkgrel=1
6 | pkgdesc="Automatically generate and add an accurate table of contents to markdown files"
7 | arch=('any')
8 | url="https://blog.franco.net.eu.org/software/#md-toc"
9 | license=('GPL3')
10 | depends=('python'
11 | 'python-fpyutils=4.0.1')
12 | makedepends=('python-pyfakefs'
13 | 'python-build'
14 | 'python-installer'
15 | 'python-wheel'
16 | 'python-setuptools')
17 | options=(!emptydirs)
18 | source=("https://blog.franco.net.eu.org/software/md-toc-${pkgver}/md-toc-${pkgver}.tar.gz.sig" "https://blog.franco.net.eu.org/software/md-toc-${pkgver}/md-toc-${pkgver}.tar.gz")
19 | sha512sums=('SKIP' 'SKIP')
20 |
21 | check() {
22 | cd "${srcdir}"/md-toc-"${pkgver}"
23 | python -m unittest discover --failfast --locals --verbose
24 | }
25 |
26 | build() {
27 | cd "${srcdir}"/md-toc-"${pkgver}"
28 | python -m build --wheel --no-isolation
29 | }
30 |
31 | package() {
32 | cd "${srcdir}"/md-toc-"${pkgver}"
33 | python -m installer --destdir="${pkgdir}" dist/*.whl
34 | }
35 |
--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
1 | Developer Interface
2 | ===================
3 |
4 | Functions
5 | ---------
6 |
7 | .. important:: If you are a developer and you need a quick way to generate
8 | a TOC, the function you may want to use is `build_toc <#md_toc.api.build_toc>`_
9 |
10 | .. autosummary::
11 | :nosignatures:
12 | :recursive:
13 |
14 | md_toc.api.get_atx_heading
15 | md_toc.api.get_md_header
16 | md_toc.api.build_toc_line
17 | md_toc.api.increase_index_ordered_list
18 | md_toc.api.anchor_link_punctuation_filter
19 | md_toc.api.build_anchor_link
20 | md_toc.api.build_toc
21 | md_toc.api.build_multiple_tocs
22 | md_toc.api.write_string_on_file_between_markers
23 | md_toc.api.write_strings_on_files_between_markers
24 | md_toc.api.init_indentation_log
25 | md_toc.api.compute_toc_line_indentation_spaces
26 | md_toc.api.build_toc_line_without_indentation
27 | md_toc.api.is_valid_code_fence_indent
28 | md_toc.api.is_opening_code_fence
29 | md_toc.api.is_closing_code_fence
30 | md_toc.api.tocs_equal
31 | md_toc.api.remove_html_tags
32 | md_toc.api.remove_emphasis
33 | md_toc.api.replace_and_split_newlines
34 | md_toc.api.filter_indices_from_line
35 |
36 | .. automodule:: md_toc.api
37 | :members:
38 |
39 | Exceptions
40 | ----------
41 |
42 | .. automodule:: md_toc.exceptions
43 | :members:
44 |
45 | Types
46 | -----
47 |
48 | .. automodule:: md_toc.types
49 | :members:
50 |
--------------------------------------------------------------------------------
/md_toc/__main__.py:
--------------------------------------------------------------------------------
1 | #
2 | # __main__.py
3 | #
4 | # Copyright (C) 2017-2020 Franco Masotti (see /README.md)
5 | #
6 | # This file is part of md-toc.
7 | #
8 | # md-toc is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # md-toc is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with md-toc. If not, see .
20 | #
21 | """Call the CLI parser."""
22 |
23 | import sys
24 | import traceback
25 |
26 | from .cli import CliInterface
27 |
28 |
29 | def main(args=None):
30 | """Call the CLI interface and wait for the result."""
31 | retcode = 0
32 | try:
33 | ci = CliInterface()
34 | args = ci.parser.parse_args()
35 | result = args.func(args)
36 | if result is not None and not isinstance(result, bool):
37 | print(result)
38 | retcode = 0
39 |
40 | # TOC differs.
41 | if result:
42 | retcode = 128
43 |
44 | except Exception:
45 | retcode = 1
46 | traceback.print_exc()
47 | sys.exit(retcode)
48 |
49 |
50 | if __name__ == '__main__':
51 | main()
52 |
--------------------------------------------------------------------------------
/docs/pre_commit_hook.rst:
--------------------------------------------------------------------------------
1 | Pre-commit hook
2 | ---------------
3 |
4 | This repo provides the following :download:`plugin <../.pre-commit-hooks.yaml>` to be used with the `Pre-commit framework `_
5 |
6 | .. literalinclude:: ../.pre-commit-hooks.yaml
7 | :language: yaml
8 | :caption: The .pre-commit-hooks.yaml file
9 | :name: .pre-commit-hooks.yaml
10 |
11 | Add a ``.pre-commit-config.yaml`` file in the root of your GIT repo.
12 | Have a look at the ``/.pre-commit-hooks.yaml`` file of this repository for a
13 | full example.
14 |
15 | These are the default plugin settings
16 |
17 | .. code-block:: yaml
18 | :caption: A simple example of a .pre-commit-config.yaml file
19 | :name: .pre-commit-config.yaml simple
20 |
21 | repos:
22 | - repo: https://codeberg.org/frnmst/md-toc
23 | # Release updates (ATOM) https://codeberg.org/frnmst/md-toc/tags.atom
24 | rev: master # set a GIT tag
25 | hooks:
26 | - id: md-toc
27 |
28 | You can override the defaults via the ``args`` parameter, such as
29 |
30 | .. code-block:: yaml
31 | :caption: Example of arguments passed as a pre-commit
32 | :name: .pre-commit-config.yaml args
33 |
34 | repos:
35 | - repo: https://codeberg.org/frnmst/md-toc
36 | # Release updates (ATOM) https://codeberg.org/frnmst/md-toc/tags.atom
37 | rev: master # set a GIT tag
38 | hooks:
39 | - id: md-toc
40 | args: [-p, --skip-lines, '1', redcarpet] # CLI options
41 |
42 | Finally, run ``pre-commit install`` to enable the hook.
43 |
--------------------------------------------------------------------------------
/md_toc/exceptions.py:
--------------------------------------------------------------------------------
1 | #
2 | # exceptions.py
3 | #
4 | # Copyright (C) 2017-2020 Franco Masotti (see /README.md)
5 | #
6 | # This file is part of md-toc.
7 | #
8 | # md-toc is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # md-toc is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with md-toc. If not, see .
20 | #
21 | """Exceptions file."""
22 |
23 |
24 | class GithubOverflowCharsLinkLabel(Exception):
25 | """Cannot parse link label."""
26 |
27 |
28 | class GithubEmptyLinkLabel(Exception):
29 | """The link lables contains only whitespace characters or is empty."""
30 |
31 |
32 | class GithubOverflowOrderedListMarker(Exception):
33 | """The ordered list marker number is too big."""
34 |
35 |
36 | class StdinIsNotAFileToBeWritten(Exception):
37 | """stdin cannot be written onto."""
38 |
39 |
40 | class TocDoesNotRenderAsCoherentList(Exception):
41 | """TOC list indentations are either wrong or not what the user intended."""
42 |
43 |
44 | class StringCannotContainNewlines(Exception):
45 | """The specified string cannot contain newlines."""
46 |
--------------------------------------------------------------------------------
/requirements-freeze.txt:
--------------------------------------------------------------------------------
1 | accessible-pygments==0.0.4
2 | alabaster==0.7.16
3 | asciinema==2.4.0
4 | Babel==2.14.0
5 | beautifulsoup4==4.12.3
6 | build==1.0.3
7 | cachetools==5.3.3
8 | certifi==2024.2.2
9 | cffi==1.16.0
10 | cfgv==3.4.0
11 | chardet==5.2.0
12 | charset-normalizer==3.3.2
13 | colorama==0.4.6
14 | cryptography==42.0.5
15 | distlib==0.3.8
16 | docutils==0.19
17 | filelock==3.13.2
18 | fpyutils==4.0.1
19 | identify==2.5.35
20 | idna==3.6
21 | imagesize==1.4.1
22 | importlib_metadata==7.1.0
23 | jaraco.classes==3.3.1
24 | jaraco.context==4.3.0
25 | jaraco.functools==4.0.0
26 | jeepney==0.8.0
27 | Jinja2==3.1.3
28 | keyring==25.0.0
29 | markdown-it-py==3.0.0
30 | MarkupSafe==2.1.5
31 | mdurl==0.1.2
32 | more-itertools==10.2.0
33 | nh3==0.2.15
34 | nodeenv==1.8.0
35 | packaging==24.0
36 | pkginfo==1.10.0
37 | platformdirs==4.2.0
38 | pluggy==1.4.0
39 | pre-commit==3.7.0
40 | pycparser==2.21
41 | pydata-sphinx-theme==0.15.2
42 | pyfakefs==5.3.5
43 | Pygments==2.17.2
44 | pyproject-api==1.6.1
45 | pyproject_hooks==1.0.0
46 | PyYAML==6.0.1
47 | readme_renderer==43.0
48 | requests==2.31.0
49 | requests-toolbelt==1.0.0
50 | rfc3986==2.0.0
51 | rich==13.7.1
52 | SecretStorage==3.3.3
53 | snowballstemmer==2.2.0
54 | soupsieve==2.5
55 | Sphinx==6.2.1
56 | sphinx-book-theme==1.0.1
57 | sphinx-copybutton==0.5.2
58 | sphinxcontrib-applehelp==1.0.8
59 | sphinxcontrib-devhelp==1.0.6
60 | sphinxcontrib-htmlhelp==2.0.5
61 | sphinxcontrib-jsmath==1.0.1
62 | sphinxcontrib-qthelp==1.0.7
63 | sphinxcontrib-serializinghtml==1.1.10
64 | tox==4.14.2
65 | twine==4.0.2
66 | typing_extensions==4.10.0
67 | urllib3==2.2.1
68 | virtualenv==20.25.1
69 | zipp==3.18.1
70 |
--------------------------------------------------------------------------------
/md_toc/cmark/buffer_h.py:
--------------------------------------------------------------------------------
1 | #
2 | # buffer_h.py
3 | #
4 | # Copyright (C) 2017-2022 Franco Masotti (see /README.md)
5 | #
6 | # This file is part of md-toc.
7 | #
8 | # md-toc is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # md-toc is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with md-toc. If not, see .
20 | #
21 | r"""A cmark implementation file."""
22 |
23 | from dataclasses import dataclass
24 | from typing import Optional
25 |
26 | from .cmark_h import _cmarkCmarkMem
27 |
28 | # License E applies to this file except for non derivative code:
29 | # in that case the license header at the top of the file applies.
30 | # See docs/copyright_license.rst
31 |
32 |
33 | # 0.29, 0.30
34 | @dataclass
35 | class _cmarkCmarkStrbuf:
36 | mem: Optional[_cmarkCmarkMem] = None
37 | ptr: str = ''
38 | asize: int = 0
39 | size: int = 0
40 |
41 |
42 | # Should be equivalent to
43 | # #define CMARK_BUF_INIT(mem) \
44 | # { mem, cmark_strbuf__initbuf, 0, 0 }
45 | # 0.29, 0.30
46 | def _cmark_CMARK_BUF_INIT(mem: _cmarkCmarkMem):
47 | b = _cmarkCmarkStrbuf()
48 | b.mem = mem
49 |
50 | return b
51 |
52 |
53 | if __name__ == '__main__':
54 | pass
55 |
--------------------------------------------------------------------------------
/docs/rules/headers.rst:
--------------------------------------------------------------------------------
1 | Headers
2 | =======
3 |
4 | Only ATX-style headings are supported in md-toc.
5 |
6 | ``cmark``, ``github``, ``gitlab``
7 | ---------------------------------
8 |
9 | The code used in md-toc is a reverse engineering of the
10 | behavour described in the following:
11 |
12 | - https://spec.commonmark.org/0.30/#atx-heading
13 |
14 | The escape character ``\`` will be left as-is since they are parsed by
15 | Github's markdown parser already:
16 |
17 | - https://spec.commonmark.org/0.30/#backslash-escapes
18 |
19 | A line ending character is ``U+000A`` or the ``U+000D`` character,
20 | respectively ``\n`` and ``\r`` (or ``\r\n`` if combined).
21 | Everything following those characters is ignored.
22 | This has also the benefit to automatically remove
23 | the trailing newline or carriage return at the end of each line. This also
24 | includes ATX headers with line endings only as main content, such as
25 | ``#\n`` or ``#\r``. See also:
26 |
27 | - https://spec.commonmark.org/0.30/#line
28 | - https://spec.commonmark.org/0.30/#line-ending
29 |
30 | Every other rule for ATX headings is applied.
31 |
32 | ``redcarpet``
33 | -------------
34 |
35 | - https://github.com/vmg/redcarpet/blob/6270d6b4ab6b46ee6bb57a6c0e4b2377c01780ae/ext/redcarpet/markdown.c#L1444
36 | - https://github.com/vmg/redcarpet/blob/6270d6b4ab6b46ee6bb57a6c0e4b2377c01780ae/ext/redcarpet/markdown.c#L1981
37 |
38 | Line endings are generically ``\n`` or ``\r`` characters. See:
39 |
40 | - https://github.com/vmg/redcarpet/blob/6270d6b4ab6b46ee6bb57a6c0e4b2377c01780ae/ext/redcarpet/markdown.c#L2845
41 | - https://github.com/vmg/redcarpet/blob/6270d6b4ab6b46ee6bb57a6c0e4b2377c01780ae/ext/redcarpet/markdown.c#L2854
42 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_1_0_0_demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #
4 | # md_toc_asciinema_1_0_0_demo.sh
5 | #
6 | # Copyright (C) 2017-2018 Franco Masotti (see /README.md)
7 | #
8 | # This file is part of md-toc.
9 | #
10 | # md-toc is free software: you can redistribute it and/or modify
11 | # it under the terms of the GNU General Public License as published by
12 | # the Free Software Foundation, either version 3 of the License, or
13 | # (at your option) any later version.
14 | #
15 | # md-toc is distributed in the hope that it will be useful,
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | # GNU General Public License for more details.
19 | #
20 | # You should have received a copy of the GNU General Public License
21 | # along with md-toc. If not, see .
22 | #
23 |
24 | printf "Running a demo to show some of md_toc's capabilities...\n"
25 | printf "\n"
26 | sleep 2
27 |
28 | printf "$ md_toc -h\n"
29 | md_toc -h
30 | printf "\n"
31 | sleep 5
32 |
33 | cat <<-EOF > foo.md
34 | # Hi
35 |
36 | [](TOC)
37 |
38 | hey
39 |
40 | ## How are you? !!!
41 |
42 | ## fine, thanks
43 |
44 | ### Bye
45 |
46 | ## Bye bye
47 | EOF
48 |
49 | printf "$ cat foo.md\n"
50 | cat foo.md
51 | printf "\n"
52 | sleep 5
53 |
54 | printf "$ md_toc -p github foo.md\n"
55 | md_toc -p github foo.md
56 | printf "\n"
57 | sleep 5
58 |
59 | printf "$ md_toc -o -p gitlab foo.md\n"
60 | md_toc -o -p gitlab foo.md
61 | printf "\n"
62 | sleep 5
63 |
64 | printf "$ md_toc -n foo.md\n"
65 | md_toc -n foo.md
66 | printf "\n"
67 | sleep 5
68 |
69 | printf "$ Editing the file in-place...\n"
70 | printf "$ md_toc -i -p redcarpet foo.md\n"
71 | md_toc -i -p redcarpet foo.md
72 | printf "$ cat foo.md\n"
73 | cat foo.md
74 |
75 | rm foo.md
76 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_2_0_0_demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #
4 | # md_toc_asciinema_2_0_0_demo.sh
5 | #
6 | # Copyright (C) 2017-2018 Franco Masotti (see /README.md)
7 | #
8 | # This file is part of md-toc.
9 | #
10 | # md-toc is free software: you can redistribute it and/or modify
11 | # it under the terms of the GNU General Public License as published by
12 | # the Free Software Foundation, either version 3 of the License, or
13 | # (at your option) any later version.
14 | #
15 | # md-toc is distributed in the hope that it will be useful,
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | # GNU General Public License for more details.
19 | #
20 | # You should have received a copy of the GNU General Public License
21 | # along with md-toc. If not, see .
22 | #
23 |
24 | printf "Running a demo to show some of md_toc's capabilities...\n"
25 | printf "\n"
26 | sleep 2
27 |
28 | printf "$ md_toc -h\n"
29 | md_toc -h
30 | printf "\n"
31 | sleep 5
32 |
33 | cat <<-EOF > foo.md
34 | # Hi
35 |
36 | [](TOC)
37 |
38 | hey
39 |
40 | ## How are you? !!!
41 |
42 | ## fine, thanks
43 |
44 | ### Bye
45 |
46 | ## Bye bye
47 | EOF
48 |
49 | printf "$ cat foo.md\n"
50 | cat foo.md
51 | printf "\n"
52 | sleep 5
53 |
54 | printf "$ md_toc github foo.md\n"
55 | md_toc github foo.md
56 | printf "\n"
57 | sleep 5
58 |
59 | printf "$ md_toc gitlab -o foo.md\n"
60 | md_toc gitlab -o foo.md
61 | printf "\n"
62 | sleep 5
63 |
64 | printf "$ md_toc -n github foo.md\n"
65 | md_toc -n github foo.md
66 | printf "\n"
67 | sleep 5
68 |
69 | printf "$ Editing the file in-place...\n"
70 | printf "$ md_toc -i redcarpet foo.md\n"
71 | md_toc -i redcarpet foo.md
72 | printf "$ cat foo.md\n"
73 | cat foo.md
74 |
75 | rm foo.md
76 |
--------------------------------------------------------------------------------
/md_toc/cmark/cmark_ctype_c.py:
--------------------------------------------------------------------------------
1 | #
2 | # cmark_ctype_c.py
3 | #
4 | # Copyright (C) 2017-2022 Franco Masotti (see /README.md)
5 | #
6 | # This file is part of md-toc.
7 | #
8 | # md-toc is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # md-toc is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with md-toc. If not, see .
20 | #
21 | r"""The cmark implementation file."""
22 |
23 | import string
24 | import unicodedata
25 |
26 | from ..constants import parser as md_parser
27 |
28 | # License C applies to this file except for non derivative code:
29 | # in that case the license header at the top of the file applies.
30 | # See docs/copyright_license.rst
31 |
32 |
33 | # Return True if c is a "whitespace" character as defined by the spec.
34 | # 0.30
35 | def _cmark_cmark_isspace(char: int) -> bool:
36 | # A Unicode whitespace character is any code point in the Unicode Zs
37 | # general category, or a tab (U+0009), line feed (U+000A), form feed
38 | # (U+000C), or carriage return (U+000D).
39 | return (unicodedata.category(chr(char)) == 'Zs'
40 | or chr(char) in ['\u0009', '\u000A', '\u000C', '\u000D'])
41 |
42 |
43 | # Return True if c is an ascii punctuation character.
44 | # 0.29, 0.30
45 | def _cmark_cmark_ispunct(char: int) -> bool:
46 | return chr(char) in string.punctuation
47 |
48 |
49 | if __name__ == '__main__':
50 | pass
51 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # atheris crash logs
2 | crash-*
3 |
4 | *.md
5 | *.MD
6 | !README.md
7 | !CONTRIBUTING.md
8 | !SECURITY.md
9 | benchmark.csv
10 | Makefile
11 | .dockerfile_build_python_dist
12 | .requirements-freeze-hashes.txt
13 |
14 | # Byte-compiled / optimized / DLL files
15 | __pycache__/
16 | *.py[cod]
17 | *$py.class
18 |
19 | # C extensions
20 | *.so
21 |
22 | # Distribution / packaging
23 | .Python
24 | build/
25 | develop-eggs/
26 | dist/
27 | downloads/
28 | eggs/
29 | .eggs/
30 | lib/
31 | lib64/
32 | parts/
33 | sdist/
34 | var/
35 | wheels/
36 | *.egg-info/
37 | .installed.cfg
38 | *.egg
39 | MANIFEST
40 |
41 | # PyInstaller
42 | # Usually these files are written by a python script from a template
43 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
44 | *.manifest
45 | *.spec
46 |
47 | # Installer logs
48 | pip-log.txt
49 | pip-delete-this-directory.txt
50 |
51 | # Unit test / coverage reports
52 | htmlcov/
53 | .tox/
54 | .coverage
55 | .coverage.*
56 | .cache
57 | nosetests.xml
58 | coverage.xml
59 | *.cover
60 | .hypothesis/
61 |
62 | # Translations
63 | *.mo
64 | *.pot
65 |
66 | # Django stuff:
67 | *.log
68 | .static_storage/
69 | .media/
70 | local_settings.py
71 |
72 | # Flask stuff:
73 | instance/
74 | .webassets-cache
75 |
76 | # Scrapy stuff:
77 | .scrapy
78 |
79 | # Sphinx documentation
80 | docs/_build/
81 |
82 | # PyBuilder
83 | target/
84 |
85 | # Jupyter Notebook
86 | .ipynb_checkpoints
87 |
88 | # pyenv
89 | .python-version
90 |
91 | # celery beat schedule file
92 | celerybeat-schedule
93 |
94 | # SageMath parsed files
95 | *.sage.py
96 |
97 | # Environments
98 | .env
99 | .venv
100 | env/
101 | venv/
102 | ENV/
103 | env.bak/
104 | venv.bak/
105 |
106 | # Spyder project settings
107 | .spyderproject
108 | .spyproject
109 |
110 | # Rope project settings
111 | .ropeproject
112 |
113 | # mkdocs documentation
114 | /site
115 |
116 | # mypy
117 | .mypy_cache/
118 |
--------------------------------------------------------------------------------
/md_toc/cmark/references_h.py:
--------------------------------------------------------------------------------
1 | #
2 | # reference_h.py
3 | #
4 | # Copyright (C) 2017-2022 Franco Masotti (see /README.md)
5 | #
6 | # This file is part of md-toc.
7 | #
8 | # md-toc is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # md-toc is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with md-toc. If not, see .
20 | #
21 | r"""A cmark implementation file."""
22 |
23 | from .cmark_h import _cmarkCmarkMem
24 |
25 | # License C applies to this file except for non derivative code:
26 | # in that case the license header at the top of the file applies.
27 | # See docs/copyright_license.rst
28 |
29 |
30 | # 0.30
31 | class _cmarkCmarkReference:
32 | __slots__ = [
33 | 'next',
34 | 'label',
35 | 'url',
36 | 'title',
37 | 'age',
38 | 'size',
39 | ]
40 |
41 | def __init__(self):
42 | self.next: _cmarkCmarkReference = None
43 | self.label: str = None
44 | self.url: str = None
45 | self.title: str = None
46 | self.age: int = 0
47 | self.size: int = 0
48 |
49 |
50 | # 0.30
51 | class _cmarkCmarkReferenceMap:
52 | __slots__ = [
53 | 'mem',
54 | 'refs',
55 | 'sorted',
56 | 'size',
57 | 'ref_size',
58 | 'max_ref_size',
59 | ]
60 |
61 | def __init__(self):
62 | self.mem: _cmarkCmarkMem = None
63 | self.refs: _cmarkCmarkReference = None
64 | # A list of _cmarkCmarkReference
65 | self.sorted: list = None
66 | self.size: int = 0
67 | self.ref_size: int = 0
68 | self.max_ref_size: int = 0
69 |
70 |
71 | if __name__ == '__main__':
72 | pass
73 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_3_0_0_demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #
4 | # md_toc_asciinema_3_0_0_demo.sh
5 | #
6 | # Copyright (C) 2019 Franco Masotti (see /README.md)
7 | #
8 | # This file is part of md-toc.
9 | #
10 | # md-toc is free software: you can redistribute it and/or modify
11 | # it under the terms of the GNU General Public License as published by
12 | # the Free Software Foundation, either version 3 of the License, or
13 | # (at your option) any later version.
14 | #
15 | # md-toc is distributed in the hope that it will be useful,
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | # GNU General Public License for more details.
19 | #
20 | # You should have received a copy of the GNU General Public License
21 | # along with md-toc. If not, see .
22 | #
23 |
24 | # Discover md_toc of this repository not the one installed on the system.
25 | export PYTHONPATH='..'
26 |
27 | printf "Running a demo to show some of md_toc's capabilities...\n"
28 | printf "\n"
29 | sleep 2
30 |
31 | printf "$ md_toc -h\n"
32 | md_toc -h
33 | printf "\n"
34 | sleep 5
35 |
36 | cat <<-EOF > foo.md
37 | # Hi
38 |
39 | [](TOC)
40 |
41 | hey
42 |
43 | ## How are you? !!!
44 |
45 | ## fine, thanks
46 |
47 | ### Bye
48 |
49 | ## Bye bye
50 | EOF
51 |
52 | printf "Inspecting the file...\n"
53 | printf "$ cat foo.md\n"
54 | cat foo.md
55 | printf "\n"
56 | sleep 5
57 |
58 | printf "Run with default options...\n"
59 | printf "$ md_toc foo.md github\n"
60 | md_toc foo.md github
61 | printf "\n"
62 | sleep 5
63 |
64 | printf "Ordered list...\n"
65 | printf "$ md_toc foo.md gitlab -o\n"
66 | md_toc foo.md gitlab -o
67 | printf "\n"
68 | sleep 5
69 |
70 | printf "No links...\n"
71 | printf "$ md_toc -l foo.md github\n"
72 | md_toc -l foo.md github
73 | printf "\n"
74 | sleep 5
75 |
76 | printf "No links and no indentation...\n"
77 | printf "$ md_toc -l -i foo.md github\n"
78 | md_toc -l -i foo.md github
79 | printf "\n"
80 | sleep 5
81 |
82 | printf "Editing the file in-place...\n"
83 | printf "$ md_toc -p foo.md redcarpet\n"
84 | md_toc -p foo.md redcarpet
85 | printf "$ cat foo.md\n"
86 | cat foo.md
87 |
88 | rm foo.md
89 |
--------------------------------------------------------------------------------
/md_toc/tests/fuzzer.py:
--------------------------------------------------------------------------------
1 | #
2 | # fuzzer.py
3 | #
4 | # Copyright (C) 20244 Franco Masotti (see /README.md)
5 | #
6 | # This file is part of md-toc.
7 | #
8 | # md-toc is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # md-toc is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with md-toc. If not, see .
20 | #
21 | r"""A basic fuzzer for the build_toc function."""
22 | import atheris
23 |
24 | with atheris.instrument_imports():
25 | import secrets
26 | import sys
27 | import tempfile
28 |
29 | from .. import api, exceptions
30 |
31 |
32 | def TestBuildToc(data):
33 | r"""Test the md_toc.api.build_toc function."""
34 | with tempfile.NamedTemporaryFile() as fp:
35 |
36 | bytez: bytes = b''.join([
37 | bytes('#' * (secrets.randbelow(6) + 1), 'UTF-8'),
38 | b' ',
39 | data,
40 | ])
41 | fp.write(bytez)
42 |
43 | # Move pointer to the start of the file.
44 | fp.seek(0)
45 |
46 | try:
47 | for parser in ['cmark', 'github', 'gitlab', 'redcarpet']:
48 | api.build_toc(filename=fp.name,
49 | parser=parser,
50 | keep_header_levels=6)
51 | except (exceptions.GithubEmptyLinkLabel,
52 | exceptions.TocDoesNotRenderAsCoherentList,
53 | exceptions.GithubOverflowCharsLinkLabel) as e:
54 | # The input string cannot be guaranteed to have a non-empty label
55 | # (GithubEmptyLinkLabel)
56 | # or a newline with '#' sequences can be inserted
57 | # (TocDoesNotRenderAsCoherentList)
58 | # or header generates a link label which is too long
59 | # (GithubOverflowCharsLinkLabel)
60 | print(e, end='')
61 |
62 |
63 | atheris.Setup(sys.argv, TestBuildToc)
64 | atheris.Fuzz()
65 |
--------------------------------------------------------------------------------
/md_toc/cmark/scanners_h.py:
--------------------------------------------------------------------------------
1 | #
2 | # scanners_h.py
3 | #
4 | # Copyright (C) 2017-2022 Franco Masotti (see /README.md)
5 | #
6 | # This file is part of md-toc.
7 | #
8 | # md-toc is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # md-toc is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with md-toc. If not, see .
20 | #
21 | r"""A cmark implementation file."""
22 |
23 | from .chunk_h import _cmarkCmarkChunk
24 | from .scanners_c import _cmark__scan_at
25 |
26 | # License C applies to this file except for non derivative code:
27 | # in that case the license header at the top of the file applies.
28 | # See docs/copyright_license.rst
29 |
30 |
31 | def _cmark_scan_spacechars(c: _cmarkCmarkChunk, n: int) -> int:
32 | return _cmark__scan_at('_cmark__scan_spacechars', c, n)
33 |
34 |
35 | def _cmark_scan_link_title(c: _cmarkCmarkChunk, n: int) -> int:
36 | return _cmark__scan_at('_cmark__scan_link_title', c, n)
37 |
38 |
39 | def _cmark_scan_autolink_uri(c: _cmarkCmarkChunk, n: int) -> int:
40 | return _cmark__scan_at('_cmark__scan_autolink_uri', c, n)
41 |
42 |
43 | def _cmark_scan_autolink_email(c: _cmarkCmarkChunk, n: int) -> int:
44 | return _cmark__scan_at('_cmark__scan_autolink_email', c, n)
45 |
46 |
47 | def _cmark_scan_html_comment(c: _cmarkCmarkChunk, n: int) -> int:
48 | return _cmark__scan_at('_cmark__scan_html_comment', c, n)
49 |
50 |
51 | def _cmark_scan_html_cdata(c: _cmarkCmarkChunk, n: int) -> int:
52 | return _cmark__scan_at('_cmark__scan_cdata', c, n)
53 |
54 |
55 | def _cmark_scan_html_tag(c: _cmarkCmarkChunk, n: int) -> int:
56 | return _cmark__scan_at('_cmark__scan_html_tag', c, n)
57 |
58 |
59 | def _cmark_scan_html_declaration(c: _cmarkCmarkChunk, n: int) -> int:
60 | return _cmark__scan_at('_cmark__scan_html_declaration', c, n)
61 |
62 |
63 | def _cmark_scan_html_pi(c: _cmarkCmarkChunk, n: int) -> int:
64 | return _cmark__scan_at('_cmark__scan_html_pi', c, n)
65 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_3_1_0_demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #
4 | # md_toc_asciinema_3_1_0_demo.sh
5 | #
6 | # Copyright (C) 2019 Franco Masotti (see /README.md)
7 | #
8 | # This file is part of md-toc.
9 | #
10 | # md-toc is free software: you can redistribute it and/or modify
11 | # it under the terms of the GNU General Public License as published by
12 | # the Free Software Foundation, either version 3 of the License, or
13 | # (at your option) any later version.
14 | #
15 | # md-toc is distributed in the hope that it will be useful,
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | # GNU General Public License for more details.
19 | #
20 | # You should have received a copy of the GNU General Public License
21 | # along with md-toc. If not, see .
22 | #
23 |
24 | # Discover md_toc of this repository not the one installed on the system.
25 | export PYTHONPATH='..'
26 |
27 | printf "Running a demo to show some of md_toc's capabilities...\n"
28 | printf "\n"
29 | sleep 2
30 |
31 | printf "$ md_toc -h\n"
32 | md_toc -h
33 | printf "\n"
34 | sleep 5
35 |
36 | cat <<-EOF > foo.md
37 | # Hi
38 |
39 | [](TOC)
40 |
41 | hey
42 |
43 | ## How are you? !!!
44 |
45 | ## fine, thanks
46 |
47 | ### Bye
48 |
49 | ## Bye bye
50 |
51 | \`\`\`python
52 | # This is a code
53 | # fence with comments that might represent ATX-style headings
54 | # if not properly parsed
55 | \`\`\`
56 |
57 | bye
58 |
59 | # boo
60 | EOF
61 |
62 | printf "Inspecting the file...\n"
63 | printf "$ cat foo.md\n"
64 | cat foo.md
65 | printf "\n"
66 | sleep 5
67 |
68 | printf "Run with default options...\n"
69 | printf "$ md_toc foo.md github\n"
70 | md_toc foo.md github
71 | printf "\n"
72 | sleep 5
73 |
74 | printf "Ordered list...\n"
75 | printf "$ md_toc foo.md gitlab -o\n"
76 | md_toc foo.md gitlab -o
77 | printf "\n"
78 | sleep 5
79 |
80 | printf "No links...\n"
81 | printf "$ md_toc -l foo.md github\n"
82 | md_toc -l foo.md github
83 | printf "\n"
84 | sleep 5
85 |
86 | printf "No links and no indentation...\n"
87 | printf "$ md_toc -l -i foo.md github\n"
88 | md_toc -l -i foo.md github
89 | printf "\n"
90 | sleep 5
91 |
92 | printf "Use stdin and ...\n"
93 | printf "$ cat foo.md | md_toc -l -i cmark -u '*'\n"
94 | cat foo.md | md_toc -l -i cmark -u '*'
95 | printf "\n"
96 | sleep 5
97 |
98 | printf "Editing the file in-place. As you can see, code fence \
99 | detection still needs to be implemented for redcarpet..\n"
100 | printf "$ md_toc -p foo.md redcarpet\n"
101 | md_toc -p foo.md redcarpet
102 | printf "$ cat foo.md\n"
103 | cat foo.md
104 |
105 | rm foo.md
106 |
--------------------------------------------------------------------------------
/md_toc/cmark/chunk_h.py:
--------------------------------------------------------------------------------
1 | #
2 | # chunk_h.py
3 | #
4 | # Copyright (C) 2017-2022 Franco Masotti (see /README.md)
5 | #
6 | # This file is part of md-toc.
7 | #
8 | # md-toc is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # md-toc is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with md-toc. If not, see .
20 | #
21 | r"""The cmark implementation file."""
22 |
23 | import copy
24 | from dataclasses import dataclass
25 |
26 | from ..constants import parser as md_parser
27 | from .cmark_ctype_c import _cmark_cmark_isspace
28 |
29 | # License E applies to this file except for non derivative code:
30 | # in that case the license header at the top of the file applies.
31 | # See docs/copyright_license.rst
32 |
33 | # Returns 1 if c is a "whitespace" character as defined by the spec.
34 | # int cmark_isspace(char c) { return cmark_ctype_class[(uint8_t)c] == 1; }
35 | # The only defined whitespaces in the spec are Unicode whitespaces.
36 |
37 |
38 | # 0.30
39 | @dataclass
40 | class _cmarkCmarkChunk:
41 | data: str = None
42 | length: int = 0
43 |
44 |
45 | # 0.30
46 | def _cmark_cmark_chunk_free(c: _cmarkCmarkChunk):
47 | c.data = None
48 | c.length = 0
49 |
50 |
51 | # 0.30
52 | def _cmark_cmark_chunk_ltrim(c: _cmarkCmarkChunk):
53 | while c.length > 0 and _cmark_cmark_isspace(ord(c.data[0])):
54 | c.data = c.data[1:]
55 | c.length -= 1
56 |
57 |
58 | # 0.30
59 | def _cmark_cmark_chunk_rtrim(c: _cmarkCmarkChunk):
60 | while c.length > 0:
61 | if not _cmark_cmark_isspace(ord(c.data[c.length - 1])):
62 | break
63 |
64 | c.length -= 1
65 |
66 |
67 | # 0.30
68 | def _cmark_cmark_chunk_trim(c: _cmarkCmarkChunk):
69 | _cmark_cmark_chunk_ltrim(c)
70 | _cmark_cmark_chunk_rtrim(c)
71 |
72 |
73 | # 0.30
74 | def _cmark_cmark_chunk_literal(data: str) -> _cmarkCmarkChunk:
75 | length: int
76 | c: _cmarkCmarkChunk
77 |
78 | if data is not None:
79 | length = len(data)
80 | else:
81 | length = 0
82 |
83 | c = _cmarkCmarkChunk(data, length)
84 | return c
85 |
86 |
87 | # 0.29, 0.30
88 | def _cmark_cmark_chunk_dup(ch: _cmarkCmarkChunk, pos: int,
89 | length: int) -> _cmarkCmarkChunk:
90 | c = _cmarkCmarkChunk(copy.deepcopy(ch.data[pos:]), length)
91 | return c
92 |
93 |
94 | if __name__ == '__main__':
95 | pass
96 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_2_0_0.json:
--------------------------------------------------------------------------------
1 | {"version": 2, "width": 174, "height": 46, "timestamp": 1521395243, "env": {"SHELL": "/bin/bash", "TERM": "st-256color"}}
2 | [0.009865, "o", "Running a demo to show some of md_toc's capabilities...\r\n"]
3 | [0.010509, "o", "\r\n"]
4 | [2.011863, "o", "$ md_toc -h\r\n"]
5 | [2.21847, "o", "usage: md_toc [-h] [-i] [-n] [-t TOC_MARKER] [-v]\r\n {github,cmark,redcarpet,gitlab} ... FILE_NAME\r\n\r\nMarkdown Table Of Contents: Automatically generate a compliant table\r\nof contents for a markdown file to improve document readability.\r\n\r\npositional arguments:\r\n FILE_NAME the I/O file name\r\n\r\noptional arguments:\r\n -h, --help show this help message and exit\r\n -i, --in-place overwrite the input file\r\n -n, --no-links avoids adding links to the corresponding content\r\n -t TOC_MARKER, --toc-marker TOC_MARKER\r\n set the string to be used as the marker for\r\n positioning the table of contents. Defaults to [](TOC)\r\n -v, --version show program's version number and exit\r\n\r\nmarkdown parser:\r\n {github,cmark,redcarpet,gitlab}\r\n\r\nReturn values: 0 OK, 1 Error, 2 Invalid command\r\n\r\nCopyright (C) 2018 Franco Masotti, frnmst\r\nLicense GPLv3+: GNU GPL version 3 or later \r\nThis is free soft"]
6 | [2.218538, "o", "ware: you are free to change and redistribute it.\r\nThere is NO WARRANTY, to the extent permitted by law.\r\n"]
7 | [2.233581, "o", "\r\n"]
8 | [7.236907, "o", "$ cat foo.md\r\n"]
9 | [7.239637, "o", "# Hi\r\n\r\n[](TOC)\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## Bye bye\r\n"]
10 | [7.241085, "o", "\r\n"]
11 | [12.244051, "o", "$ md_toc github foo.md\r\n"]
12 | [12.449938, "o", "- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you-----------)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [Bye bye](#bye-bye)\r\n"]
13 | [12.46659, "o", "\r\n"]
14 | [17.468522, "o", "$ md_toc gitlab -o foo.md\r\n"]
15 | [17.673403, "o", "1. [Hi](#hi)\r\n 1. [How are you? !!!](#how-are-you)\r\n 2. [fine, thanks](#fine-thanks)\r\n 1. [Bye](#bye)\r\n 3. [Bye bye](#bye-bye)\r\n"]
16 | [17.689824, "o", "\r\n"]
17 | [22.691577, "o", "$ md_toc -n github foo.md\r\n"]
18 | [22.898801, "o", "- Hi\r\n - How are you? !!!\r\n - fine, thanks\r\n - Bye\r\n - Bye bye\r\n"]
19 | [22.913503, "o", "\r\n"]
20 | [27.915307, "o", "$ Editing the file in-place..."]
21 | [27.91691, "o", "\r\n"]
22 | [27.917858, "o", "$ md_toc -i redcarpet foo.md\r\n"]
23 | [28.137716, "o", "$ cat foo.md\r\n"]
24 | [28.139199, "o", "# Hi\r\n\r\n[](TOC)\r\n\r\n- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [Bye bye](#bye-bye)\r\n\r\n[](TOC)\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## Bye bye\r\n"]
25 |
--------------------------------------------------------------------------------
/md_toc/cmark/cmark_h.py:
--------------------------------------------------------------------------------
1 | #
2 | # cmark_h.py
3 | #
4 | # Copyright (C) 2017-2022 Franco Masotti (see /README.md)
5 | #
6 | # This file is part of md-toc.
7 | #
8 | # md-toc is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # md-toc is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with md-toc. If not, see .
20 | #
21 | r"""A cmark implementation file."""
22 |
23 | from enum import Enum
24 |
25 | # License C applies to this file except for non derivative code:
26 | # in that case the license header at the top of the file applies.
27 | # See docs/copyright_license.rst
28 |
29 |
30 | # Defines the memory allocation functions to be used by CMark
31 | # when parsing and allocating a document tree
32 | # typedef struct cmark_mem {
33 | # void *(*calloc)(size_t, size_t);
34 | # void *(*realloc)(void *, size_t);
35 | # void (*free)(void *);
36 | # } cmark_mem;
37 | class _cmarkCmarkMem:
38 | pass
39 |
40 |
41 | # C enum
42 | # typedef enum { ... } cmark_node_type;
43 | # Undefined value in the C source code get their value
44 | # accoring to their position in the sequence, like an array.
45 | class _cmarkCmarkNodeType(Enum):
46 | # Error status.
47 | CMARK_NODE_NONE = 0
48 |
49 | # Block.
50 | CMARK_NODE_DOCUMENT = 1
51 | CMARK_NODE_BLOCK_QUOTE = 2
52 | CMARK_NODE_LIST = 3
53 | CMARK_NODE_ITEM = 4
54 | CMARK_NODE_CODE_BLOCK = 5
55 | CMARK_NODE_HTML_BLOCK = 6
56 | CMARK_NODE_CUSTOM_BLOCK = 7
57 | CMARK_NODE_PARAGRAPH = 8
58 | CMARK_NODE_HEADING = 9
59 | CMARK_NODE_THEMATIC_BREAK = 10
60 |
61 | CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT
62 | CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK
63 |
64 | # Inline
65 | CMARK_NODE_TEXT = 11
66 | CMARK_NODE_SOFTBREAK = 12
67 | CMARK_NODE_LINEBREAK = 13
68 | CMARK_NODE_CODE = 14
69 | CMARK_NODE_HTML_INLINE = 15
70 | CMARK_NODE_CUSTOM_INLINE = 16
71 | CMARK_NODE_EMPH = 17
72 | CMARK_NODE_STRONG = 18
73 | CMARK_NODE_LINK = 19
74 | CMARK_NODE_IMAGE = 20
75 |
76 | CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT
77 | CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE
78 |
79 | # For backwards compatibility:
80 | CMARK_NODE_HEADER = CMARK_NODE_HEADING
81 | CMARK_NODE_HRULE = CMARK_NODE_THEMATIC_BREAK
82 | CMARK_NODE_HTML = CMARK_NODE_HTML_BLOCK
83 | CMARK_NODE_INLINE_HTML = CMARK_NODE_HTML_INLINE
84 |
85 |
86 | CMARK_OPT_SOURCEPOS = 1 << 1
87 | CMARK_OPT_SMART = 1 << 10
88 |
89 | if __name__ == '__main__':
90 | pass
91 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # See https://pre-commit.com for more information
2 | # See https://pre-commit.com/hooks.html for more hooks
3 | repos:
4 | - repo: https://github.com/pre-commit/pre-commit-hooks
5 | rev: 'v4.5.0'
6 | hooks:
7 | - id: trailing-whitespace
8 | - id: end-of-file-fixer
9 | - id: check-yaml
10 | - id: check-added-large-files
11 | - id: destroyed-symlinks
12 | - id: detect-private-key
13 | - id: check-ast
14 | - id: check-case-conflict
15 | - id: debug-statements
16 | # Add <# -*- coding: utf-8 -*-> to the top of python files.
17 | - id: fix-encoding-pragma
18 | # https://github.com/asottile/pyupgrade/issues/748
19 | # https://github.com/asottile/pyupgrade/issues/89
20 | args: ['--remove']
21 | - id: forbid-submodules
22 | - id: check-symlinks
23 | - id: check-shebang-scripts-are-executable
24 | - id: check-case-conflict
25 | - id: check-added-large-files
26 | args: ['--maxkb=4096']
27 | - id: destroyed-symlinks
28 | - id: double-quote-string-fixer
29 | - id: mixed-line-ending
30 | args: ['--fix=lf']
31 | - id: requirements-txt-fixer
32 |
33 | - repo: https://github.com/asottile/pyupgrade
34 | rev: 'v3.15.0'
35 | hooks:
36 | - id: pyupgrade
37 |
38 | # YAPF and flake8 need to remain pinned.
39 | - repo: https://github.com/pre-commit/mirrors-yapf
40 | rev: 'v0.32.0' # frozen: v0.32.0
41 | hooks:
42 | - id: yapf
43 | additional_dependencies: ['toml']
44 |
45 | - repo: https://github.com/pycqa/flake8
46 | rev: '6.0.0' # frozen: 6.0.0
47 | hooks:
48 | - id: flake8
49 | additional_dependencies: ['flake8-docstrings']
50 |
51 | - repo: https://github.com/PyCQA/bandit
52 | rev: '1.7.7'
53 | hooks:
54 | - id: bandit
55 | args: ['-c', 'pyproject.toml', '--level', 'LOW']
56 | additional_dependencies: ['bandit[toml]']
57 |
58 | - repo: https://github.com/pycqa/isort
59 | rev: '5.13.2'
60 | hooks:
61 | - id: isort
62 |
63 | - repo: https://codeberg.org/frnmst/md-toc
64 | rev: '8.2.3'
65 | hooks:
66 | - id: md-toc
67 | args: ['-p', 'cmark', '-l6']
68 |
69 | - repo: https://github.com/pypa/pip-audit
70 | rev: 'v2.7.2'
71 | hooks:
72 | - id: pip-audit
73 | args: ['--requirement', '.requirements-freeze-hashes.txt', '--local', '--require-hashes', '--desc', '--aliases', '--no-deps', '--disable-pip', '--strict']
74 |
75 | - repo: https://github.com/mgedmin/check-manifest
76 | rev: '0.49'
77 | hooks:
78 | - id: check-manifest
79 |
80 | - repo: local
81 | hooks:
82 | - id: unit-tests
83 | name: unit tests
84 | language: system
85 | entry: make test
86 | verbose: true
87 | always_run: true
88 | pass_filenames: false
89 | - id: build-docs
90 | name: build docs
91 | language: system
92 | entry: make doc
93 | verbose: true
94 | always_run: true
95 | pass_filenames: false
96 |
97 | - repo: https://github.com/jorisroovers/gitlint
98 | rev: 'v0.19.1'
99 | hooks:
100 | - id: gitlint
101 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_3_0_0.json:
--------------------------------------------------------------------------------
1 | {"version": 2, "width": 83, "height": 46, "timestamp": 1552905547, "env": {"SHELL": "/bin/bash", "TERM": "rxvt-unicode-256color"}}
2 | [0.010489, "o", "Running a demo to show some of md_toc's capabilities...\r\n\r\n"]
3 | [2.014595, "o", "$ md_toc -h\r\n"]
4 | [2.191121, "o", "usage: md_toc [-h] [-p] [-l] [-i] [-m TOC_MARKER] [-v]\r\n [FILE_NAME [FILE_NAME ...]]\r\n {github,cmark,gitlab,commonmarker,redcarpet} ...\r\n\r\nMarkdown Table Of Contents: Automatically generate a compliant table\r\nof contents for a markdown file to improve document readability.\r\n\r\npositional arguments:\r\n FILE_NAME the I/O file name\r\n\r\noptional arguments:\r\n -h, --help show this help message and exit\r\n -p, --in-place overwrite the input file\r\n -l, --no-links avoids adding links to the corresponding content\r\n -i, --no-indentation avoids adding indentations to the corresponding\r\n content\r\n -m TOC_MARKER, --toc-marker TOC_MARKER\r\n set the string to be used as the marker for\r\n positioning the table of contents. Defaults to [](TOC)\r\n -v, --version show program's version number and exit\r\n\r\nmarkdown parser:\r\n {github,cmark,gitlab,commonmarker,redcarpet}\r\n\r\nReturn values: 0 OK,"]
5 | [2.191225, "o", " 1 Error, 2 Invalid command\r\n\r\nCopyright (C) 2018-2019 Franco Masotti, frnmst\r\nLicense GPLv3+: GNU GPL version 3 or later \r\nThis is free software: you are free to change and redistribute it.\r\nThere is NO WARRANTY, to the extent permitted by law.\r\n"]
6 | [2.20289, "o", "\r\n"]
7 | [7.20649, "o", "Inspecting the file...\r\n$ cat foo.md\r\n"]
8 | [7.207679, "o", "# Hi\r\n\r\n[](TOC)\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## Bye bye\r\n"]
9 | [7.20814, "o", "\r\n"]
10 | [12.210146, "o", "Run with default options...\r\n$ md_toc foo.md github\r\n"]
11 | [12.385408, "o", "- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you-----------)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [Bye bye](#bye-bye)\r\n"]
12 | [12.396683, "o", "\r\n"]
13 | [17.398421, "o", "Ordered list...\r\n$ md_toc foo.md gitlab -o\r\n"]
14 | [17.575691, "o", "1. [Hi](#hi)\r\n 1. [How are you? !!!](#how-are-you-----------)\r\n 2. [fine, thanks](#fine-thanks)\r\n 1. [Bye](#bye)\r\n 3. [Bye bye](#bye-bye)\r\n"]
15 | [17.587251, "o", "\r\n"]
16 | [22.589181, "o", "No links...\r\n$ md_toc -l foo.md github\r\n"]
17 | [22.767776, "o", "- Hi\r\n - How are you? !!!\r\n - fine, thanks\r\n - Bye\r\n - Bye bye\r\n"]
18 | [22.779368, "o", "\r\n"]
19 | [27.780919, "o", "No links and no indentation...\r\n$ md_toc -l -i foo.md github\r\n"]
20 | [27.95595, "o", "- Hi\r\n- How are you? !!!\r\n- fine, thanks\r\n- Bye\r\n- Bye bye\r\n"]
21 | [27.967396, "o", "\r\n"]
22 | [32.969054, "o", "Editing the file in-place...\r\n$ md_toc -p foo.md redcarpet\r\n"]
23 | [33.160029, "o", "$ cat foo.md\r\n"]
24 | [33.160866, "o", "# Hi\r\n\r\n[](TOC)\r\n\r\n- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [Bye bye](#bye-bye)\r\n\r\n[](TOC)\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## Bye bye\r\n"]
25 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_5_0_0_demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #
4 | # md_toc_asciinema_5_0_0_demo.sh
5 | #
6 | # Copyright (C) 2019 Franco Masotti (see /README.md)
7 | #
8 | # This file is part of md-toc.
9 | #
10 | # md-toc is free software: you can redistribute it and/or modify
11 | # it under the terms of the GNU General Public License as published by
12 | # the Free Software Foundation, either version 3 of the License, or
13 | # (at your option) any later version.
14 | #
15 | # md-toc is distributed in the hope that it will be useful,
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | # GNU General Public License for more details.
19 | #
20 | # You should have received a copy of the GNU General Public License
21 | # along with md-toc. If not, see .
22 | #
23 |
24 | # Discover md_toc of this repository not the one installed on the system.
25 | export PYTHONPATH='..'
26 | TIMEOUT=1
27 |
28 | printf "Running a demo to show some of md_toc's capabilities...\n"
29 | printf "\n"
30 | sleep ${TIMEOUT}
31 |
32 | printf "$ md_toc -h\n"
33 | md_toc -h
34 | printf "\n"
35 | sleep ${TIMEOUT}
36 |
37 | cat <<-EOF > foo.md
38 | # Hi
39 |
40 | [](TOC)
41 |
42 | hey
43 |
44 | ## How are you? !!!
45 |
46 | ## fine, thanks
47 |
48 | ### Bye
49 |
50 | ## Bye bye
51 |
52 | \`\`\`python
53 | # This is a code
54 | # fence with comments that might represent ATX-style headings
55 | # if not properly parsed
56 | \`\`\`
57 |
58 | bye
59 |
60 | # boo
61 | EOF
62 |
63 | cat <<-EOF > foo_noncoherent.md
64 | # Hi
65 | ### boo
66 | EOF
67 |
68 | printf "Inspecting the file...\n"
69 | printf "$ cat foo.md\n"
70 | cat foo.md
71 | printf "\n"
72 | sleep ${TIMEOUT}
73 |
74 | printf "Run with default options...\n"
75 | printf "$ md_toc foo.md github\n"
76 | md_toc foo.md github
77 | printf "\n"
78 | sleep ${TIMEOUT}
79 |
80 | printf "Ordered list...\n"
81 | printf "$ md_toc foo.md gitlab -o\n"
82 | md_toc foo.md gitlab -o
83 | printf "\n"
84 | sleep ${TIMEOUT}
85 |
86 | printf "No links...\n"
87 | printf "$ md_toc -l foo.md github\n"
88 | md_toc -l foo.md github
89 | printf "\n"
90 | sleep ${TIMEOUT}
91 |
92 | printf "No links and no indentation...\n"
93 | printf "$ md_toc -l -i foo.md github\n"
94 | md_toc -l -i foo.md github
95 | printf "\n"
96 | sleep ${TIMEOUT}
97 |
98 | printf "Inspecting the non-coherent file...\n"
99 | printf "$ cat foo_noncoherent.md\n"
100 | cat foo_noncoherent.md
101 | printf "\n"
102 | sleep ${TIMEOUT}
103 |
104 | printf "Trying to parse a non coherent markdown file will raise an exception...\n"
105 | printf "$ md_toc foo_noncoherent.md github\n"
106 | md_toc foo_noncoherent.md github
107 | printf "\n"
108 | sleep ${TIMEOUT}
109 |
110 | printf "Try to parse a non coherent markdown file without checking for coherence...\n"
111 | printf "$ md_toc -c foo_noncoherent.md github\n"
112 | md_toc -c foo_noncoherent.md github
113 | printf "\n"
114 | sleep ${TIMEOUT}
115 |
116 | printf "Use stdin, no links and no indentation...\n"
117 | printf "$ cat foo.md | md_toc -l -i cmark -u '*'\n"
118 | cat foo.md | md_toc -l -i cmark -u '*'
119 | printf "\n"
120 | sleep ${TIMEOUT}
121 |
122 | printf "Editing the file in-place. As you can see, code fence \
123 | detection still needs to be implemented for redcarpet..\n"
124 | printf "$ md_toc -p foo.md redcarpet\n"
125 | md_toc -p foo.md redcarpet
126 | printf "$ cat foo.md\n"
127 | cat foo.md
128 |
129 | rm foo.md foo_noncoherent.md
130 |
--------------------------------------------------------------------------------
/md_toc/types.py:
--------------------------------------------------------------------------------
1 | #
2 | # types.py
3 | #
4 | # Copyright (C) 2023 Franco Masotti (see /README.md)
5 | #
6 | # This file is part of md-toc.
7 | #
8 | # md-toc is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # md-toc is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with md-toc. If not, see .
20 | #
21 | r"""Complex ``dict`` type definitions."""
22 | from typing import TypedDict
23 |
24 |
25 | class IndentationLogElement(TypedDict, total=False):
26 | r"""An ``indentation_log_element`` object.
27 |
28 | :parameter index: values: 1 -> md_parser['github']['header']['max levels'] + 1.
29 | :parameter list marker: ordered or undordered list marker.
30 | :parameter indentation spaces: number of indentation spaces.
31 | :type index: int
32 | :type list marker: str
33 | :type indentation spaces: int
34 | """
35 |
36 | # index: 1 -> md_parser['github']['header']['max_levels'] + 1
37 | index: int
38 | list_marker: str
39 | indentation_spaces: int
40 |
41 |
42 | class Header(TypedDict):
43 | r"""A ``header`` object.
44 |
45 | :parameter header_type: h1 to h6 (``1`` -> ``6``).
46 | :parameter text_original: Raw text.
47 | :parameter text_anchor_link: Transformed text so it works as an anchor
48 | link.
49 | :parameter visible: if ``True`` the header needs to be visible, if
50 | ``False`` it will not.
51 | :type header_type: int
52 | :type text_original: str
53 | :type text_anchor_link: str
54 | :type visible: bool
55 | """
56 |
57 | header_type: int
58 | text_original: str
59 | text_anchor_link: str
60 | visible: bool
61 |
62 |
63 | class HeaderTypeCounter(TypedDict, total=False):
64 | r"""The number of headers for each type, from ``h1`` to ``h6``."""
65 |
66 | h1: int
67 | h2: int
68 | h3: int
69 | h4: int
70 | h5: int
71 | h6: int
72 |
73 |
74 | class HeaderDuplicateCounter(TypedDict, total=False):
75 | r"""A ``header_duplicate_counter`` object.
76 |
77 | :parameter ``key``: a generic string corresponding to header links. Its
78 | value is the number of times ``key`` appears during the execution of
79 | md-toc.
80 |
81 | .. note:: This dict can be empty.
82 | """
83 |
84 | key: int
85 |
86 |
87 | class AtxHeadingStructElement(TypedDict, total=False):
88 | """A single element of the list returned by the ``get_atx_heading`` function.
89 |
90 | :parameter header_type: h1 to h6 (``1`` -> ``6``).
91 | :parameter header_text trimmed: the link label.
92 | :parameter visible: if the line has a smaller header that
93 | ``keep_header_levels``, then ``visible`` is set to ``False``.
94 | :type header_type: int
95 | :type header_text_trimmed: str
96 | :type visible: bool
97 |
98 | .. note:: ``header_type`` and ``header_text_trimmed`` are
99 | set to ``None`` if the line does not contain header elements according
100 | to the rules of the selected markdown parser.
101 | ``visible`` is set to ``True`` if the line needs to be saved, ``False``
102 | if it just needed for duplicate counting.
103 | """
104 |
105 | header_type: int
106 | header_text_trimmed: str
107 | visible: bool
108 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_1_0_0.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": 1,
3 | "width": 83,
4 | "height": 46,
5 | "duration": 28.378759,
6 | "command": "./md_toc_asciinema_1_0_0_demo.sh",
7 | "title": null,
8 | "env": {
9 | "TERM": "st-256color",
10 | "SHELL": "/bin/bash"
11 | },
12 | "stdout": [
13 | [
14 | 0.013629,
15 | "Running a demo to show some of md_toc's capabilities...\r\n"
16 | ],
17 | [
18 | 0.000432,
19 | "\r\n"
20 | ],
21 | [
22 | 2.001283,
23 | "$ md_toc -h\r\n"
24 | ],
25 | [
26 | 0.261431,
27 | "usage: md_toc [-h] [-i] [-n] [-o] [-p {standard,github,redcarpet,gitlab}]\r\n [-t TOC_MARKER] [-l HEADER_LEVELS] [-v]\r\n FILE_NAME\r\n\r\nMarkdown Table Of Contents\r\n\r\npositional arguments:\r\n FILE_NAME the I/O file name\r\n\r\noptional arguments:\r\n -h, --help show this help message and exit\r\n -i, --in-place overwrite the input file\r\n -n, --no-links avoids adding links to corresponding content\r\n -o, --ordered write as an ordered list\r\n -p {standard,github,redcarpet,gitlab}, --parser {standard,github,redcarpet,gitlab}\r\n decide what markdown parser will be used to generate\r\n the links. Defaults to standard\r\n -t TOC_MARKER, --toc-marker TOC_MARKER\r\n set the string to be used as the marker for\r\n positioning the table of contents\r\n -l HEADER_LEVELS, --header-levels HEADER_LEVELS\r\n set the maximum level of headers to be considered as\r\n "
28 | ],
29 | [
30 | 3.1e-05,
31 | " part of the TOC\r\n -v, --version show program's version number and exit\r\n\r\nReturn values: 0 OK, 1 Error, 2 Invalid command\r\n"
32 | ],
33 | [
34 | 0.014976,
35 | "\r\n"
36 | ],
37 | [
38 | 5.003232,
39 | "$ cat foo.md\r\n"
40 | ],
41 | [
42 | 0.001457,
43 | "# Hi\r\n\r\n[](TOC)\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## Bye bye\r\n"
44 | ],
45 | [
46 | 0.000547,
47 | "\r\n"
48 | ],
49 | [
50 | 5.001721,
51 | "$ md_toc -p github foo.md\r\n"
52 | ],
53 | [
54 | 0.251735,
55 | "- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you-----------)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [Bye bye](#bye-bye)\r\n"
56 | ],
57 | [
58 | 0.015469,
59 | "\r\n"
60 | ],
61 | [
62 | 5.001438,
63 | "$ md_toc -o -p gitlab foo.md\r\n"
64 | ],
65 | [
66 | 0.252434,
67 | "1. [Hi](#hi)\r\n 1. [How are you? !!!](#how-are-you)\r\n 2. [fine, thanks](#fine-thanks)\r\n 1. [Bye](#bye)\r\n 3. [Bye bye](#bye-bye)\r\n"
68 | ],
69 | [
70 | 0.01472,
71 | "\r\n"
72 | ],
73 | [
74 | 5.001544,
75 | "$ md_toc -n foo.md\r\n"
76 | ],
77 | [
78 | 0.245354,
79 | "- Hi\r\n - How are you? !!!\r\n - fine, thanks\r\n - Bye\r\n - Bye bye\r\n"
80 | ],
81 | [
82 | 0.014583,
83 | "\r\n"
84 | ],
85 | [
86 | 5.001507,
87 | "$ Editing the file in-place...\r\n"
88 | ],
89 | [
90 | 0.000604,
91 | "$ md_toc -i -p redcarpet foo.md\r\n"
92 | ],
93 | [
94 | 0.276076,
95 | "$ cat foo.md\r\n"
96 | ],
97 | [
98 | 0.001687,
99 | "# Hi\r\n\r\n[](TOC)\r\n\r\n- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [Bye bye](#bye-bye)\r\n\r\n[](TOC)\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## Bye bye\r\n"
100 | ]
101 | ]
102 | }
103 |
--------------------------------------------------------------------------------
/md_toc/cmark/node_h.py:
--------------------------------------------------------------------------------
1 | #
2 | # node_h.py
3 | #
4 | # Copyright (C) 2017-2023 Franco Masotti (see /README.md)
5 | #
6 | # This file is part of md-toc.
7 | #
8 | # md-toc is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # md-toc is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with md-toc. If not, see .
20 | #
21 | r"""A cmark implementation file."""
22 |
23 | from dataclasses import dataclass
24 |
25 | from .cmark_h import _cmarkCmarkMem, _cmarkCmarkNodeType
26 |
27 | # License C applies to this file except for non derivative code:
28 | # in that case the license header at the top of the file applies.
29 | # See docs/copyright_license.rst
30 |
31 |
32 | # Slots have less overhead but have been introduced in Python 3.10.
33 | # @dataclass(slots=True)
34 | @dataclass
35 | class _cmarkCmarkList:
36 | marker_offset: int = -1
37 | padding: int = -1
38 | start: int = -1
39 | list_type: int = -1
40 | delimiter: int = -1
41 | bullet_char: int = -1
42 | tight: bool = False
43 |
44 |
45 | @dataclass
46 | class _cmarkCmarkCode:
47 | info: str = -1
48 | fence_length: int = -1
49 | fence_offset: int = -1
50 | fence_char: int = -1
51 | fenced: int = -1
52 |
53 |
54 | @dataclass
55 | class _cmarkCmarkHeading:
56 | level: int = -1
57 | setext: bool = False
58 |
59 |
60 | @dataclass
61 | class _cmarkCmarkLink:
62 | url: str = ''
63 | title: str = ''
64 |
65 |
66 | @dataclass
67 | class _cmarkCmarkCustom:
68 | on_enter: str = ''
69 | on_exit: str = ''
70 |
71 |
72 | class _cmarkCmarkNode:
73 | __slots__ = [
74 | 'mem',
75 | 'next',
76 | 'prev',
77 | 'parent',
78 | 'first_child',
79 | 'last_child',
80 | 'user_data',
81 | 'data',
82 | 'length',
83 | 'start_line',
84 | 'start_column',
85 | 'end_line',
86 | 'end_column',
87 | 'internal_offset',
88 | 'type',
89 | 'flags',
90 | 'as_list',
91 | 'as_code',
92 | 'as_heading',
93 | 'as_link',
94 | 'as_custom',
95 | 'as_html_block_type',
96 | 'numdelims',
97 | ]
98 |
99 | def __init__(self):
100 | self.mem: _cmarkCmarkMem = None
101 |
102 | self.next: _cmarkCmarkNode = None
103 | self.prev: _cmarkCmarkNode = None
104 | self.parent: _cmarkCmarkNode = None
105 | self.first_child: _cmarkCmarkNode = None
106 | self.last_child: _cmarkCmarkNode = None
107 |
108 | self.user_data = None
109 |
110 | self.data: str = None
111 | self.length: int = 0
112 |
113 | self.start_line: int = 0
114 | self.start_column: int = 0
115 | self.end_line: int = 0
116 | self.end_column: int = 0
117 | self.internal_offset: int = 0
118 | self.type: int = 0
119 | self.flags: int = 0
120 |
121 | # "as" union.
122 | self.as_list: _cmarkCmarkList = _cmarkCmarkList()
123 | self.as_code: _cmarkCmarkCode = _cmarkCmarkCode()
124 | self.as_heading: _cmarkCmarkHeading = _cmarkCmarkHeading()
125 | self.as_link: _cmarkCmarkLink = _cmarkCmarkLink()
126 | self.as_custom: _cmarkCmarkCustom = _cmarkCmarkCustom()
127 | self.as_html_block_type: int = 0
128 |
129 | # Add a new variable.
130 | self.numdelims: int = 0
131 |
132 |
133 | if __name__ == '__main__':
134 | pass
135 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | # setup.cfg
2 | #
3 | # Copyright (C) 2022-2024 Franco Masotti (see /README.md)
4 | #
5 | # This file is part of md-toc.
6 | #
7 | # md-toc is free software: you can redistribute it and/or modify
8 | # it under the terms of the GNU General Public License as published by
9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # md-toc is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with md-toc. If not, see .
19 | #
20 |
21 | [metadata]
22 | name = md_toc
23 | # 'version' needs setuptools >= 39.2.0.
24 | version = 9.0.0
25 | license = GPLv3+,
26 | description = Automatically generate and add an accurate table of contents to markdown files
27 | long_description = file: README.md
28 | long_description_content_type = text/markdown
29 | author = Franco Masotti
30 | author_email = franco.masotti@tutanota.com
31 | keywords =
32 | markdown
33 | toc
34 | text
35 | table-of-contents
36 | documentation
37 | url = https://blog.franco.net.eu.org/software/#md-toc
38 | project_urls =
39 | Bug Tracker = https://github.com/frnmst/md-toc/issues
40 | Documentation = https://docs.franco.net.eu.org/md-toc/
41 | API Reference = https://docs.franco.net.eu.org/md-toc/api.html
42 | Source Code = https://github.com/frnmst/md-toc
43 | Changelog = https://blog.franco.net.eu.org/software/CHANGELOG-md-toc.html
44 | Funding = https://github.com/frnmst/md-toc#support-this-project
45 | classifiers =
46 | Development Status :: 5 - Production/Stable
47 | Topic :: Documentation
48 | Topic :: Software Development
49 | Topic :: Text Processing
50 | Topic :: Text Processing :: Markup :: Markdown
51 | Topic :: Utilities
52 | Intended Audience :: Developers
53 | Intended Audience :: End Users/Desktop
54 | Intended Audience :: Information Technology
55 | Environment :: Console
56 | License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
57 | Programming Language :: Python :: 3
58 | Programming Language :: Python :: 3.8
59 | Programming Language :: Python :: 3.9
60 | Programming Language :: Python :: 3.10
61 | Programming Language :: Python :: 3.11
62 | Programming Language :: Python :: 3.12
63 | Programming Language :: Python :: 3.13
64 |
65 | [options]
66 | # https://iscinumpy.dev/post/bound-version-constraints/#pinning-the-python-version-is-special
67 | python_requires = >=3.8
68 | install_requires = file: requirements.txt
69 | packages = find:
70 |
71 | [options.entry_points]
72 | console_scripts =
73 | md_toc = md_toc.__main__:main
74 |
75 | [options.packages.find]
76 | exclude=
77 | *tests*
78 |
79 | [options.package_data]
80 | * = *.txt, *.rst
81 |
82 | [yapf]
83 | based_on_style = pep8
84 | indent_width = 4
85 |
86 | [flake8]
87 | ignore =
88 | E125
89 | E131
90 | E501
91 | W503
92 | W504
93 | F401
94 |
95 | [isort]
96 | # See
97 | # https://github.com/ESMValGroup/ESMValCore/issues/777
98 | multi_line_output = 3
99 | include_trailing_comma = true
100 |
101 | [tox:tox]
102 | requires =
103 | tox>=4
104 | env_list = py{38,39,310,311,312,313}
105 |
106 | [testenv]
107 | description = run the tests with unittest
108 | package = wheel
109 | wheel_build_env = .pkg
110 | deps =
111 | -r requirements.txt
112 | -r requirements-dev.txt
113 | commands =
114 | python3 -m unittest md_toc.tests.tests --failfast --locals --verbose
115 |
116 | [testenv:fuzzer]
117 | description = run the fuzzer
118 | package = wheel
119 | wheel_build_env = .pkg
120 | deps =
121 | atheris>=2.3,<2.4
122 | commands =
123 | python3 -m md_toc.tests.fuzzer
124 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_3_1_0.json:
--------------------------------------------------------------------------------
1 | {"version": 2, "width": 83, "height": 46, "timestamp": 1553863945, "env": {"SHELL": "/bin/bash", "TERM": "rxvt-unicode-256color"}}
2 | [0.009147, "o", "Running a demo to show some of md_toc's capabilities...\r\n\r\n"]
3 | [2.010888, "o", "$ md_toc -h\r\n"]
4 | [2.194677, "o", "usage: md_toc [-h] [-p] [-l] [-i] [-m TOC_MARKER] [-v]\r\n [FILE_NAME [FILE_NAME ...]]\r\n {github,cmark,gitlab,commonmarker,redcarpet} ...\r\n\r\nMarkdown Table Of Contents: Automatically generate a compliant table\r\nof contents for a markdown file to improve document readability.\r\n\r\npositional arguments:\r\n FILE_NAME the I/O file name\r\n\r\noptional arguments:\r\n -h, --help show this help message and exit\r\n -p, --in-place overwrite the input file\r\n -l, --no-links avoids adding links to the corresponding content\r\n -i, --no-indentation avoids adding indentations to the corresponding\r\n content\r\n -m TOC_MARKER, --toc-marker TOC_MARKER\r\n set the string to be used as the marker for\r\n positioning the table of contents. Defaults to [](TOC)\r\n -v, --version show program's version number and exit\r\n\r\nmarkdown parser:\r\n {github,cmark,gitlab,commonmarker,redcarpet}\r\n\r\nPlease read the docu"]
5 | [2.194781, "o", "mentation to understand how each parser works\r\n\r\nReturn values: 0 ok, 1 error, 2 invalid command\r\n\r\nCopyright (C) 2018-2019 Franco Masotti, frnmst\r\nLicense GPLv3+: GNU GPL version 3 or later \r\nThis is free software: you are free to change and redistribute it.\r\nThere is NO WARRANTY, to the extent permitted by law.\r\n"]
6 | [2.206379, "o", "\r\n"]
7 | [7.209793, "o", "Inspecting the file...\r\n$ cat foo.md\r\n"]
8 | [7.211251, "o", "# Hi\r\n\r\n[](TOC)\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## Bye bye\r\n\r\n```python\r\n# This is a code\r\n# fence with comments that might represent ATX-style headings\r\n# if not properly parsed\r\n```\r\n\r\nbye\r\n\r\n# boo\r\n"]
9 | [7.211669, "o", "\r\n"]
10 | [12.213709, "o", "Run with default options...\r\n$ md_toc foo.md github\r\n"]
11 | [12.398653, "o", "- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you-----------)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [Bye bye](#bye-bye)\r\n- [boo](#boo)\r\n"]
12 | [12.410272, "o", "\r\n"]
13 | [17.411973, "o", "Ordered list...\r\n$ md_toc foo.md gitlab -o\r\n"]
14 | [17.599974, "o", "1. [Hi](#hi)\r\n 1. [How are you? !!!](#how-are-you-----------)\r\n 2. [fine, thanks](#fine-thanks)\r\n 1. [Bye](#bye)\r\n 3. [Bye bye](#bye-bye)\r\n2. [boo](#boo)\r\n"]
15 | [17.611696, "o", "\r\n"]
16 | [22.613308, "o", "No links...\r\n$ md_toc -l foo.md github\r\n"]
17 | [22.796106, "o", "- Hi\r\n - How are you? !!!\r\n - fine, thanks\r\n - Bye\r\n - Bye bye\r\n- boo\r\n"]
18 | [22.808055, "o", "\r\n"]
19 | [27.809539, "o", "No links and no indentation...\r\n$ md_toc -l -i foo.md github\r\n"]
20 | [27.996843, "o", "- Hi\r\n- How are you? !!!\r\n- fine, thanks\r\n- Bye\r\n- Bye bye\r\n- boo\r\n"]
21 | [28.008569, "o", "\r\n"]
22 | [33.011449, "o", "Use stdin and ...\r\n$ cat foo.md | md_toc -l -i cmark -u '*'\r\n"]
23 | [33.1974, "o", "* Hi\r\n* How are you? !!!\r\n* fine, thanks\r\n* Bye\r\n* Bye bye\r\n* boo\r\n"]
24 | [33.209085, "o", "\r\n"]
25 | [38.210776, "o", "Editing the file in-place. As you can see, code fence detection still needs to be implemented for redcarpet..\r\n$ md_toc -p foo.md redcarpet\r\n"]
26 | [38.407375, "o", "$ cat foo.md\r\n"]
27 | [38.408353, "o", "# Hi\r\n\r\n[](TOC)\r\n\r\n- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [Bye bye](#bye-bye)\r\n- [This is a code](#this-is-a-code)\r\n- [fence with comments that might represent ATX-style headings](#fence-with-comments-that-might-represent-atx-style-headings)\r\n- [if not properly parsed](#if-not-properly-parsed)\r\n- [boo](#boo)\r\n\r\n[](TOC)\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## Bye bye\r\n\r\n```python\r\n# This is a code\r\n# fence with comments that might represent ATX-style headings\r\n# if not properly parsed\r\n```\r\n\r\nbye\r\n\r\n# boo\r\n"]
28 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_6_0_0_demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #
4 | # python -m md_toc_asciinema_6_0_0_demo.sh
5 | #
6 | # Copyright (C) 2019 Franco Masotti (see /README.md)
7 | #
8 | # This file is part of md-toc.
9 | #
10 | # md-toc is free software: you can redistribute it and/or modify
11 | # it under the terms of the GNU General Public License as published by
12 | # the Free Software Foundation, either version 3 of the License, or
13 | # (at your option) any later version.
14 | #
15 | # md-toc is distributed in the hope that it will be useful,
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | # GNU General Public License for more details.
19 | #
20 | # You should have received a copy of the GNU General Public License
21 | # along with md-toc. If not, see .
22 | #
23 |
24 | # Discover python -m md_toc of this repository not the one installed on the system.
25 | export PYTHONPATH='..'
26 | TIMEOUT=1
27 |
28 | printf "Running a demo to show some of python -m md_toc's capabilities...\n"
29 | printf "\n"
30 | sleep ${TIMEOUT}
31 |
32 | printf "$ python -m md_toc -h\n"
33 | python -m md_toc -h
34 | printf "\n"
35 | sleep ${TIMEOUT}
36 |
37 | cat <<-EOF > foo.md
38 | # Hi
39 |
40 |
41 |
42 | hey
43 |
44 | ## How are you? !!!
45 |
46 | ## fine, thanks
47 |
48 | ### Bye
49 |
50 | ## Bye bye
51 |
52 | \`\`\`python
53 | # This is a code
54 | # fence with comments that might represent ATX-style headings
55 | # if not properly parsed
56 | \`\`\`
57 |
58 | bye
59 |
60 | # boo
61 | EOF
62 |
63 | cat <<-EOF > foo_noncoherent.md
64 | # Hi
65 | ### boo
66 | EOF
67 |
68 | cat <<-EOF > foo_skiplines.md
69 | # I want this line to be a comment
70 | #### And this as well
71 | ## And this
72 | ###### ByeBye
73 |
74 | # Hi
75 | ## How
76 | ### Are
77 | ## You
78 | # Today ?
79 | EOF
80 |
81 | printf "Inspecting the file...\n"
82 | printf "$ cat foo.md\n"
83 | cat foo.md
84 | printf "\n"
85 | sleep ${TIMEOUT}
86 |
87 | printf "Run with default options...\n"
88 | printf "$ python -m md_toc foo.md github\n"
89 | python -m md_toc foo.md github
90 | printf "\n"
91 | sleep ${TIMEOUT}
92 |
93 | printf "Ordered list...\n"
94 | printf "$ python -m md_toc foo.md gitlab -o\n"
95 | python -m md_toc foo.md gitlab -o
96 | printf "\n"
97 | sleep ${TIMEOUT}
98 |
99 | printf "No links...\n"
100 | printf "$ python -m md_toc -l foo.md github\n"
101 | python -m md_toc -l foo.md github
102 | printf "\n"
103 | sleep ${TIMEOUT}
104 |
105 | printf "No links and no indentation...\n"
106 | printf "$ python -m md_toc -l -i foo.md github\n"
107 | python -m md_toc -l -i foo.md github
108 | printf "\n"
109 | sleep ${TIMEOUT}
110 |
111 | printf "Inspecting the non-coherent file...\n"
112 | printf "$ cat foo_noncoherent.md\n"
113 | cat foo_noncoherent.md
114 | printf "\n"
115 | sleep ${TIMEOUT}
116 |
117 | printf "Trying to parse a non coherent markdown file will raise an exception...\n"
118 | printf "$ python -m md_toc foo_noncoherent.md github\n"
119 | python -m md_toc foo_noncoherent.md github
120 | printf "\n"
121 | sleep ${TIMEOUT}
122 |
123 | printf "Try to parse a non coherent markdown file without checking for coherence...\n"
124 | printf "$ python -m md_toc -c foo_noncoherent.md github\n"
125 | python -m md_toc -c foo_noncoherent.md github
126 | printf "\n"
127 | sleep ${TIMEOUT}
128 |
129 | printf "Use stdin, no links and no indentation...\n"
130 | printf "$ cat foo.md | python -m md_toc -l -i cmark -u '*'\n"
131 | cat foo.md | python -m md_toc -l -i cmark -u '*'
132 | printf "\n"
133 | sleep ${TIMEOUT}
134 |
135 | printf "Inspecting a file where the first 5 lines need to be skipped...\n"
136 | printf "$ cat foo_skiplines.md\n"
137 | cat foo_skiplines.md
138 | printf "\n"
139 | sleep ${TIMEOUT}
140 |
141 | printf "Using the skip lines option...\n"
142 | printf "$ python -m md_toc -s 5 foo_skiplines.md github\n"
143 | python -m md_toc -s 5 foo_skiplines.md github
144 | printf "\n"
145 | sleep ${TIMEOUT}
146 |
147 | printf "Editing the file in-place. As you can see, code fence \
148 | detection still needs to be implemented for redcarpet...\n"
149 | printf "$ python -m md_toc -p foo.md redcarpet\n"
150 | python -m md_toc -p foo.md redcarpet
151 | printf "$ cat foo.md\n"
152 | cat foo.md
153 |
154 | rm foo.md foo_noncoherent.md foo_skiplines.md
155 |
--------------------------------------------------------------------------------
/docs/rules/anchor_link_types_and_behaviours.rst:
--------------------------------------------------------------------------------
1 | Anchor link types and behaviours
2 | ================================
3 |
4 | Generic
5 | -------
6 |
7 | ``cmark``, ``github``
8 | `````````````````````
9 |
10 | A translated version of the Ruby algorithm is used in md-toc.
11 | The original one is repored here:
12 |
13 | - https://github.com/jch/html-pipeline/blob/master/lib/html/pipeline/toc_filter.rb
14 |
15 | I could not find the code directly responsable for the anchor link generation.
16 | See also:
17 |
18 | - https://github.github.com/gfm/
19 | - https://githubengineering.com/a-formal-spec-for-github-markdown/
20 | - https://github.com/github/cmark/issues/65#issuecomment-343433978
21 |
22 | Apparently GitHub (and possibly others) filter HTML tags in the anchor links.
23 | This is an undocumented feature (?) so the ``remove_html_tags`` function was
24 | added to address this problem. Instead of designing an algorithm to detect HTML tags,
25 | regular expressions came in handy. All the rules
26 | present in https://spec.commonmark.org/0.28/#raw-html have been followed by the
27 | letter. Regular expressions are divided by type and are composed at the end
28 | by concatenating all the strings. For example:
29 |
30 | .. code-block:: python
31 | :linenos:
32 |
33 | # Comment start.
34 | COS = ''
39 | # Comment.
40 | CO = COS + COT + COE
41 |
42 | HTML tags are stripped using the ``re.sub`` replace function, for example:
43 |
44 | .. code-block:: ruby
45 |
46 | line = re.sub(CO, str(), line, flags=re.DOTALL)
47 |
48 | GitHub added an extension in GFM to ignore certain HTML tags, valid at least from versions `0.27.1.gfm.3` to `0.29.0.gfm.0`:
49 |
50 | - https://github.github.com/gfm/#disallowed-raw-html-extension-
51 | - https://github.com/github/cmark-gfm/blob/fca380ca85c046233c39523717073153e2458c1e/extensions/tagfilter.c
52 |
53 | ``gitlab``
54 | ``````````
55 |
56 | New rules have been written:
57 |
58 | - https://docs.gitlab.com/ee/user/markdown.html#header-ids-and-links
59 |
60 | ``redcarpet``
61 | `````````````
62 |
63 | Treats consecutive dash characters by tranforming them
64 | into a single dash character. A translated version of the C algorithm
65 | is used in md-toc. The original version is here:
66 |
67 | - https://github.com/vmg/redcarpet/blob/6270d6b4ab6b46ee6bb57a6c0e4b2377c01780ae/ext/redcarpet/html.c#L274
68 |
69 | See also:
70 |
71 | - https://github.com/vmg/redcarpet/issues/618#issuecomment-306476184
72 | - https://github.com/vmg/redcarpet/issues/307#issuecomment-261793668
73 |
74 | Emphasis
75 | --------
76 |
77 | To be able to have working anchor links, emphasis must also be removed from the
78 | link destination.
79 |
80 | ``cmark``, ``github``, ``gitlab``
81 | ``````````````````````````````````
82 |
83 | At the moment the implementation of emnphasis removal is incomplete
84 | because of its complexity. See:
85 |
86 | - https://spec.commonmark.org/0.30/#emphasis-and-strong-emphasis
87 |
88 | The core functions for this feature have been ported directly
89 | from the original cmark source with some differences:
90 |
91 | #. things such as string manipulation, mallocs, etc are different in Python
92 |
93 | #. the ``cmark_utf8proc_charlen`` uses ``length = 1``
94 | instead of ``length = utf8proc_utf8class[ord(line[0])]``
95 | (causes list overflow).
96 |
97 | The ``cmark_utf8proc_charlen`` function is related to
98 | the ``cmark_utf8proc_encode_char`` function. Have a look at that function to
99 | know character lengths in cmark.
100 |
101 | In Python 3, since all characters are UTF-8 by default, they are all
102 | represented with length 1. See:
103 |
104 | - https://rosettacode.org/wiki/String_length#Python
105 | - https://docs.python.org/3/howto/unicode.html#comparing-strings
106 |
107 | As of the release md-toc 8.1.2, cmark-gfm is still at version 0.29. Moreover, certain
108 | code sections used in the emphasis processing are not the same of cmark 0.29. See
109 | this one for example:
110 |
111 | - https://github.com/github/cmark-gfm/blob/0.29.0.gfm.3/src/inlines.c#L639-L654
112 | - https://github.com/commonmark/cmark/blob/0.29.0/src/inlines.c#L615-L621
113 |
114 | For the moment md-toc uses the original cmark source only as reference for emphasis processing.
115 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_7_0_0_demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #
4 | # python -m md_toc_asciinema_7_0_0_demo.sh
5 | #
6 | # Copyright (C) 2020 Franco Masotti (see /README.md)
7 | #
8 | # This file is part of md-toc.
9 | #
10 | # md-toc is free software: you can redistribute it and/or modify
11 | # it under the terms of the GNU General Public License as published by
12 | # the Free Software Foundation, either version 3 of the License, or
13 | # (at your option) any later version.
14 | #
15 | # md-toc is distributed in the hope that it will be useful,
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | # GNU General Public License for more details.
19 | #
20 | # You should have received a copy of the GNU General Public License
21 | # along with md-toc. If not, see .
22 | #
23 |
24 | # Discover python -m md_toc of this repository not the one installed on the system.
25 | export PYTHONPATH='..'
26 | TIMEOUT=1
27 | alias python='pipenv run python'
28 |
29 | ###
30 | ###
31 |
32 | printf "Running a demo to show some of python -m md_toc's capabilities...\n"
33 | printf "\n"
34 | sleep ${TIMEOUT}
35 |
36 | printf "$ python -m md_toc -h\n"
37 | python -m md_toc -h
38 | printf "\n"
39 | sleep ${TIMEOUT}
40 |
41 | cat <<-EOF > foo.md
42 | # Hi
43 |
44 |
45 |
46 | hey
47 |
48 | ## How are you? !!!
49 |
50 | ## fine, thanks
51 |
52 | ### Bye
53 |
54 | ## Bye bye
55 |
56 | \`\`\`python
57 | # This is a code
58 | # fence with comments that might represent ATX-style headings
59 | # if not properly parsed
60 | \`\`\`
61 |
62 | bye
63 |
64 | # boo
65 | EOF
66 |
67 | cat <<-EOF > foo_noncoherent.md
68 | # Hi
69 | ### boo
70 | EOF
71 |
72 | cat <<-EOF > foo_skiplines.md
73 | # I want this line to be a comment
74 | #### And this as well
75 | ## And this
76 | ###### ByeBye
77 |
78 | # Hi
79 | ## How
80 | ### Are
81 | ## You
82 | # Today ?
83 | EOF
84 |
85 | printf "Inspecting the file...\n"
86 | printf "$ cat foo.md\n"
87 | cat foo.md
88 | printf "\n"
89 | sleep ${TIMEOUT}
90 |
91 | printf "Run with default options...\n"
92 | printf "$ python -m md_toc github foo.md\n"
93 | python -m md_toc github foo.md
94 | printf "\n"
95 | sleep ${TIMEOUT}
96 |
97 | printf "Ordered list...\n"
98 | printf "$ python -m md_toc gitlab -o '.' foo.md\n"
99 | python -m md_toc gitlab -o '.' foo.md
100 | printf "\n"
101 | sleep ${TIMEOUT}
102 |
103 | printf "No links...\n"
104 | printf "$ python -m md_toc -l github foo.md\n"
105 | python -m md_toc -l github foo.md
106 | printf "\n"
107 | sleep ${TIMEOUT}
108 |
109 | printf "No links and no indentation...\n"
110 | printf "$ python -m md_toc -l -i github foo.md\n"
111 | python -m md_toc -l -i github foo.md
112 | printf "\n"
113 | sleep ${TIMEOUT}
114 |
115 | printf "Inspecting the non-coherent file...\n"
116 | printf "$ cat foo_noncoherent.md\n"
117 | cat foo_noncoherent.md
118 | printf "\n"
119 | sleep ${TIMEOUT}
120 |
121 | printf "Trying to parse a non coherent markdown file will raise an exception...\n"
122 | printf "$ python -m md_toc github foo_noncoherent.md\n"
123 | python -m md_toc github foo_noncoherent.md
124 | printf "\n"
125 | sleep ${TIMEOUT}
126 |
127 | printf "Try to parse a non coherent markdown file without checking for coherence...\n"
128 | printf "$ python -m md_toc -c github foo_noncoherent.md\n"
129 | python -m md_toc -c github foo_noncoherent.md
130 | printf "\n"
131 | sleep ${TIMEOUT}
132 |
133 | printf "Use stdin, no links and no indentation...\n"
134 | printf "$ cat foo.md | python -m md_toc -l -i - cmark -u '*'\n"
135 | cat foo.md | python -m md_toc -l -i cmark -u '*'
136 | printf "\n"
137 | sleep ${TIMEOUT}
138 |
139 | printf "Inspecting a file where the first 5 lines need to be skipped...\n"
140 | printf "$ cat foo_skiplines.md\n"
141 | cat foo_skiplines.md
142 | printf "\n"
143 | sleep ${TIMEOUT}
144 |
145 | printf "Using the skip lines option...\n"
146 | printf "$ python -m md_toc -s 5 github foo_skiplines.md\n"
147 | python -m md_toc -s 5 github foo_skiplines.md
148 | printf "\n"
149 | sleep ${TIMEOUT}
150 |
151 | printf "Editing the file in-place. As you can see, code fence \
152 | detection still needs to be implemented for redcarpet...\n"
153 | printf "$ python -m md_toc -p redcarpet foo.md\n"
154 | python -m md_toc -p redcarpet foo.md
155 | printf "$ cat foo.md\n"
156 | cat foo.md
157 |
158 | rm foo.md foo_noncoherent.md foo_skiplines.md
159 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_7_1_0_demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #
4 | # python -m md_toc_asciinema_7_1_0_demo.sh
5 | #
6 | # Copyright (C) 2021 Franco Masotti (see /README.md)
7 | #
8 | # This file is part of md-toc.
9 | #
10 | # md-toc is free software: you can redistribute it and/or modify
11 | # it under the terms of the GNU General Public License as published by
12 | # the Free Software Foundation, either version 3 of the License, or
13 | # (at your option) any later version.
14 | #
15 | # md-toc is distributed in the hope that it will be useful,
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | # GNU General Public License for more details.
19 | #
20 | # You should have received a copy of the GNU General Public License
21 | # along with md-toc. If not, see .
22 | #
23 |
24 | # Discover python -m md_toc of this repository not the one installed on the system.
25 | export PYTHONPATH='..'
26 | TIMEOUT=1
27 | alias python='pipenv run python'
28 |
29 | ###
30 | ###
31 |
32 | printf "Running a demo to show some of python -m md_toc's capabilities...\n"
33 | printf "\n"
34 | sleep ${TIMEOUT}
35 |
36 | printf "$ python -m md_toc -h\n"
37 | python -m md_toc -h
38 | printf "\n"
39 | sleep ${TIMEOUT}
40 |
41 | cat <<-EOF > foo.md
42 | # Hi
43 |
44 |
45 |
46 | hey
47 |
48 | ## How are you? !!!
49 |
50 | ## fine, thanks
51 |
52 | ### Bye
53 |
54 | ## Bye bye
55 |
56 | \`\`\`python
57 | # This is a code
58 | # fence with comments that might represent ATX-style headings
59 | # if not properly parsed
60 | \`\`\`
61 |
62 | bye
63 |
64 | # boo
65 | EOF
66 |
67 | cat <<-EOF > foo_noncoherent.md
68 | # Hi
69 | ### boo
70 | EOF
71 |
72 | cat <<-EOF > foo_skiplines.md
73 | # I want this line to be a comment
74 | #### And this as well
75 | ## And this
76 | ###### ByeBye
77 |
78 | # Hi
79 | ## How
80 | ### Are
81 | ## You
82 | # Today ?
83 | EOF
84 |
85 | printf "Inspecting the file...\n"
86 | printf "$ cat foo.md\n"
87 | cat foo.md
88 | printf "\n"
89 | sleep ${TIMEOUT}
90 |
91 | printf "Run with default options...\n"
92 | printf "$ python -m md_toc github foo.md\n"
93 | python -m md_toc github foo.md
94 | printf "\n"
95 | sleep ${TIMEOUT}
96 |
97 | printf "Ordered list...\n"
98 | printf "$ python -m md_toc gitlab -o '.' foo.md\n"
99 | python -m md_toc gitlab -o '.' foo.md
100 | printf "\n"
101 | sleep ${TIMEOUT}
102 |
103 | printf "Constant ordered list...\n"
104 | printf "$ python -m md_toc github -c -o '.' foo.md\n"
105 | python -m md_toc github -c -o '.' foo.md
106 | printf "\n"
107 | sleep ${TIMEOUT}
108 |
109 | printf "No links...\n"
110 | printf "$ python -m md_toc -l github foo.md\n"
111 | python -m md_toc -l github foo.md
112 | printf "\n"
113 | sleep ${TIMEOUT}
114 |
115 | printf "No links and no indentation...\n"
116 | printf "$ python -m md_toc -l -i github foo.md\n"
117 | python -m md_toc -l -i github foo.md
118 | printf "\n"
119 | sleep ${TIMEOUT}
120 |
121 | printf "Inspecting the non-coherent file...\n"
122 | printf "$ cat foo_noncoherent.md\n"
123 | cat foo_noncoherent.md
124 | printf "\n"
125 | sleep ${TIMEOUT}
126 |
127 | printf "Trying to parse a non coherent markdown file will raise an exception...\n"
128 | printf "$ python -m md_toc github foo_noncoherent.md\n"
129 | python -m md_toc github foo_noncoherent.md
130 | printf "\n"
131 | sleep ${TIMEOUT}
132 |
133 | printf "Try to parse a non coherent markdown file without checking for coherence...\n"
134 | printf "$ python -m md_toc -c github foo_noncoherent.md\n"
135 | python -m md_toc -c github foo_noncoherent.md
136 | printf "\n"
137 | sleep ${TIMEOUT}
138 |
139 | printf "Use stdin, no links and no indentation...\n"
140 | printf "$ cat foo.md | python -m md_toc -l -i - cmark -u '*'\n"
141 | cat foo.md | python -m md_toc -l -i cmark -u '*'
142 | printf "\n"
143 | sleep ${TIMEOUT}
144 |
145 | printf "Inspecting a file where the first 5 lines need to be skipped...\n"
146 | printf "$ cat foo_skiplines.md\n"
147 | cat foo_skiplines.md
148 | printf "\n"
149 | sleep ${TIMEOUT}
150 |
151 | printf "Using the skip lines option...\n"
152 | printf "$ python -m md_toc -s 5 github foo_skiplines.md\n"
153 | python -m md_toc -s 5 github foo_skiplines.md
154 | printf "\n"
155 | sleep ${TIMEOUT}
156 |
157 | printf "Editing the file in-place. As you can see, code fence \
158 | detection still needs to be implemented for redcarpet...\n"
159 | printf "$ python -m md_toc -p redcarpet foo.md\n"
160 | python -m md_toc -p redcarpet foo.md
161 | printf "$ cat foo.md\n"
162 | cat foo.md
163 |
164 | rm foo.md foo_noncoherent.md foo_skiplines.md
165 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_7_2_0_demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #
4 | # python -m md_toc_asciinema_7_1_0_demo.sh
5 | #
6 | # Copyright (C) 2021 Franco Masotti (see /README.md)
7 | #
8 | # This file is part of md-toc.
9 | #
10 | # md-toc is free software: you can redistribute it and/or modify
11 | # it under the terms of the GNU General Public License as published by
12 | # the Free Software Foundation, either version 3 of the License, or
13 | # (at your option) any later version.
14 | #
15 | # md-toc is distributed in the hope that it will be useful,
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | # GNU General Public License for more details.
19 | #
20 | # You should have received a copy of the GNU General Public License
21 | # along with md-toc. If not, see .
22 | #
23 |
24 | # Discover python -m md_toc of this repository not the one installed on the system.
25 | export PYTHONPATH='..'
26 | TIMEOUT=1
27 | alias python='pipenv run python'
28 |
29 | ###
30 | ###
31 |
32 | printf "Running a demo to show some of python -m md_toc's capabilities...\n"
33 | printf "\n"
34 | sleep ${TIMEOUT}
35 |
36 | printf "$ python -m md_toc -h\n"
37 | python -m md_toc -h
38 | printf "\n"
39 | sleep ${TIMEOUT}
40 |
41 | cat <<-EOF > foo.md
42 | # Hi
43 |
44 |
45 |
46 | hey
47 |
48 | ## How are you? !!!
49 |
50 | ## fine, thanks
51 |
52 | ### Bye
53 |
54 | ## __Bye bye__ *bye*
55 |
56 | \`\`\`python
57 | # This is a code
58 | # fence with comments that might represent ATX-style headings
59 | # if not properly parsed
60 | \`\`\`
61 |
62 | bye
63 |
64 | # boo
65 | EOF
66 |
67 | cat <<-EOF > foo_noncoherent.md
68 | # Hi
69 | ### boo
70 | EOF
71 |
72 | cat <<-EOF > foo_skiplines.md
73 | # I want this line to be a comment
74 | #### And this as well
75 | ## And this
76 | ###### ByeBye
77 |
78 | # Hi
79 | ## How
80 | ### Are
81 | ## You
82 | # Today ?
83 | EOF
84 |
85 | printf "Inspecting the file...\n"
86 | printf "$ cat foo.md\n"
87 | cat foo.md
88 | printf "\n"
89 | sleep ${TIMEOUT}
90 |
91 | printf "Run with default options...\n"
92 | printf "$ python -m md_toc github foo.md\n"
93 | python -m md_toc github foo.md
94 | printf "\n"
95 | sleep ${TIMEOUT}
96 |
97 | printf "Ordered list...\n"
98 | printf "$ python -m md_toc gitlab -o '.' foo.md\n"
99 | python -m md_toc gitlab -o '.' foo.md
100 | printf "\n"
101 | sleep ${TIMEOUT}
102 |
103 | printf "Constant ordered list...\n"
104 | printf "$ python -m md_toc github -c -o '.' foo.md\n"
105 | python -m md_toc github -c -o '.' foo.md
106 | printf "\n"
107 | sleep ${TIMEOUT}
108 |
109 | printf "No links...\n"
110 | printf "$ python -m md_toc -l github foo.md\n"
111 | python -m md_toc -l github foo.md
112 | printf "\n"
113 | sleep ${TIMEOUT}
114 |
115 | printf "No links and no indentation...\n"
116 | printf "$ python -m md_toc -l -i github foo.md\n"
117 | python -m md_toc -l -i github foo.md
118 | printf "\n"
119 | sleep ${TIMEOUT}
120 |
121 | printf "Inspecting the non-coherent file...\n"
122 | printf "$ cat foo_noncoherent.md\n"
123 | cat foo_noncoherent.md
124 | printf "\n"
125 | sleep ${TIMEOUT}
126 |
127 | printf "Trying to parse a non coherent markdown file will raise an exception...\n"
128 | printf "$ python -m md_toc github foo_noncoherent.md\n"
129 | python -m md_toc github foo_noncoherent.md
130 | printf "\n"
131 | sleep ${TIMEOUT}
132 |
133 | printf "Try to parse a non coherent markdown file without checking for coherence...\n"
134 | printf "$ python -m md_toc -c github foo_noncoherent.md\n"
135 | python -m md_toc -c github foo_noncoherent.md
136 | printf "\n"
137 | sleep ${TIMEOUT}
138 |
139 | printf "Use stdin, no links and no indentation...\n"
140 | printf "$ cat foo.md | python -m md_toc -l -i - cmark -u '*'\n"
141 | cat foo.md | python -m md_toc -l -i cmark -u '*'
142 | printf "\n"
143 | sleep ${TIMEOUT}
144 |
145 | printf "Inspecting a file where the first 5 lines need to be skipped...\n"
146 | printf "$ cat foo_skiplines.md\n"
147 | cat foo_skiplines.md
148 | printf "\n"
149 | sleep ${TIMEOUT}
150 |
151 | printf "Using the skip lines option...\n"
152 | printf "$ python -m md_toc -s 5 github foo_skiplines.md\n"
153 | python -m md_toc -s 5 github foo_skiplines.md
154 | printf "\n"
155 | sleep ${TIMEOUT}
156 |
157 | printf "Editing the file in-place. As you can see, code fence \
158 | detection still needs to be implemented for redcarpet...\n"
159 | printf "$ python -m md_toc -p redcarpet foo.md\n"
160 | python -m md_toc -p redcarpet foo.md
161 | printf "$ cat foo.md\n"
162 | cat foo.md
163 |
164 | rm foo.md foo_noncoherent.md foo_skiplines.md
165 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_0_0_demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #
4 | # python -m md_toc_asciinema_7_1_0_demo.sh
5 | #
6 | # Copyright (C) 2021 Franco Masotti (see /README.md)
7 | #
8 | # This file is part of md-toc.
9 | #
10 | # md-toc is free software: you can redistribute it and/or modify
11 | # it under the terms of the GNU General Public License as published by
12 | # the Free Software Foundation, either version 3 of the License, or
13 | # (at your option) any later version.
14 | #
15 | # md-toc is distributed in the hope that it will be useful,
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | # GNU General Public License for more details.
19 | #
20 | # You should have received a copy of the GNU General Public License
21 | # along with md-toc. If not, see .
22 | #
23 |
24 | # Discover python -m md_toc of this repository not the one installed on the system.
25 | export PYTHONPATH='..'
26 | TIMEOUT=1
27 | alias python='pipenv run python'
28 |
29 | ###
30 | ###
31 |
32 | printf "Running a demo to show some of python -m md_toc's capabilities...\n"
33 | printf "\n"
34 | sleep ${TIMEOUT}
35 |
36 | printf "$ python -m md_toc -h\n"
37 | python -m md_toc -h
38 | printf "\n"
39 | sleep ${TIMEOUT}
40 |
41 | cat <<-EOF > foo.md
42 | # Hi
43 |
44 |
45 |
46 | hey
47 |
48 | ## How are you? !!!
49 |
50 | ## fine, thanks
51 |
52 | ### Bye
53 |
54 | ## __Bye bye__ **bye***
55 |
56 | \`\`\`python
57 | # This is a code
58 | # fence with comments that might represent ATX-style headings
59 | # if not properly parsed
60 | \`\`\`
61 |
62 | bye
63 |
64 | # boo
65 |
66 | # a string with lots of spaces.
67 | EOF
68 |
69 | cat <<-EOF > foo_noncoherent.md
70 | # Hi
71 | ### boo
72 | EOF
73 |
74 | cat <<-EOF > foo_skiplines.md
75 | # I want this line to be a comment
76 | #### And this as well
77 | ## And this
78 | ###### ByeBye
79 |
80 | # Hi
81 | ## How
82 | ### Are
83 | ## You
84 | # Today ?
85 | EOF
86 |
87 | printf "Inspecting the file...\n"
88 | printf "$ cat foo.md\n"
89 | cat foo.md
90 | printf "\n"
91 | sleep ${TIMEOUT}
92 |
93 | printf "Run with default options...\n"
94 | printf "$ python -m md_toc github foo.md\n"
95 | python -m md_toc github foo.md
96 | printf "\n"
97 | sleep ${TIMEOUT}
98 |
99 | printf "Ordered list...\n"
100 | printf "$ python -m md_toc gitlab -o '.' foo.md\n"
101 | python -m md_toc gitlab -o '.' foo.md
102 | printf "\n"
103 | sleep ${TIMEOUT}
104 |
105 | printf "Constant ordered list...\n"
106 | printf "$ python -m md_toc github -c -o '.' foo.md\n"
107 | python -m md_toc github -c -o '.' foo.md
108 | printf "\n"
109 | sleep ${TIMEOUT}
110 |
111 | printf "No links...\n"
112 | printf "$ python -m md_toc -l github foo.md\n"
113 | python -m md_toc -l github foo.md
114 | printf "\n"
115 | sleep ${TIMEOUT}
116 |
117 | printf "No links and no indentation...\n"
118 | printf "$ python -m md_toc -l -i github foo.md\n"
119 | python -m md_toc -l -i github foo.md
120 | printf "\n"
121 | sleep ${TIMEOUT}
122 |
123 | printf "Inspecting the non-coherent file...\n"
124 | printf "$ cat foo_noncoherent.md\n"
125 | cat foo_noncoherent.md
126 | printf "\n"
127 | sleep ${TIMEOUT}
128 |
129 | printf "Trying to parse a non coherent markdown file will raise an exception...\n"
130 | printf "$ python -m md_toc github foo_noncoherent.md\n"
131 | python -m md_toc github foo_noncoherent.md
132 | printf "\n"
133 | sleep ${TIMEOUT}
134 |
135 | printf "Try to parse a non coherent markdown file without checking for coherence...\n"
136 | printf "$ python -m md_toc -c github foo_noncoherent.md\n"
137 | python -m md_toc -c github foo_noncoherent.md
138 | printf "\n"
139 | sleep ${TIMEOUT}
140 |
141 | printf "Use stdin, no links and no indentation...\n"
142 | printf "$ cat foo.md | python -m md_toc -l -i - cmark -u '*'\n"
143 | cat foo.md | python -m md_toc -l -i cmark -u '*'
144 | printf "\n"
145 | sleep ${TIMEOUT}
146 |
147 | printf "Inspecting a file where the first 5 lines need to be skipped...\n"
148 | printf "$ cat foo_skiplines.md\n"
149 | cat foo_skiplines.md
150 | printf "\n"
151 | sleep ${TIMEOUT}
152 |
153 | printf "Using the skip lines option...\n"
154 | printf "$ python -m md_toc -s 5 github foo_skiplines.md\n"
155 | python -m md_toc -s 5 github foo_skiplines.md
156 | printf "\n"
157 | sleep ${TIMEOUT}
158 |
159 | printf "Showing Gitlab's removal of consecutive dashes in the link destination...\n"
160 | printf "$ python -m md_toc gitlab -l 6 foo.md\n"
161 | python -m md_toc gitlab -l 6 foo.md
162 | printf "\n"
163 | sleep ${TIMEOUT}
164 |
165 | printf "Editing the file in-place. As you can see, code fence \
166 | detection still needs to be implemented for redcarpet...\n"
167 | printf "$ python -m md_toc -p redcarpet foo.md\n"
168 | python -m md_toc -p redcarpet foo.md
169 | printf "$ cat foo.md\n"
170 | cat foo.md
171 |
172 | rm foo.md foo_noncoherent.md foo_skiplines.md
173 |
--------------------------------------------------------------------------------
/md_toc/cmark/references_c.py:
--------------------------------------------------------------------------------
1 | #
2 | # reference_c.py
3 | #
4 | # Copyright (C) 2017-2022 Franco Masotti (see /README.md)
5 | #
6 | # This file is part of md-toc.
7 | #
8 | # md-toc is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # md-toc is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with md-toc. If not, see .
20 | #
21 | r"""A cmark implementation file."""
22 |
23 | import functools
24 |
25 | from ..constants import parser as md_parser
26 | from .buffer_c import (
27 | _cmark_cmark_strbuf_detach,
28 | _cmark_cmark_strbuf_normalize_whitespace,
29 | _cmark_cmark_strbuf_trim,
30 | )
31 | from .buffer_h import _cmark_CMARK_BUF_INIT, _cmarkCmarkStrbuf
32 | from .chunk_h import _cmarkCmarkChunk
33 | from .cmark_h import _cmarkCmarkMem
34 | from .references_h import _cmarkCmarkReference, _cmarkCmarkReferenceMap
35 | from .utf8_c import _cmark_cmark_utf8proc_case_fold
36 |
37 | # License C applies to this file except for non derivative code:
38 | # in that case the license header at the top of the file applies.
39 | # See docs/copyright_license.rst
40 |
41 |
42 | # normalize reference: collapse internal whitespace to single space,
43 | # remove leading/trailing whitespace, case fold
44 | # Return NULL if the reference name is actually empty (i.e. composed
45 | # solely from whitespace)
46 | # 0.30
47 | def _cmark_normalize_reference(mem: _cmarkCmarkMem,
48 | ref: _cmarkCmarkChunk) -> str:
49 | normalized: _cmarkCmarkStrbuf = _cmark_CMARK_BUF_INIT(mem)
50 | result: str
51 |
52 | if ref is None:
53 | return None
54 |
55 | if ref.length == 0:
56 | return None
57 |
58 | _cmark_cmark_utf8proc_case_fold(normalized, ref.data, ref.length)
59 | _cmark_cmark_strbuf_trim(normalized)
60 | _cmark_cmark_strbuf_normalize_whitespace(normalized)
61 |
62 | result = _cmark_cmark_strbuf_detach(normalized)
63 | if not result:
64 | raise ValueError
65 |
66 | # if result[0] == '\0':
67 | if result == '':
68 | # mem.free(result)
69 | del result
70 | return None
71 |
72 | return result
73 |
74 |
75 | def _cmark_labelcmp(a: str, b: str) -> bool:
76 | return a == b
77 |
78 |
79 | def _cmark_refcmp(p1: _cmarkCmarkReference, p2: _cmarkCmarkReference) -> int:
80 | res: bool = _cmark_labelcmp(p1.label, p2.label)
81 |
82 | if res:
83 | return res
84 | else:
85 | return p1.age - p2.age
86 |
87 |
88 | def _cmark_sort_references(maps: _cmarkCmarkReferenceMap):
89 | i: int
90 | last: int = 0
91 | size: int = maps.size
92 | r: _cmarkCmarkReference = maps.refs
93 |
94 | # **sorted = NULL;
95 | # A list of _cmarkCmarkReference
96 | srt: list = list()
97 |
98 | # sorted = (cmark_reference **)maps->mem->calloc(size, sizeof(cmark_reference *));
99 | while (r):
100 | srt.append(r)
101 | r = r.next
102 |
103 | # qsort(sorted, size, sizeof(cmark_reference *), refcmp);
104 | srt = sorted(srt, key=functools.cmp_to_key(_cmark_refcmp))
105 |
106 | for i in range(1, size):
107 | if _cmark_labelcmp(srt[i].label, srt[last].label) != 0:
108 | last += 1
109 | srt[last] = srt[i]
110 |
111 | maps.sorted = srt
112 | maps.size = last + 1
113 |
114 |
115 | # Returns reference if refmaps contains a reference with matching
116 | # label, otherwise NULL.
117 | # 0.30
118 | def _cmark_cmark_reference_lookup(
119 | maps: _cmarkCmarkReferenceMap,
120 | label: _cmarkCmarkChunk,
121 | ) -> _cmarkCmarkReference:
122 | # A list of _cmarkCmarkReference
123 | ref: list = None
124 | r: _cmarkCmarkReference = None
125 | norm: str
126 |
127 | # MAX_LINK_LABEL_LENGTH
128 | if label.length < 1 or label.length > md_parser['cmark']['link'][
129 | 'max_chars_label'] + 1:
130 | return None
131 |
132 | if maps is None or not maps.size:
133 | return None
134 |
135 | norm = _cmark_normalize_reference(maps.mem, label)
136 | if norm is None:
137 | return None
138 |
139 | if not maps.sorted:
140 | _cmark_sort_references(maps)
141 |
142 | # TODO
143 | # ref = (cmark_reference **)bsearch(norm, maps->sorted, maps->size, sizeof(cmark_reference *),
144 | # refsearch);
145 | # FIXME
146 |
147 | # maps->mem->free(norm);
148 | del norm
149 |
150 | if ref is not None:
151 | r = ref[0]
152 | # Check for expansion limit
153 | if maps.max_ref_size and r.size > maps.max_ref_size - maps.ref_size:
154 | return None
155 | maps.ref_size += r.size
156 |
157 | return r
158 |
--------------------------------------------------------------------------------
/md_toc/cmark/utf8_c.py:
--------------------------------------------------------------------------------
1 | #
2 | # utf8_c.py
3 | #
4 | # Copyright (C) 2017-2022 Franco Masotti (see /README.md)
5 | #
6 | # This file is part of md-toc.
7 | #
8 | # md-toc is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # md-toc is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with md-toc. If not, see .
20 | #
21 | r"""A cmark implementation file."""
22 |
23 | import unicodedata
24 |
25 | from ..constants import parser as md_parser
26 | from .buffer_c import _cmark_cmark_strbuf_put
27 | from .buffer_h import _cmarkCmarkStrbuf
28 | from .cmark_ctype_c import _cmark_cmark_ispunct
29 |
30 | # License D applies to this file except for non derivative code:
31 | # in that case the license header at the top of the file applies.
32 | # See docs/copyright_license.rst
33 |
34 |
35 | def _cmark_encode_unknown(buf: _cmarkCmarkStrbuf):
36 | # static const uint8_t repl[] = {239, 191, 189};
37 |
38 | # man 3 printf
39 | #
40 | # #include
41 | # #include
42 | #
43 | # int main(void)
44 | # {
45 | # static const uint8_t repl[] = {239, 191, 189};
46 | # printf ("%hhu", repl);
47 | #
48 | # return 0;
49 | # }
50 | #
51 | # gcc a.c
52 | # ./a.out
53 | # 9
54 | #
55 | # python3
56 | # >>> chr(9)
57 | # '\t'
58 | repl = '\t'
59 | _cmark_cmark_strbuf_put(buf, repl, 3)
60 |
61 |
62 | # 0.29, 0.30
63 | def _cmark_cmark_utf8proc_charlen(line: str, line_length: int) -> int:
64 | length: int
65 | i: int
66 |
67 | if not line_length:
68 | return 0
69 |
70 | # Use length = 1 instead of the utf8proc_utf8class[256]
71 | # list.
72 | # Python:
73 | # length = utf8proc_utf8class[ord(line[0])]
74 | # C:
75 | # length = utf8proc_utf8class[str[0]];
76 | # For example:
77 | # len('ł') == 2 # in Python 2
78 | # len('ł') == 1 # in Python 3
79 | # See the documentation.
80 | # In Python 3 all strings are unicode by default
81 | # and they all have length of 1.
82 | length = 1
83 |
84 | # if (!length)
85 | # return -1;
86 |
87 | if line_length >= 0 and length > line_length:
88 | return -line_length
89 |
90 | for i in range(1, length):
91 | if (ord(line[i]) & 0xC0) != 0x80:
92 | return -i
93 |
94 | return length
95 |
96 |
97 | # 0.29, 0.30
98 | def _cmark_cmark_utf8proc_iterate(line: str, line_len: int) -> tuple:
99 | length: int = 0
100 | uc: int = -1
101 | dst: int = -1
102 |
103 | length = _cmark_cmark_utf8proc_charlen(line, line_len)
104 | if length < 0:
105 | return -1, dst
106 |
107 | if length == 1:
108 | uc = ord(line[0])
109 |
110 | # In Python 3 all strings are unicode by default
111 | # and they all have length of 1.
112 | # All the original C code here is omitted for this reason.
113 |
114 | if uc < 0:
115 | return -1, dst
116 |
117 | dst = uc
118 |
119 | return length, dst
120 |
121 |
122 | # 0.30
123 | def _cmark_cmark_utf8proc_encode_char(uc: int, buf: _cmarkCmarkStrbuf):
124 | dst: str
125 | length: int = 0
126 |
127 | if uc < 0:
128 | raise ValueError
129 |
130 | # In Python 3 all strings are unicode by default
131 | # and they all have length of 1.
132 | # Omitted code.
133 | length = 1
134 | if uc > 1 or uc >= 0x110000:
135 | _cmark_encode_unknown(buf)
136 | return
137 |
138 | dst = str(uc)
139 | _cmark_cmark_strbuf_put(buf, dst, length)
140 |
141 |
142 | # 0.30
143 | def _cmark_cmark_utf8proc_case_fold(
144 | dest: _cmarkCmarkStrbuf,
145 | string: str,
146 | length: int,
147 | ):
148 | c: int
149 |
150 | while (length > 0):
151 | char_len, c = _cmark_cmark_utf8proc_iterate(string, length)
152 |
153 | if char_len >= 0:
154 | # FIXME: unsure about this. See original C source code.
155 | _cmark_cmark_utf8proc_encode_char(
156 | ord(unicodedata.normalize('NFC', chr(c)).casefold()), dest)
157 | else:
158 | _cmark_encode_unknown(dest)
159 | char_len = -char_len
160 |
161 | # Advance pointer.
162 | # str += char_len;
163 | string = string[char_len:]
164 | # Reduce string length.
165 | length -= char_len
166 |
167 |
168 | # 0.29, 0.30
169 | def _cmark_cmark_utf8proc_is_space(char: int) -> bool:
170 | r"""Match anything in the Zs class, plus LF, CR, TAB, FF."""
171 | return (unicodedata.category(chr(char)) == 'Zs'
172 | or chr(char) in ['\u0009', '\u000A', '\u000C', '\u000D'])
173 |
174 |
175 | # 0.29, 0.30
176 | def _cmark_cmark_utf8proc_is_punctuation(char: int) -> bool:
177 | r"""Match anything in the P[cdefios] classes."""
178 | return ((char < 128 and _cmark_cmark_ispunct(char))
179 | or unicodedata.category(chr(char)).startswith('P'))
180 |
181 |
182 | if __name__ == '__main__':
183 | pass
184 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_5_0_0.json:
--------------------------------------------------------------------------------
1 | {"version": 2, "width": 174, "height": 46, "timestamp": 1555259634, "env": {"SHELL": "/bin/bash", "TERM": "rxvt-unicode-256color"}}
2 | [0.010556, "o", "Running a demo to show some of md_toc's capabilities...\r\n\r\n"]
3 | [1.012344, "o", "$ md_toc -h\r\n"]
4 | [1.197964, "o", "usage: md_toc [-h] [-c | -i] [-l] [-m TOC_MARKER] [-p] [-v]\r\n [FILE_NAME [FILE_NAME ...]]\r\n {github,cmark,gitlab,commonmarker,redcarpet} ...\r\n\r\nMarkdown Table Of Contents: Automatically generate a compliant table\r\nof contents for a markdown file to improve document readability.\r\n\r\npositional arguments:\r\n FILE_NAME the I/O file name\r\n\r\noptional arguments:\r\n -h, --help show this help message and exit\r\n -c, --no-list-coherence\r\n avoids checking for TOC list coherence\r\n -i, --no-indentation avoids adding indentations to the TOC\r\n -l, --no-links avoids adding links to the TOC\r\n -m TOC_MARKER, --toc-marker TOC_MARKER\r\n set the string to be used as the marker for\r\n positioning the table of contents. Defaults to [](TOC)\r\n -p, --in-place overwrite the input file\r\n -v, --version show program's version number and exit\r\n\r\nmarkdown parser:\r\n {github,cmark,gitlab,commonmarker"]
5 | [1.198643, "o", ",redcarpet}\r\n\r\nPlease read the documentation to understand how each parser works\r\n\r\nReturn values: 0 ok, 1 error, 2 invalid command\r\n\r\nCopyright (C) 2018-2019 Franco Masotti, frnmst\r\nLicense GPLv3+: GNU GPL version 3 or later \r\nThis is free software: you are free to change and redistribute it.\r\nThere is NO WARRANTY, to the extent permitted by law.\r\n"]
6 | [1.210429, "o", "\r\n"]
7 | [2.215499, "o", "Inspecting the file...\r\n$ cat foo.md\r\n"]
8 | [2.216698, "o", "# Hi\r\n\r\n[](TOC)\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## Bye bye\r\n\r\n```python\r\n# This is a code\r\n# fence with comments that might represent ATX-style headings\r\n# if not properly parsed\r\n```\r\n\r\nbye\r\n\r\n# boo\r\n"]
9 | [2.21728, "o", "\r\n"]
10 | [3.218615, "o", "Run with default options...\r\n$ md_toc foo.md github\r\n"]
11 | [3.401344, "o", "- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you-----------)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [Bye bye](#bye-bye)\r\n- [boo](#boo)\r\n"]
12 | [3.413, "o", "\r\n"]
13 | [4.41438, "o", "Ordered list...\r\n$ md_toc foo.md gitlab -o\r\n"]
14 | [4.605895, "o", "1. [Hi](#hi)\r\n 1. [How are you? !!!](#how-are-you-----------)\r\n 2. [fine, thanks](#fine-thanks)\r\n 1. [Bye](#bye)\r\n 3. [Bye bye](#bye-bye)\r\n2. [boo](#boo)\r\n"]
15 | [4.617561, "o", "\r\n"]
16 | [5.619173, "o", "No links...\r\n$ md_toc -l foo.md github\r\n"]
17 | [5.801036, "o", "- Hi\r\n - How are you? !!!\r\n - fine, thanks\r\n - Bye\r\n - Bye bye\r\n- boo\r\n"]
18 | [5.812618, "o", "\r\n"]
19 | [6.814147, "o", "No links and no indentation...\r\n$ md_toc -l -i foo.md github\r\n"]
20 | [6.998235, "o", "- Hi"]
21 | [6.998534, "o", "\r\n"]
22 | [6.9986, "o", "- How are you? !!!"]
23 | [6.998829, "o", "\r\n"]
24 | [6.998944, "o", "- fine, thanks\r\n- Bye\r\n- Bye bye\r\n- boo\r\n"]
25 | [7.010849, "o", "\r\n"]
26 | [8.012481, "o", "Inspecting the non-coherent file...\r\n$ cat foo_noncoherent.md\r\n"]
27 | [8.014395, "o", "# Hi\r\n### boo\r\n"]
28 | [8.014576, "o", "\r\n"]
29 | [9.01643, "o", "Trying to parse a non coherent markdown file will raise an exception...\r\n$ md_toc foo_noncoherent.md github\r\n"]
30 | [9.200414, "o", "Traceback (most recent call last):\r\n File \"/home/vm/md-toc/md_toc/__main__.py\", line 34, in main\r\n result = args.func(args)\r\n File \"/home/vm/md-toc/md_toc/cli.py\", line 67, in write_toc\r\n list_marker=list_marker)\r\n File \"/home/vm/md-toc/md_toc/api.py\", line 288, in build_multiple_tocs\r\n list_marker))\r\n File \"/home/vm/md-toc/md_toc/api.py\", line 213, in build_toc\r\n raise TocDoesNotRenderAsCoherentList\r\nmd_toc.exceptions.TocDoesNotRenderAsCoherentList\r\n"]
31 | [9.212276, "o", "\r\n"]
32 | [10.213782, "o", "Try to parse a non coherent markdown file without checking for coherence...\r\n$ md_toc -c foo_noncoherent.md github\r\n"]
33 | [10.399526, "o", "- [Hi](#hi)\r\n - [boo](#boo)\r\n"]
34 | [10.411949, "o", "\r\n"]
35 | [11.413447, "o", "Use stdin, no links and no indentation...\r\n$ cat foo.md | md_toc -l -i cmark -u '*'\r\n"]
36 | [11.599078, "o", "* Hi\r\n* How are you? !!!\r\n* fine, thanks\r\n* Bye\r\n* Bye bye\r\n* boo\r\n"]
37 | [11.611748, "o", "\r\n"]
38 | [12.61327, "o", "Editing the file in-place. As you can see, code fence detection still needs to be implemented for redcarpet..\r\n$ md_toc -p foo.md redcarpet\r\n"]
39 | [12.867298, "o", "$ cat foo.md\r\n"]
40 | [12.867999, "o", "# Hi\r\n\r\n[](TOC)\r\n\r\n- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [Bye bye](#bye-bye)\r\n- [This is a code](#this-is-a-code)\r\n- [fence with comments that might represent ATX-style headings](#fence-with-comments-that-might-represent-atx-style-headings)\r\n- [if not properly parsed](#if-not-properly-parsed)\r\n- [boo](#boo)\r\n\r\n[](TOC)\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## Bye bye\r\n\r\n```python\r\n# This is a code\r\n# fence with comments that might represent ATX-style headings\r\n# if not properly parsed\r\n```\r\n\r\nbye\r\n\r\n# boo\r\n"]
41 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_1_0_demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #
4 | # python -m md_toc_asciinema_8_1_0_demo.sh
5 | #
6 | # Copyright (C) 2021 Franco Masotti (see /README.md)
7 | #
8 | # This file is part of md-toc.
9 | #
10 | # md-toc is free software: you can redistribute it and/or modify
11 | # it under the terms of the GNU General Public License as published by
12 | # the Free Software Foundation, either version 3 of the License, or
13 | # (at your option) any later version.
14 | #
15 | # md-toc is distributed in the hope that it will be useful,
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | # GNU General Public License for more details.
19 | #
20 | # You should have received a copy of the GNU General Public License
21 | # along with md-toc. If not, see .
22 | #
23 |
24 | # Discover python -m md_toc of this repository not the one installed on the system.
25 | export PYTHONPATH='..'
26 | TIMEOUT=1
27 |
28 | gen_foo()
29 | {
30 | cat <<-EOF > foo.md
31 | # Hi
32 |
33 |
34 |
35 | hey
36 |
37 | ## How are you? !!!
38 |
39 | ## fine, thanks
40 |
41 | ### Bye
42 |
43 | ## __Bye bye__ **bye***
44 |
45 | \`\`\`python
46 | # This is a code
47 | # fence with comments that might represent ATX-style headings
48 | # if not properly parsed
49 | \`\`\`
50 |
51 | bye
52 |
53 | # boo
54 |
55 | # a string with lots of spaces.
56 | EOF
57 | }
58 |
59 | gen_foo_noncoherent()
60 | {
61 | cat <<-EOF > foo_noncoherent.md
62 | # Hi
63 | ### boo
64 | EOF
65 | }
66 |
67 | gen_foo_skiplines()
68 | {
69 | cat <<-EOF > foo_skiplines.md
70 | # I want this line to be a comment
71 | #### And this as well
72 | ## And this
73 | ###### ByeBye
74 |
75 | # Hi
76 | ## How
77 | ### Are
78 | ## You
79 | # Today ?
80 | EOF
81 | }
82 |
83 | ###
84 | ###
85 |
86 | printf "Running a demo to show some of python -m md_toc's capabilities...\n"
87 | printf "\n"
88 | sleep ${TIMEOUT}
89 |
90 | printf "$ python -m md_toc -h\n"
91 | python -m md_toc -h
92 | printf "\n~~~~\n"
93 | sleep ${TIMEOUT}
94 |
95 | gen_foo
96 | printf "1. inspecting the file...\n"
97 | printf "$ cat foo.md\n"
98 | cat foo.md
99 | printf "\n~~~~\n"
100 | sleep ${TIMEOUT}
101 |
102 | gen_foo
103 | printf "2. run with default options...\n"
104 | printf "$ python -m md_toc github foo.md\n"
105 | python -m md_toc github foo.md
106 | printf "\n~~~~\n"
107 | sleep ${TIMEOUT}
108 |
109 | gen_foo
110 | printf "3. ordered list...\n"
111 | printf "$ python -m md_toc gitlab -o '.' foo.md\n"
112 | python -m md_toc gitlab -o '.' foo.md
113 | printf "\n~~~~\n"
114 | sleep ${TIMEOUT}
115 |
116 | gen_foo
117 | printf "4. constant ordered list...\n"
118 | printf "$ python -m md_toc github -c -o '.' foo.md\n"
119 | python -m md_toc github -c -o '.' foo.md
120 | printf "\n~~~~\n"
121 | sleep ${TIMEOUT}
122 |
123 | gen_foo
124 | printf "5. no links...\n"
125 | printf "$ python -m md_toc -l github foo.md\n"
126 | python -m md_toc -l github foo.md
127 | printf "\n~~~~\n"
128 | sleep ${TIMEOUT}
129 |
130 | gen_foo
131 | printf "6. no links and no indentation...\n"
132 | printf "$ python -m md_toc -l -i github foo.md\n"
133 | python -m md_toc -l -i github foo.md
134 | printf "\n~~~~\n"
135 | sleep ${TIMEOUT}
136 |
137 | gen_foo_noncoherent
138 | printf "7. inspecting the non-coherent file...\n"
139 | printf "$ cat foo_noncoherent.md\n"
140 | cat foo_noncoherent.md
141 | printf "\n~~~~\n"
142 | sleep ${TIMEOUT}
143 |
144 | gen_foo_noncoherent
145 | printf "8. trying to parse a non coherent markdown file will raise an exception...\n"
146 | printf "$ python -m md_toc github foo_noncoherent.md\n"
147 | python -m md_toc github foo_noncoherent.md
148 | printf "\n~~~~\n"
149 | sleep ${TIMEOUT}
150 |
151 | gen_foo_noncoherent
152 | printf "9. try to parse a non coherent markdown file without checking for coherence...\n"
153 | printf "$ python -m md_toc -c github foo_noncoherent.md\n"
154 | python -m md_toc -c github foo_noncoherent.md
155 | printf "\n~~~~\n"
156 | sleep ${TIMEOUT}
157 |
158 | printf "10. use stdin, no links and no indentation...\n"
159 | printf "$ cat foo.md | python -m md_toc -l -i - cmark -u '*'\n"
160 | cat foo.md | python -m md_toc -l -i cmark -u '*'
161 | printf "\n~~~~\n"
162 | sleep ${TIMEOUT}
163 |
164 | gen_foo_skiplines
165 | printf "11. inspecting a file where the first 5 lines need to be skipped...\n"
166 | printf "$ cat foo_skiplines.md\n"
167 | cat foo_skiplines.md
168 | printf "\n~~~~\n"
169 | sleep ${TIMEOUT}
170 |
171 | gen_foo_skiplines
172 | printf "12. using the skip lines option...\n"
173 | printf "$ python -m md_toc -s 5 github foo_skiplines.md\n"
174 | python -m md_toc -s 5 github foo_skiplines.md
175 | printf "\n~~~~\n"
176 | sleep ${TIMEOUT}
177 |
178 | gen_foo
179 | printf "13. showing GitLab's removal of consecutive dashes in the link destination...\n"
180 | printf "$ python -m md_toc gitlab -l 6 foo.md\n"
181 | python -m md_toc gitlab -l 6 foo.md
182 | printf "\n~~~~\n"
183 | sleep ${TIMEOUT}
184 |
185 | gen_foo
186 | printf "14. editing file in place with a custom newline...\n"
187 | printf "$ python -m md_toc --newline '\\\r\\\n' -p gitlab -l 6 foo.md\n"
188 | python -m md_toc --newline '\r\n' -p gitlab -l 6 foo.md
189 | printf "\n"
190 | printf "$ cat --show-nonprinting --show-ends foo.md\n"
191 | cat --show-nonprinting --show-ends foo.md
192 | printf "\n~~~~\n"
193 | sleep ${TIMEOUT}
194 |
195 | gen_foo
196 | printf "15. editing the file in-place. As you can see, code fence \
197 | detection still needs to be implemented for redcarpet...\n"
198 | printf "$ python -m md_toc -p redcarpet foo.md\n"
199 | python -m md_toc -p redcarpet foo.md
200 | printf "$ cat foo.md\n"
201 | cat foo.md
202 |
203 | rm foo.md foo_noncoherent.md foo_skiplines.md
204 |
--------------------------------------------------------------------------------
/md_toc/cmark/scanners_c.py:
--------------------------------------------------------------------------------
1 | #
2 | # scanners_c.py
3 | #
4 | # Copyright (C) 2017-2022 Franco Masotti (see /README.md)
5 | #
6 | # This file is part of md-toc.
7 | #
8 | # md-toc is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # md-toc is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with md-toc. If not, see .
20 | #
21 | r"""A cmark implementation file."""
22 |
23 | import copy
24 | import re
25 |
26 | from ..constants import parser as md_parser
27 | from .chunk_h import _cmarkCmarkChunk
28 |
29 | # License C applies to this file except for non derivative code:
30 | # in that case the license header at the top of the file applies.
31 | # See docs/copyright_license.rst
32 |
33 | # These functions have been re-written to avoid GOTO jumps,
34 | # also using the scanners.re source file.
35 | # The original C source states:
36 | # /* Generated by re2c 1.3 */
37 |
38 |
39 | def _cmark__scan_at(
40 | scanner_function_name: str,
41 | c: _cmarkCmarkChunk,
42 | offset: int,
43 | ) -> int:
44 | res: int
45 | ptr: str = c.data
46 |
47 | if ptr is None or offset > c.length:
48 | return 0
49 | else:
50 | # lim: str = ptr[c.length]
51 |
52 | # ptr[c.length] = '\0'
53 | if scanner_function_name == '_cmark__scan_spacechars':
54 | res = _cmark__scan_spacechars(ptr, offset)
55 | if scanner_function_name == '_cmark__scan_link_title':
56 | res = _cmark__scan_link_title(ptr, offset)
57 | if scanner_function_name == '_cmark__scan_autolink_uri':
58 | res = _cmark__scan_autolink_uri(ptr, offset)
59 | if scanner_function_name == '_cmark__scan_autolink_email':
60 | res = _cmark__scan_autolink_email(ptr, offset)
61 | if scanner_function_name == '_cmark__scan_html_comment':
62 | res = _cmark__scan_html_comment(ptr, offset)
63 | if scanner_function_name == '_cmark__scan_cdata':
64 | res = _cmark__scan_cdata(ptr, offset)
65 | if scanner_function_name == '_cmark__scan_html_tag':
66 | res = _cmark__scan_html_tag(ptr, offset)
67 | if scanner_function_name == '_cmark__scan_html_declaration':
68 | res = _cmark__scan_html_declaration(ptr, offset)
69 | if scanner_function_name == '_cmark__scan_html_pi':
70 | res = _cmark__scan_html_pi(ptr, offset)
71 |
72 | # ptr[c.length] = lim
73 |
74 | return res
75 |
76 |
77 | def _common_scan(regex: str, ptr: str, p: int) -> int:
78 | start_match: int = 0
79 | end_match: int = 0
80 | retval: int
81 |
82 | span = re.match(regex, ptr[p:])
83 | if span:
84 | ll = list(span.span())
85 | start_match = ll[0]
86 | end_match = ll[1]
87 | retval = end_match - start_match
88 | else:
89 | retval = 0
90 |
91 | return retval
92 |
93 |
94 | # Try to match a link title (in single quotes, in double quotes, or
95 | # in parentheses), returning number of chars matched. Allow one
96 | # level of internal nesting (quotes within quotes).
97 | def _cmark__scan_link_title(ptr: str, p: int) -> int:
98 | r1 = '["](' + md_parser['cmark']['_scanners.re'][
99 | 'escaped_char'] + '|[^"\u0000])*["]'
100 | r2 = "['](" + md_parser['cmark']['_scanners.re'][
101 | 'escaped_char'] + "|[^'\u0000])*[']"
102 | r3 = r'[\(](' + md_parser['cmark']['_scanners.re'][
103 | 'escaped_char'] + r"|[^\(\)\u0000])*[']"
104 | r = '(' + r1 + '|' + r2 + '|' + r3 + ')'
105 | return _common_scan(r, ptr, p)
106 |
107 |
108 | # Match SOME space characters, including newlines.
109 | def _cmark__scan_spacechars(ptr: str, p: int) -> int:
110 | return _common_scan(md_parser['cmark']['_scanners.re']['spacechar'] + '+',
111 | ptr, p)
112 |
113 |
114 | # Try to match URI autolink after first <, returning number of chars matched.
115 | def _cmark__scan_autolink_uri(ptr: str, p: int) -> int:
116 | return _common_scan('[:][^\x00-\x20<>]*[>]', ptr, p)
117 |
118 |
119 | # Try to match email autolink after first <, returning num of chars matched.
120 | def _cmark__scan_autolink_email(ptr: str, p: int) -> int:
121 | return _common_scan(
122 | '[a-zA-Z0-9.!#$%&\'*+/=?^_`{|}~-]+[@][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*[>]',
123 | ptr, p)
124 |
125 |
126 | def _cmark__scan_html_comment(ptr: str, p: int) -> int:
127 | return _common_scan(md_parser['cmark']['_scanners.re']['htmlcomment'], ptr,
128 | p)
129 |
130 |
131 | def _cmark__scan_cdata(ptr: str, p: int) -> int:
132 | return _common_scan(md_parser['cmark']['_scanners.re']['cdata'], ptr, p)
133 |
134 |
135 | # Try to match an HTML tag after first <, returning num of chars matched.
136 | def _cmark__scan_html_tag(ptr: str, p: int) -> int:
137 | return _common_scan(md_parser['cmark']['_scanners.re']['htmltag'], ptr, p)
138 |
139 |
140 | def _cmark__scan_html_declaration(ptr: str, p: int) -> int:
141 | return _common_scan(md_parser['cmark']['_scanners.re']['declaration'], ptr,
142 | p)
143 |
144 |
145 | def _cmark__scan_html_pi(ptr: str, p: int) -> int:
146 | return _common_scan(
147 | md_parser['cmark']['_scanners.re']['processinginstruction'], ptr, p)
148 |
--------------------------------------------------------------------------------
/docs/rules/link_lables.rst:
--------------------------------------------------------------------------------
1 | Link label
2 | ==========
3 |
4 | If the user decides to generate the table of contents with the anchor links,
5 | then link label rules will be applied.
6 |
7 | ``cmark``, ``github``, ``gitlab``
8 | ---------------------------------
9 |
10 | - https://spec.commonmark.org/0.30/#link-label
11 |
12 | If a line ends in 1 or more '\' characters, this disrupts the anchor
13 | title. For example ``- [xdmdmsdm\](#xdmdmsdm)`` becomes
14 | ```` instead of
15 | ````.
16 | The workaround used in md-toc is to add a space character at the end of the
17 | string, so it becomes: ````
18 |
19 | If the link label contains only whitespace characters a ``GithubEmptyLinkLabel``
20 | exception is raised.
21 |
22 | If the number of characters inside the link label is over 999 a
23 | ``GithubOverflowCharsLinkLabel`` is raised.
24 |
25 | If the headers contains ``[`` or ``]``, these characters
26 | are treated with the following rules.
27 |
28 | - https://spec.commonmark.org/0.30/#link-text
29 | - https://spec.commonmark.org/0.30/#link-destination
30 |
31 | According to a function in the source code, balanced square brackets do not
32 | work, however they do when interpeted by the web interface. It is however
33 | possible that they are supported within the ``handle_close_bracket``
34 | function.
35 |
36 | - https://github.com/github/cmark/blob/6b101e33ba1637e294076c46c69cd6a262c7539f/src/inlines.c#L881
37 | - https://github.com/github/cmark/blob/6b101e33ba1637e294076c46c69cd6a262c7539f/src/inlines.c#L994
38 |
39 | Here is the original C function with some more comments added:
40 |
41 | .. code-block:: c
42 | :linenos:
43 |
44 | // Parse a link label. Returns 1 if successful.
45 | // Note: unescaped brackets are not allowed in labels.
46 | // The label begins with `[` and ends with the first `]` character
47 | // encountered. Backticks in labels do not start code spans.
48 | static int link_label(subject *subj, cmark_chunk *raw_label) {
49 | bufsize_t startpos = subj->pos;
50 | int length = 0;
51 | unsigned char c;
52 |
53 | // advance past [
54 | //
55 | // Ignore the open link label identifier
56 | // peek_char simply returns the current char if we are
57 | // in range of the string, 0 otherwise.
58 | if (peek_char(subj) == '[') {
59 | advance(subj);
60 | } else {
61 | return 0;
62 | }
63 |
64 | while ((c = peek_char(subj)) && c != '[' && c != ']') {
65 | // If there is an escape and the next character is (for example)
66 | // '[' or ']' then,
67 | // ignore the loop conditions.
68 | // If there are nested balanced square brakets this loop ends.
69 | if (c == '\\') {
70 | advance(subj);
71 | length++;
72 |
73 | // Puntuation characters are the ones defined at:
74 | // https://github.github.com/gfm/#ascii-punctuation-character
75 | if (cmark_ispunct(peek_char(subj))) {
76 | advance(subj);
77 | length++;
78 | }
79 | } else {
80 | advance(subj);
81 | length++;
82 | }
83 | // MAX_LINK_LABEL_LENGTH is a constant defined at
84 | // https://github.com/github/cmark/blob/master/src/parser.h#L13
85 | if (length > MAX_LINK_LABEL_LENGTH) {
86 | goto noMatch;
87 | }
88 | }
89 |
90 | // If the loop terminates when the current character is ']' then
91 | // everything between '[' and ']' is the link label...
92 | if (c == ']') { // match found
93 | *raw_label =
94 | cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
95 | cmark_chunk_trim(raw_label);
96 | advance(subj); // advance past ]
97 | return 1;
98 | }
99 |
100 | // ...otherwise return error.
101 | // This label always get executed according to C rules.
102 | noMatch:
103 | subj->pos = startpos; // rewind
104 | return 0;
105 | }
106 |
107 |
108 | For simpleness the escape ``[`` and ``]`` rule is used.
109 |
110 | ``redcarpet``
111 | -------------
112 |
113 | - https://github.com/vmg/redcarpet/blob/6270d6b4ab6b46ee6bb57a6c0e4b2377c01780ae/ext/redcarpet/markdown.c#L998
114 |
115 | Let's inspect this loop:
116 |
117 | - https://github.com/vmg/redcarpet/blob/6270d6b4ab6b46ee6bb57a6c0e4b2377c01780ae/ext/redcarpet/markdown.c#L1017
118 |
119 | .. code-block:: c
120 | :linenos:
121 |
122 | /* looking for the matching closing bracket */
123 | for (level = 1; i < size; i++) {
124 | if (data[i] == '\n')
125 | text_has_nl = 1;
126 |
127 | else if (data[i - 1] == '\\')
128 | continue;
129 |
130 | else if (data[i] == '[')
131 | level++;
132 |
133 | else if (data[i] == ']') {
134 | level--;
135 | if (level <= 0)
136 | break;
137 | }
138 | }
139 |
140 | if (i >= size)
141 | goto cleanup;
142 |
143 |
144 | The cleanup label looks like this:
145 |
146 | .. code-block:: c
147 | :linenos:
148 |
149 | /* cleanup */
150 | cleanup:
151 | rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
152 | return ret ? i : 0;
153 |
154 | An example: ``[test \](test \)`` becomes ``[test ](test )`` instead of
155 | ``test \``
156 |
157 | Infact, you can see that if the current character is ``\\`` then the the
158 | current iteration is skipped. If for any chance the next character is ``]``
159 | then the inline link closing parenthesis detection is ignored. ``i`` becomes
160 | equal to ``size`` eventually and so we jump to the ``cleanup`` label.
161 | That lable contains a return statement so that string is not treated as
162 | inline link anymore. A similar code is implemented also for
163 | detecting ``(`` and ``)``. See:
164 |
165 | - https://github.com/vmg/redcarpet/blob/6270d6b4ab6b46ee6bb57a6c0e4b2377c01780ae/ext/redcarpet/markdown.c#L1088
166 | - https://github.com/vmg/redcarpet/blob/6270d6b4ab6b46ee6bb57a6c0e4b2377c01780ae/ext/redcarpet/markdown.c#L1099
167 |
168 | To solve this we use the same workaround used for ``cmark``, ``github``, ``gitlab``.
169 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_7_0_0.json:
--------------------------------------------------------------------------------
1 | {"version": 2, "width": 89, "height": 46, "timestamp": 1585155760, "env": {"SHELL": "/bin/bash", "TERM": "rxvt-unicode-256color"}}
2 | [0.010153, "o", "Running a demo to show some of python -m md_toc's capabilities...\r\n\r\n"]
3 | [1.011397, "o", "$ python -m md_toc -h\r\n"]
4 | [1.144886, "o", "usage: __main__.py [-h] [-c | -i] [-l] [-m TOC_MARKER] [-p] [-s SKIP_LINES] [-v]\r\n {github,cmark,gitlab,commonmarker,redcarpet} ...\r\n\r\nMarkdown Table Of Contents: Automatically generate a compliant table\r\nof contents for a markdown file to improve document readability.\r\n\r\noptional arguments:\r\n -h, --help show this help message and exit\r\n -c, --no-list-coherence\r\n avoids checking for TOC list coherence\r\n -i, --no-indentation avoids adding indentations to the TOC\r\n -l, --no-links avoids adding links to the TOC\r\n -m TOC_MARKER, --toc-marker TOC_MARKER\r\n set the string to be used as the marker for positioning the\r\n table of contents. Defaults to \r\n -p, --in-place overwrite the input file\r\n -s SKIP_LINES, --skip-lines SKIP_LINES\r\n skip parsing of the first selected number of lines. Defaults to\r\n 0, i.e. do not skip any lines\r\n -v, --version"]
5 | [1.144939, "o", " show program's version number and exit\r\n\r\nmarkdown parser:\r\n {github,cmark,gitlab,commonmarker,redcarpet}\r\n --help\r\n\r\nPlease read the documentation to understand how each parser works\r\n\r\nReturn values: 0 ok, 1 error, 2 invalid command\r\n\r\nCopyright (C) 2017-2020 Franco Masotti, frnmst\r\nLicense GPLv3+: GNU GPL version 3 or later \r\nThis is free software: you are free to change and redistribute it.\r\nThere is NO WARRANTY, to the extent permitted by law.\r\n"]
6 | [1.153268, "o", "\r\n"]
7 | [2.157095, "o", "Inspecting the file...\r\n$ cat foo.md\r\n"]
8 | [2.157653, "o", "# Hi\r\n\r\n\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## Bye bye\r\n\r\n```python\r\n# This is a code\r\n# fence with comments that might represent ATX-style headings\r\n# if not properly parsed\r\n```\r\n\r\nbye\r\n\r\n# boo\r\n"]
9 | [2.157873, "o", "\r\n"]
10 | [3.158819, "o", "Run with default options...\r\n$ python -m md_toc github foo.md\r\n"]
11 | [3.291031, "o", "- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you-----------)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [Bye bye](#bye-bye)\r\n- [boo](#boo)\r\n"]
12 | [3.298924, "o", "\r\n"]
13 | [4.299849, "o", "Ordered list...\r\n$ python -m md_toc gitlab -o '.' foo.md\r\n"]
14 | [4.440878, "o", "1. [Hi](#hi)\r\n 1. [How are you? !!!](#how-are-you-----------)\r\n 2. [fine, thanks](#fine-thanks)\r\n 1. [Bye](#bye)\r\n 3. [Bye bye](#bye-bye)\r\n2. [boo](#boo)\r\n"]
15 | [4.449244, "o", "\r\n"]
16 | [5.450176, "o", "No links...\r\n$ python -m md_toc -l github foo.md\r\n"]
17 | [5.587205, "o", "- Hi\r\n - How are you? !!!\r\n - fine, thanks\r\n - Bye\r\n - Bye bye\r\n- boo\r\n"]
18 | [5.595605, "o", "\r\n"]
19 | [6.596582, "o", "No links and no indentation...\r\n$ python -m md_toc -l -i github foo.md\r\n"]
20 | [6.729254, "o", "- Hi\r\n- How are you? !!!\r\n- fine, thanks\r\n- Bye\r\n- Bye bye\r\n- boo\r\n"]
21 | [6.737559, "o", "\r\n"]
22 | [7.738477, "o", "Inspecting the non-coherent file...\r\n$ cat foo_noncoherent.md\r\n"]
23 | [7.739502, "o", "# Hi\r\n### boo\r\n"]
24 | [7.739825, "o", "\r\n"]
25 | [8.74086, "o", "Trying to parse a non coherent markdown file will raise an exception...\r\n$ python -m md_toc github foo_noncoherent.md\r\n"]
26 | [8.877359, "o", "Traceback (most recent call last):\r\n File \"/home/vm/dev/personal/stable/md-toc/md_toc/__main__.py\", line 34, in main\r\n result = args.func(args)\r\n File \"/home/vm/dev/personal/stable/md-toc/md_toc/cli.py\", line 54, in write_toc\r\n toc_struct = build_multiple_tocs(\r\n File \"/home/vm/dev/personal/stable/md-toc/md_toc/api.py\", line 335, in build_multiple_tocs\r\n build_toc(filenames[file_id], ordered, no_links, no_indentation,\r\n File \"/home/vm/dev/personal/stable/md-toc/md_toc/api.py\", line 257, in build_toc\r\n raise TocDoesNotRenderAsCoherentList\r\nmd_toc.exceptions.TocDoesNotRenderAsCoherentList\r\n"]
27 | [8.886285, "o", "\r\n"]
28 | [9.887329, "o", "Try to parse a non coherent markdown file without checking for coherence...\r\n$ python -m md_toc -c github foo_noncoherent.md\r\n"]
29 | [10.027086, "o", "- [Hi](#hi)\r\n - [boo](#boo)\r\n"]
30 | [10.035279, "o", "\r\n"]
31 | [11.036226, "o", "Use stdin, no links and no indentation...\r\n$ cat foo.md | python -m md_toc -l -i - cmark -u '*'\r\n"]
32 | [11.163924, "o", "* Hi\r\n* How are you? !!!\r\n* fine, thanks\r\n* Bye\r\n* Bye bye\r\n* boo\r\n"]
33 | [11.172106, "o", "\r\n"]
34 | [12.173201, "o", "Inspecting a file where the first 5 lines need to be skipped...\r\n$ cat foo_skiplines.md\r\n"]
35 | [12.174048, "o", "# I want this line to be a comment\r\n#### And this as well\r\n## And this\r\n###### ByeBye\r\n\r\n# Hi\r\n## How\r\n### Are\r\n## You\r\n# Today ?\r\n"]
36 | [12.174162, "o", "\r\n"]
37 | [13.175239, "o", "Using the skip lines option...\r\n$ python -m md_toc -s 5 github foo_skiplines.md\r\n"]
38 | [13.312227, "o", "- [Hi](#hi)\r\n - [How](#how)\r\n - [Are](#are)\r\n - [You](#you)\r\n- [Today ?](#today-)\r\n"]
39 | [13.32088, "o", "\r\n"]
40 | [14.322071, "o", "Editing the file in-place. As you can see, code fence detection still needs to be implemented for redcarpet...\r\n$ python -m md_toc -p redcarpet foo.md\r\n"]
41 | [14.491408, "o", "$ cat foo.md\r\n"]
42 | [14.492049, "o", "# Hi\r\n\r\n\r\n\r\n- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [Bye bye](#bye-bye)\r\n- [This is a code](#this-is-a-code)\r\n- [fence with comments that might represent ATX-style headings](#fence-with-comments-that-might-represent-atx-style-headings)\r\n- [if not properly parsed](#if-not-properly-parsed)\r\n- [boo](#boo)\r\n\r\n\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## Bye bye\r\n\r\n```python\r\n# This is a code\r\n# fence with comments that might represent ATX-style headings\r\n# if not properly parsed\r\n```\r\n\r\nbye\r\n\r\n# boo\r\n"]
43 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_6_0_0.json:
--------------------------------------------------------------------------------
1 | {"version": 2, "width": 83, "height": 46, "timestamp": 1560350339, "env": {"SHELL": "/bin/bash", "TERM": "rxvt-unicode-256color"}}
2 | [0.012098, "o", "Running a demo to show some of python -m md_toc's capabilities...\r\n\r\n"]
3 | [1.013844, "o", "$ python -m md_toc -h\r\n"]
4 | [1.202271, "o", "usage: __main__.py [-h] [-c | -i] [-l] [-m TOC_MARKER] [-p] [-s SKIP_LINES]\r\n [-v]\r\n [FILE_NAME [FILE_NAME ...]]\r\n {github,cmark,gitlab,commonmarker,redcarpet} ...\r\n\r\nMarkdown Table Of Contents: Automatically generate a compliant table\r\nof contents for a markdown file to improve document readability.\r\n\r\npositional arguments:\r\n FILE_NAME the I/O file name\r\n\r\noptional arguments:\r\n -h, --help show this help message and exit\r\n -c, --no-list-coherence\r\n avoids checking for TOC list coherence\r\n -i, --no-indentation avoids adding indentations to the TOC\r\n -l, --no-links avoids adding links to the TOC\r\n -m TOC_MARKER, --toc-marker TOC_MARKER\r\n set the string to be used as the marker for\r\n positioning the table of contents. Defaults to \r\n -p, --in-place overwrite the input file\r\n -s SKIP_LINES, --skip-lines SKIP_LINE"]
5 | [1.202757, "o", "S\r\n skip parsing of the first selected number of lines.\r\n Defaults to 0, i.e. do not skip any lines\r\n -v, --version show program's version number and exit\r\n\r\nmarkdown parser:\r\n {github,cmark,gitlab,commonmarker,redcarpet}\r\n\r\nPlease read the documentation to understand how each parser works\r\n\r\nReturn values: 0 ok, 1 error, 2 invalid command\r\n\r\nCopyright (C) 2018-2019 Franco Masotti, frnmst\r\nLicense GPLv3+: GNU GPL version 3 or later \r\nThis is free software: you are free to change and redistribute it.\r\nThere is NO WARRANTY, to the extent permitted by law.\r\n"]
6 | [1.215144, "o", "\r\n"]
7 | [2.2221, "o", "Inspecting the file...\r\n$ cat foo.md\r\n"]
8 | [2.22298, "o", "# Hi\r\n\r\n\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## Bye bye\r\n\r\n```python\r\n# This is a code\r\n# fence with comments that might represent ATX-style headings\r\n# if not properly parsed\r\n```\r\n\r\nbye\r\n\r\n# boo\r\n"]
9 | [2.223663, "o", "\r\n"]
10 | [3.224875, "o", "Run with default options...\r\n$ python -m md_toc foo.md github\r\n"]
11 | [3.413952, "o", "- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you-----------)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [Bye bye](#bye-bye)\r\n- [boo](#boo)\r\n"]
12 | [3.426435, "o", "\r\n"]
13 | [4.427878, "o", "Ordered list...\r\n$ python -m md_toc foo.md gitlab -o\r\n"]
14 | [4.61836, "o", "1. [Hi](#hi)\r\n 1. [How are you? !!!](#how-are-you-----------)\r\n 2. [fine, thanks](#fine-thanks)\r\n 1. [Bye](#bye)\r\n 3. [Bye bye](#bye-bye)\r\n2. [boo](#boo)\r\n"]
15 | [4.631221, "o", "\r\n"]
16 | [5.632689, "o", "No links...\r\n$ python -m md_toc -l foo.md github\r\n"]
17 | [5.820955, "o", "- Hi\r\n - How are you? !!!\r\n - fine, thanks\r\n - Bye\r\n - Bye bye\r\n- boo\r\n"]
18 | [5.833366, "o", "\r\n"]
19 | [6.834851, "o", "No links and no indentation...\r\n$ python -m md_toc -l -i foo.md github\r\n"]
20 | [7.017477, "o", "- Hi\r\n- How are you? !!!\r\n- fine, thanks\r\n- Bye\r\n- Bye bye\r\n- boo\r\n"]
21 | [7.030133, "o", "\r\n"]
22 | [8.031626, "o", "Inspecting the non-coherent file...\r\n$ cat foo_noncoherent.md\r\n"]
23 | [8.032911, "o", "# Hi\r\n### boo\r\n"]
24 | [8.033147, "o", "\r\n"]
25 | [9.034898, "o", "Trying to parse a non coherent markdown file will raise an exception...\r\n$ python -m md_toc foo_noncoherent.md github\r\n"]
26 | [9.228868, "o", "Traceback (most recent call last):\r\n File \"/home/vm/md-toc/md_toc/__main__.py\", line 34, in main\r\n result = args.func(args)\r\n File \"/home/vm/md-toc/md_toc/cli.py\", line 63, in write_toc\r\n skip_lines=args.skip_lines)\r\n File \"/home/vm/md-toc/md_toc/api.py\", line 303, in build_multiple_tocs\r\n list_marker, skip_lines))\r\n File \"/home/vm/md-toc/md_toc/api.py\", line 226, in build_toc"]
27 | [9.229464, "o", "\r\n raise TocDoesNotRenderAsCoherentList\r\nmd_toc.exceptions.TocDoesNotRenderAsCoherentList\r\n"]
28 | [9.242013, "o", "\r\n"]
29 | [10.243606, "o", "Try to parse a non coherent markdown file without checking for coherence...\r\n$ python -m md_toc -c foo_noncoherent.md github\r\n"]
30 | [10.428045, "o", "- [Hi](#hi)\r\n - [boo](#boo)\r\n"]
31 | [10.440762, "o", "\r\n"]
32 | [11.442176, "o", "Use stdin, no links and no indentation...\r\n$ cat foo.md | python -m md_toc -l -i cmark -u '*'\r\n"]
33 | [11.634328, "o", "* Hi\r\n* How are you? !!!\r\n* fine, thanks\r\n* Bye\r\n* Bye bye\r\n* boo\r\n"]
34 | [11.64981, "o", "\r\n"]
35 | [12.651345, "o", "Inspecting a file where the first 5 lines need to be skipped...\r\n$ cat foo_skiplines.md\r\n"]
36 | [12.65276, "o", "# I want this line to be a comment\r\n#### And this as well\r\n## And this\r\n###### ByeBye\r\n\r\n# Hi\r\n## How\r\n### Are\r\n## You\r\n# Today ?\r\n"]
37 | [12.653268, "o", "\r\n"]
38 | [13.654713, "o", "Using the skip lines option...\r\n$ python -m md_toc -s 5 foo_skiplines.md github\r\n"]
39 | [13.848681, "o", "- [Hi](#hi)\r\n - [How](#how)\r\n - [Are](#are)\r\n - [You](#you)\r\n- [Today ?](#today-)\r\n"]
40 | [13.860635, "o", "\r\n"]
41 | [14.862582, "o", "Editing the file in-place. As you can see, code fence detection still needs to be implemented for redcarpet...\r\n$ python -m md_toc -p foo.md redcarpet\r\n"]
42 | [15.150645, "o", "$ cat foo.md\r\n"]
43 | [15.151666, "o", "# Hi\r\n\r\n\r\n\r\n- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [Bye bye](#bye-bye)\r\n- [This is a code](#this-is-a-code)\r\n- [fence with comments that might represent ATX-style headings](#fence-with-comments-that-might-represent-atx-style-headings)\r\n- [if not properly parsed](#if-not-properly-parsed)\r\n- [boo](#boo)\r\n\r\n\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## Bye bye\r\n\r\n```python\r\n# This is a code\r\n# fence with comments that might represent ATX-style headings\r\n# if not properly parsed\r\n```\r\n\r\nbye\r\n\r\n# boo\r\n"]
44 |
--------------------------------------------------------------------------------
/md_toc/cmark/houdini_html_u_c.py:
--------------------------------------------------------------------------------
1 | #
2 | # houdini_html_u_c.py
3 | #
4 | # Copyright (C) 2017-2022 Franco Masotti (see /README.md)
5 | #
6 | # This file is part of md-toc.
7 | #
8 | # md-toc is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # md-toc is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with md-toc. If not, see .
20 | #
21 | r"""A cmark implementation file."""
22 |
23 | import re
24 |
25 | from ..constants import parser as md_parser
26 | from ..generic import _strncmp
27 | from .buffer_c import (
28 | _cmark_cmark_strbuf_grow,
29 | _cmark_cmark_strbuf_put,
30 | _cmark_cmark_strbuf_putc,
31 | _cmark_cmark_strbuf_puts,
32 | )
33 | from .buffer_h import _cmarkCmarkStrbuf
34 | from .houdini_h import _cmark_HOUDINI_UNESCAPED_SIZE
35 | from .utf8_c import _cmark_cmark_utf8proc_encode_char
36 |
37 | # License F applies to this file except for non derivative code:
38 | # in that case the license header at the top of the file applies.
39 | # See docs/copyright_license.rst
40 |
41 |
42 | # Recursive function of a binary search.
43 | # 0.30.
44 | def _cmark_S_lookup(i: int, low: int, hi: int, s: str, length: int) -> str:
45 | j: int
46 | cmp: int = _strncmp(
47 | s, md_parser['cmark']['re']['ENTITIES']['entities'][i]['entity'],
48 | length)
49 | # if (cmp == 0 && cmark_entities[i].entity[len] == 0) {
50 | if cmp == 0 and length == len(
51 | md_parser['cmark']['re']['ENTITIES']['entities'][i]['entity']):
52 | return md_parser['cmark']['re']['ENTITIES']['entities'][i]['bytes']
53 | elif cmp == -1 and i > low:
54 | j = i - int((i - low) / 2)
55 | if j == i:
56 | j -= 1
57 | return _cmark_S_lookup(j, low, i - 1, s, length)
58 | elif cmp == 1 and i < hi:
59 | j = i + int((hi - i) / 2)
60 | if j == i:
61 | j += 1
62 | return _cmark_S_lookup(j, i + 1, hi, s, length)
63 | else:
64 | return None
65 |
66 |
67 | # 0.30.
68 | def _cmark_S_lookup_entity(s: str, length: int):
69 | return _cmark_S_lookup(
70 | int(md_parser['cmark']['re']['ENTITIES']['CMARK_NUM_ENTITIES'] / 2), 0,
71 | md_parser['cmark']['re']['ENTITIES']['CMARK_NUM_ENTITIES'] - 1, s,
72 | length)
73 |
74 |
75 | # 0.30.
76 | def _cmark_houdini_unescape_ent(ob: _cmarkCmarkStrbuf, src: str,
77 | size: int) -> int:
78 | i: int = 0
79 |
80 | if size >= 3 and src[0] == '#':
81 | codepoint: int = 0
82 | num_digits: int = 0
83 | max_digits: int = 7
84 |
85 | if re.match(r'\d', src[1]):
86 |
87 | i = 1
88 | while i < size and re.match(r'\d', src[i]):
89 | codepoint = (codepoint * 10) + (ord(src[i]) - ord('0'))
90 |
91 | if codepoint >= 0x110000:
92 | # Keep counting digits but
93 | # avoid integer overflow.
94 | codepoint = 0x110000
95 |
96 | i += 1
97 |
98 | num_digits = i - 1
99 | max_digits = 7
100 |
101 | elif src[1] == 'x' or src[1] == 'X':
102 | i = 2
103 | while i < size and re.match(r'[\dA-Fa-f]', src[i]):
104 | codepoint = (codepoint * 16) + ((ord(src[i]) | 32) % 39 - 9)
105 |
106 | if codepoint >= 0x110000:
107 | # Keep counting digits but
108 | # avoid integer overflow.
109 | codepoint = 0x110000
110 |
111 | i += 1
112 |
113 | num_digits = i - 2
114 | max_digits = 6
115 |
116 | if (num_digits >= 1 and num_digits <= max_digits and i < size
117 | and src[i] == ';'):
118 | if (codepoint == 0 or (codepoint >= 0xD800 and codepoint < 0xE000)
119 | or codepoint >= 0x110000):
120 | codepoint = 0xFFFD
121 |
122 | _cmark_cmark_utf8proc_encode_char(codepoint, ob)
123 | return i + 1
124 |
125 | else:
126 | if size > md_parser['cmark']['re']['ENTITIES'][
127 | 'CMARK_ENTITY_MAX_LENGTH']:
128 | size = md_parser['cmark']['re']['ENTITIES'][
129 | 'CMARK_ENTITY_MAX_LENGTH']
130 |
131 | for i in range(
132 | md_parser['cmark']['re']['ENTITIES']
133 | ['CMARK_ENTITY_MIN_LENGTH'], size):
134 | if src[i] == ' ':
135 | break
136 |
137 | if src[i] == ';':
138 | entity: str = _cmark_S_lookup_entity(src, i)
139 |
140 | if entity is not None:
141 | _cmark_cmark_strbuf_puts(ob, entity)
142 | return i + 1
143 |
144 | break
145 |
146 | return 0
147 |
148 |
149 | # 0.30.
150 | def _cmark_houdini_unescape_html(
151 | ob: _cmarkCmarkStrbuf,
152 | src: str,
153 | size: int,
154 | ) -> int:
155 | i: int = 0
156 | org: int
157 | ent: int
158 |
159 | while i < size:
160 | org = i
161 | while i < size and src[i] != '&':
162 | i += 1
163 |
164 | if i > org:
165 | if org == 0:
166 | if i >= size:
167 | return 0
168 |
169 | _cmark_cmark_strbuf_grow(ob,
170 | _cmark_HOUDINI_UNESCAPED_SIZE(size))
171 |
172 | _cmark_cmark_strbuf_put(ob, src[org:], i - org)
173 |
174 | # escaping
175 | if i >= size:
176 | break
177 |
178 | i += 1
179 |
180 | ent = _cmark_houdini_unescape_ent(ob, src[i:], size - i)
181 | i += ent
182 |
183 | # not really an entity
184 | if ent == 0:
185 | _cmark_cmark_strbuf_putc(ob, ord('&'))
186 |
187 | return 1
188 |
189 |
190 | def _cmark_houdini_unescape_html_f(
191 | ob: _cmarkCmarkStrbuf,
192 | src: str,
193 | size: int,
194 | ):
195 | if not _cmark_houdini_unescape_html(ob, src, size):
196 | _cmark_cmark_strbuf_put(ob, src, size)
197 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | #
2 | # md-toc documentation build configuration file, created by
3 | # sphinx-quickstart on Wed Dec 27 17:32:50 2017.
4 | #
5 | # This file is execfile()d with the current directory set to its
6 | # containing dir.
7 | #
8 | # Note that not all possible configuration values are present in this
9 | # autogenerated file.
10 | #
11 | # All configuration values have a default; values that are commented out
12 | # serve to show the default.
13 |
14 | # If extensions (or modules to document with autodoc) are in another directory,
15 | # add these directories to sys.path here. If the directory is relative to the
16 | # documentation root, use os.path.abspath to make it absolute, like shown here.
17 | #
18 | r"""conf.py."""
19 |
20 | import os
21 | import sys
22 |
23 | sys.path.insert(0, os.path.abspath('..'))
24 |
25 | # -- General configuration ------------------------------------------------
26 |
27 | # General information about the project.
28 | project = 'md-toc'
29 | copyright = '2017-2024, Franco Masotti'
30 | author = 'Franco Masotti'
31 |
32 | # The version info for the project you're documenting, acts as replacement for
33 | # |version| and |release|, also used in various other places throughout the
34 | # built documents.
35 | #
36 | # The short X.Y version.
37 | version = '9.0.0'
38 | # The full version, including alpha/beta/rc tags.
39 | release = '9.0.0'
40 |
41 | # If your documentation needs a minimal Sphinx version, state it here.
42 | #
43 | # needs_sphinx = '1.0'
44 |
45 | # Add any Sphinx extension module names here, as strings. They can be
46 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
47 | # ones.
48 | extensions = [
49 | 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'sphinx.ext.coverage',
50 | 'sphinx.ext.graphviz', 'sphinx_copybutton'
51 | ]
52 |
53 | # Add any paths that contain templates here, relative to this directory.
54 | templates_path = ['_templates']
55 |
56 | # The suffix(es) of source filenames.
57 | # You can specify multiple suffix as a list of string:
58 | #
59 | # source_suffix = ['.rst', '.md']
60 | source_suffix = '.rst'
61 |
62 | # The master toctree document.
63 | master_doc = 'index'
64 |
65 | # The language for content autogenerated by Sphinx. Refer to documentation
66 | # for a list of supported languages.
67 | #
68 | # This is also used if you do content translation via gettext catalogs.
69 | # Usually you set "language" from the command line for these cases.
70 | language = 'en'
71 |
72 | # List of patterns, relative to source directory, that match files and
73 | # directories to ignore when looking for source files.
74 | # This patterns also effect to html_static_path and html_extra_path
75 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
76 |
77 | # The name of the Pygments (syntax highlighting) style to use.
78 | pygments_style = 'default'
79 |
80 | # If true, `todo` and `todoList` produce output, else they produce nothing.
81 | todo_include_todos = False
82 |
83 | # -- Options for HTML output ----------------------------------------------
84 |
85 | # The theme to use for HTML and HTML Help pages. See the documentation for
86 | # a list of builtin themes.
87 | #
88 | html_theme = 'sphinx_book_theme'
89 |
90 | # Add any paths that contain custom static files (such as style sheets) here,
91 | # relative to this directory. They are copied after the builtin static files,
92 | # so a file named "default.css" will overwrite the builtin "default.css".
93 | html_static_path = ['_static']
94 |
95 | # These paths are either relative to html_static_path
96 | # or fully qualified paths (eg. https://...)
97 | html_css_files = ['css/custom.css']
98 |
99 | # -- Options for HTMLHelp output ------------------------------------------
100 |
101 | # Output file base name for HTML help builder.
102 | htmlhelp_basename = 'md-toc-doc'
103 |
104 | # -- Options for LaTeX output ---------------------------------------------
105 |
106 | latex_engine = 'xelatex'
107 | latex_elements = {
108 |
109 | # The paper size ('letterpaper' or 'a4paper').
110 | #
111 | 'papersize': 'a4paper',
112 |
113 | # The font size ('10pt', '11pt' or '12pt').
114 | #
115 | # 'pointsize': '10pt',
116 |
117 | # Additional stuff for the LaTeX preamble.
118 | #
119 | # 'preamble': '',
120 |
121 | # Latex figure (float) alignment
122 | #
123 | # 'figure_align': 'htbp',
124 | }
125 |
126 | # Grouping the document tree into LaTeX files. List of tuples
127 | # (source start file, target name, title,
128 | # author, documentclass [howto, manual, or own class]).
129 | latex_documents = [
130 | (
131 | master_doc,
132 | 'md-toc.tex',
133 | 'md-toc Documentation',
134 | 'Franco Masotti',
135 | 'manual',
136 | ),
137 | ]
138 |
139 | # -- Options for manual page output ---------------------------------------
140 |
141 | # One entry per manual page. List of tuples
142 | # (source start file, name, description, authors, manual section).
143 | man_pages = [
144 | (
145 | master_doc,
146 | 'md-toc',
147 | 'md-toc Documentation',
148 | [author],
149 | 1,
150 | ),
151 | ]
152 |
153 | # -- Options for Texinfo output -------------------------------------------
154 |
155 | # Grouping the document tree into Texinfo files. List of tuples
156 | # (source start file, target name, title, author,
157 | # dir menu entry, description, category)
158 | texinfo_documents = [
159 | (
160 | master_doc,
161 | 'md-toc',
162 | 'md-toc Documentation',
163 | author,
164 | 'md-toc',
165 | 'Generate table of contents for markdown files.',
166 | 'Miscellaneous',
167 | ),
168 | ]
169 |
170 | html_theme_options = {
171 | 'repository_provider':
172 | 'github',
173 | 'repository_url':
174 | 'https://software.franco.net.eu.org/frnmst/md-toc',
175 | 'use_repository_button':
176 | True,
177 | 'use_download_button':
178 | True,
179 | 'use_issues_button':
180 | True,
181 | 'announcement':
182 | '⚠️ starting from version 9 all the functions are only accessible via the full module path. For example: md_toc.build_toc(...) is now md_toc.api.build_toc(...) ⚠️'
183 | }
184 |
185 | html_baseurl = 'https://docs.franco.net.eu.org/md-toc/'
186 |
187 | pygments_style = 'default'
188 | html_last_updated_fmt = '%Y-%m-%d %H:%M:%S %z'
189 | copybutton_line_continuation_character = '\\'
190 |
191 | # Epub.
192 | epub_theme = 'epub'
193 | epub_author = 'Franco Masotti'
194 | epub_theme_options = {
195 | 'relbar1': False,
196 | 'footer': False,
197 | }
198 | epub_css_style = [
199 | 'css/epub.css',
200 | ]
201 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_7_1_0.json:
--------------------------------------------------------------------------------
1 | {"version": 2, "width": 83, "height": 46, "timestamp": 1611160459, "env": {"SHELL": "/bin/bash", "TERM": "rxvt-unicode-256color"}}
2 | [0.006135, "o", "Running a demo to show some of python -m md_toc's capabilities...\r\n\r\n"]
3 | [1.007122, "o", "$ python -m md_toc -h\r\n"]
4 | [1.205091, "o", "usage: __main__.py [-h] [-c | -i] [-l] [-m TOC_MARKER] [-p] [-s SKIP_LINES] [-v]\r\n {github,cmark,gitlab,commonmarker,redcarpet} ...\r\n\r\nMarkdown Table Of Contents: Automatically generate a compliant table\r\nof contents for a markdown file to improve document readability.\r\n\r\noptional arguments:\r\n -h, --help show this help message and exit\r\n -c, --no-list-coherence\r\n avoids checking for TOC list coherence\r\n -i, --no-indentation avoids adding indentations to the TOC\r\n -l, --no-links avoids adding links to the TOC\r\n -m TOC_MARKER, --toc-marker TOC_MARKER\r\n set the string to be used as the marker for positioning\r\n the table of contents. Defaults to \r\n -p, --in-place overwrite the input file\r\n -s SKIP_LINES, --skip-lines SKIP_LINES\r\n skip parsing of the first selected number of lines.\r\n Defaults to 0, i.e. do not skip any lines\r\n -v, --version"]
5 | [1.205153, "o", " show program's version number and exit\r\n\r\nmarkdown parser:\r\n {github,cmark,gitlab,commonmarker,redcarpet}\r\n --help\r\n\r\nPlease read the documentation to understand how each parser works\r\n\r\nReturn values: 0 ok, 1 error, 2 invalid command\r\n\r\nCopyright (C) 2017-2021 Franco Masotti, frnmst\r\nLicense GPLv3+: GNU GPL version 3 or later \r\nThis is free software: you are free to change and redistribute it.\r\nThere is NO WARRANTY, to the extent permitted by law.\r\n"]
6 | [1.22023, "o", "\r\n"]
7 | [2.223355, "o", "Inspecting the file...\r\n$ cat foo.md\r\n"]
8 | [2.223963, "o", "# Hi\r\n\r\n\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## Bye bye\r\n\r\n```python\r\n# This is a code\r\n# fence with comments that might represent ATX-style headings\r\n# if not properly parsed\r\n```\r\n\r\nbye\r\n\r\n# boo\r\n"]
9 | [2.224005, "o", "\r\n"]
10 | [3.224999, "o", "Run with default options...\r\n$ python -m md_toc github foo.md\r\n"]
11 | [3.439828, "o", "- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you-----------)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [Bye bye](#bye-bye)\r\n- [boo](#boo)\r\n"]
12 | [3.457865, "o", "\r\n"]
13 | [4.458982, "o", "Ordered list...\r\n$ python -m md_toc gitlab -o '.' foo.md\r\n"]
14 | [4.648557, "o", "1. [Hi](#hi)\r\n 1. [How are you? !!!](#how-are-you-----------)\r\n 2. [fine, thanks](#fine-thanks)\r\n 1. [Bye](#bye)\r\n 3. [Bye bye](#bye-bye)\r\n2. [boo](#boo)\r\n"]
15 | [4.664706, "o", "\r\n"]
16 | [5.665772, "o", "Constant ordered list...\r\n$ python -m md_toc github -c -o '.' foo.md\r\n"]
17 | [5.87788, "o", "1. [Hi](#hi)\r\n 1. [How are you? !!!](#how-are-you-----------)\r\n 1. [fine, thanks](#fine-thanks)\r\n 1. [Bye](#bye)\r\n 1. [Bye bye](#bye-bye)\r\n1. [boo](#boo)\r\n"]
18 | [5.894791, "o", "\r\n"]
19 | [6.895756, "o", "No links...\r\n$ python -m md_toc -l github foo.md\r\n"]
20 | [7.10896, "o", "- Hi\r\n - How are you? !!!\r\n - fine, thanks\r\n - Bye\r\n - Bye bye\r\n- boo\r\n"]
21 | [7.123939, "o", "\r\n"]
22 | [8.125197, "o", "No links and no indentation...\r\n$ python -m md_toc -l -i github foo.md\r\n"]
23 | [8.331182, "o", "- Hi\r\n- How are you? !!!\r\n- fine, thanks\r\n- Bye\r\n- Bye bye\r\n- boo\r\n"]
24 | [8.346253, "o", "\r\n"]
25 | [9.347239, "o", "Inspecting the non-coherent file...\r\n$ cat foo_noncoherent.md\r\n"]
26 | [9.347893, "o", "# Hi\r\n### boo\r\n"]
27 | [9.348052, "o", "\r\n"]
28 | [10.350585, "o", "Trying to parse a non coherent markdown file will raise an exception...\r\n$ python -m md_toc github foo_noncoherent.md\r\n"]
29 | [10.560376, "o", "Traceback (most recent call last):\r\n File \"/home/vm/dev/personal/stable/md-toc/md_toc/__main__.py\", line 34, in main\r\n result = args.func(args)\r\n File \"/home/vm/dev/personal/stable/md-toc/md_toc/cli.py\", line 55, in write_toc\r\n toc_struct = build_multiple_tocs(\r\n File \"/home/vm/dev/personal/stable/md-toc/md_toc/api.py\", line 367, in build_multiple_tocs\r\n build_toc(filenames[file_id], ordered, no_links, no_indentation,\r\n File \"/home/vm/dev/personal/stable/md-toc/md_toc/api.py\", line 276, in build_toc\r\n raise TocDoesNotRenderAsCoherentList\r\nmd_toc.exceptions.TocDoesNotRenderAsCoherentList\r\n"]
30 | [10.576715, "o", "\r\n"]
31 | [11.578079, "o", "Try to parse a non coherent markdown file without checking for coherence...\r\n$ python -m md_toc -c github foo_noncoherent.md\r\n"]
32 | [11.801014, "o", "- [Hi](#hi)\r\n - [boo](#boo)\r\n"]
33 | [11.81771, "o", "\r\n"]
34 | [12.818955, "o", "Use stdin, no links and no indentation...\r\n$ cat foo.md | python -m md_toc -l -i - cmark -u '*'\r\n"]
35 | [13.029036, "o", "* Hi\r\n* How are you? !!!\r\n* fine, thanks\r\n* Bye\r\n* Bye bye\r\n* boo\r\n"]
36 | [13.044662, "o", "\r\n"]
37 | [14.045578, "o", "Inspecting a file where the first 5 lines need to be skipped...\r\n$ cat foo_skiplines.md\r\n"]
38 | [14.046162, "o", "# I want this line to be a comment\r\n#### And this as well\r\n## And this\r\n###### ByeBye\r\n\r\n# Hi\r\n## How\r\n### Are\r\n## You\r\n# Today ?\r\n"]
39 | [14.046329, "o", "\r\n"]
40 | [15.04761, "o", "Using the skip lines option...\r\n$ python -m md_toc -s 5 github foo_skiplines.md\r\n"]
41 | [15.244163, "o", "- [Hi](#hi)\r\n - [How](#how)\r\n - [Are](#are)\r\n - [You](#you)\r\n- [Today ?](#today-)\r\n"]
42 | [15.259783, "o", "\r\n"]
43 | [16.260795, "o", "Editing the file in-place. As you can see, code fence detection still needs to be implemented for redcarpet...\r\n$ python -m md_toc -p redcarpet foo.md\r\n"]
44 | [16.488959, "o", "$ cat foo.md\r\n"]
45 | [16.489472, "o", "# Hi\r\n\r\n\r\n\r\n- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [Bye bye](#bye-bye)\r\n- [This is a code](#this-is-a-code)\r\n- [fence with comments that might represent ATX-style headings](#fence-with-comments-that-might-represent-atx-style-headings)\r\n- [if not properly parsed](#if-not-properly-parsed)\r\n- [boo](#boo)\r\n\r\n\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## Bye bye\r\n\r\n```python\r\n# This is a code\r\n# fence with comments that might represent ATX-style headings\r\n# if not properly parsed\r\n```\r\n\r\nbye\r\n\r\n# boo\r\n"]
46 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_7_2_0.json:
--------------------------------------------------------------------------------
1 | {"version": 2, "width": 83, "height": 46, "timestamp": 1618065590, "env": {"SHELL": "/bin/bash", "TERM": "rxvt-unicode-256color"}}
2 | [0.006035, "o", "Running a demo to show some of python -m md_toc's capabilities...\r\n\r\n"]
3 | [1.007006, "o", "$ python -m md_toc -h\r\n"]
4 | [1.221794, "o", "usage: __main__.py [-h] [-c | -i] [-l] [-m TOC_MARKER] [-p] [-s SKIP_LINES] [-v]\r\n {github,cmark,gitlab,commonmarker,redcarpet} ...\r\n\r\nMarkdown Table Of Contents: Automatically generate a compliant table\r\nof contents for a markdown file to improve document readability.\r\n\r\noptional arguments:\r\n -h, --help show this help message and exit\r\n -c, --no-list-coherence\r\n avoids checking for TOC list coherence\r\n -i, --no-indentation avoids adding indentations to the TOC\r\n -l, --no-links avoids adding links to the TOC\r\n -m TOC_MARKER, --toc-marker TOC_MARKER\r\n set the string to be used as the marker for positioning\r\n the table of contents. Defaults to \r\n -p, --in-place overwrite the input file\r\n -s SKIP_LINES, --skip-lines SKIP_LINES\r\n skip parsing of the first selected number of lines.\r\n Defaults to 0, i.e. do not skip any lines\r\n -v, --version"]
5 | [1.22192, "o", " show program's version number and exit\r\n\r\nmarkdown parser:\r\n {github,cmark,gitlab,commonmarker,redcarpet}\r\n --help\r\n\r\nPlease read the documentation to understand how each parser works\r\n\r\nReturn values: 0 ok, 1 error, 2 invalid command\r\n\r\nCopyright (C) 2017-2021 Franco Masotti, frnmst\r\nLicense GPLv3+: GNU GPL version 3 or later \r\nThis is free software: you are free to change and redistribute it.\r\nThere is NO WARRANTY, to the extent permitted by law.\r\n"]
6 | [1.24011, "o", "\r\n"]
7 | [2.247683, "o", "Inspecting the file...\r\n$ cat foo.md\r\n"]
8 | [2.249411, "o", "# Hi\r\n\r\n\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## __Bye bye__ *bye*\r\n\r\n```python\r\n# This is a code\r\n# fence with comments that might represent ATX-style headings\r\n# if not properly parsed\r\n```\r\n\r\nbye\r\n\r\n# boo\r\n"]
9 | [2.250153, "o", "\r\n"]
10 | [3.252191, "o", "Run with default options...\r\n$ python -m md_toc github foo.md\r\n"]
11 | [3.489595, "o", "- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you-----------)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [__Bye bye__ *bye*](#bye-bye-bye)\r\n- [boo](#boo)\r\n"]
12 | [3.50685, "o", "\r\n"]
13 | [4.507947, "o", "Ordered list...\r\n$ python -m md_toc gitlab -o '.' foo.md\r\n"]
14 | [4.735371, "o", "1. [Hi](#hi)\r\n 1. [How are you? !!!](#how-are-you-----------)\r\n 2. [fine, thanks](#fine-thanks)\r\n 1. [Bye](#bye)\r\n 3. [__Bye bye__ *bye*](#bye-bye-bye)\r\n2. [boo](#boo)\r\n"]
15 | [4.752882, "o", "\r\n"]
16 | [5.753784, "o", "Constant ordered list...\r\n$ python -m md_toc github -c -o '.' foo.md\r\n"]
17 | [5.964008, "o", "1. [Hi](#hi)\r\n 1. [How are you? !!!](#how-are-you-----------)\r\n 1. [fine, thanks](#fine-thanks)\r\n 1. [Bye](#bye)\r\n 1. [__Bye bye__ *bye*](#bye-bye-bye)\r\n1. [boo](#boo)\r\n"]
18 | [5.979297, "o", "\r\n"]
19 | [6.980371, "o", "No links...\r\n$ python -m md_toc -l github foo.md\r\n"]
20 | [7.199988, "o", "- Hi\r\n - How are you? !!!\r\n - fine, thanks\r\n - Bye\r\n - __Bye bye__ *bye*\r\n- boo\r\n"]
21 | [7.217292, "o", "\r\n"]
22 | [8.218866, "o", "No links and no indentation...\r\n$ python -m md_toc -l -i github foo.md\r\n"]
23 | [8.438493, "o", "- Hi\r\n- How are you? !!!\r\n- fine, thanks\r\n- Bye\r\n- __Bye bye__ *bye*\r\n- boo\r\n"]
24 | [8.455101, "o", "\r\n"]
25 | [9.456421, "o", "Inspecting the non-coherent file...\r\n$ cat foo_noncoherent.md\r\n"]
26 | [9.458095, "o", "# Hi\r\n### boo\r\n"]
27 | [9.458364, "o", "\r\n"]
28 | [10.46083, "o", "Trying to parse a non coherent markdown file will raise an exception...\r\n$ python -m md_toc github foo_noncoherent.md\r\n"]
29 | [10.681219, "o", "Traceback (most recent call last):\r\n File \"/home/vm/dev/personal/stable/md-toc/md_toc/__main__.py\", line 34, in main\r\n result = args.func(args)\r\n File \"/home/vm/dev/personal/stable/md-toc/md_toc/cli.py\", line 55, in write_toc\r\n toc_struct = build_multiple_tocs(\r\n File \"/home/vm/dev/personal/stable/md-toc/md_toc/api.py\", line 367, in build_multiple_tocs\r\n build_toc(filenames[file_id], ordered, no_links, no_indentation,\r\n File \"/home/vm/dev/personal/stable/md-toc/md_toc/api.py\", line 276, in build_toc\r\n raise TocDoesNotRenderAsCoherentList\r\nmd_toc.exceptions.TocDoesNotRenderAsCoherentList\r\n"]
30 | [10.702213, "o", "\r\n"]
31 | [11.703501, "o", "Try to parse a non coherent markdown file without checking for coherence...\r\n$ python -m md_toc -c github foo_noncoherent.md\r\n"]
32 | [11.923728, "o", "- [Hi](#hi)\r\n - [boo](#boo)\r\n"]
33 | [11.944064, "o", "\r\n"]
34 | [12.945164, "o", "Use stdin, no links and no indentation...\r\n$ cat foo.md | python -m md_toc -l -i - cmark -u '*'\r\n"]
35 | [13.15915, "o", "* Hi\r\n* How are you? !!!\r\n* fine, thanks\r\n* Bye\r\n* __Bye bye__ *bye*\r\n* boo\r\n"]
36 | [13.175019, "o", "\r\n"]
37 | [14.176371, "o", "Inspecting a file where the first 5 lines need to be skipped...\r\n$ cat foo_skiplines.md\r\n"]
38 | [14.177968, "o", "# I want this line to be a comment\r\n#### And this as well\r\n## And this\r\n###### ByeBye\r\n\r\n# Hi\r\n## How\r\n### Are\r\n## You\r\n# Today ?\r\n"]
39 | [14.178236, "o", "\r\n"]
40 | [15.180343, "o", "Using the skip lines option...\r\n$ python -m md_toc -s 5 github foo_skiplines.md\r\n"]
41 | [15.406022, "o", "- [Hi](#hi)\r\n - [How](#how)\r\n - [Are](#are)\r\n - [You](#you)\r\n- [Today ?](#today-)\r\n"]
42 | [15.422074, "o", "\r\n"]
43 | [16.422969, "o", "Editing the file in-place. As you can see, code fence detection still needs to be implemented for redcarpet...\r\n$ python -m md_toc -p redcarpet foo.md\r\n"]
44 | [16.640376, "o", "$ cat foo.md\r\n"]
45 | [16.640929, "o", "# Hi\r\n\r\n\r\n\r\n- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [__Bye bye__ *bye*](#__bye-bye__-bye)\r\n- [This is a code](#this-is-a-code)\r\n- [fence with comments that might represent ATX-style headings](#fence-with-comments-that-might-represent-atx-style-headings)\r\n- [if not properly parsed](#if-not-properly-parsed)\r\n- [boo](#boo)\r\n\r\n\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## __Bye bye__ *bye*\r\n\r\n```python\r\n# This is a code\r\n# fence with comments that might represent ATX-style headings\r\n# if not properly parsed\r\n```\r\n\r\nbye\r\n\r\n# boo\r\n"]
46 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_2_0_demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #
4 | # python -m md_toc_asciinema_8_2_0_demo.sh
5 | #
6 | # Copyright (C) 2023 Franco Masotti (see /README.md)
7 | #
8 | # This file is part of md-toc.
9 | #
10 | # md-toc is free software: you can redistribute it and/or modify
11 | # it under the terms of the GNU General Public License as published by
12 | # the Free Software Foundation, either version 3 of the License, or
13 | # (at your option) any later version.
14 | #
15 | # md-toc is distributed in the hope that it will be useful,
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | # GNU General Public License for more details.
19 | #
20 | # You should have received a copy of the GNU General Public License
21 | # along with md-toc. If not, see .
22 | #
23 |
24 | # Discover python -m md_toc of this repository not the one installed on the system.
25 | export PYTHONPATH='..'
26 | TIMEOUT=1
27 |
28 | gen_foo()
29 | {
30 | cat <<-EOF > foo.md
31 | # Hi
32 |
33 |
34 |
35 | hey
36 |
37 | ## How are you? !!!
38 |
39 | ## fine, thanks
40 |
41 | ### Bye
42 |
43 | ## __Bye bye__ **bye***
44 |
45 | \`\`\`python
46 | # This is a code
47 | # fence with comments that might represent ATX-style headings
48 | # if not properly parsed
49 | \`\`\`
50 |
51 | bye
52 |
53 | # boo
54 |
55 | # a string with lots of spaces.
56 | EOF
57 | }
58 |
59 | gen_foo_noncoherent()
60 | {
61 | cat <<-EOF > foo_noncoherent.md
62 | # Hi
63 | ### boo
64 | EOF
65 | }
66 |
67 | gen_foo_skiplines()
68 | {
69 | cat <<-EOF > foo_skiplines.md
70 | # I want this line to be a comment
71 | #### And this as well
72 | ## And this
73 | ###### ByeBye
74 |
75 | # Hi
76 | ## How
77 | ### Are
78 | ## You
79 | # Today ?
80 | EOF
81 | }
82 |
83 |
84 | gen_foo_with_toc()
85 | {
86 | cat <<-EOF > foo_with_toc.md
87 | # Hi
88 |
89 |
90 |
91 | - [Hi](#hi)
92 | - [How are you? !!!](#how-are-you-----------)
93 | - [fine, thanks](#fine-thanks)
94 | - [A DIFFERING LINE SO WE CAN CHECK THE DIFF FEATURE](#bye)
95 | - [__Bye bye__ **bye***](#bye-bye-bye)
96 | - [boo](#boo)
97 | - [a string with lots of spaces.](#a-string--------------with----------------lots-of-spaces)
98 |
99 |
100 |
101 | hey
102 |
103 | ## How are you? !!!
104 |
105 | ## fine, thanks
106 |
107 | ### Bye
108 |
109 | ## __Bye bye__ **bye***
110 |
111 | \`\`\`python
112 | # This is a code
113 | # fence with comments that might represent ATX-style headings
114 | # if not properly parsed
115 | \`\`\`
116 |
117 | bye
118 |
119 | # boo
120 |
121 | # a string with lots of spaces.
122 | EOF
123 | }
124 |
125 | ###
126 | ###
127 |
128 | printf "Running a demo to show some of python -m md_toc's capabilities...\n"
129 | printf "\n"
130 | sleep ${TIMEOUT}
131 |
132 | printf "$ python -m md_toc -h\n"
133 | python -m md_toc -h
134 | printf "\n~~~~\n"
135 | sleep ${TIMEOUT}
136 |
137 | gen_foo
138 | printf "1. inspecting the file...\n"
139 | printf "$ cat foo.md\n"
140 | cat foo.md
141 | printf "\n~~~~\n"
142 | sleep ${TIMEOUT}
143 |
144 | gen_foo
145 | printf "2. run with default options...\n"
146 | printf "$ python -m md_toc github foo.md\n"
147 | python -m md_toc github foo.md
148 | printf "\n~~~~\n"
149 | sleep ${TIMEOUT}
150 |
151 | gen_foo
152 | printf "3. ordered list...\n"
153 | printf "$ python -m md_toc gitlab -o '.' foo.md\n"
154 | python -m md_toc gitlab -o '.' foo.md
155 | printf "\n~~~~\n"
156 | sleep ${TIMEOUT}
157 |
158 | gen_foo
159 | printf "4. constant ordered list...\n"
160 | printf "$ python -m md_toc github -c -o '.' foo.md\n"
161 | python -m md_toc github -c -o '.' foo.md
162 | printf "\n~~~~\n"
163 | sleep ${TIMEOUT}
164 |
165 | gen_foo
166 | printf "5. no links...\n"
167 | printf "$ python -m md_toc -l github foo.md\n"
168 | python -m md_toc -l github foo.md
169 | printf "\n~~~~\n"
170 | sleep ${TIMEOUT}
171 |
172 | gen_foo
173 | printf "6. no links and no indentation...\n"
174 | printf "$ python -m md_toc -l -i github foo.md\n"
175 | python -m md_toc -l -i github foo.md
176 | printf "\n~~~~\n"
177 | sleep ${TIMEOUT}
178 |
179 | gen_foo_noncoherent
180 | printf "7. inspecting the non-coherent file...\n"
181 | printf "$ cat foo_noncoherent.md\n"
182 | cat foo_noncoherent.md
183 | printf "\n~~~~\n"
184 | sleep ${TIMEOUT}
185 |
186 | gen_foo_noncoherent
187 | printf "8. trying to parse a non coherent markdown file will raise an exception...\n"
188 | printf "$ python -m md_toc github foo_noncoherent.md\n"
189 | python -m md_toc github foo_noncoherent.md
190 | printf "\n~~~~\n"
191 | sleep ${TIMEOUT}
192 |
193 | gen_foo_noncoherent
194 | printf "9. try to parse a non coherent markdown file without checking for coherence...\n"
195 | printf "$ python -m md_toc -c github foo_noncoherent.md\n"
196 | python -m md_toc -c github foo_noncoherent.md
197 | printf "\n~~~~\n"
198 | sleep ${TIMEOUT}
199 |
200 | printf "10. use stdin, no links and no indentation...\n"
201 | printf "$ cat foo.md | python -m md_toc -l -i - cmark -u '*'\n"
202 | cat foo.md | python -m md_toc -l -i cmark -u '*'
203 | printf "\n~~~~\n"
204 | sleep ${TIMEOUT}
205 |
206 | gen_foo_skiplines
207 | printf "11. inspecting a file where the first 5 lines need to be skipped...\n"
208 | printf "$ cat foo_skiplines.md\n"
209 | cat foo_skiplines.md
210 | printf "\n~~~~\n"
211 | sleep ${TIMEOUT}
212 |
213 | gen_foo_skiplines
214 | printf "12. using the skip lines option...\n"
215 | printf "$ python -m md_toc -s 5 github foo_skiplines.md\n"
216 | python -m md_toc -s 5 github foo_skiplines.md
217 | printf "\n~~~~\n"
218 | sleep ${TIMEOUT}
219 |
220 | gen_foo
221 | printf "13. showing GitLab's removal of consecutive dashes in the link destination...\n"
222 | printf "$ python -m md_toc gitlab -l 6 foo.md\n"
223 | python -m md_toc gitlab -l 6 foo.md
224 | printf "\n~~~~\n"
225 | sleep ${TIMEOUT}
226 |
227 | gen_foo
228 | printf "14. editing file in place with a custom newline...\n"
229 | printf "$ python -m md_toc --newline '\\\r\\\n' -p gitlab -l 6 foo.md\n"
230 | python -m md_toc --newline '\r\n' -p gitlab -l 6 foo.md
231 | printf "\n"
232 | printf "$ cat --show-nonprinting --show-ends foo.md\n"
233 | cat --show-nonprinting --show-ends foo.md
234 | printf "\n~~~~\n"
235 | sleep ${TIMEOUT}
236 |
237 | gen_foo
238 | printf "15. editing the file in-place. As you can see, code fence \
239 | detection still needs to be implemented for redcarpet...\n"
240 | printf "$ python -m md_toc -p redcarpet foo.md\n"
241 | python -m md_toc -p redcarpet foo.md
242 | printf "$ cat foo.md\n"
243 | cat foo.md
244 | printf "\n~~~~\n"
245 | sleep ${TIMEOUT}
246 |
247 | gen_foo_with_toc
248 | printf "16. TOC diff. Check if the existing TOC in the file is different from
249 | the newly generated one. In this case they are different so the return value \
250 | is 128. 0 is returned if they are equal...\n"
251 | printf "$ python -m md_toc -p --diff github -l 6 foo_with_toc.md\n"
252 | python -m md_toc -p --diff github -l 6 foo_with_toc.md
253 | retval=${?}
254 | printf "$ echo \${?}\n"
255 | echo ${retval}
256 | printf "$ cat foo_with_toc.md\n"
257 | cat foo_with_toc.md
258 |
259 | rm foo.md foo_noncoherent.md foo_skiplines.md foo_with_toc.md
260 |
--------------------------------------------------------------------------------
/md_toc/tests/benchmark.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (C) 2022 Franco Masotti (see /README.md)
4 | #
5 | # This file is part of md-toc.
6 | #
7 | # md-toc is free software: you can redistribute it and/or modify
8 | # it under the terms of the GNU General Public License as published by
9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # md-toc is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with md-toc. If not, see .
19 | #
20 | r"""A simple benchmark file to be used to check for hidden errors as well."""
21 |
22 | import csv
23 | import ctypes
24 | import multiprocessing
25 | import platform
26 | import random
27 | import secrets
28 | import string
29 | import subprocess
30 | import tempfile
31 | import time
32 | import traceback
33 |
34 | import md_toc
35 |
36 | # ~50M characters
37 | CHAR_SIZE = 1024 * 1024 * 50
38 | ITERATIONS = 100
39 |
40 | # At what nTH character to put a header.
41 | # MIN_HEADER_STEP must be >= 10.
42 | MIN_HEADER_STEP = 10
43 | MAX_HEADER_STEP = 50
44 |
45 | # Do not use '#' as content to avoid triggering an empty link label exception.
46 | alphabet: str = string.printable.replace('#', '')
47 |
48 |
49 | def _generate_batch(size: int) -> bytes:
50 | return bytes(
51 | [secrets.choice(alphabet).encode('UTF-8')[0] for _ in range(size)])
52 |
53 |
54 | def _generate_random_characters(size: int,
55 | min_header_step: int = 10,
56 | max_header_step: int = 1000) -> bytes:
57 | if min_header_step < 10 or max_header_step < min_header_step:
58 | # 10 - 1 = header (maximum 6) + '\n' + space + alphanum char
59 | raise ValueError
60 |
61 | print('generating random file...')
62 |
63 | secret_gen: random.SystemRandom = secrets.SystemRandom()
64 | alphanumerics: str = ''.join([string.ascii_letters, string.digits])
65 |
66 | # Batch size at 1% of total size.
67 | batch_size: int = int(CHAR_SIZE * 0.01)
68 | # Chunk size at 1 per 10**4 of the batch size.
69 | chunk_size: int = int(batch_size * 0.00001)
70 |
71 | with multiprocessing.Pool() as pool:
72 | results = pool.map(_generate_batch,
73 | [batch_size] * (size // batch_size), chunk_size)
74 |
75 | string_buf = ctypes.create_string_buffer(
76 | b''.join(results),
77 | size=size + 1,
78 | )
79 |
80 | print('adding headers to file...')
81 |
82 | i: int = 0
83 | j: int = 1
84 | while i + j + 3 < size:
85 | newline = b'\n'
86 | heading = b'#' * j
87 | space = b' '
88 | random_char = secrets.choice(alphanumerics).encode('utf-8')
89 |
90 | ctypes.memmove(ctypes.byref(string_buf, i), newline, 1)
91 | ctypes.memmove(ctypes.byref(string_buf, i + 1), heading, len(heading))
92 | ctypes.memmove(ctypes.byref(string_buf, i + 1 + j), space, 1)
93 |
94 | # Replace next character so we are sure never to raise the empty
95 | # link label exception.
96 | ctypes.memmove(ctypes.byref(string_buf, i + 2 + j), random_char, 1)
97 |
98 | # Reset header level.
99 | j = (j % 6) + 1
100 | i += secret_gen.randrange(min_header_step, max_header_step)
101 |
102 | return string_buf.value
103 |
104 |
105 | if __name__ == '__main__':
106 | ok: bool = True
107 | i: int = 0
108 | j: int = 0
109 | total_iterations: int
110 | percent_progress: int
111 | current_parser_counter: int = 0
112 | avg: list
113 | average: dict = dict()
114 | total: dict = dict()
115 |
116 | parsers = ['github', 'cmark', 'redcarpet', 'gitlab']
117 | parsers.sort()
118 |
119 | total_iterations = len(parsers) * ITERATIONS
120 | for current_parser_counter, p in enumerate(parsers):
121 | print('parser: ' + p + ', ' + str(ITERATIONS) + ' iterations with ' +
122 | str(CHAR_SIZE) + ' characters each')
123 | print('min step: ' + str(MIN_HEADER_STEP) + ' , max step: ' +
124 | str(MAX_HEADER_STEP))
125 | i = 0
126 | avg = list()
127 | while ok and i < ITERATIONS:
128 | print('parser ' + str(p) + ' (' + str(current_parser_counter + 1) +
129 | ' of ' + str(len(parsers)) + '), iteration: ' + str(i + 1) +
130 | ' of ' + str(ITERATIONS))
131 | percent_progress = (j / total_iterations) * 100
132 | print('total progress percent = ' + str(percent_progress))
133 | with tempfile.NamedTemporaryFile() as fp:
134 | fp.write(
135 | _generate_random_characters(
136 | CHAR_SIZE,
137 | min_header_step=MIN_HEADER_STEP,
138 | max_header_step=MAX_HEADER_STEP))
139 | print('building TOC...')
140 | try:
141 | start = time.time()
142 | md_toc.api.build_toc(filename=fp.name,
143 | parser=p,
144 | keep_header_levels=3)
145 | end = time.time()
146 | avg.append(end - start)
147 | print('total_time: ' + str(avg[-1]))
148 | except Exception:
149 | ok = False
150 | traceback.print_exc()
151 | i += 1
152 | j += 1
153 | total[p] = sum(avg)
154 | average[p] = total[p] / len(avg)
155 | print('total = ' + str(total[p]) + '\n')
156 | print('avg = ' + str(average[p]) + '\n')
157 |
158 | # Write CSV file.
159 | # Fields
160 | # 0 md_toc git hash version
161 | # 1 parser name
162 | # 2 number of characters
163 | # 3 number of iterations
164 | # 4 min_header_step
165 | # 5 max_header_step
166 | # 6 total execution time in seconds
167 | # 7 average execution time in seconds
168 | # 8->n system information
169 | # File header:
170 | # md_toc_git_hash,markdown_parser,total_characters,iterations,min_header_step,max_header_step,total,avg,platform_python_version,platform_architecture,platform_machine,platform_python_implementation,platform_python_compiler,platform_libc_ver
171 | md_toc_version = subprocess.check_output(
172 | ['/usr/bin/git', 'rev-parse', 'HEAD']).decode('UTF-8').strip()
173 | with open('benchmark.csv', 'a') as csvfile:
174 | spamwriter = csv.writer(csvfile,
175 | delimiter=',',
176 | quotechar='|',
177 | quoting=csv.QUOTE_MINIMAL)
178 | for p in parsers:
179 | spamwriter.writerow([
180 | md_toc_version, p, CHAR_SIZE, ITERATIONS, MIN_HEADER_STEP,
181 | MIN_HEADER_STEP, total[p], average[p],
182 | platform.python_version(), ' '.join(platform.architecture()),
183 | platform.machine(),
184 | platform.python_implementation(),
185 | platform.python_compiler(), ' '.join(platform.libc_ver())
186 | ])
187 |
--------------------------------------------------------------------------------
/asciinema/md_toc_asciinema_8_0_0.json:
--------------------------------------------------------------------------------
1 | {"version": 2, "width": 83, "height": 46, "timestamp": 1622221077, "env": {"SHELL": "/bin/bash", "TERM": "rxvt-unicode-256color"}}
2 | [0.008049, "o", "Running a demo to show some of python -m md_toc's capabilities...\r\n\r\n"]
3 | [1.015122, "o", "$ python -m md_toc -h\r\n"]
4 | [1.356915, "o", "usage: __main__.py [-h] [-c | -i] [-l] [-m TOC_MARKER] [-n NEWLINE_STRING]\r\n [-p] [-s SKIP_LINES] [-v]\r\n {github,commonmarker,gitlab,cmark,goldmark,redcarpet} ...\r\n\r\nMarkdown Table Of Contents: Automatically generate a compliant table\r\nof contents for a markdown file to improve document readability.\r\n\r\noptional arguments:\r\n -h, --help show this help message and exit\r\n -c, --no-list-coherence\r\n avoids checking for TOC list coherence\r\n -i, --no-indentation avoids adding indentations to the TOC\r\n -l, --no-links avoids adding links to the TOC\r\n -m TOC_MARKER, --toc-marker TOC_MARKER\r\n set the string to be used as the marker for\r\n positioning the table of contents. Defaults to \r\n -n NEWLINE_STRING, --newline-string NEWLINE_STRING\r\n the string used to separate the lines of the TOC. Use\r\n quotes to delimit t"]
5 | [1.357019, "o", "he string. Defaults to '\\n'\r\n -p, --in-place overwrite the input file\r\n -s SKIP_LINES, --skip-lines SKIP_LINES\r\n skip parsing of the first selected number of lines.\r\n Defaults to 0, i.e. do not skip any lines\r\n -v, --version show program's version number and exit\r\n\r\nmarkdown parser:\r\n {github,commonmarker,gitlab,cmark,goldmark,redcarpet}\r\n --help\r\n\r\nPlease read the documentation to understand how each parser works\r\n\r\nReturn values: 0 ok, 1 error, 2 invalid command\r\n\r\nCopyright (C) 2017-2021 Franco Masotti, frnmst\r\nLicense GPLv3+: GNU GPL version 3 or later \r\nThis is free software: you are free to change and redistribute it.\r\nThere is NO WARRANTY, to the extent permitted by law.\r\n"]
6 | [1.449011, "o", "\r\n"]
7 | [2.457318, "o", "Inspecting the file...\r\n$ cat foo.md\r\n"]
8 | [2.458509, "o", "# Hi\r\n\r\n\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## __Bye bye__ **bye***\r\n\r\n```python\r\n# This is a code\r\n# fence with comments that might represent ATX-style headings\r\n# if not properly parsed\r\n```\r\n\r\nbye\r\n\r\n# boo\r\n\r\n# a string with lots of spaces.\r\n"]
9 | [2.459323, "o", "\r\n"]
10 | [3.467115, "o", "Run with default options...\r\n$ python -m md_toc github foo.md\r\n"]
11 | [3.932094, "o", "- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you-----------)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [__Bye bye__ **bye***](#bye-bye-bye)\r\n- [boo](#boo)\r\n- [a string with lots of spaces.](#a-string--------------with----------------lots-of-spaces)\r\n"]
12 | [4.018819, "o", "\r\n"]
13 | [5.020617, "o", "Ordered list...\r\n$ python -m md_toc gitlab -o '.' foo.md\r\n"]
14 | [5.43953, "o", "1. [Hi](#hi)\r\n 1. [How are you? !!!](#how-are-you-)\r\n 2. [fine, thanks](#fine-thanks)\r\n 1. [Bye](#bye)\r\n 3. [__Bye bye__ **bye***](#bye-bye-bye)\r\n2. [boo](#boo)\r\n3. [a string with lots of spaces.](#a-string-with-lots-of-spaces)\r\n"]
15 | [5.499317, "o", "\r\n"]
16 | [6.501616, "o", "Constant ordered list...\r\n$ python -m md_toc github -c -o '.' foo.md\r\n"]
17 | [6.897007, "o", "1. [Hi](#hi)\r\n 1. [How are you? !!!](#how-are-you-----------)\r\n 1. [fine, thanks](#fine-thanks)\r\n 1. [Bye](#bye)\r\n 1. [__Bye bye__ **bye***](#bye-bye-bye)\r\n1. [boo](#boo)\r\n1. [a string with lots of spaces.](#a-string--------------with----------------lots-of-spaces)\r\n"]
18 | [6.963218, "o", "\r\n"]
19 | [7.965299, "o", "No links...\r\n$ python -m md_toc -l github foo.md\r\n"]
20 | [8.342501, "o", "- Hi\r\n - How are you? !!!\r\n - fine, thanks\r\n - Bye\r\n - __Bye bye__ **bye***\r\n- boo\r\n- a string with lots of spaces.\r\n"]
21 | [8.419095, "o", "\r\n"]
22 | [9.421119, "o", "No links and no indentation...\r\n$ python -m md_toc -l -i github foo.md\r\n"]
23 | [9.951343, "o", "- Hi\r\n- How are you? !!!\r\n- fine, thanks\r\n- Bye\r\n- __Bye bye__ **bye***\r\n- boo\r\n- a string with lots of spaces.\r\n"]
24 | [10.052129, "o", "\r\n"]
25 | [11.053621, "o", "Inspecting the non-coherent file...\r\n$ cat foo_noncoherent.md\r\n"]
26 | [11.055057, "o", "# Hi\r\n### boo\r\n"]
27 | [11.055449, "o", "\r\n"]
28 | [12.05725, "o", "Trying to parse a non coherent markdown file will raise an exception...\r\n$ python -m md_toc github foo_noncoherent.md\r\n"]
29 | [12.513556, "o", "Traceback (most recent call last):\r\n File \"/home/vm/dev/personal/stable/md-toc/md_toc/__main__.py\", line 34, in main\r\n result = args.func(args)\r\n File \"/home/vm/dev/personal/stable/md-toc/md_toc/cli.py\", line 73, in write_toc\r\n newline_string=newline_string)\r\n File \"/home/vm/dev/personal/stable/md-toc/md_toc/api.py\", line 842, in build_multiple_tocs\r\n list_marker, skip_lines, constant_ordered_list, newline_string))\r\n File \"/home/vm/dev/personal/stable/md-toc/md_toc/api.py\", line 748, in build_toc\r\n raise TocDoesNotRenderAsCoherentList\r\nmd_toc.exceptions.TocDoesNotRenderAsCoherentList\r\n"]
30 | [12.598876, "o", "\r\n"]
31 | [13.601292, "o", "Try to parse a non coherent markdown file without checking for coherence...\r\n$ python -m md_toc -c github foo_noncoherent.md\r\n"]
32 | [13.957473, "o", "- [Hi](#hi)\r\n - [boo](#boo)\r\n"]
33 | [14.008105, "o", "\r\n"]
34 | [15.008974, "o", "Use stdin, no links and no indentation...\r\n$ cat foo.md | python -m md_toc -l -i - cmark -u '*'\r\n"]
35 | [15.429645, "o", "* Hi\r\n* How are you? !!!\r\n* fine, thanks\r\n* Bye\r\n* __Bye bye__ **bye***\r\n* boo\r\n* a string with lots of spaces.\r\n"]
36 | [15.514497, "o", "\r\n"]
37 | [16.516463, "o", "Inspecting a file where the first 5 lines need to be skipped...\r\n$ cat foo_skiplines.md\r\n# I want this line to be a comment\r\n#### And this as well\r\n## And this\r\n###### ByeBye\r\n\r\n# Hi\r\n## How\r\n### Are\r\n## You\r\n# Today ?\r\n"]
38 | [16.519302, "o", "\r\n"]
39 | [17.519869, "o", "Using the skip lines option...\r\n$ python -m md_toc -s 5 github foo_skiplines.md\r\n"]
40 | [17.935135, "o", "- [Hi](#hi)\r\n - [How](#how)\r\n - [Are](#are)\r\n - [You](#you)\r\n- [Today ?](#today-)\r\n"]
41 | [18.007388, "o", "\r\n"]
42 | [19.009973, "o", "Showing Gitlab's removal of consecutive dashes in the link destination...\r\n$ python -m md_toc gitlab -l 6 foo.md\r\n"]
43 | [19.41085, "o", "- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you-)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [__Bye bye__ **bye***](#bye-bye-bye)\r\n- [boo](#boo)\r\n- [a string with lots of spaces.](#a-string-with-lots-of-spaces)\r\n"]
44 | [19.483543, "o", "\r\n"]
45 | [20.486416, "o", "Editing the file in-place. As you can see, code fence detection still needs to be implemented for redcarpet...\r\n$ python -m md_toc -p redcarpet foo.md\r\n"]
46 | [21.00377, "o", "$ cat foo.md\r\n"]
47 | [21.007926, "o", "# Hi\r\n\r\n\r\n\r\n- [Hi](#hi)\r\n - [How are you? !!!](#how-are-you)\r\n - [fine, thanks](#fine-thanks)\r\n - [Bye](#bye)\r\n - [__Bye bye__ **bye***](#small>__bye-bye__-bye-small>)\r\n- [This is a code](#this-is-a-code)\r\n- [fence with comments that might represent ATX-style headings](#fence-with-comments-that-might-represent-atx-style-headings)\r\n- [if not properly parsed](#if-not-properly-parsed)\r\n- [boo](#boo)\r\n- [a string with lots of spaces.](#a-string-with-lots-of-spaces)\r\n\r\n\r\n\r\nhey\r\n\r\n## How are you? !!!\r\n\r\n## fine, thanks\r\n\r\n### Bye\r\n\r\n## __Bye bye__ **bye***\r\n\r\n```python\r\n# This is a code\r\n# fence with comments that might represent ATX-style headings\r\n# if not properly parsed\r\n```\r\n\r\nbye\r\n\r\n# boo\r\n\r\n# a string with lots of spaces.\r\n"]
48 |
--------------------------------------------------------------------------------