├── .flake8 ├── .github └── workflows │ ├── ensure_green.yml │ └── release.yml ├── .gitignore ├── CONTRIBUTING.md ├── Dockerfile ├── IMPLEMENTATION.md ├── LICENSE.txt ├── Makefile ├── README.md ├── docker-compose.yml ├── markflow ├── __init__.py ├── __main__.py ├── _argparse.py ├── _utils │ ├── __init__.py │ ├── _utils.py │ └── textwrap.py ├── detectors │ ├── __init__.py │ ├── _lines.py │ ├── atx_heading.py │ ├── blank_line.py │ ├── block_quote.py │ ├── bullet_list.py │ ├── fenced_code_block.py │ ├── indented_code_block.py │ ├── link_reference_definition.py │ ├── ordered_list.py │ ├── paragraph.py │ ├── setext_heading.py │ ├── table.py │ └── thematic_break.py ├── exceptions.py ├── formatters │ ├── __init__.py │ ├── atx_heading.py │ ├── base.py │ ├── blank_line.py │ ├── block_quote.py │ ├── fenced_code_block.py │ ├── indented_code_block.py │ ├── link_reference_definition.py │ ├── lists.py │ ├── paragraph.py │ ├── setext_heading.py │ ├── table.py │ └── thematic_break.py ├── parser.py ├── reformat_markdown.py └── typing.py ├── poetry-aliases.sh ├── poetry.lock ├── pyproject.toml ├── stubs ├── commonmark.pyi ├── pytest.pyi └── rich │ ├── __init__.pyi │ ├── console.pyi │ ├── highlighter.pyi │ ├── logging.pyi │ ├── markdown.pyi │ └── style.pyi └── tests ├── __init__.py ├── files ├── 0000_in_base.md ├── 0000_out_base.md ├── 0001_in_blank.md ├── 0001_out_blank.md ├── 0002_in_lists.md ├── 0002_out_lists.md ├── 0003_in_too_many_endling_newlines.md ├── 0003_out_too_many_endling_newlines.md ├── 0004_in_multiple_code_blocks.md ├── 0004_out_multiple_code_blocks.md ├── 0005_in_headings.md ├── 0005_out_headings.md ├── 0006_in_tables.md ├── 0006_out_tables.md ├── 0007_in_link_reference_definitions.md ├── 0007_out_link_reference_definitions.md ├── 0008_in_indented_code_blocks.md ├── 0008_out_indented_code_blocks.md ├── 0009_in_misnumbering.md ├── 0009_out_misnumbering.md ├── 0010_in_list_with_bold.md ├── 0010_out_list_with_bold.md ├── 0011_in_horizontal_lines.md ├── 0011_out_horizontal_lines.md ├── 0012_in_block_quotes.md ├── 0012_out_block_quotes.md ├── 0013_in_list_with_horizontal_line.md ├── 0013_out_list_with_horizontal_line.md ├── 0014_in_code_block_that_looks_like_a_heading.md ├── 0014_out_code_block_that_looks_like_a_heading.md ├── 0015_in_ordered_lists_with_code_blocks.md ├── 0015_out_ordered_lists_with_code_blocks.md ├── 0016_in_lists_starting_at_not_one.md ├── 0016_out_lists_starting_at_not_one.md ├── 0017_in_one_lists_with_many_newlines.md ├── 0017_out_one_lists_with_many_newlines.md ├── 0018_in_urls_with_trailing_characters.md ├── 0018_out_urls_with_trailing_characters.md ├── 0019_in_table_alignment.md ├── 0019_out_table_alignment.md ├── 0020_in_forced_paragraphs.md ├── 0020_out_forced_paragraphs.md ├── 0021_in_separators.md ├── 0021_out_separators.md ├── 0022_in_link_reference_definition_at_end_of_file.md ├── 0022_out_link_reference_definition_at_end_of_file.md ├── 0023_in_setext_heading_close_to_block_quote.md └── 0023_out_setext_heading_close_to_block_quote.md ├── pytest.ini ├── test_atx_heading.py ├── test_block_quote.py ├── test_fenced_code_block.py ├── test_files.py ├── test_horizontal_line.py ├── test_indented_code_block.py ├── test_link_reference_definition.py ├── test_list.py ├── test_paragraph.py ├── test_separator.py ├── test_setext_heading.py ├── test_table.py ├── test_utils.py └── util.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = 3 | # Where black disagrees 4 | E203, 5 | W503 6 | max-line-length = 88 7 | -------------------------------------------------------------------------------- /.github/workflows/ensure_green.yml: -------------------------------------------------------------------------------- 1 | name: Test changes 2 | 3 | on: 4 | push: 5 | branches: 6 | - '**' 7 | pull_request: 8 | branches: 9 | - '**' 10 | 11 | jobs: 12 | audit: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: Set up Python 3.8 17 | uses: actions/setup-python@v2 18 | with: 19 | python-version: 3.8 20 | - name: Install dependencies 21 | run: | 22 | sudo apt-get update && sudo apt-get install -y make 23 | python3 -m pip install --upgrade pip 24 | pip install setuptools wheel twine 25 | curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python3 26 | source $HOME/.poetry/env 27 | poetry self update 28 | - name: Audit 29 | run: | 30 | export PATH="$HOME/.poetry/bin:${PATH}" 31 | make audits 32 | test: 33 | runs-on: ubuntu-latest 34 | strategy: 35 | matrix: 36 | python-version: [3.6, 3.7, 3.8, 3.9] 37 | steps: 38 | - uses: actions/checkout@v2 39 | - name: Set up Python ${{ matrix.python-version }} 40 | uses: actions/setup-python@v2 41 | with: 42 | python-version: ${{ matrix.python-version }} 43 | - name: Install dependencies 44 | run: | 45 | sudo apt-get update && sudo apt-get install -y make 46 | python3 -m pip install --upgrade pip 47 | pip install setuptools wheel twine 48 | curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python3 49 | source $HOME/.poetry/env 50 | poetry self update 51 | - name: Test 52 | run: | 53 | export PATH="$HOME/.poetry/bin:${PATH}" 54 | make tests_${{ matrix.python-version }} 55 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | - name: Set up Python 13 | uses: actions/setup-python@v2 14 | with: 15 | python-version: '3.x' 16 | - name: Install dependencies 17 | run: | 18 | sudo apt-get update && sudo apt-get install -y make 19 | python -m pip install --upgrade pip 20 | pip install setuptools wheel twine 21 | curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python 22 | - name: Build and publish 23 | env: 24 | POETRY_PYPI_TOKEN_PYPI: ${{ secrets.POETRY_PYPI_TOKEN_PYPI }} 25 | run: | 26 | export PATH="$HOME/.poetry/bin:${PATH}" 27 | make package 28 | poetry publish 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # PYTHON 2 | .pytest_cache 3 | __pycache__ 4 | *.pyc 5 | *.egg-info 6 | 7 | # MACS 8 | .DS_Store 9 | 10 | # TESTS 11 | junit.xml 12 | # coverage 13 | .coverage 14 | coverage.xml 15 | htmlcov 16 | # mypy 17 | .mypy_cache 18 | 19 | # POETRY 20 | /dist/ 21 | /pyproject.tmp 22 | /setup.py 23 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to MarkFlow 2 | 3 | The following are the contributing guidelines when making changes to this project. 4 | 5 | Development depends on the installation of **make**, for coordinating execution of the 6 | underlying tools, **poetry**, for managing our **Python** environments and the 7 | **MarkFlow** package itself, and the supported versions of **Python** (current, 3.6, 8 | 3.7, 3.8, and 3.9). You can generally get away with only having one version of 9 | **Python** installed and testing against that. The GitHub builds will validate your 10 | changes against all versions of **Python** anyway. 11 | 12 | ## Making Changes 13 | 14 | ([Step 0: Checkout the documentation on the implementation of the tool.]( 15 | IMPLEMENTATION.md)) 16 | 17 | To check to see if your submission is buildable, simply run `make`. If everything 18 | passes, you are good to go on to [Submitting Changes](#submitting-changes). To 19 | understand what that's doing, though, read on. 20 | 21 | ### Running Tools 22 | 23 | All tools that are available in the poetry environment (including **MarkFlow**) can 24 | easily be added to your command line as the commands themselves by running 25 | `. poetry-aliases.sh`. An additional alias, `markflow-markflow`, is also provided to 26 | easily run **MarkFlow** against non-test files. 27 | 28 | ### Running Audits 29 | 30 | We run checks against every commit to ensure all files follow standards we enforce. The 31 | audits we run are as follows: 32 | 33 | ```shell 34 | # Ensure all Markdown files would not be reformatted by us :) 35 | make markflow 36 | # Ensure all python files would not be reformatted by black 37 | make black 38 | # Ensure our import are all sorted 39 | make isort 40 | # Ensure all pythons follow a few other rules enforced by flake8 41 | make flake 42 | # Run all of the above. Every command will be run regardless of the others failing. 43 | make audits 44 | ``` 45 | 46 | The poetry environment comes with [**black**][black], and of course **MarkFlow**, so you 47 | can quickly run the tools with `poetry run black` or `poetry run markflow` or just drop 48 | into a shell with them by running `poetry shell`. 49 | 50 | [black]: https://black.readthedocs.io/en/latest/ 51 | 52 | ### Running Tests 53 | 54 | We test our code through unit and system tests that are run by [**pytest**][pytest] and 55 | strict type checking enforced via [**mypy**][mypy]. The commands to run them are as 56 | follows: 57 | 58 | ```shell 59 | # Run tests in /tests 60 | make pytests 61 | # Run mypy against the markflow library 62 | make mypy_lib 63 | # Run mypy against our tests 64 | make mypy_tests 65 | # Run all of the above in order, exiting on the first failure. 66 | make tests 67 | ``` 68 | 69 | Why do we exit on first failure unlike audits? Tests are noisier and this makes the 70 | failures more obvious. In most cases the audits are unlikely to fill up your screen, but 71 | even then. 72 | 73 | [mypy]: http://mypy-lang.org/ 74 | [pytest]: https://docs.pytest.org/en/latest/ 75 | 76 | ### Submitting Changes 77 | 78 | Once you've made all your changes, create a [pull request][pr]. Someone will be with you 79 | shortly. 80 | 81 | If you are correcting a bug you've seen when processing a **Markdown** file, add it and 82 | the expected output to `tests/files`. In the folder, inputs and outputs are matched up 83 | based on their leading numeric. So, `0010_in_tests.md`'s expected output is 84 | `0010_out_tests.md`. The [README has a section on anonymizing text](README.md#issues) if 85 | you're worried about leaking sensitive information. 86 | 87 | [pr]: https://github.com/duo-labs/markflow/pulls 88 | 89 | ## Proposing Changes 90 | 91 | If you want to propose a rule change, like making inline code blocks split across lines, 92 | feel free to open an [issue][issues]. 93 | 94 | [issues]: https://github.com/duo-labs/markflow/issues 95 | 96 | # Duplicating CI Locally 97 | 98 | The build in CI simply runs the make commands in the container defined by the root 99 | `Dockerfile`. You'll of course need [**docker**][docker]. Once you do, to build the 100 | image run: 101 | 102 | ```shell 103 | make container 104 | ``` 105 | 106 | To run commands in the container, you'll need to mount our source. The following should 107 | do the trick when run from the project's directory: 108 | 109 | ```shell 110 | docker run -v "`pwd`:/src" -w /src markflow_builder make 111 | # Build the wheel 112 | docker run -v "`pwd`:/src" -w /src markflow_builder make package 113 | ``` 114 | 115 | [docker]: https://www.docker.com/ 116 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | 3 | RUN apt-get update && \ 4 | apt-get install -y software-properties-common && \ 5 | add-apt-repository ppa:deadsnakes/ppa 6 | 7 | RUN apt-get update && apt-get -y upgrade 8 | 9 | RUN apt-get install -y git 10 | 11 | RUN apt-get install -y python3.6 python3.7 python3.8 python-3.9 python3-pip 12 | RUN apt-get install -y python3.6-venv python3.7-venv python3.8-venv python3.9-venv 13 | RUN ln -s /usr/bin/python3 /usr/bin/python 14 | 15 | RUN apt-get install -y curl 16 | RUN curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python 17 | 18 | ENV PATH="/root/.poetry/bin:${PATH}" 19 | 20 | RUN apt-get install -y make 21 | 22 | # Setup our virtual environments. Sure the intermediate layers are large, but this 23 | # doesn't change often, and can take a while. 24 | #ADD pyproject.toml /src/ 25 | #ADD poetry.lock /src/ 26 | #WORKDIR /src 27 | #RUN poetry env use 3.6 && poetry install && \ 28 | # poetry env use 3.7 && poetry install && \ 29 | # poetry env use 3.8 && poetry install && \ 30 | # rm -rf /src 31 | -------------------------------------------------------------------------------- /IMPLEMENTATION.md: -------------------------------------------------------------------------------- 1 | # MarkFlow Behind Scenes 2 | 3 | MarkFlow is a pretty simple tool that formats code in three steps: 4 | 5 | * [Parse the text](#parsing-markdown) 6 | * [Reformat each section and stitch the sections back together](#reformatting-sections) 7 | * [Rerun with the output as the input to guarantee consistency](#ensuring-consistency) 8 | 9 | A potential future step would be to render the text and ensure consistency outside of 10 | some rules (see [Future Architecture Ideas](#future-architecture-ideas)). 11 | 12 | ## Parsing Markdown 13 | 14 | We parse **Markdown** by continuously iterating over a series of splitter functions. 15 | Each function corresponds to a different [CommonMark][commonmark_spec] section type. 16 | They take in a list of lines if that list starts with their section type, they return a 17 | `tuple` of that section (as a `list` of lines) and the remaining text (also as a `list` 18 | of lines). We use lists of lines as a performance gain, so we don't have to write (and 19 | execute) `lst = str_.splitlines()` and `"\n".join(lst)` all over the place. Otherwise, 20 | they return an empty `list` as the first member and the `list` of lines passed in as the 21 | second. Once we detect a section, we continue parsing the remaining lines. 22 | 23 | The functions are designed to be mutually exclusive: if one splitter splits the text, no 24 | others should. This isn't really tested (hint, hint), but is hopefully achieved by 25 | adhering to the [CommonMark][commonmark_spec] standard. 26 | 27 | [commonmark_spec]: https://spec.commonmark.org/0.29/ 28 | 29 | ## Reformatting Sections 30 | 31 | The parsed text is then passed to the formatter class responsible for knowing how to 32 | format its section type. The various enforced rules can be checked out in the [README]( 33 | README.md), but most implementations are fairly straightforward. More complicated ones 34 | should be fairly well documented. (If you see one that is confusing, open an [issue][ 35 | issues].) 36 | 37 | Some section types are recursive, namely lists and block quotes. These end up calling 38 | back into the formatter again. We're not too worried about stack overflows since the 39 | **Python** stack limit and the depth of recursive **Markdown** definitions by human 40 | beings should different by several orders of magnitude (in favor of **Python**). 41 | 42 | [issues]: https://github.com/duo-labs/markflow/issues 43 | 44 | ## Ensuring Consistency 45 | 46 | Once everything is reformatted, that output is taken and then run through the parsing 47 | and reformatting steps. The resulting document is then compared to our original 48 | calculation to ensure they are the same. This allows us to be more confident that we 49 | didn't mess up formatting since we calculate the same document structure between the 50 | initial and resulting documents. 51 | 52 | ## Future Architecture Ideas 53 | 54 | Here are some of random ramblings on the future of **MarkFlow**. 55 | 56 | ### Plugins 57 | 58 | The tool supports tables, but they are actually extensions and not a feature of the 59 | [CommonMark][commonmark_spec] spec. Support for plugins could be added with tables being 60 | the first adopter. This is likely not a big deal right now as there are probably not 61 | many people making tables without necessary render extensions that wouldn't want to 62 | still have them prettied up. Nor are people clamoring for support for other extensions 63 | to the language. 64 | 65 | [commonmark_spec]: https://spec.commonmark.org/0.29/ 66 | 67 | ### Rendering Consistency 68 | 69 | Another nice thing would be to enforce consistent rendering of the input files. Progress 70 | on this has started as it is enforced by most tests, but tables are an extension to 71 | CommonMark and not a part of the library itself, and the [CommonMark validation 72 | library][commonmark_pkg] we are using don't support them. A potential option that makes 73 | even more sense in a plugin architecture would be having individual formatters handle 74 | validating rendering consistency. 75 | 76 | If you end up debugging an issue because of this, you can pass `--write-renders` to save 77 | off the inputs. Pass `--dev-help` to see other developer options, if you're curious. 78 | 79 | [commonmark_pkg]: https://github.com/readthedocs/commonmark.py 80 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2019 Cisco Systems, Inc. and/or its affiliates. All rights reserved. 2 | 3 | Apache License 4 | Version 2.0, January 2004 5 | http://www.apache.org/licenses/ 6 | 7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 8 | 9 | 1. Definitions. 10 | 11 | "License" shall mean the terms and conditions for use, reproduction, 12 | and distribution as defined by Sections 1 through 9 of this document. 13 | 14 | "Licensor" shall mean the copyright owner or entity authorized by 15 | the copyright owner that is granting the License. 16 | 17 | "Legal Entity" shall mean the union of the acting entity and all 18 | other entities that control, are controlled by, or are under common 19 | control with that entity. For the purposes of this definition, 20 | "control" means (i) the power, direct or indirect, to cause the 21 | direction or management of such entity, whether by contract or 22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 23 | outstanding shares, or (iii) beneficial ownership of such entity. 24 | 25 | "You" (or "Your") shall mean an individual or Legal Entity 26 | exercising permissions granted by this License. 27 | 28 | "Source" form shall mean the preferred form for making modifications, 29 | including but not limited to software source code, documentation 30 | source, and configuration files. 31 | 32 | "Object" form shall mean any form resulting from mechanical 33 | transformation or translation of a Source form, including but 34 | not limited to compiled object code, generated documentation, 35 | and conversions to other media types. 36 | 37 | "Work" shall mean the work of authorship, whether in Source or 38 | Object form, made available under the License, as indicated by a 39 | copyright notice that is included in or attached to the work 40 | (an example is provided in the Appendix below). 41 | 42 | "Derivative Works" shall mean any work, whether in Source or Object 43 | form, that is based on (or derived from) the Work and for which the 44 | editorial revisions, annotations, elaborations, or other modifications 45 | represent, as a whole, an original work of authorship. For the purposes 46 | of this License, Derivative Works shall not include works that remain 47 | separable from, or merely link (or bind by name) to the interfaces of, 48 | the Work and Derivative Works thereof. 49 | 50 | "Contribution" shall mean any work of authorship, including 51 | the original version of the Work and any modifications or additions 52 | to that Work or Derivative Works thereof, that is intentionally 53 | submitted to Licensor for inclusion in the Work by the copyright owner 54 | or by an individual or Legal Entity authorized to submit on behalf of 55 | the copyright owner. For the purposes of this definition, "submitted" 56 | means any form of electronic, verbal, or written communication sent 57 | to the Licensor or its representatives, including but not limited to 58 | communication on electronic mailing lists, source code control systems, 59 | and issue tracking systems that are managed by, or on behalf of, the 60 | Licensor for the purpose of discussing and improving the Work, but 61 | excluding communication that is conspicuously marked or otherwise 62 | designated in writing by the copyright owner as "Not a Contribution." 63 | 64 | "Contributor" shall mean Licensor and any individual or Legal Entity 65 | on behalf of whom a Contribution has been received by Licensor and 66 | subsequently incorporated within the Work. 67 | 68 | 2. Grant of Copyright License. Subject to the terms and conditions of 69 | this License, each Contributor hereby grants to You a perpetual, 70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 71 | copyright license to reproduce, prepare Derivative Works of, 72 | publicly display, publicly perform, sublicense, and distribute the 73 | Work and such Derivative Works in Source or Object form. 74 | 75 | 3. Grant of Patent License. Subject to the terms and conditions of 76 | this License, each Contributor hereby grants to You a perpetual, 77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 78 | (except as stated in this section) patent license to make, have made, 79 | use, offer to sell, sell, import, and otherwise transfer the Work, 80 | where such license applies only to those patent claims licensable 81 | by such Contributor that are necessarily infringed by their 82 | Contribution(s) alone or by combination of their Contribution(s) 83 | with the Work to which such Contribution(s) was submitted. If You 84 | institute patent litigation against any entity (including a 85 | cross-claim or counterclaim in a lawsuit) alleging that the Work 86 | or a Contribution incorporated within the Work constitutes direct 87 | or contributory patent infringement, then any patent licenses 88 | granted to You under this License for that Work shall terminate 89 | as of the date such litigation is filed. 90 | 91 | 4. Redistribution. You may reproduce and distribute copies of the 92 | Work or Derivative Works thereof in any medium, with or without 93 | modifications, and in Source or Object form, provided that You 94 | meet the following conditions: 95 | 96 | (a) You must give any other recipients of the Work or 97 | Derivative Works a copy of this License; and 98 | 99 | (b) You must cause any modified files to carry prominent notices 100 | stating that You changed the files; and 101 | 102 | (c) You must retain, in the Source form of any Derivative Works 103 | that You distribute, all copyright, patent, trademark, and 104 | attribution notices from the Source form of the Work, 105 | excluding those notices that do not pertain to any part of 106 | the Derivative Works; and 107 | 108 | (d) If the Work includes a "NOTICE" text file as part of its 109 | distribution, then any Derivative Works that You distribute must 110 | include a readable copy of the attribution notices contained 111 | within such NOTICE file, excluding those notices that do not 112 | pertain to any part of the Derivative Works, in at least one 113 | of the following places: within a NOTICE text file distributed 114 | as part of the Derivative Works; within the Source form or 115 | documentation, if provided along with the Derivative Works; or, 116 | within a display generated by the Derivative Works, if and 117 | wherever such third-party notices normally appear. The contents 118 | of the NOTICE file are for informational purposes only and 119 | do not modify the License. You may add Your own attribution 120 | notices within Derivative Works that You distribute, alongside 121 | or as an addendum to the NOTICE text from the Work, provided 122 | that such additional attribution notices cannot be construed 123 | as modifying the License. 124 | 125 | You may add Your own copyright statement to Your modifications and 126 | may provide additional or different license terms and conditions 127 | for use, reproduction, or distribution of Your modifications, or 128 | for any such Derivative Works as a whole, provided Your use, 129 | reproduction, and distribution of the Work otherwise complies with 130 | the conditions stated in this License. 131 | 132 | 5. Submission of Contributions. Unless You explicitly state otherwise, 133 | any Contribution intentionally submitted for inclusion in the Work 134 | by You to the Licensor shall be under the terms and conditions of 135 | this License, without any additional terms or conditions. 136 | Notwithstanding the above, nothing herein shall supersede or modify 137 | the terms of any separate license agreement you may have executed 138 | with Licensor regarding such Contributions. 139 | 140 | 6. Trademarks. This License does not grant permission to use the trade 141 | names, trademarks, service marks, or product names of the Licensor, 142 | except as required for reasonable and customary use in describing the 143 | origin of the Work and reproducing the content of the NOTICE file. 144 | 145 | 7. Disclaimer of Warranty. Unless required by applicable law or 146 | agreed to in writing, Licensor provides the Work (and each 147 | Contributor provides its Contributions) on an "AS IS" BASIS, 148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 149 | implied, including, without limitation, any warranties or conditions 150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 151 | PARTICULAR PURPOSE. You are solely responsible for determining the 152 | appropriateness of using or redistributing the Work and assume any 153 | risks associated with Your exercise of permissions under this License. 154 | 155 | 8. Limitation of Liability. In no event and under no legal theory, 156 | whether in tort (including negligence), contract, or otherwise, 157 | unless required by applicable law (such as deliberate and grossly 158 | negligent acts) or agreed to in writing, shall any Contributor be 159 | liable to You for damages, including any direct, indirect, special, 160 | incidental, or consequential damages of any character arising as a 161 | result of this License or out of the use or inability to use the 162 | Work (including but not limited to damages for loss of goodwill, 163 | work stoppage, computer failure or malfunction, or any and all 164 | other commercial damages or losses), even if such Contributor 165 | has been advised of the possibility of such damages. 166 | 167 | 9. Accepting Warranty or Additional Liability. While redistributing 168 | the Work or Derivative Works thereof, You may choose to offer, 169 | and charge a fee for, acceptance of support, warranty, indemnity, 170 | or other liability obligations and/or rights consistent with this 171 | License. However, in accepting such obligations, You may act only 172 | on Your own behalf and on Your sole responsibility, not on behalf 173 | of any other Contributor, and only if You agree to indemnify, 174 | defend, and hold each Contributor harmless for any liability 175 | incurred by, or claims asserted against, such Contributor by reason 176 | of your accepting any such warranty or additional liability. 177 | 178 | END OF TERMS AND CONDITIONS 179 | 180 | APPENDIX: How to apply the Apache License to your work. 181 | 182 | To apply the Apache License to your work, attach the following 183 | boilerplate notice, with the fields enclosed by brackets "[]" 184 | replaced with your own identifying information. (Don't include 185 | the brackets!) The text should be enclosed in the appropriate 186 | comment syntax for the file format. We also recommend that a 187 | file or class name and description of purpose be included on the 188 | same "printed page" as the copyright notice for easier 189 | identification within third-party archives. 190 | 191 | Copyright 2020 [name of copyright owner] 192 | 193 | Licensed under the Apache License, Version 2.0 (the "License"); 194 | you may not use this file except in compliance with the License. 195 | You may obtain a copy of the License at 196 | 197 | http://www.apache.org/licenses/LICENSE-2.0 198 | 199 | Unless required by applicable law or agreed to in writing, software 200 | distributed under the License is distributed on an "AS IS" BASIS, 201 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 202 | See the License for the specific language governing permissions and 203 | limitations under the License. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: audits tests 2 | 3 | # --- ENVIRONMENT MANAGEMENT --- 4 | .PHONY: clean 5 | 6 | clean: 7 | git clean -fdX 8 | # poetry returns a non-zero exit status if the virtualenv doesn't exist so we ignore 9 | # errors. 10 | -poetry env remove 3.6 11 | -poetry env remove 3.7 12 | -poetry env remove 3.8 13 | -poetry env remove 3.9 14 | 15 | 16 | .PHONY: clean _venv _venv_3.6 _venv_3.7 _venv_3.8 _venv_3.9 venvs 17 | venvs: _venv_3.6 _venv_3.7 _venv_3.8 _venv_3.9 18 | 19 | _venv: 20 | poetry env use ${PYTHON_VERSION} 21 | poetry install 22 | 23 | _venv_3.6: 24 | PYTHON_VERSION=3.6 $(MAKE) _venv 25 | 26 | _venv_3.7: 27 | PYTHON_VERSION=3.7 $(MAKE) _venv 28 | 29 | _venv_3.8: 30 | PYTHON_VERSION=3.8 $(MAKE) _venv 31 | 32 | _venv_3.9: 33 | PYTHON_VERSION=3.9 $(MAKE) _venv 34 | 35 | 36 | # --- AUDITS --- 37 | .PHONY: audits black flake8 isort markflow 38 | 39 | # Runs all of our audits regardless of if any fail so we can get all relevant issues 40 | audits: 41 | @status=0; \ 42 | for target in black flake8 isort markflow; do \ 43 | $(MAKE) $${target}; \ 44 | status=$$(($$status + $$?)); \ 45 | echo ""; \ 46 | done; \ 47 | if [ $$status -eq 0 ]; then \ 48 | echo "All Audits Succeeded!"; \ 49 | else \ 50 | echo "Some audits failed. :("; \ 51 | fi; \ 52 | exit $$status 53 | 54 | black: _venv_3.8 55 | @echo Running $@ audit... 56 | git ls-files | egrep '.*\.pyi?$$' | xargs poetry run black --check 57 | @echo Success! 58 | 59 | # pyi files provide type stubbing and can look weird to flake8, so we filter them out 60 | flake8: _venv_3.8 61 | @echo Running $@ audit... 62 | git ls-files | egrep '.*\.py$$' | xargs poetry run flake8 63 | @echo Success! 64 | 65 | isort: _venv_3.8 66 | @echo Running $@ audit... 67 | git ls-files | egrep '.*\.pyi?$$' | xargs poetry run isort --profile=black --check 68 | @echo Success! 69 | 70 | markflow: _venv_3.8 71 | @echo Running $@ audit... 72 | git ls-files | egrep ".md$$" | grep -v "tests/" | xargs poetry run markflow --check 73 | @echo Success! 74 | 75 | # --- TESTS --- 76 | .PHONY: tests tests_3.6 tests_3.7 tests_3.8 tests_3.9 77 | tests: utests mypy ensure_deps 78 | tests_3.6: utests_3.6 ensure_deps_3.6 79 | tests_3.7: utests_3.7 ensure_deps_3.7 80 | tests_3.8: utests_3.8 mypy ensure_deps_3.8 81 | tests_3.9: utests_3.9 ensure_deps_3.9 82 | 83 | # Ensure dependencies are properly specified 84 | .PHONY: ensure_deps _ensure_deps ensure_deps_3.6 ensure_deps_3.7 ensure_deps_3.8 ensure_deps_3.9 85 | ensure_deps: ensure_deps_3.6 ensure_deps_3.7 ensure_deps_3.8 ensure_deps_3.9 86 | 87 | _ensure_deps: 88 | # Ensure dependencies markflow needs didn't sneak into dev dependencies 89 | poetry env use ${PYTHON_VERSION} 90 | poetry install --no-dev 91 | echo -e "Hello\n--" | poetry run markflow 92 | 93 | ensure_deps_3.6: 94 | PYTHON_VERSION=3.6 $(MAKE) _ensure_deps 95 | 96 | ensure_deps_3.7: 97 | PYTHON_VERSION=3.7 $(MAKE) _ensure_deps 98 | 99 | ensure_deps_3.8: 100 | PYTHON_VERSION=3.8 $(MAKE) _ensure_deps 101 | 102 | ensure_deps_3.9: 103 | PYTHON_VERSION=3.9 $(MAKE) _ensure_deps 104 | 105 | # MyPy 106 | .PHONY: mypy mypy_lib mypy_tests 107 | mypy: mypy_lib mypy_tests 108 | 109 | mypy_lib: _venv_3.8 110 | # --implicity-reexport means that we don't have to explicitly tell mypy about our 111 | # modules' members via a `__all__` 112 | poetry env use 3.8 113 | MYPYPATH=$(CURDIR)/stubs poetry run mypy --strict --implicit-reexport markflow 114 | 115 | mypy_tests: _venv_3.8 116 | # --implicity-reexport means that we don't have to explicitly tell mypy about our 117 | # modules' members via a `__all__` 118 | poetry env use 3.8 119 | MYPYPATH=$(CURDIR)/stubs poetry run mypy --strict --implicit-reexport tests 120 | 121 | # Unit Tests 122 | # Bit of a misnomer since `test_files.py` is more of a system/integration test 123 | .PHONY: utests _utests utests_3.6 utests_3.7 utests_3.8 utests_3.9 124 | utests: utests_3.6 utests_3.7 utests_3.8 utests_3.9 125 | 126 | _utests: 127 | poetry env use ${PYTHON_VERSION} 128 | cd $(CURDIR)/tests && poetry run pytest --cov=markflow --cov-report=term \ 129 | --cov-report=html --junit-xml=junit.xml 130 | @echo For more detailed information, see $(CURDIR)/tests/htmlcov/index.html 131 | 132 | utests_3.6: _venv_3.6 133 | PYTHON_VERSION=3.6 $(MAKE) _utests 134 | 135 | utests_3.7: _venv_3.7 136 | PYTHON_VERSION=3.7 $(MAKE) _utests 137 | 138 | utests_3.8: _venv_3.8 139 | PYTHON_VERSION=3.8 $(MAKE) _utests 140 | 141 | utests_3.9: _venv_3.9 142 | PYTHON_VERSION=3.9 $(MAKE) _utests 143 | 144 | # --- EXPORTING --- 145 | .PHONY: package 146 | 147 | package: 148 | poetry build 149 | 150 | # --- CI CONTAINER --- 151 | .PHONY: container 152 | 153 | container: 154 | docker build . -t markflow_builder 155 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MarkFlow 2 | 3 | Welcome to **MarkFlow**. This tool automatically reformats your **Markdown** to provide 4 | consistent looking **Markdown** files that look pretty similar to HTML that would be 5 | generated by them. 6 | 7 | ## Quickstart 8 | 9 | To use this tool, install it with pip then run `markflow`: 10 | 11 | ```shell 12 | pip install markflow 13 | markflow SOMETHING.md 14 | ``` 15 | 16 | To install from source, assuming you already have `poetry` installed, from the project 17 | directory, run: 18 | 19 | ```shell 20 | poetry install 21 | poetry run markflow 22 | ``` 23 | 24 | Just want to see if there will be any changes? Use the `--check` flag: 25 | 26 | ```shell 27 | markflow --check $PATH_TO_MARKDOWN_FILE 28 | ``` 29 | 30 | For all features, we've got a help: 31 | 32 | ```shell 33 | markflow --help 34 | ``` 35 | 36 | ## Enforced Rules 37 | 38 | The tool ensures that the following rules are enforced for each different type of 39 | Markdown section. For all sections, trailing spaces on each line are removed. It also 40 | ensures that **Markdown** files end with a single newline and newlines are all `'\n'`. 41 | 42 | This tool uses the **Markdown** standard defined by [CommonMark 0.29][commonmark_spec]. 43 | It is expected to evolve with the standard and this section will be updated as support 44 | is added. If you notice any discrepancies, please open an issue. 45 | 46 | [commonmark_spec]: https://spec.commonmark.org/0.29/ 47 | 48 | ### Block Quotes 49 | 50 | Block quotes are fixed up with proper indentation markers for indented quotes, quote 51 | indicators have any space between them removed, and unescaped `>` that could be confused 52 | with quote markers are escaped. *e.g.*: 53 | 54 | ```markdown 55 | > 56 | > > Text > 57 | > > 58 | > 59 | > > Ice Cream \> 0O0> 60 | > 61 | ``` 62 | 63 | becomes: 64 | 65 | ```markdown 66 | > 67 | >> Text \> 68 | >> 69 | > 70 | >> Ice Cream \> 0O0> 71 | > 72 | ``` 73 | 74 | ### Code Blocks 75 | 76 | Fenced codeblocks have any whitespace stripped from their markers and then printed out 77 | as usual. 78 | 79 | ````markdown 80 | ``` markdown 81 | # Markdown code 82 | ``` 83 | ```` 84 | 85 | becomes 86 | 87 | ````markdown 88 | ```markdown 89 | # Markdown code 90 | ``` 91 | ```` 92 | 93 | Indented code blocks simply have their trailing whitespace removed. 94 | 95 | ### Footnotes (or Link Reference Definitions) 96 | 97 | Footnotes will have their whitespace corrected and their titles wrapped. The tool will 98 | however respect what line URLs should appear on, even if they overflow. For example, the 99 | next two examples would be unchanged. 100 | 101 | ```markdown 102 | [really_really_really_long_link_that_could_go_on_a_new_line]: /but/doesnt/because/the/tool/understands/that/you/may/not/want/that 103 | ``` 104 | 105 | ```markdown 106 | [short_link]: 107 | /that/stays/on/separate/lines 108 | 'Even if title would fit' 109 | ``` 110 | 111 | Titles will be kept on whatever line you write them on, as long as they wouldn't be 112 | wrapped off the line. 113 | 114 | ```markdown 115 | [really_really_really_long_link_that_could_go_on_a_new_line]: /but/doesnt/because/the/tool/understands/that/you/may/not/want/that "But the title is moved to the next line and itself is wrapped because it is also really long." 116 | ``` 117 | 118 | becomes: 119 | 120 | ```markdown 121 | [really_really_really_long_link_that_could_go_on_a_new_line]: /but/doesnt/because/the/tool/understands/that/you/may/not/want/that 122 | "But the title is moved to the next line and itself is wrapped because it is also really 123 | long." 124 | ``` 125 | 126 | ### Headings 127 | 128 | Heading lines begin and end with no whitespace. If you're using ATX headings (leading 129 | `#`s), but will correct missing or extra spaces between the octothorpe's and the 130 | heading. 131 | 132 | ```markdown 133 | #Non-Standard Heading 134 | ``` 135 | 136 | becomes 137 | 138 | ```markdown 139 | # Non-Standard Heading 140 | ``` 141 | 142 | If you are using setext headings (*i.e.*, underlined headings), they will automatically 143 | be fixed to ensure underlining matches the heading length. *e.g.*: 144 | 145 | ```markdown 146 | Heading 1 147 | -- 148 | ``` 149 | 150 | becomes 151 | 152 | ```markdown 153 | Heading 1 154 | --------- 155 | ``` 156 | 157 | If you have a heading that extends beyond an entire line, **MarkFlow** will wrap it for 158 | you. 159 | 160 | ```markdown 161 | This is a really long heading that I had to make up so that it would be at least 88 characters long 162 | -- 163 | ``` 164 | 165 | becomes 166 | 167 | ```markdown 168 | This is a really long heading that I had to make up so that it would be at least 88 169 | characters long 170 | ----------------------------------------------------------------------------------- 171 | ``` 172 | 173 | ### Lists 174 | 175 | Lists will be corrected to proper indentation. In addition, ordered lists will be 176 | properly numbered and bullet lists will be reformatted to use consistent bullets. Line 177 | lengths are also enforces. *e.g.*: 178 | 179 | ```markdown 180 | 2. One 181 | * Asterisk 182 | - Dash 183 | 1. Two 184 | 5. Three 185 | ``` 186 | 187 | becomes 188 | 189 | ```markdown 190 | 2. One 191 | * Asterisk 192 | * Dash 193 | 3. Two 194 | 4. Three 195 | ``` 196 | 197 | CommonMark doesn't allow lists to start with 0. That's not really a big deal for this 198 | tool, so we are OK with that. If this causes you issues, please let us know by opening 199 | an [issue][issues]. 200 | 201 | ### Paragraphs 202 | 203 | Paragraphs are reformatted to ensure they are the proper length. URLs and footnotes are 204 | properly split across lines. Inline code is placed all on a singular line. *e.g.* 205 | (assuming a line length of 1): 206 | 207 | ```markdown 208 | test `test = 209 | 1` [url](http://example.com) 210 | ``` 211 | 212 | becomes: 213 | 214 | ```markdown 215 | test 216 | `test = 1` 217 | [url]( 218 | http://example.com) 219 | ``` 220 | 221 | ### Separators 222 | 223 | Separating lines (*i.e.*, blank lines) contain only new lines, removing any horizontal 224 | whitespace. 225 | 226 | ### Tables 227 | 228 | Tables are reformatted to ensure proper width and headings are centered and all cells 229 | have at minimum one space between their contents and column separators. Alignment is 230 | supported too! *e.g.*: 231 | 232 | ```markdown 233 | |L|C|R|N| 234 | |:--|:-:|--:|---| 235 | |a|a|a|a| 236 | |aa|aa|aa|aa| 237 | |abcde|abcde|abcde|abcde| 238 | ``` 239 | 240 | becomes: 241 | 242 | ```markdown 243 | | L | C | R | N | 244 | |:------|:-----:|------:|-------| 245 | | a | a | a | a | 246 | | aa | aa | aa | aa | 247 | | abcde | abcde | abcde | abcde | 248 | ``` 249 | 250 | ### Thematic Breaks 251 | 252 | Thematic breaks are extended or reduced to match the length of the document. If line 253 | length is set to infinity, it will instead use 3 of the separating character which must 254 | be one of `-`, `_`, or `*`. 255 | 256 | ```markdown 257 | -- - - 258 | ``` 259 | 260 | becomes: 261 | 262 | ```markdown 263 | ---------------------------------------------------------------------------------------- 264 | ``` 265 | 266 | ## API Reference 267 | 268 | The tool also provides a function to reformat **Markdown** strings yourself. 269 | 270 | ```python 271 | from markflow import reformat_markdown_text 272 | 273 | markdown = " # Header 1" 274 | nice_markdown = reformat_markdown_text(markdown, width=88) 275 | ``` 276 | 277 | ## Contributing 278 | 279 | To contribute to this project, check out our [contributing guide](CONTRIBUTING.md). 280 | 281 | ## Issues 282 | 283 | If you run into an issue running a **Markdown** file, feel free to open an [issue][ 284 | issues]. If you can include the faulting file, that will make it so much easier to 285 | debug. 286 | 287 | This script can help in anonymizing your file if you have any confidential information 288 | in it. 289 | 290 | ```python 291 | #!/usr/bin/env python3 292 | """ Anonymize file XXXX.md and output it to XXXX.out.md """ 293 | import pathlib 294 | import random 295 | import string 296 | 297 | FILE_NAME = "XXXX.md" 298 | input_path = pathlib.Path(FILE_NAME) 299 | output_path = pathlib.Path(".out.".join(FILE_NAME.rsplit(".", maxsplit=1))) 300 | text = input_path.read_text() 301 | output = "" 302 | 303 | for char in text: 304 | if char in string.ascii_lowercase: 305 | char = random.choice(string.ascii_lowercase) 306 | elif char in string.ascii_uppercase: 307 | char = random.choice(string.ascii_uppercase) 308 | output += char 309 | output_path.write_text(output) 310 | ``` 311 | 312 | [issues]: https://github.com/duo-labs/markflow/issues 313 | 314 | ## Implementation 315 | 316 | To read more about how the tool works, checkout the [implementation outline]( 317 | IMPLEMENTATION.md). 318 | 319 | ## Credits 320 | 321 | This tool was inspired by a coworker not enjoying having to manually reformat 322 | **Markdown** files. He wanted a tool that would enforce it like [**black**][black] does 323 | for **Python** code. That is why the line length default is 88. 324 | 325 | [black]: https://black.readthedocs.io/en/latest/ 326 | 327 | ## A Bonus Note on Block Quote Formatting 328 | 329 | Escaping `>` is especially important for the tool itself as otherwise updated block 330 | quotes could be too deep. For instance, incorrect wrapping here could result in an extra 331 | indented block of code. 332 | 333 | ```markdown 334 | > Please don't wrap after this period. > 335 | > Because I don't want to be a double quote. 336 | ``` 337 | 338 | becomes: 339 | 340 | ```markdown 341 | > Please don't wrap after this period. 342 | > > Because I don't want to be a 343 | > double quote. 344 | ``` 345 | 346 | which would format to: 347 | 348 | ```markdown 349 | > Please don't wrap after this period. 350 | > > Because I don't want to be a 351 | > > double quote. 352 | ``` 353 | 354 | Of course, if the tool tried that, it would throw an exception since it double checks 355 | that if it were to be rerun the output would not change, at which point, hopefully, dear 356 | reader, you would open an issue. But I get it if you don't want to. I've been there. 357 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | build: 4 | build: . 5 | volumes: 6 | - .:/src -------------------------------------------------------------------------------- /markflow/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .exceptions import * 3 | from .reformat_markdown import * 4 | -------------------------------------------------------------------------------- /markflow/_argparse.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import dataclasses 3 | import glob 4 | import os 5 | import pathlib 6 | from typing import Any, Callable, List, Optional, Sequence, Union, cast 7 | 8 | 9 | @dataclasses.dataclass(frozen=True) 10 | class Permission: 11 | os_constant: int 12 | verbiage: str 13 | 14 | 15 | EXECUTABLE = Permission(os.X_OK, "execute") 16 | READABLE = Permission(os.R_OK, "read from") 17 | WRITABLE = Permission(os.W_OK, "write to") 18 | 19 | 20 | class ExistingPath: 21 | def __init__(self, permissions: List[Permission]): 22 | self._permissions = permissions 23 | 24 | def __call__(self, string: str) -> pathlib.Path: 25 | path = pathlib.Path(string) 26 | if not path.exists(): 27 | raise argparse.ArgumentTypeError( 28 | f"specified path does not exist: {repr(str(path))}" 29 | ) 30 | return path 31 | 32 | 33 | class Directory: 34 | def __init__(self, permissions: List[Permission], must_exist: bool = False): 35 | self._permissions = permissions 36 | self._must_exist = must_exist 37 | 38 | def __call__(self, string: str) -> pathlib.Path: 39 | path = pathlib.Path(string) 40 | if path.exists(): 41 | if not path.is_dir(): 42 | raise argparse.ArgumentTypeError( 43 | f"specified directory is a file: {repr(str(path))}" 44 | ) 45 | 46 | for permission in self._permissions: 47 | if not os.access(path, permission.os_constant): 48 | raise argparse.ArgumentTypeError( 49 | f"cannot {permission.verbiage} directory: " f"{repr(str(path))}" 50 | ) 51 | else: 52 | if self._must_exist: 53 | raise argparse.ArgumentTypeError( 54 | f"directory does not exist: {repr(str(path))}" 55 | ) 56 | return path 57 | 58 | 59 | class File: 60 | def __init__(self, permissions: List[Permission], must_exist: bool = False): 61 | self._permissions = permissions 62 | self._must_exist = must_exist 63 | 64 | def __call__(self, string: str) -> pathlib.Path: 65 | path = pathlib.Path(string) 66 | if path.exists(): 67 | if path.is_dir(): 68 | raise argparse.ArgumentTypeError( 69 | f"file is a directory: {repr(str(path))}" 70 | ) 71 | 72 | for permission in self._permissions: 73 | if not os.access(path, permission.os_constant): 74 | raise argparse.ArgumentTypeError( 75 | f"can't {permission.verbiage} file: {repr(str(path))}" 76 | ) 77 | else: 78 | if self._must_exist: 79 | raise argparse.ArgumentTypeError( 80 | f"file does not exist: {repr(str(path))}" 81 | ) 82 | 83 | if not path.parent.is_dir(): 84 | raise argparse.ArgumentTypeError( 85 | f"directory does not exist for file: {repr(str(path))}" 86 | ) 87 | 88 | for permission in self._permissions: 89 | if not os.access(path.parent, permission.os_constant): 90 | raise argparse.ArgumentTypeError( 91 | f"cannot {permission.verbiage} directory of file: " 92 | f"{repr(str(path))}" 93 | ) 94 | return path 95 | 96 | 97 | class AddMarkdownFilesInDirOrPathsAction(argparse.Action): 98 | def __init__( 99 | self, 100 | option_strings: List[str], 101 | dest: str, 102 | type: Callable[[str], pathlib.Path], 103 | nargs: Optional[str] = None, 104 | **kwargs: Any, 105 | ): 106 | if nargs != "*": 107 | raise ValueError("nargs must be *") 108 | super().__init__(option_strings, dest, type=type, nargs=nargs, **kwargs) 109 | 110 | def __call__( 111 | self, 112 | parser: argparse.ArgumentParser, 113 | namespace: argparse.Namespace, 114 | values: Union[str, Sequence[Any], None], 115 | option_string: Optional[str] = None, 116 | ) -> None: 117 | if values is None: 118 | return 119 | values = cast(Sequence[pathlib.Path], values) 120 | expanded_paths = [] 121 | for value in values: 122 | if value.is_file(): 123 | expanded_paths.append(value) 124 | else: 125 | markdown_paths = glob.glob(str(value / "**" / "*.md"), recursive=True) 126 | expanded_paths += [pathlib.Path(path) for path in markdown_paths] 127 | setattr(namespace, self.dest, expanded_paths) 128 | -------------------------------------------------------------------------------- /markflow/_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | import textwrap 3 | 4 | from ._utils import * 5 | -------------------------------------------------------------------------------- /markflow/_utils/_utils.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import logging 3 | from typing import Iterator 4 | 5 | __all__ = [ 6 | "get_indent", 7 | "truncate_str", 8 | "redirect_info_logs_to_debug", 9 | ] 10 | 11 | ELLIPSIS = "..." 12 | 13 | 14 | def get_indent(line: str) -> int: 15 | return len(line) - len(line.lstrip()) 16 | 17 | 18 | def truncate_str(str_: str, length: int) -> str: 19 | if len(str_) <= length: 20 | pass 21 | elif len(ELLIPSIS) >= length: 22 | str_ = "." * length 23 | else: 24 | truncation = max(0, length - len(ELLIPSIS)) 25 | str_ = str_[:truncation] + ELLIPSIS 26 | return str_ 27 | 28 | 29 | @contextlib.contextmanager 30 | def redirect_info_logs_to_debug() -> Iterator[None]: 31 | old_info = logging.INFO 32 | logging.INFO = logging.DEBUG 33 | yield 34 | logging.INFO = old_info 35 | -------------------------------------------------------------------------------- /markflow/_utils/textwrap.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import List, Tuple 3 | 4 | from markflow.typing import Number 5 | 6 | INLINE_CODE_MARKER_REGEX = re.compile(r"(((?!<\\)`)+)") 7 | FOOTNOTE_REGEX = re.compile(r"[^\s\]\)]*\[[^\[]+\]\[[^\]]+\][^\s\[\(]*") 8 | HTML_NEWLINE_REGEX = re.compile(r"
") 9 | URL_REGEX = re.compile(r"[^\s\]\)]*\[[^\[]+\]\([^\)]+\)[^\s\[\(]*") 10 | 11 | 12 | def join(split_text: List[str], leading_spaces: List[bool], width: Number) -> str: 13 | new_split_text = [""] 14 | for word, leading_space in zip(split_text, leading_spaces): 15 | if leading_space and new_split_text[-1]: 16 | potential_new_string = f"{new_split_text[-1]} {word}" 17 | else: 18 | potential_new_string = f"{new_split_text[-1]}{word}" 19 | if len(potential_new_string) <= width or not new_split_text[-1] or width <= 0: 20 | new_split_text[-1] = potential_new_string 21 | else: 22 | new_split_text.append(word) 23 | 24 | # If we hit an HTML new line, the next text should begin on a new line. 25 | if HTML_NEWLINE_REGEX.match(word): 26 | new_split_text.append("") 27 | 28 | if not new_split_text[-1]: 29 | new_split_text = new_split_text[:-1] 30 | return "\n".join(new_split_text) 31 | 32 | 33 | def code_split( 34 | text: str, leading_space: bool 35 | ) -> Tuple[List[str], List[bool], List[bool]]: 36 | split_text: List[str] = [] 37 | leading_spaces: List[bool] = [] 38 | evaluates: List[bool] = [] 39 | 40 | # Markdown inline code only ends when the exact same number of tildas are seen 41 | # again. More or less indicates it is still part of the code. 42 | open_marker_len = 0 43 | last_end = 0 44 | # We jump from tilda mark to tilda mark. The length of the tildas indicate if we are 45 | # beginning, ending, or still in code. 46 | for code_marker in INLINE_CODE_MARKER_REGEX.finditer(text): 47 | if open_marker_len == 0: 48 | plaintext = text[last_end : code_marker.start()] 49 | if ( 50 | plaintext.startswith(".") 51 | and not plaintext.startswith("..") 52 | and split_text 53 | ): 54 | split_text[-1] += "." 55 | plaintext = plaintext[1:] 56 | if plaintext.strip(): 57 | if not leading_spaces: 58 | leading_spaces.append(leading_space) 59 | else: 60 | leading_spaces.append(plaintext.startswith(" ")) 61 | split_text.append(plaintext.strip()) 62 | evaluates.append(True) 63 | open_marker_len = len(code_marker.group()) 64 | 65 | # Prepare our lists for code 66 | if not leading_spaces: 67 | leading_spaces.append(leading_space) 68 | else: 69 | leading_spaces.append(plaintext.endswith(" ")) 70 | evaluates.append(False) 71 | split_text.append("`" * open_marker_len) 72 | elif len(code_marker.group()) == open_marker_len: 73 | # We've found the close of our inline code 74 | code = text[last_end : code_marker.start()] 75 | split_text[-1] += code + "`" * open_marker_len 76 | open_marker_len = 0 77 | else: 78 | # We've found more inline code 79 | split_text[-1] += text[last_end : code_marker.end()] 80 | 81 | last_end = code_marker.end() 82 | 83 | # If our last field only has a singular inline code marker, it means that it isn't 84 | # inline text and just a standalone tilda or set of tildas, so we can evaluate it. 85 | if split_text and len(INLINE_CODE_MARKER_REGEX.findall(split_text[-1])) == 1: 86 | evaluates[-1] = True 87 | if text[last_end:].strip(): 88 | split_text[-1] += text[last_end:].rstrip() 89 | else: 90 | remaining_text = text[last_end:] 91 | if ( 92 | remaining_text 93 | and remaining_text.startswith(".") 94 | and not remaining_text.startswith("..") 95 | ): 96 | split_text[-1] += "." 97 | remaining_text = remaining_text[1:] 98 | if remaining_text.strip(): 99 | split_text.append(remaining_text.strip()) 100 | if last_end == 0: 101 | leading_spaces.append(leading_space) 102 | else: 103 | leading_spaces.append(remaining_text.startswith(" ")) 104 | evaluates.append(True) 105 | 106 | return split_text, leading_spaces, evaluates 107 | 108 | 109 | def link_split( 110 | text: str, leading_space: bool 111 | ) -> Tuple[List[str], List[bool], List[bool]]: 112 | """Splits text based on links 113 | 114 | This function iterates over text split by tildas. Markdown inline code begins with 115 | a number of tildas and only ends when that exact number is reached. If there are 116 | more tildas, e.g. `` ```` ``, they are treated as part of the inline code. 117 | 118 | Per our rules, inline code should all be on one line, so each inline code section is 119 | marked for non-evaluation. 120 | 121 | Args: 122 | text: The text to evaluate 123 | leading_space: Should this code section have a leading new space when reflowed? 124 | 125 | Returns: 126 | Split text, What sections have leading spaces, What sections should continue to 127 | be evaluated 128 | """ 129 | matches = [m for m in FOOTNOTE_REGEX.finditer(text)] 130 | matches += [m for m in URL_REGEX.finditer(text)] 131 | matches.sort(key=lambda m: m.start()) 132 | 133 | split_text: List[str] = [] 134 | leading_spaces: List[bool] = [] 135 | evaluates: List[bool] = [] 136 | last_end = 0 137 | # Each iteration of this for loop operates on non-link text followed by 138 | # link text. 139 | for match in matches: 140 | non_link_text = text[last_end : match.start()] 141 | if non_link_text.strip(): 142 | if ( 143 | split_text 144 | and non_link_text.startswith(".") 145 | and not non_link_text.startswith("..") 146 | ): 147 | split_text[-1] += "." 148 | non_link_text = non_link_text[1:] 149 | split_text.append(non_link_text.strip()) 150 | if not leading_spaces: 151 | leading_spaces.append(leading_space) 152 | else: 153 | leading_spaces.append(non_link_text.startswith(" ")) 154 | 155 | leading_spaces.append(text[match.start() - 1] == " ") 156 | evaluates.append(True) 157 | else: 158 | if not leading_spaces: 159 | leading_spaces.append(leading_space) 160 | else: 161 | leading_spaces.append(False) 162 | 163 | leading_spaces.append(False) 164 | if "](" in match.group(): 165 | split_link = match.group().split("](") 166 | split_text.append(split_link[0].strip() + "](") 167 | split_text.append(split_link[1].strip()) 168 | else: 169 | split_link = match.group().split("][") 170 | split_text.append(split_link[0].strip() + "][") 171 | split_text.append(split_link[1].strip()) 172 | 173 | # Don't modify our hyperlink 174 | evaluates += [True, False] 175 | last_end = match.end() 176 | 177 | remaining_text = text[last_end:] 178 | if ( 179 | remaining_text 180 | and remaining_text.startswith(".") 181 | and not remaining_text.startswith("..") 182 | ): 183 | split_text[-1] += "." 184 | remaining_text = remaining_text[1:] 185 | if remaining_text.strip(): 186 | split_text.append(remaining_text.strip()) 187 | if last_end == 0: 188 | leading_spaces.append(leading_space) 189 | else: 190 | leading_spaces.append(remaining_text.startswith(" ")) 191 | evaluates.append(True) 192 | 193 | return split_text, leading_spaces, evaluates 194 | 195 | 196 | def newline_split( 197 | text: str, leading_space: bool 198 | ) -> Tuple[List[str], List[bool], List[bool]]: 199 | split_text: List[str] = [] 200 | leading_spaces: List[bool] = [] 201 | evaluates: List[bool] = [] 202 | last_end = 0 203 | # Each iteration of this for loop operates operates on plaintext followed by an HML 204 | # newline. 205 | for match in HTML_NEWLINE_REGEX.finditer(text): 206 | non_newline_text = text[last_end : match.start()] 207 | if not leading_spaces: 208 | leading_spaces.append(leading_space) 209 | else: 210 | leading_spaces.append(text[last_end] == " ") 211 | 212 | if non_newline_text.strip(): 213 | split_text.append(non_newline_text.strip()) 214 | evaluates.append(True) 215 | leading_spaces.append(non_newline_text.endswith(" ")) 216 | 217 | split_text.append(match.group()) 218 | evaluates.append(False) 219 | last_end = match.end() 220 | 221 | if text[last_end:].strip(): 222 | split_text.append(text[last_end:].strip()) 223 | if last_end == 0: 224 | leading_spaces.append(leading_space) 225 | else: 226 | leading_spaces.append(text[last_end:].startswith(" ")) 227 | evaluates.append(True) 228 | 229 | return split_text, leading_spaces, evaluates 230 | 231 | 232 | def space_split( 233 | text: str, leading_space: bool 234 | ) -> Tuple[List[str], List[bool], List[bool]]: 235 | split_text: List[str] = [] 236 | leading_spaces: List[bool] = [] 237 | evaluates: List[bool] = [] 238 | for word in text.split(" "): 239 | if not word: 240 | continue 241 | split_text.append(word.strip()) 242 | if not leading_spaces: 243 | leading_spaces.append(leading_space) 244 | else: 245 | leading_spaces.append(True) 246 | evaluates.append(True) 247 | 248 | return split_text, leading_spaces, evaluates 249 | 250 | 251 | def wrap(text: str, width: Number) -> str: 252 | # TODO: Should wrap be modifying the input. Maybe assert there's no newlines? 253 | lines = text.splitlines() 254 | text = " ".join([line.strip() for line in lines]) 255 | 256 | split_text: List[str] = [text] 257 | leading_spaces: List[bool] = [False] 258 | evaluates: List[bool] = [True] 259 | for func in [code_split, link_split, newline_split, space_split]: 260 | new_split_text = [] 261 | new_leading_spaces = [] 262 | new_evaluates = [] 263 | for text, leading_space, evaluate in zip(split_text, leading_spaces, evaluates): 264 | if evaluate: 265 | nst, nls, evl = func(text, leading_space) 266 | new_split_text += nst 267 | new_leading_spaces += nls 268 | new_evaluates += evl 269 | else: 270 | new_split_text.append(text) 271 | new_leading_spaces.append(leading_space) 272 | new_evaluates.append(evaluate) 273 | split_text = new_split_text 274 | leading_spaces = new_leading_spaces 275 | evaluates = new_evaluates 276 | 277 | return join(split_text, leading_spaces, width) 278 | -------------------------------------------------------------------------------- /markflow/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | """ 3 | MarkFlow MarkDown Section Detection Library 4 | 5 | This library provide this functions MarkFlow uses to split a document into it's 6 | individual text types. 7 | """ 8 | from .atx_heading import * 9 | from .blank_line import * 10 | from .block_quote import * 11 | from .bullet_list import * 12 | from .fenced_code_block import * 13 | from .indented_code_block import * 14 | from .link_reference_definition import * 15 | from .ordered_list import * 16 | from .paragraph import * 17 | from .setext_heading import * 18 | from .table import * 19 | from .thematic_break import * 20 | -------------------------------------------------------------------------------- /markflow/detectors/_lines.py: -------------------------------------------------------------------------------- 1 | """ 2 | MarkFlow Line Detection Library 3 | 4 | This library is used a common space to evaluate position independent information about 5 | lines. They are stored here so as to avoid any circular imports. 6 | """ 7 | 8 | import re 9 | 10 | from .._utils import get_indent 11 | 12 | FENCED_CODE_BLOCK_FENCE_CHARACTERS = ["`", "~"] 13 | BULLET_LIST_START_REGEX = re.compile( 14 | r"^\s*" # Leading spaces are OK and often expected 15 | r"[" 16 | r"*" # Asterisk list marker 17 | r"+" # Plus list marker 18 | r"-" # Dash list marker 19 | r"] " # Lists need a space after their identifier 20 | ) 21 | ORDERED_LIST_START_REGEX = re.compile( 22 | r"^\s*" # Leading spaces are OK and often expected 23 | r"(" 24 | r"[0-9]+\." # Numeric list marker 25 | r") " # Lists need a space after their identifier 26 | ) 27 | THEMATIC_BREAK_CHARACTERS = ["*", "_", "-"] 28 | 29 | 30 | def is_atx_heading_line(line: str) -> bool: 31 | """Evaluates whether a line is formatted like an ATX heading 32 | 33 | The standard requires a space, but it also notes that not everyone follows this. We 34 | are lax in our definition and fix it on reformatting. 35 | 36 | Examples: 37 | ``` 38 | #Heading 39 | # Heading 40 | ``` 41 | 42 | Args: 43 | line: The line to evaluate 44 | 45 | Returns: 46 | True if the line is an ATX heading. False otherwise. 47 | """ 48 | return not is_indented_code_block_start_line(line) and line.lstrip().startswith("#") 49 | 50 | 51 | def is_blank_line_line(line: str) -> bool: 52 | """Evaluates whether a line is a blank line 53 | 54 | Example: 55 | ``` 56 | 57 | ``` 58 | 59 | Args: 60 | line: The line to evaluate 61 | 62 | Returns: 63 | True if the line is an ATX heading. False otherwise. 64 | """ 65 | return not line.strip() 66 | 67 | 68 | def is_explicit_block_quote_line(line: str) -> bool: 69 | """Evaluates whether a line is explicitly block quote line 70 | 71 | The distinction here is that paragraph continuation lines can be part of a block 72 | quote. This ensures that is what is desired. 73 | 74 | Example: 75 | ``` 76 | > Block Quote 77 | ``` 78 | 79 | Args: 80 | line: The line to evaluate 81 | 82 | Returns: 83 | True if the line is an block quote line. False otherwise. 84 | """ 85 | return not is_indented_code_block_start_line(line) and line.lstrip().startswith(">") 86 | 87 | 88 | def is_fenced_code_block_start_line(line: str) -> bool: 89 | """Evaluates whether a line could open a fenced code block 90 | 91 | Examples: 92 | ``` 93 | ```python3 94 | ~~~markdown 95 | ``` 96 | 97 | Args: 98 | line: The line to evaluate 99 | 100 | Returns: 101 | True if the line is could open a fenced code block. False otherwise. 102 | """ 103 | for fence in FENCED_CODE_BLOCK_FENCE_CHARACTERS: 104 | if line.strip().startswith(fence * 3): 105 | return True 106 | return False 107 | 108 | 109 | def is_indented_code_block_start_line(line: str) -> bool: 110 | """Evaluates whether a line could start and indented code block 111 | 112 | Examples: 113 | ``` 114 | There's four spaces before this 115 | ``` 116 | 117 | Args: 118 | line: The line to evaluate 119 | 120 | Returns: 121 | True if the line is could start an indented code block. False otherwise. 122 | """ 123 | return bool(line.strip()) and get_indent(line) >= 4 124 | 125 | 126 | def is_ordered_list_start_line(line: str) -> bool: 127 | """Evaluates whether a line could start an ordered list 128 | 129 | Example: 130 | ``` 131 | 1. Entry 132 | ``` 133 | 134 | Args: 135 | line: The line to evaluate 136 | 137 | Returns: 138 | True if the line is could start an ordered list. False otherwise. 139 | """ 140 | return not is_indented_code_block_start_line(line) and bool( 141 | ORDERED_LIST_START_REGEX.search(line) 142 | ) 143 | 144 | 145 | def is_bullet_list_start_line(line: str) -> bool: 146 | """Evaluates whether a line could start a bullet list 147 | 148 | Example: 149 | ``` 150 | * Asterisk List 151 | - Dash List 152 | + Plus List 153 | ``` 154 | 155 | Args: 156 | line: The line to evaluate 157 | 158 | Returns: 159 | True if the line is could start a bullet list. False otherwise. 160 | """ 161 | return not is_indented_code_block_start_line(line) and bool( 162 | BULLET_LIST_START_REGEX.search(line) 163 | ) 164 | 165 | 166 | def is_paragraph_start_line(line: str) -> bool: 167 | """Evaluates whether a line could start a paragraph 168 | 169 | We basically evaluate that no other section type could start instead. 170 | 171 | Examples: 172 | ``` 173 | Just some text 174 | ``` 175 | 176 | Args: 177 | line: The line to evaluate 178 | 179 | Returns: 180 | True if the line is could start a list. False otherwise. 181 | """ 182 | for line_checker in [ 183 | is_indented_code_block_start_line, 184 | is_atx_heading_line, 185 | is_blank_line_line, 186 | is_bullet_list_start_line, 187 | is_explicit_block_quote_line, 188 | is_fenced_code_block_start_line, 189 | is_ordered_list_start_line, 190 | is_table_start_line, 191 | is_thematic_break_line, 192 | ]: 193 | if line_checker(line): 194 | return False 195 | return True 196 | 197 | 198 | def is_setext_underline(line: str) -> bool: 199 | """Evaluates whether a line could be the underlining for a setext heading 200 | 201 | Examples: 202 | ``` 203 | --- 204 | == 205 | ``` 206 | 207 | Args: 208 | line: The line to evaluate 209 | 210 | Returns: 211 | True if the line is could underline an setext heading. False otherwise. 212 | """ 213 | return ( 214 | not is_indented_code_block_start_line(line) 215 | and bool(line.strip()) 216 | and ( 217 | all([c == "=" for c in line.strip()]) 218 | or all([c == "-" for c in line.strip()]) 219 | ) 220 | ) 221 | 222 | 223 | def is_table_start_line(line: str) -> bool: 224 | """Evaluates whether a line could start a table 225 | 226 | Examples: 227 | ``` 228 | |Table| 229 | ``` 230 | 231 | Args: 232 | line: The line to evaluate 233 | 234 | Returns: 235 | True if the line is could start a table. False otherwise. 236 | """ 237 | # ToDo: Not really, but we'll have to adapt a standard from somewhere other than 238 | # CommonMark 239 | return line.lstrip().startswith("|") 240 | 241 | 242 | def is_thematic_break_line(line: str) -> bool: 243 | if is_indented_code_block_start_line(line): 244 | return False 245 | 246 | spaceless_line = "".join(line.split()) 247 | if len(spaceless_line) < 3: 248 | # Thematic breaks must be at least three characters long 249 | return False 250 | else: 251 | for symbol in THEMATIC_BREAK_CHARACTERS: 252 | if all(char == symbol for char in spaceless_line.strip()): 253 | return True 254 | else: 255 | return False 256 | -------------------------------------------------------------------------------- /markflow/detectors/atx_heading.py: -------------------------------------------------------------------------------- 1 | """ 2 | MarkFlow ATX Heading Detection Library 3 | 4 | ATX headings are lines that begin with ane or more octothorpes (#) and are not indented. 5 | The number of octothorpes indicates the depth of the heading (e.g. # ->

, 6 | ## ->

) The standard requires that a space exist between the octothorpes and 7 | the title, but our detector does not enforce that as we assume that is not actually 8 | meant (as many other tools do) and the formatter will insert that space automatically. 9 | 10 | Examples: 11 | ``` 12 | # Heading 1 13 | ``` 14 | 15 | ``` 16 | ## Heading 2 17 | ``` 18 | """ 19 | 20 | from typing import List, Tuple 21 | 22 | from ._lines import is_atx_heading_line 23 | 24 | 25 | def split_atx_heading( 26 | lines: List[str], line_offset: int = 0 27 | ) -> Tuple[List[str], List[str]]: 28 | """Split leading ATX heading from lines if one exists 29 | 30 | While the standard does require that ATX headings have a space between the 31 | octothorpes and the heading text, we are lenient and do not require that assuming 32 | that to just be author error. 33 | 34 | Args: 35 | lines: The lines to evaluate. 36 | line_offset (optional): The offset into the overall document we are at. This is 37 | used for reporting errors in the original document. 38 | 39 | Returns: 40 | A tuple of two values. The first is the ATX heading lines if they were found, 41 | otherwise it is an empty list. The second value is the remaining text. (If lines 42 | does not start with an ATX heading, it is the same as lines.) 43 | """ 44 | if is_atx_heading_line(lines[0]): 45 | return [lines[0]], lines[1:] 46 | else: 47 | return [], lines 48 | -------------------------------------------------------------------------------- /markflow/detectors/blank_line.py: -------------------------------------------------------------------------------- 1 | """ 2 | MarkFlow Blank Line Detection Library 3 | 4 | Blank lines are simply those lines that only have whitespace in them and are not in the 5 | middle of another section line and indented code block. 6 | 7 | Example: 8 | ``` 9 | 10 | ``` 11 | """ 12 | 13 | from typing import List, Tuple 14 | 15 | from ._lines import is_blank_line_line 16 | 17 | 18 | def split_blank_line( 19 | lines: List[str], line_offset: int = 0 20 | ) -> Tuple[List[str], List[str]]: 21 | """Split leading blank line from lines if one exists 22 | 23 | Args: 24 | lines: The lines to evaluate. 25 | line_offset (optional): The offset into the overall document we are at. This is 26 | used for reporting errors in the original document. 27 | 28 | Returns: 29 | A tuple of two values. The first is the blank line if it was found (as a 30 | single-element list), otherwise it is an empty list. The second value is the 31 | remaining text. (If lines does not start with a blank line, it is the same as 32 | lines.) 33 | """ 34 | if is_blank_line_line(lines[0]): 35 | return [lines[0]], lines[1:] 36 | else: 37 | return [], lines 38 | -------------------------------------------------------------------------------- /markflow/detectors/block_quote.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import List, Tuple 3 | 4 | from .._utils import redirect_info_logs_to_debug 5 | from ._lines import ( 6 | is_explicit_block_quote_line, 7 | is_setext_underline, 8 | is_thematic_break_line, 9 | ) 10 | from .atx_heading import split_atx_heading 11 | from .blank_line import split_blank_line 12 | from .bullet_list import split_bullet_list 13 | from .fenced_code_block import split_fenced_code_block 14 | from .ordered_list import split_ordered_list 15 | from .table import split_table 16 | from .thematic_break import split_thematic_break 17 | 18 | LEADING_QUOTE_MARKER = re.compile(r"^ {0,3}>") 19 | 20 | 21 | def _is_paragraph_continuation_text(lines: List[str], line_offset: int = 0) -> bool: 22 | """Indicates whether the first line of lines would continue a paragraph 23 | 24 | This ensures that any valid interrupting section of a paragraph could not result in 25 | a valid block instead. 26 | 27 | We have a separate definition from the one used in paragraph detection to avoid 28 | circular imports. This definition assumes the line doesn't start with '>'. 29 | 30 | There is also a bit of a diversion from the spec here. According to the spec, the 31 | following is a block-quoted paragraph: 32 | 33 | > paragraph 34 | title 35 | ===== 36 | 37 | or: 38 | 39 | > paragraph title ===== 40 | 41 | But, that looks odd, and the definition for paragraph continuation text could easily 42 | be interpreted to consider that a paragraph and a title. So, we do the same here. 43 | Given that MarkFlow output should not result in any paragraph continuation lines 44 | after a block quote, there are no concerns around consistency. In the case of 45 | trailing equals, e.g. 46 | 47 | > paragraph 48 | === 49 | 50 | the caller should detect this as a continuation line. But, if it is dashes, it 51 | should be detected as a horizontal line. 52 | 53 | There is an open issue on the ambiguity of paragraph continuation text here: 54 | https://github.com/commonmark/commonmark-spec/issues/675 55 | 56 | Args: 57 | lines: The lines to evaluate. 58 | line_offset (optional): The offset into the overall document we are at. This is 59 | used for reporting errors in the original document. 60 | 61 | Returns: 62 | True if the first line would continue the paragraph. False otherwise. 63 | """ 64 | from .setext_heading import split_setext_heading 65 | 66 | for splitter in [ 67 | split_atx_heading, 68 | split_blank_line, 69 | split_bullet_list, 70 | split_fenced_code_block, 71 | split_ordered_list, 72 | split_setext_heading, 73 | split_table, 74 | split_thematic_break, 75 | ]: 76 | with redirect_info_logs_to_debug(): 77 | if splitter(lines, line_offset)[0]: 78 | return False 79 | if is_setext_underline(lines[0]): 80 | return False 81 | return True 82 | 83 | 84 | def _block_quote_ends_with_paragraph(block_quote_lines: List[str]) -> bool: 85 | # Avoid circular imports 86 | from ..parser import MarkdownSectionEnum, parse_markdown 87 | 88 | parsing_lines = [] 89 | for line in block_quote_lines: 90 | parsing_lines.append(LEADING_QUOTE_MARKER.sub("", line)) 91 | 92 | with redirect_info_logs_to_debug(): 93 | ending_section_type, ending_section_content = parse_markdown(parsing_lines)[-1] 94 | 95 | if ending_section_type == MarkdownSectionEnum.BLOCK_QUOTE: 96 | return _block_quote_ends_with_paragraph(ending_section_content) 97 | elif ending_section_type == MarkdownSectionEnum.PARAGRAPH: 98 | return True 99 | else: 100 | return False 101 | 102 | 103 | def split_block_quote( 104 | lines: List[str], line_offset: int = 0 105 | ) -> Tuple[List[str], List[str]]: 106 | """Splits a block quote from the beginning of lines if one exists 107 | 108 | We slightly differ from the spec when it comes to paragraph continuation lines. 109 | While the spec detects the following as all a block quoted paragraph: 110 | 111 | > code 112 | TITLE 113 | ===== 114 | 115 | we detect it as a block quote followed by a heading. In all other ways, we should 116 | match the spec. 117 | 118 | ToDo: 119 | * This pattern could be applicable in paragraph detection and be easier to grok. 120 | (Minus the parsing portion. That's not necessary.) 121 | 122 | Returns: 123 | A tuple of two values. The first is the block quote lines if a block quote was 124 | found, otherwise it is an empty list. The second value is the remaining text. 125 | (If lines does not start with a thematic break, it is the same as lines.) 126 | """ 127 | block_quote: List[str] = [] 128 | remaining_lines = lines 129 | 130 | while remaining_lines: 131 | if not is_explicit_block_quote_line(remaining_lines[0]): 132 | break 133 | 134 | while remaining_lines and is_explicit_block_quote_line(remaining_lines[0]): 135 | block_quote += [remaining_lines[0]] 136 | remaining_lines = remaining_lines[1:] 137 | 138 | check_for_continuation = _block_quote_ends_with_paragraph(block_quote) 139 | 140 | if check_for_continuation: 141 | first_line = True 142 | while remaining_lines and _is_paragraph_continuation_text( 143 | remaining_lines, line_offset 144 | ): 145 | if first_line: 146 | first_line = False 147 | if is_setext_underline( 148 | remaining_lines[0] 149 | ) and not is_thematic_break_line(remaining_lines[0]): 150 | break 151 | block_quote += [remaining_lines[0]] 152 | remaining_lines = remaining_lines[1:] 153 | line_offset += 1 154 | else: 155 | break 156 | 157 | return block_quote, remaining_lines 158 | -------------------------------------------------------------------------------- /markflow/detectors/bullet_list.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | 3 | from ._lines import ( 4 | is_blank_line_line, 5 | is_bullet_list_start_line, 6 | is_table_start_line, 7 | is_thematic_break_line, 8 | ) 9 | 10 | 11 | def split_bullet_list( 12 | lines: List[str], line_offset: int = 0 13 | ) -> Tuple[List[str], List[str]]: 14 | bullet_list: List[str] = [] 15 | remaining_lines = lines 16 | indexed_line_generator = enumerate(lines) 17 | 18 | index, line = next(indexed_line_generator) 19 | if not is_bullet_list_start_line(line): 20 | return bullet_list, remaining_lines 21 | 22 | bullet_list.append(line) 23 | for index, line in indexed_line_generator: 24 | if ( 25 | is_blank_line_line(line) 26 | or is_table_start_line(line) 27 | or is_thematic_break_line(line) 28 | ): 29 | break 30 | else: 31 | bullet_list.append(line) 32 | else: 33 | # We consumed the last line, so increment our index to chop it off 34 | index += 1 35 | 36 | remaining_lines = remaining_lines[index:] 37 | return bullet_list, remaining_lines 38 | -------------------------------------------------------------------------------- /markflow/detectors/fenced_code_block.py: -------------------------------------------------------------------------------- 1 | """ 2 | MarkFlow Fenced Code Block Detection Library 3 | 4 | Fenced code blocks are multiple lines of text that open with a line beginning with at 5 | least two asterisks or tildas that ends with that same sequence on its own line. 6 | 7 | Examples: 8 | ``` 9 | `` 10 | print("Hello world!") 11 | `` 12 | ``` 13 | 14 | ``` 15 | ~~~~ 16 | print("Hello world!") 17 | ~~~~ 18 | ``` 19 | """ 20 | 21 | import logging 22 | from typing import List, Tuple 23 | 24 | from .._utils import get_indent 25 | 26 | logger = logging.getLogger(__name__) 27 | 28 | # TODO: This is really dirty; let's probably make started functions return ended 29 | # functions; I'm not doing that yet in case a better pattern emerges on the rest of 30 | # this refactor 31 | # The alternative is every time fenced_code_block_ended is called, we walk backwards to 32 | # find the fence. 33 | 34 | FENCES = "`~" 35 | __LAST_FENCE = "" 36 | __LAST_FENCE_INDEX = -1 37 | 38 | 39 | def fenced_code_block_started(line: str, index: int, lines: List[str]) -> bool: 40 | """DEPRECATED""" 41 | global __LAST_FENCE 42 | global __LAST_FENCE_INDEX 43 | for fence in FENCES: 44 | if line.strip().startswith(fence * 3): 45 | count = len(line.strip()) - len(line.strip().lstrip(fence)) 46 | __LAST_FENCE = fence * count 47 | __LAST_FENCE_INDEX = index 48 | return True 49 | return False 50 | 51 | 52 | def fenced_code_block_ended(line: str, index: int, lines: List[str]) -> bool: 53 | """DEPRECATED""" 54 | # We'll catch even over indented fences assuming that that was an accident. 55 | global __LAST_FENCE 56 | global __LAST_FENCE_INDEX 57 | if not __LAST_FENCE: 58 | raise RuntimeError("End of fenced code block attempted without starting one.") 59 | 60 | # If we're on the last line, we'll still want to warn about the fence indentation 61 | if index + 1 == len(lines): 62 | # TODO: We add the last fence because this is used for parsing lists, and we 63 | # allow indented code blocks in lists. But, those are actually inline code 64 | # blocks according to the example render. 65 | last_fence_indent = len(lines[__LAST_FENCE_INDEX]) - len( 66 | lines[__LAST_FENCE_INDEX].lstrip() 67 | ) 68 | if ( 69 | line.strip().startswith(__LAST_FENCE) 70 | and len(line) - len(line.lstrip()) > 3 + last_fence_indent 71 | ): 72 | logger.warning( 73 | "Detected that the fence on line %d is over indented per the standard. " 74 | "If this is intentional, please file a bug report." % (index + 1) 75 | ) 76 | 77 | # We'll just redetect our opening line 78 | if index - 1 == __LAST_FENCE_INDEX: 79 | return False 80 | 81 | last_line = lines[index - 1] 82 | if last_line.strip().startswith(__LAST_FENCE): 83 | last_fence_indent = len(lines[__LAST_FENCE_INDEX]) - len( 84 | lines[__LAST_FENCE_INDEX].lstrip() 85 | ) 86 | if len(last_line) - len(last_line.lstrip()) > 3 + last_fence_indent: 87 | logger.warning( 88 | "Detected that the fence on line %d is over indented per the standard. " 89 | "If this is intentional, please file a bug report." % (index + 1) 90 | ) 91 | __LAST_FENCE = "" 92 | __LAST_FENCE_INDEX = -1 93 | return True 94 | return False 95 | 96 | 97 | def split_fenced_code_block( 98 | lines: List[str], line_offset: int = 0 99 | ) -> Tuple[List[str], List[str]]: 100 | """Split leading fenced code block from lines if one exists 101 | 102 | Args: 103 | lines: The lines to evaluate. 104 | line_offset (optional): The offset into the overall document we are at. This is 105 | used for reporting errors in the original document. 106 | 107 | Returns: 108 | A tuple of two values. The first is the fenced code block lines if they were 109 | found, otherwise it is an empty list. The second value is the remaining text. 110 | (If lines does not start with a fenced code block, it is the same as lines.) 111 | """ 112 | # TODO: Fenced code blocks can't be indented 113 | fenced_code_block: List[str] = [] 114 | remaining_lines = lines 115 | indexed_line_generator = enumerate(lines) 116 | 117 | index, line = next(indexed_line_generator) 118 | for fence in FENCES: 119 | if line.strip().startswith(fence * 3): 120 | count = len(line.lstrip()) - len(line.lstrip().lstrip(fence)) 121 | fence_indent = get_indent(line) 122 | full_fence = fence * count 123 | break 124 | else: 125 | return fenced_code_block, remaining_lines 126 | 127 | fenced_code_block.append(line) 128 | 129 | for index, line in indexed_line_generator: 130 | fenced_code_block.append(line) 131 | if line.strip() == full_fence: 132 | if get_indent(line) > 3 + fence_indent: 133 | logger.warning( 134 | "Detected that the fence on line %d is over indented per the " 135 | "standard. If this is intentional, please file a bug report." 136 | % (index + line_offset + 1) 137 | ) 138 | break 139 | 140 | remaining_lines = remaining_lines[index + 1 :] 141 | return fenced_code_block, remaining_lines 142 | -------------------------------------------------------------------------------- /markflow/detectors/indented_code_block.py: -------------------------------------------------------------------------------- 1 | """ 2 | MarkFlow Indented Code Block Detection Library 3 | 4 | Indented code blocks are one or more lines of text that are indented at least four 5 | spaces that are not in the middle of a paragraph. 6 | 7 | Example: 8 | ``` 9 | print("Hello world!") 10 | ``` 11 | """ 12 | 13 | from typing import List, Tuple 14 | 15 | from .._utils import get_indent 16 | 17 | 18 | def split_indented_code_block( 19 | lines: List[str], line_offset: int = 0 20 | ) -> Tuple[List[str], List[str]]: 21 | """Split leading indented code block from lines if one exists 22 | 23 | Args: 24 | lines: The lines to evaluate. 25 | line_offset (optional): The offset into the overall document we are at. This is 26 | used for reporting errors in the original document. 27 | 28 | Returns: 29 | A tuple of two values. The first is the indented code block lines if they were 30 | found, otherwise it is an empty list. The second value is the remaining text. 31 | (If lines does not start with an indented code block, it is the same as lines.) 32 | """ 33 | indented_code_block = [] 34 | remaining_lines = lines 35 | indexed_line_generator = enumerate(lines) 36 | 37 | # By default, everything to the end of the document is a block quote 38 | index, line = next(indexed_line_generator) 39 | close_index = index + 1 40 | if line.strip() and get_indent(line) >= 4: 41 | # Find the next line that isn't indented at least 4, excluding trailing blank 42 | # lines 43 | for index, line in indexed_line_generator: 44 | if not line.strip(): 45 | continue 46 | elif get_indent(line) >= 4: 47 | close_index = index 48 | else: 49 | break 50 | 51 | indented_code_block = lines[:close_index] 52 | remaining_lines = lines[close_index:] 53 | 54 | return indented_code_block, remaining_lines 55 | -------------------------------------------------------------------------------- /markflow/detectors/link_reference_definition.py: -------------------------------------------------------------------------------- 1 | """ 2 | MarkFlow Link Reference Definition Detection Library 3 | 4 | Link reference definitions are intended to be unrendered portions of a document that 5 | provide a short hand for links. The start with a series of non-whitespace characters 6 | enclosed in brackets ([]) that serve as the label. It is followed by a colon (:) and 7 | optional whitespace. That is then followed by a series of non-whitespace characters that 8 | serve as the link. This can optionally be followed by white-space and then a quotation 9 | (' or ") enclosed series of characters that serves as the title. Any of the optional 10 | whitespace may be a new line. 11 | 12 | Examples: 13 | ``` 14 | [label]: link 'title' 15 | 16 | [label]: link 17 | 'title' 18 | 19 | [label]: 20 | link 21 | 'title' 22 | ``` 23 | """ 24 | 25 | import itertools 26 | import logging 27 | import re 28 | from typing import List, Tuple 29 | 30 | from .._utils import get_indent 31 | 32 | logger = logging.getLogger(__name__) 33 | 34 | LINK_REFERENCE_DEFINITION_FIRST_ELEMENT_REGEX = re.compile( 35 | r"\[" # Open bracket 36 | r"[^\]]{1,999}" # At least one and up to 999 characters as the name 37 | r"\]:" # End bracket and colon 38 | ) 39 | QUOTATION_CHARACTERS = "'\"" 40 | 41 | 42 | def split_link_reference_definition( 43 | lines: List[str], line_offset: int = 0 44 | ) -> Tuple[List[str], List[str]]: 45 | """Split leading link reference definition from lines if one exists 46 | 47 | Args: 48 | lines: The lines to evaluate. 49 | line_offset (optional): The offset into the overall document we are at. This is 50 | used for reporting errors in the original document. 51 | 52 | Returns: 53 | A tuple of two values. The first is the indented code block lines if they were 54 | found, otherwise it is an empty list. The second value is the remaining text. 55 | (If lines does not start with a link reference definition, it is the same as 56 | lines.) 57 | """ 58 | link_reference_definition: List[str] = [] 59 | remaining_lines = lines 60 | indexed_line_generator = enumerate(lines) 61 | 62 | index, line = next(indexed_line_generator) 63 | 64 | if get_indent(line) >= 4: 65 | return link_reference_definition, remaining_lines 66 | 67 | rest_of_line = line.lstrip() 68 | match = LINK_REFERENCE_DEFINITION_FIRST_ELEMENT_REGEX.match(rest_of_line) 69 | if not match: 70 | return link_reference_definition, remaining_lines 71 | 72 | rest_of_line = rest_of_line[match.end() :] 73 | url_and_title = rest_of_line.split(maxsplit=1) 74 | # At the end of this, index is set to the line with the beginning of the title_text 75 | # contains that first text. Is complete gets set from this loop when we know that 76 | # we have a valid title. In this first loop, we only set it when we know the lines 77 | # with the label and URL can stand on their own. 78 | # The later loops checks to ensure our closing quotation is the last non-whitespace 79 | # character on whatever line it ends on and the first occurence of that character, 80 | # unescaped. 81 | is_complete = False 82 | if len(url_and_title) == 2: 83 | # The label, URL, and possible title (or part of it) are on this line 84 | title_text = url_and_title[1] 85 | elif len(url_and_title) == 1: 86 | # Only the label and URL are on the first line 87 | try: 88 | index, line = next(indexed_line_generator) 89 | title_text = line 90 | except StopIteration: 91 | title_text = "" 92 | is_complete = True 93 | else: 94 | # Just the label was on the first line 95 | try: 96 | index, line = next(indexed_line_generator) 97 | except StopIteration: 98 | line = "" 99 | if line.startswith("[") or not line.strip(): 100 | # According to this standard, this is just paragraph text, but this tool 101 | # should be usable during development. 102 | # ToDo: Does that match up with our treatment of misquoted titles? 103 | logger.warning( 104 | "The text on line %d seems to be a link reference definition, but it " 105 | "does not contain a link. We will be treating it as if it were.", 106 | index, # We are just pass where the issue exists 107 | ) 108 | link_reference_definition = [lines[0]] 109 | remaining_lines = lines[1:] 110 | return link_reference_definition, remaining_lines 111 | elif len(line.split(maxsplit=1)) == 1: 112 | # Only the URL is on the second line 113 | index, line = next(indexed_line_generator) 114 | is_complete = True 115 | title_text = line 116 | else: 117 | # The URL and possible title (or part of it) are on the second line 118 | title_text = line.split(maxsplit=1)[1] 119 | 120 | if title_text.strip(): 121 | quotation_character = title_text[0] 122 | else: 123 | quotation_character = "NO QUOTE" 124 | 125 | if quotation_character in QUOTATION_CHARACTERS: 126 | closing_regex = re.compile(r"(? Tuple[List[str], List[str]]: 14 | ordered_list: List[str] = [] 15 | remaining_lines = lines 16 | indexed_line_generator = enumerate(lines) 17 | 18 | index, line = next(indexed_line_generator) 19 | if not is_ordered_list_start_line(line): 20 | return ordered_list, remaining_lines 21 | 22 | ordered_list.append(line) 23 | for index, line in indexed_line_generator: 24 | if ( 25 | is_blank_line_line(line) 26 | or is_table_start_line(line) 27 | or is_thematic_break_line(line) 28 | ): 29 | break 30 | else: 31 | ordered_list.append(line) 32 | else: 33 | # We consumed the last line, so increment our index to chop it off 34 | index += 1 35 | 36 | remaining_lines = remaining_lines[index:] 37 | return ordered_list, remaining_lines 38 | -------------------------------------------------------------------------------- /markflow/detectors/paragraph.py: -------------------------------------------------------------------------------- 1 | from typing import Generator, List, Tuple 2 | 3 | from ._lines import is_paragraph_start_line, is_setext_underline 4 | from .atx_heading import split_atx_heading 5 | from .blank_line import split_blank_line 6 | from .block_quote import split_block_quote 7 | from .bullet_list import split_bullet_list 8 | from .fenced_code_block import split_fenced_code_block 9 | from .ordered_list import split_ordered_list 10 | from .table import split_table 11 | from .thematic_break import split_thematic_break 12 | 13 | 14 | def _is_paragraph_continuation_text(lines: List[str], line_offset: int = 0) -> bool: 15 | """Indicates whether the first line of lines would continue a paragraph 16 | 17 | This ensures that any valid interrupting section of a paragraph could not result in 18 | a valid block instead. 19 | 20 | We have a separate definition from the one used in block quote detection to avoid 21 | circular imports. That one also gets to skip block quote checking. 22 | 23 | Args: 24 | lines: The lines to evaluate. 25 | line_offset (optional): The offset into the overall document we are at. This is 26 | used for reporting errors in the original document. 27 | 28 | Returns: 29 | True if the first line would continue the paragraph. False otherwise. 30 | """ 31 | for splitter in [ 32 | split_atx_heading, 33 | split_blank_line, 34 | split_block_quote, 35 | split_bullet_list, 36 | split_fenced_code_block, 37 | split_ordered_list, 38 | split_table, 39 | split_thematic_break, 40 | ]: 41 | # ToDo: Disable logging? 42 | if splitter(lines, line_offset)[0]: 43 | return False 44 | if is_setext_underline(lines[0]): 45 | return False 46 | return True 47 | 48 | 49 | def list_tail_generator(lines: List[str]) -> Generator[List[str], None, None]: 50 | """Generator that returns less and less of the end of a list 51 | 52 | The first call to this generator returns the passed in list. Each successive call 53 | to this generator returns the previous call without the first element until we 54 | return the last element. 55 | 56 | Args: 57 | lines: The lines to evaluate 58 | line_offset (optional): The offset into the overall document we are at. This is 59 | used for reporting errors in the original document. 60 | 61 | Returns: 62 | A tuple of two values. The first is the setext heading lines if they were found, 63 | otherwise it is an empty list. The second value is the remaining text. (If lines 64 | does not start with a thematic break, it is the same as lines.) 65 | 66 | The returned text can then be evaluated to determine if this is actually a 67 | paragraph or an setext heading. 68 | """ 69 | for i in range(len(lines)): 70 | yield lines[i:] 71 | 72 | 73 | def split_paragraph_ignoring_setext( 74 | lines: List[str], line_offset: int = 0 75 | ) -> Tuple[List[str], List[str]]: 76 | """Split a paragraph from beginning of lines if one exists 77 | 78 | Unlike split_paragraph, this does not take into account setext underlining. This is 79 | so that both detectors can share a common function. 80 | 81 | Args: 82 | lines: The lines to evaluate. 83 | line_offset (optional): The offset into the overall document we are at. This is 84 | used for reporting errors in the original document. 85 | 86 | Returns: 87 | A tuple of two values. The first is the paragraph lines if a paragraph was 88 | found, otherwise it is an empty list. The second value is the remaining text. 89 | (If lines does not start with a thematic break, it is the same as lines.) 90 | """ 91 | paragraph_lines = [] 92 | remaining_lines = lines 93 | 94 | if is_paragraph_start_line(lines[0]): 95 | # ToDo: This should be handled in `wrap` as a double space is always a newline 96 | # in any section type. Also add indents while you're there. 97 | if lines[0].endswith(" "): 98 | return [lines[0]], lines[1:] 99 | paragraph_lines.append(lines[0]) 100 | tail_lines_generator = list_tail_generator(lines[1:]) 101 | for tail in tail_lines_generator: 102 | if _is_paragraph_continuation_text( 103 | tail, line_offset + len(paragraph_lines) 104 | ): 105 | paragraph_lines.append(tail[0]) 106 | # ToDo: This should be handled in `wrap` as a double space is always a 107 | # newline in any section type. 108 | if tail[0].endswith(" "): 109 | remaining_lines = next(tail_lines_generator) 110 | break 111 | else: 112 | remaining_lines = tail 113 | break 114 | else: 115 | remaining_lines = [] 116 | 117 | return paragraph_lines, remaining_lines 118 | 119 | 120 | def split_paragraph( 121 | lines: List[str], line_offset: int = 0 122 | ) -> Tuple[List[str], List[str]]: 123 | """Split a paragraph from beginning of lines if one exists 124 | 125 | Args: 126 | lines: The lines to evaluate. 127 | line_offset (optional): The offset into the overall document we are at. This is 128 | used for reporting errors in the original document. 129 | 130 | Returns: 131 | A tuple of two values. The first is the paragraph lines if a paragraph was 132 | found, otherwise it is an empty list. The second value is the remaining text. 133 | (If lines does not start with a thematic break, it is the same as lines.) 134 | """ 135 | potential_paragraph, remaining_lines = split_paragraph_ignoring_setext( 136 | lines, line_offset 137 | ) 138 | if not remaining_lines or not is_setext_underline(remaining_lines[0]): 139 | return potential_paragraph, remaining_lines 140 | else: 141 | return [], lines 142 | -------------------------------------------------------------------------------- /markflow/detectors/setext_heading.py: -------------------------------------------------------------------------------- 1 | """ 2 | MarkFlow Setext Heading Detection Library 3 | 4 | Setext headings are basically any paragraph that is followed by a line composed of all 5 | all equals signs (=) or dashes (-). The former indicates a heading of level 1 while the 6 | latter indicates a heading of level 2. 7 | 8 | Examples: 9 | ``` 10 | Heading 1 11 | ========= 12 | ``` 13 | 14 | ``` 15 | Heading 2 16 | - 17 | ``` 18 | """ 19 | 20 | from typing import List, Tuple 21 | 22 | from ._lines import is_setext_underline 23 | from .paragraph import split_paragraph_ignoring_setext 24 | 25 | 26 | def split_setext_heading( 27 | lines: List[str], line_offset: int = 0 28 | ) -> Tuple[List[str], List[str]]: 29 | """Split setext heading from beginning of lines if one exists 30 | 31 | Args: 32 | lines: The lines to evaluate. 33 | line_offset (optional): The offset into the overall document we are at. This is 34 | used for reporting errors in the original document. 35 | 36 | Returns: 37 | A tuple of two values. The first is the setext heading lines if they were found, 38 | otherwise it is an empty list. The second value is the remaining text. (If lines 39 | does not start with a thematic break, it is the same as lines.) 40 | """ 41 | paragraph, remaining_lines = split_paragraph_ignoring_setext(lines, line_offset) 42 | if paragraph and remaining_lines and is_setext_underline(remaining_lines[0]): 43 | return paragraph + [remaining_lines[0]], remaining_lines[1:] 44 | return [], lines 45 | -------------------------------------------------------------------------------- /markflow/detectors/table.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | 3 | 4 | def table_started(line: str, index: int, lines: List[str]) -> bool: 5 | """DEPRECATED""" 6 | return line.lstrip().startswith("|") 7 | 8 | 9 | def table_ended(line: str, index: int, lines: List[str]) -> bool: 10 | """DEPRECATED""" 11 | return not table_started(line, index, lines) 12 | 13 | 14 | def split_table(lines: List[str], line_offset: int = 0) -> Tuple[List[str], List[str]]: 15 | table = [] 16 | remaining_lines = lines 17 | 18 | index = 0 19 | if table_started(lines[index], index, lines): 20 | table.append(lines[index]) 21 | for index, line in enumerate(lines[1:], start=index + 1): 22 | if table_ended(line, index, lines): 23 | break 24 | table.append(line) 25 | else: 26 | index += 1 27 | remaining_lines = lines[index:] 28 | 29 | return table, remaining_lines 30 | -------------------------------------------------------------------------------- /markflow/detectors/thematic_break.py: -------------------------------------------------------------------------------- 1 | """ 2 | MarkFlow Thematic Break Detection Library 3 | 4 | A thematic break is a lines that is a sequence of at least three dashes (-), underscores 5 | (_), or asterisks (*), with optional whitespace (though no more that three leading 6 | spaces), and is not the underlining of a setext heading in the case of dashes. 7 | 8 | Examples: 9 | ``` 10 | ___ 11 | ``` 12 | 13 | ``` 14 | **************** 15 | ``` 16 | """ 17 | 18 | from typing import List, Tuple 19 | 20 | from ._lines import is_thematic_break_line 21 | 22 | SEPARATOR_SYMBOLS = ["*", "_", "-"] 23 | 24 | 25 | def split_thematic_break( 26 | lines: List[str], line_offset: int = 0 27 | ) -> Tuple[List[str], List[str]]: 28 | """Split leading thematic break from lines if one exists 29 | 30 | Args: 31 | lines: The lines to evaluate. 32 | line_offset (optional): The offset into the overall document we are at. This is 33 | used for reporting errors in the original document. 34 | 35 | Returns: 36 | A tuple of two values. The first is the indented code block lines if they were 37 | found, otherwise it is an empty list. The second value is the remaining text. 38 | (If lines does not start with a thematic break, it is the same as lines.) 39 | """ 40 | if is_thematic_break_line(lines[0]): 41 | return [lines[0]], lines[1:] 42 | else: 43 | return [], lines 44 | -------------------------------------------------------------------------------- /markflow/exceptions.py: -------------------------------------------------------------------------------- 1 | class MarkdownFormatException(Exception): 2 | """Raised if the passed in file is formatted incorrectly""" 3 | 4 | 5 | class ReformatInconsistentException(RuntimeError): 6 | """Raised if a reformated Markdown file would be reformatted differently 7 | 8 | If you get this error, you should open a bug report. 9 | """ 10 | -------------------------------------------------------------------------------- /markflow/formatters/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .atx_heading import * 3 | from .base import * 4 | from .blank_line import * 5 | from .block_quote import * 6 | from .fenced_code_block import * 7 | from .indented_code_block import * 8 | from .link_reference_definition import * 9 | from .lists import * 10 | from .paragraph import * 11 | from .setext_heading import * 12 | from .table import * 13 | from .thematic_break import * 14 | -------------------------------------------------------------------------------- /markflow/formatters/atx_heading.py: -------------------------------------------------------------------------------- 1 | """ 2 | 4.2 ATX headings 3 | 4 | An ATX heading consists of a string of characters, parsed as inline content, between an 5 | opening sequence of 1–6 unescaped # characters and an optional closing sequence of any 6 | number of unescaped # characters. The opening sequence of # characters must be followed 7 | by a space or by the end of line. The optional closing sequence of #s must be preceded 8 | by a space and may be followed by spaces only. The opening # character may be indented 9 | 0-3 spaces. The raw contents of the heading are stripped of leading and trailing spaces 10 | before being parsed as inline content. The heading level is equal to the number of # 11 | characters in the opening sequence. 12 | 13 | At least one space is required between the # characters and the heading’s contents, 14 | unless the heading is empty. Note that many implementations currently do not require the 15 | space. However, the space was required by the original ATX implementation. 16 | 17 | https://spec.commonmark.org/0.29/#atx-headings 18 | """ 19 | import logging 20 | 21 | from .._utils import truncate_str 22 | from ..typing import Number 23 | from .base import MarkdownSection 24 | 25 | __all__ = ["MarkdownATXHeading"] 26 | 27 | logger = logging.getLogger(__name__) 28 | 29 | REPR_CONTENT_LEN = 20 30 | 31 | 32 | class MarkdownATXHeading(MarkdownSection): 33 | @property 34 | def content(self) -> str: 35 | if not self.lines: 36 | raise RuntimeError( 37 | f"Attempted access of uninitialized {self.__class__.__name__}." 38 | ) 39 | return self.lines[0].strip().lstrip("#").strip() 40 | 41 | @property 42 | def depth(self) -> int: 43 | if not self.lines: 44 | raise RuntimeError( 45 | f"Attempted access of uninitialized {self.__class__.__name__}." 46 | ) 47 | return len(self.lines[0].strip()) - len(self.lines[0].strip().lstrip("#")) 48 | 49 | def append(self, line: str) -> None: 50 | if self.lines: 51 | raise RuntimeError( 52 | "Attempted to add another line to an ATX Header. They can only be one " 53 | "line." 54 | ) 55 | self.lines.append(line) 56 | 57 | def reformatted(self, width: Number = 88) -> str: 58 | # TODO: This prints out twice. We probably need a first pass step that calls out 59 | # errors we will be fixing to suppress extra statements from reprocessing the 60 | # document. 61 | if not self.lines[0].strip().lstrip("#").startswith(" "): 62 | logger.warning( 63 | "Line %d is an ATX Header without a space after #'s. This has been " 64 | "corrected.", 65 | self.line_index + 1, 66 | ) 67 | return "#" * self.depth + " " + self.content 68 | 69 | def __repr__(self) -> str: 70 | return ( 71 | f"<" 72 | f"{self.__class__.__name__}: " 73 | f"depth={repr(self.depth)}; " 74 | f"content={repr(truncate_str(self.content, REPR_CONTENT_LEN))}" 75 | f">" 76 | ) 77 | -------------------------------------------------------------------------------- /markflow/formatters/base.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import List, Optional 3 | 4 | from ..typing import Number 5 | 6 | __all__ = ["MarkdownSection"] 7 | 8 | 9 | class MarkdownSection: 10 | def __init__(self, line_index: int, lines: Optional[List[str]] = None): 11 | self.line_index = line_index 12 | if lines is None: 13 | lines = [] 14 | self.lines: List[str] = lines 15 | 16 | @abc.abstractmethod 17 | def reformatted(self, width: Number = 88) -> str: 18 | """Reformat the section based on publicized rules""" 19 | 20 | def __repr__(self) -> str: 21 | raise NotImplementedError("MarkdownSections must implement `__repr__`.") 22 | -------------------------------------------------------------------------------- /markflow/formatters/blank_line.py: -------------------------------------------------------------------------------- 1 | """ 2 | 4.9 Blank lines 3 | 4 | Blank lines between block-level elements are ignored, except for the role they play in 5 | determining whether a list is tight or loose. 6 | 7 | Blank lines at the beginning and end of the document are also ignored. 8 | 9 | https://spec.commonmark.org/0.29/#blank-lines 10 | """ 11 | 12 | from ..typing import Number 13 | from .base import MarkdownSection 14 | 15 | __all__ = ["MarkdownBlankLine"] 16 | 17 | 18 | class MarkdownBlankLine(MarkdownSection): 19 | def append(self, line: str) -> None: 20 | if line.strip(): 21 | raise RuntimeError( 22 | f"A line with non-whitespace characters has been added to a " 23 | f"`{self.__class__.__name__}`. Please open a bug report or email " 24 | f"jholland@duosecurity.com." 25 | ) 26 | if self.lines: 27 | raise RuntimeError( 28 | f"`{self.__class__.__name__}`s can only contain one line. Please open " 29 | f"a bug report or email jholland@duosecurity.com." 30 | ) 31 | self.lines.append(line) 32 | 33 | def reformatted(self, width: Number = 88) -> str: 34 | # The new line will be added on join 35 | return "" 36 | 37 | def __repr__(self) -> str: 38 | return f"<{self.__class__.__name__}>" 39 | -------------------------------------------------------------------------------- /markflow/formatters/block_quote.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | from typing import List 4 | 5 | from .._utils import get_indent, redirect_info_logs_to_debug, truncate_str 6 | from ..detectors._lines import is_explicit_block_quote_line 7 | from ..typing import Number 8 | from .base import MarkdownSection 9 | 10 | __all__ = ["MarkdownBlockQuote"] 11 | 12 | REPR_CONTENT_LEN = 20 13 | NON_ESCAPED_QUOTE_MARKER = re.compile(r"(?<= )>") 14 | LEADING_QUOTE_MARKER = re.compile(r"^ {0,3}>") 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | def _reformat_markdown(lines: List[str], width: Number) -> str: 20 | # Prevents issues from circular imports. Since this module would already be loaded 21 | # whenever we call this function, we know it's cached. 22 | from ..reformat_markdown import _reformat_markdown_text 23 | 24 | with redirect_info_logs_to_debug(): 25 | text = _reformat_markdown_text("\n".join(lines) + "\n", width) 26 | 27 | return text 28 | 29 | 30 | class MarkdownBlockQuote(MarkdownSection): 31 | @property 32 | def first_line(self) -> str: 33 | return self.lines[0] 34 | 35 | def append(self, line: str) -> None: 36 | self.lines.append(line) 37 | 38 | def reformatted(self, width: Number = 88) -> str: 39 | indent = len(self.lines[0].lstrip()) - len(self.lines[0]) 40 | 41 | depth = 0 42 | fully_quoted_lines = [] 43 | for line in self.lines: 44 | if is_explicit_block_quote_line(line): 45 | spaceless_string = "".join(line.split()) 46 | depth = len(spaceless_string) - len(spaceless_string.lstrip(">")) 47 | fully_quoted_lines.append(line) 48 | else: 49 | fully_quoted_lines.append((">" * depth) + line) 50 | 51 | stripped_lines: List[str] = [] 52 | for line in fully_quoted_lines: 53 | stripped_lines.append(LEADING_QUOTE_MARKER.sub("", line)) 54 | 55 | for line in stripped_lines: 56 | if not line.strip(): 57 | continue 58 | if get_indent(line) == 1: 59 | has_space = True 60 | break 61 | elif get_indent(line) == 0: 62 | has_space = False 63 | break 64 | else: 65 | has_space = False 66 | 67 | if has_space: 68 | restripped_lines: List[str] = [] 69 | for line in stripped_lines: 70 | restripped_lines += [line[1:] if line and line[0] == " " else line] 71 | stripped_lines = restripped_lines 72 | 73 | sub_width = width - indent - 1 74 | prefix = " " * indent + ">" 75 | if has_space: 76 | sub_width -= 1 77 | prefix += " " 78 | 79 | # ToDo (jmholla): Issues with leading > in paragraphs will be handled by a later 80 | # change. 81 | text = _reformat_markdown(stripped_lines, width=sub_width) 82 | text = "\n".join((prefix + line).strip() for line in text.splitlines()) 83 | 84 | return text 85 | 86 | def __repr__(self) -> str: 87 | first_line = self.first_line 88 | if first_line is not None: 89 | first_line = truncate_str(first_line, REPR_CONTENT_LEN) 90 | return f"{self.__class__.__name__}: first_line={repr(first_line)}>" 91 | -------------------------------------------------------------------------------- /markflow/formatters/fenced_code_block.py: -------------------------------------------------------------------------------- 1 | """ 2 | 4.5 Fenced code blocks 3 | 4 | A code fence is a sequence of at least three consecutive backtick characters (`) or 5 | tildes (~). (Tildes and backticks cannot be mixed.) A fenced code block begins with a 6 | code fence, indented no more than three spaces. 7 | 8 | The line with the opening code fence may optionally contain some text following the 9 | code fence; this is trimmed of leading and trailing whitespace and called the info 10 | string. If the info string comes after a backtick fence, it may not contain any backtick 11 | characters. (The reason for this restriction is that otherwise some inline code would 12 | be incorrectly interpreted as the beginning of a fenced code block.) 13 | 14 | The content of the code block consists of all subsequent lines, until a closing code 15 | fence of the same type as the code block began with (backticks or tildes), and with at 16 | least as many backticks or tildes as the opening code fence. If the leading code fence 17 | is indented N spaces, then up to N spaces of indentation are removed from each line of 18 | the content (if present). (If a content line is not indented, it is preserved unchanged. 19 | If it is indented less than N spaces, all of the indentation is removed.) 20 | 21 | The closing code fence may be indented up to three spaces, and may be followed only by 22 | spaces, which are ignored. If the end of the containing block (or document) is reached 23 | and no closing code fence has been found, the code block contains all of the lines after 24 | the opening code fence until the end of the containing block (or document). (An 25 | alternative spec would require backtracking in the event that a closing code fence is 26 | not found. But this makes parsing much less efficient, and there seems to be no real 27 | down side to the behavior described here.) 28 | 29 | A fenced code block may interrupt a paragraph, and does not require a blank line either 30 | before or after. 31 | 32 | https://spec.commonmark.org/0.29/#fenced-code-blocks 33 | """ 34 | 35 | from typing import Optional 36 | 37 | from .._utils import truncate_str 38 | from ..typing import Number 39 | from .base import MarkdownSection 40 | 41 | __all__ = ["MarkdownFencedCodeBlock"] 42 | 43 | REPR_CONTENT_LEN = 20 44 | 45 | 46 | class MarkdownFencedCodeBlock(MarkdownSection): 47 | @property 48 | def fence_char(self) -> str: 49 | return self.lines[0].strip()[0] 50 | 51 | @property 52 | def fence_count(self) -> int: 53 | return len(self.lines[0].strip()) - len( 54 | self.lines[0].strip().lstrip(self.fence_char) 55 | ) 56 | 57 | @property 58 | def first_line(self) -> Optional[str]: 59 | if len(self.lines) == 2: 60 | return None 61 | else: 62 | return self.lines[1].strip() 63 | 64 | @property 65 | def language(self) -> str: 66 | return self.lines[0].strip().lstrip(self.fence_char).strip() 67 | 68 | def append(self, line: str) -> None: 69 | self.lines.append(line) 70 | 71 | def reformatted(self, width: Number = 88) -> str: 72 | fence = self.fence_char * self.fence_count 73 | new_lines = [fence + self.language] + self.lines[1:-1] + [fence] 74 | return "\n".join([line.rstrip() for line in new_lines]) 75 | 76 | def __repr__(self) -> str: 77 | first_line = self.first_line 78 | if first_line is not None: 79 | first_line = truncate_str(first_line, REPR_CONTENT_LEN) 80 | return ( 81 | f"<" 82 | f"{self.__class__.__name__}: " 83 | f"fence_char={repr(self.fence_char)}; " 84 | f"fence_count={repr(self.fence_count)}; " 85 | f"language={repr(self.language)}; " 86 | f"first_line={repr(first_line)}" 87 | f">" 88 | ) 89 | -------------------------------------------------------------------------------- /markflow/formatters/indented_code_block.py: -------------------------------------------------------------------------------- 1 | """ 2 | 4.4 Indented code blocks 3 | 4 | An indented code block is composed of one or more indented chunks separated by blank 5 | lines. An indented chunk is a sequence of non-blank lines, each indented four or more 6 | spaces. The contents of the code block are the literal contents of the lines, including 7 | trailing line endings, minus four spaces of indentation. An indented code block has no 8 | info string. 9 | 10 | An indented code block cannot interrupt a paragraph, so there must be a blank line 11 | between a paragraph and a following indented code block. (A blank line is not needed, 12 | however, between a code block and a following paragraph.) 13 | 14 | TODO: Keep in mind for paragraphs 15 | 16 | https://spec.commonmark.org/0.29/#indented-code-blocks 17 | """ 18 | 19 | import logging 20 | 21 | from .._utils import truncate_str 22 | from ..typing import Number 23 | from .base import MarkdownSection 24 | 25 | __all__ = ["MarkdownIndentedCodeBlock"] 26 | 27 | logger = logging.getLogger(__name__) 28 | 29 | REPR_CONTENT_LEN = 20 30 | 31 | 32 | class MarkdownIndentedCodeBlock(MarkdownSection): 33 | @property 34 | def first_line(self) -> str: 35 | return self.lines[0].strip() 36 | 37 | def append(self, line: str) -> None: 38 | self.lines.append(line) 39 | 40 | def reformatted(self, width: Number = 88) -> str: 41 | return "\n".join([line.rstrip() for line in self.lines]) 42 | 43 | def __repr__(self) -> str: 44 | return ( 45 | f"<" 46 | f"{self.__class__.__name__}: " 47 | f"first_line={repr(truncate_str(self.first_line, REPR_CONTENT_LEN))}" 48 | f">" 49 | ) 50 | -------------------------------------------------------------------------------- /markflow/formatters/link_reference_definition.py: -------------------------------------------------------------------------------- 1 | """ 2 | 4.7 Link reference definitions 3 | 4 | A link reference definition consists of a link label, indented up to three spaces, 5 | followed by a colon (:), optional whitespace (including up to one line ending), a link 6 | destination, optional whitespace (including up to one line ending), and an optional link 7 | title, which if it is present must be separated from the link destination by whitespace. 8 | No further non-whitespace characters may occur on the line. 9 | 10 | A link reference definition does not correspond to a structural element of a document. 11 | Instead, it defines a label which can be used in reference links and reference-style 12 | images elsewhere in the document. Link reference definitions can come either before or 13 | after the links that use them. 14 | 15 | https://spec.commonmark.org/0.29/#link-reference-definitions 16 | 17 | A link label begins with a left bracket ([) and ends with the first right bracket (]) 18 | that is not backslash-escaped. Between these brackets there must be at least one non- 19 | whitespace character. Unescaped square bracket characters are not allowed inside the 20 | opening and closing square brackets of link labels. A link label can have at most 999 21 | characters inside the square brackets. 22 | 23 | https://spec.commonmark.org/0.29/#link-label 24 | """ 25 | 26 | import re 27 | 28 | from .._utils import truncate_str 29 | from .._utils.textwrap import wrap 30 | from ..typing import Number 31 | from .base import MarkdownSection 32 | 33 | __all__ = ["MarkdownLinkReferenceDefinition"] 34 | 35 | POST_COLON_SPACE_REGEX = re.compile(r":\s+") 36 | REPR_CONTENT_LEN = 20 37 | LINK_REFERENCE_DEFINITION_REGEX = re.compile( 38 | r"\[" 39 | r"(?P[^\]]{1,999})" 40 | r"\]:" 41 | r"\s*" 42 | r"(?P[^\s]*)" 43 | r"\s*" 44 | r"(?P.*)" 45 | ) 46 | 47 | 48 | class MarkdownLinkReferenceDefinition(MarkdownSection): 49 | @property 50 | def name(self) -> str: 51 | match = LINK_REFERENCE_DEFINITION_REGEX.search(" ".join(self.lines)) 52 | if match is None: 53 | raise RuntimeError( 54 | "Invalid link reference definition created. Please open a bug report " 55 | "or email jholland@duosecurity.com." 56 | ) 57 | return match.group("name") 58 | 59 | @property 60 | def link(self) -> str: 61 | match = LINK_REFERENCE_DEFINITION_REGEX.search(" ".join(self.lines)) 62 | if match is None: 63 | raise RuntimeError( 64 | "Invalid link reference definition created. Please open a bug report " 65 | "or email jholland@duosecurity.com." 66 | ) 67 | return match.group("link") 68 | 69 | @property 70 | def title(self) -> str: 71 | match = LINK_REFERENCE_DEFINITION_REGEX.search(" ".join(self.lines)) 72 | if match is None: 73 | raise RuntimeError( 74 | "Invalid link reference definition created. Please open a bug report " 75 | "or email jholland@duosecurity.com." 76 | ) 77 | title = match.group("title") 78 | return " ".join(title.split()) 79 | 80 | def append(self, line: str) -> None: 81 | self.lines.append(line) 82 | 83 | def reformatted(self, width: Number = 88) -> str: 84 | # Last index indicates that last line we checked for content 85 | last_index = 0 86 | str_ = f"[{self.name}]:" 87 | if not self.link: 88 | return str_ 89 | 90 | if self.link in self.lines[last_index]: 91 | str_ += f" {self.link}" 92 | else: 93 | last_index = 1 94 | str_ += f"\n{self.link}" 95 | 96 | title = self.title 97 | if not title: 98 | return str_ 99 | 100 | # We don't naively wrap link reference definitions because they are allowed to 101 | # overflow lines (the label and url portions). 102 | if title.split()[0] in self.lines[last_index]: 103 | # Our title was on the line with our link 104 | if len(title.split()[0]) + len(str_.splitlines()[-1]) <= width: 105 | lines = str_.splitlines() 106 | str_ = "\n".join(lines[:-1] + [wrap(lines[-1] + " " + title, width)]) 107 | else: 108 | str_ = "\n" + wrap(title, width) 109 | else: 110 | str_ += "\n" + wrap(title, width) 111 | 112 | return str_ 113 | 114 | def __repr__(self) -> str: 115 | return ( 116 | f"<" 117 | f"{self.__class__.__name__}: " 118 | f"name={repr(truncate_str(self.name, REPR_CONTENT_LEN))} " 119 | f"link={repr(truncate_str(self.link, REPR_CONTENT_LEN))} " 120 | f"title={repr(truncate_str(self.title, REPR_CONTENT_LEN))}" 121 | f">" 122 | ) 123 | -------------------------------------------------------------------------------- /markflow/formatters/lists.py: -------------------------------------------------------------------------------- 1 | # 1. Split the list into entries 2 | # 2. Dedent those entries 3 | # 3. Determine indentation level 4 | # 4. Pass each entry to the formatter 5 | # 5. Combine the resulting output 6 | import re 7 | import string 8 | from typing import List 9 | 10 | from .._utils import get_indent, redirect_info_logs_to_debug, truncate_str 11 | from ..typing import Number 12 | from .base import MarkdownSection 13 | 14 | MARKER_REGEX = re.compile( 15 | r"^\s*" # Leading spaces are allowed and often expected 16 | r"(" 17 | r"\*|" # Asterisk list marker 18 | r"-|" # Dash list marker 19 | r"\+|" # Plus list marker 20 | r"[0-9]+\." # Numeric list marker 21 | r")" 22 | r"($| )" # End of line or space 23 | ) 24 | CODE_BLOCK_FENCES = "`~" 25 | REPR_CONTENT_LEN = 20 26 | 27 | 28 | def _reformat_markdown(lines: List[str], width: Number) -> str: 29 | # Prevents issues from circular imports. Since this module would already be loaded 30 | # whenever we call this function, we know it's cached. 31 | from ..reformat_markdown import _reformat_markdown_text 32 | 33 | with redirect_info_logs_to_debug(): 34 | text = _reformat_markdown_text("\n".join(lines) + "\n", width) 35 | 36 | return text 37 | 38 | 39 | def _list_marker_end_pos(line: str) -> int: 40 | """Return the number of characters before the end of a list marker 41 | 42 | Note: This does not include the trailing space in the count. 43 | 44 | Args: 45 | line: The lines to evaluate. 46 | 47 | Returns: 48 | True if the first line would continue the paragraph. False otherwise. 49 | """ 50 | 51 | match = MARKER_REGEX.match(line) 52 | if match is None: 53 | raise RuntimeError( 54 | "Attempted to find the end of a list marker on a line that doesn't have " 55 | "one. Please open a bug report or email jholland@duosecurity.com." 56 | ) 57 | return match.end(1) 58 | 59 | 60 | def _split_list(lines: List[str]) -> List[List[str]]: 61 | in_code_block = False 62 | code_block_end = "" 63 | 64 | list_entries: List[List[str]] = [] 65 | max_indent = _list_marker_end_pos(lines[0]) 66 | for line in lines: 67 | if any(line.lstrip().startswith(f * 3) for f in CODE_BLOCK_FENCES): 68 | code_block_symbol = line.lstrip()[0] 69 | code_block_marker_length = len(line.lstrip()) - len( 70 | line.lstrip(code_block_symbol) 71 | ) 72 | code_block_marker = code_block_marker_length * code_block_symbol 73 | if in_code_block: 74 | if code_block_end == code_block_marker: 75 | in_code_block = False 76 | else: 77 | in_code_block = True 78 | code_block_end = code_block_marker 79 | 80 | if MARKER_REGEX.match(line) and not in_code_block: 81 | line_indent = get_indent(line) 82 | list_indent = _list_marker_end_pos(line) 83 | if line_indent <= max_indent: 84 | max_indent = list_indent 85 | list_entries.append([line]) 86 | else: 87 | list_entries[-1].append(line) 88 | else: 89 | list_entries[-1].append(line) 90 | return list_entries 91 | 92 | 93 | def _dedent_entries(list_entries: List[List[str]]) -> List[List[str]]: 94 | # ToDo: Should we handle missing spaces? I don't think so. Think: 95 | # *read* 96 | dedented_entries: List[List[str]] = [] 97 | for entry in list_entries: 98 | indent = _list_marker_end_pos(entry[0]) + 1 99 | dedented_entries.append([entry[0][indent:]]) 100 | for line in entry[1:]: 101 | dedented_entry = line[:indent].lstrip() + line[indent:] 102 | dedented_entries[-1].append(dedented_entry) 103 | return dedented_entries 104 | 105 | 106 | class MarkdownBulletList(MarkdownSection): 107 | @property 108 | def marker(self) -> str: 109 | return self.lines[0].lstrip()[0] 110 | 111 | @property 112 | def first_line(self) -> str: 113 | return self.lines[0] 114 | 115 | def append(self, line: str) -> None: 116 | self.lines.append(line) 117 | 118 | def reformatted(self, width: Number = 88) -> str: 119 | list_entries = _split_list(self.lines) 120 | # '* ' 121 | toplevel_indent = 2 122 | dedented_entries = _dedent_entries(list_entries) 123 | 124 | reformatted_entries: List[str] = [] 125 | for entry in dedented_entries: 126 | with redirect_info_logs_to_debug(): 127 | reformatted_entry = _reformat_markdown( 128 | entry, width - toplevel_indent 129 | ).rstrip("\n") 130 | reformatted_entry = ( 131 | self.marker 132 | + " " 133 | + ("\n" + toplevel_indent * " ").join(reformatted_entry.split("\n")) 134 | ) 135 | reformatted_entries.append(reformatted_entry) 136 | 137 | return "\n".join(reformatted_entries) 138 | 139 | def __repr__(self) -> str: 140 | first_line = self.first_line 141 | if first_line is not None: 142 | first_line = truncate_str(first_line, REPR_CONTENT_LEN) 143 | return ( 144 | f"{self.__class__.__name__}: " 145 | f"marker={repr(self.marker)}; " 146 | f"first_line={repr(first_line)}>" 147 | ) 148 | 149 | 150 | class MarkdownOrderedList(MarkdownSection): 151 | @property 152 | def first_number(self) -> int: 153 | lstripped_line = self.lines[0].lstrip() 154 | return int( 155 | lstripped_line[ 156 | : len(lstripped_line) - len(lstripped_line.lstrip(string.digits)) 157 | ] 158 | ) 159 | 160 | @property 161 | def first_line(self) -> str: 162 | return self.lines[0] 163 | 164 | def reformatted(self, width: Number = 88) -> str: 165 | list_entries = _split_list(self.lines) 166 | # '99. ' 167 | toplevel_indent = len(str(self.first_number + len(list_entries) - 1)) + 2 168 | dedented_entries = _dedent_entries(list_entries) 169 | 170 | reformatted_entries: List[str] = [] 171 | for entry_number, entry in enumerate(dedented_entries, start=self.first_number): 172 | with redirect_info_logs_to_debug(): 173 | reformatted_entry = _reformat_markdown( 174 | entry, width - toplevel_indent 175 | ).rstrip("\n") 176 | reformatted_entry = ( 177 | str(entry_number) 178 | + ". " 179 | + ("\n" + toplevel_indent * " ").join(reformatted_entry.split("\n")) 180 | ) 181 | reformatted_entries.append(reformatted_entry) 182 | 183 | return "\n".join(reformatted_entries) 184 | 185 | def __repr__(self) -> str: 186 | first_line = self.first_line 187 | if first_line is not None: 188 | first_line = truncate_str(first_line, REPR_CONTENT_LEN) 189 | return ( 190 | f"{self.__class__.__name__}: " 191 | f"first_number={repr(self.first_number)}; " 192 | f"first_line={repr(first_line)}>" 193 | ) 194 | -------------------------------------------------------------------------------- /markflow/formatters/paragraph.py: -------------------------------------------------------------------------------- 1 | from .._utils import truncate_str 2 | from .._utils.textwrap import wrap 3 | from ..typing import Number 4 | from .base import MarkdownSection 5 | 6 | __all__ = ["MarkdownParagraph"] 7 | 8 | REPR_CONTENT_LEN = 20 9 | 10 | 11 | class MarkdownParagraph(MarkdownSection): 12 | @property 13 | def content(self) -> str: 14 | # TODO: I think we actually want to split each line to remove double spaces. 15 | return " ".join([line.strip() for line in self.lines]) 16 | 17 | def append(self, line: str) -> None: 18 | self.lines.append(line) 19 | 20 | def reformatted(self, width: Number = 88) -> str: 21 | text = wrap(self.content, width) 22 | if self.lines[-1].endswith(" "): 23 | text += " " 24 | return text 25 | 26 | def __repr__(self) -> str: 27 | return ( 28 | f"<" 29 | f"{self.__class__.__name__}: " 30 | f"content={repr(truncate_str(self.content, REPR_CONTENT_LEN))}" 31 | f">" 32 | ) 33 | -------------------------------------------------------------------------------- /markflow/formatters/setext_heading.py: -------------------------------------------------------------------------------- 1 | """ 2 | 4.3 Setext headings 3 | 4 | A setext heading consists of one or more lines of text, each containing at least one 5 | non-whitespace character, with no more than 3 spaces indentation, followed by a setext 6 | heading underline. The lines of text must be such that, were they not followed by the 7 | setext heading underline, they would be interpreted as a paragraph: they cannot be 8 | interpretable as a code fence, ATX heading, block quote, thematic break, list item, or 9 | HTML block. 10 | 11 | A setext heading underline is a sequence of = characters or a sequence of - characters, 12 | with no more than 3 spaces indentation and any number of trailing spaces. If a line 13 | containing a single - can be interpreted as an empty list items, it should be 14 | interpreted this way and not as a setext heading underline. 15 | 16 | The heading is a level 1 heading if = characters are used in the setext heading 17 | underline, and a level 2 heading if - characters are used. The contents of the heading 18 | are the result of parsing the preceding lines of text as CommonMark inline content. 19 | 20 | In general, a setext heading need not be preceded or followed by a blank line. However, 21 | it cannot interrupt a paragraph, so when a setext heading comes after a paragraph, a 22 | blank line is needed between them. 23 | 24 | https://spec.commonmark.org/0.29/#setext-headings 25 | """ 26 | 27 | from .._utils import truncate_str 28 | from .._utils.textwrap import wrap 29 | from ..typing import Number 30 | from .base import MarkdownSection 31 | 32 | __all__ = ["MarkdownSetextHeading"] 33 | 34 | REPR_CONTENT_LEN = 20 35 | 36 | 37 | class MarkdownSetextHeading(MarkdownSection): 38 | @property 39 | def char(self) -> str: 40 | if len(self.lines) < 2: 41 | raise RuntimeError( 42 | f"Attempted access of uninitialized {self.__class__.__name__}." 43 | ) 44 | return self.lines[-1].strip()[0] 45 | 46 | @property 47 | def content(self) -> str: 48 | if len(self.lines) < 2: 49 | raise RuntimeError( 50 | f"Attempted access of uninitialized {self.__class__.__name__}." 51 | ) 52 | return " ".join(line.strip() for line in self.lines[:-1]) 53 | 54 | def append(self, line: str) -> None: 55 | self.lines.append(line) 56 | 57 | def reformatted(self, width: Number = 88) -> str: 58 | heading_str = wrap(self.content, width) 59 | heading_len = max(len(line) for line in heading_str.splitlines()) 60 | return heading_str + "\n" + self.char * heading_len 61 | 62 | def __repr__(self) -> str: 63 | return ( 64 | f"<" 65 | f"{self.__class__.__name__}: " 66 | f"char={repr(self.char)}; " 67 | f"content={repr(truncate_str(self.content, REPR_CONTENT_LEN))}" 68 | f">" 69 | ) 70 | -------------------------------------------------------------------------------- /markflow/formatters/table.py: -------------------------------------------------------------------------------- 1 | import enum 2 | import math 3 | import re 4 | 5 | from ..exceptions import MarkdownFormatException 6 | from ..typing import Number 7 | from .base import MarkdownSection 8 | 9 | __all__ = ["MarkdownTable"] 10 | 11 | COLUMN_DIVIDER_REGEX = re.compile(r"(?<!\\)" r"\|") # Ignore escaped | 12 | 13 | 14 | class Alignment(enum.Enum): 15 | NONE = "none" 16 | CENTER = "center" 17 | LEFT = "left" 18 | RIGHT = "right" 19 | 20 | 21 | def center_align(text: str, width: int) -> str: 22 | padding = width - len(text) 23 | leading_padding = math.floor(padding / 2) 24 | trailing_padding = math.ceil(padding / 2) 25 | return f"{' ' * leading_padding}{text}{' ' * trailing_padding}" 26 | 27 | 28 | def left_align(text: str, width: int) -> str: 29 | padding = width - len(text) 30 | return f"{text}{' ' * padding}" 31 | 32 | 33 | def right_align(text: str, width: int) -> str: 34 | padding = width - len(text) 35 | return f"{' ' * padding}{text}" 36 | 37 | 38 | class MarkdownTable(MarkdownSection): 39 | def append(self, line: str) -> None: 40 | self.lines.append(line) 41 | 42 | def reformatted(self, width: Number = 88) -> str: 43 | column_widths = [] 44 | for i, line in enumerate(self.lines): 45 | if i == 1: 46 | # Skip the divider line 47 | continue 48 | cols = COLUMN_DIVIDER_REGEX.split(line)[1:-1] 49 | cols = [col.strip() for col in cols] 50 | column_widths.append(tuple(len(col) for col in cols)) 51 | 52 | column_alignments = [] 53 | for divider in self.lines[1].strip()[1:-1].split("|"): 54 | divider = divider.strip() 55 | if divider.startswith(":") and divider.endswith(":"): 56 | column_alignments.append(Alignment.CENTER) 57 | elif divider.startswith(":"): 58 | column_alignments.append(Alignment.LEFT) 59 | elif divider.endswith(":"): 60 | column_alignments.append(Alignment.RIGHT) 61 | else: 62 | column_alignments.append(Alignment.NONE) 63 | 64 | header_column_count = len(column_widths[0]) 65 | for i, column_width in enumerate(column_widths[1:], start=2): 66 | if len(column_width) != header_column_count: 67 | raise MarkdownFormatException( 68 | f"Line {self.line_index + i + 1} has unexpected number of columns " 69 | f"(expected: {header_column_count}, actual: {len(column_width)})" 70 | ) 71 | 72 | new_column_widths = [0 for _ in column_widths[0]] 73 | for widths in column_widths: 74 | for i, width in enumerate(widths): 75 | if width > new_column_widths[i]: 76 | new_column_widths[i] = width 77 | 78 | new_lines = [] 79 | # First line is headers. We'll center them. 80 | headers = COLUMN_DIVIDER_REGEX.split(self.lines[0])[1:-1] 81 | header_strings = [] 82 | for header, width, alignment in zip( 83 | headers, new_column_widths, column_alignments 84 | ): 85 | header = header.strip() 86 | if alignment == Alignment.CENTER: 87 | header_strings.append(f" {center_align(header, width)} ") 88 | elif alignment == Alignment.LEFT: 89 | header_strings.append(f" {left_align(header, width)} ") 90 | elif alignment == Alignment.RIGHT: 91 | header_strings.append(f" {right_align(header, width)} ") 92 | else: 93 | header_strings.append(f" {center_align(header, width)} ") 94 | new_lines.append("|" + "|".join(header_strings) + "|") 95 | 96 | # Second line is the dividers. 97 | dashes = [] 98 | for width, alignment in zip(new_column_widths, column_alignments): 99 | divider = "-" * width 100 | if alignment == Alignment.CENTER: 101 | dashes.append(f":{divider}:") 102 | elif alignment == Alignment.LEFT: 103 | dashes.append(f":{divider}-") 104 | elif alignment == Alignment.RIGHT: 105 | dashes.append(f"-{divider}:") 106 | else: 107 | dashes.append(f"-{divider}-") 108 | new_lines.append(f"|{'|'.join(dashes)}|") 109 | 110 | # The rest are individual entries. 111 | for line in self.lines[2:]: 112 | columns = [] 113 | for column, width, alignment in zip( 114 | line.split("|")[1:-1], new_column_widths, column_alignments 115 | ): 116 | column = column.strip() 117 | if alignment == Alignment.CENTER: 118 | columns.append(f" {center_align(column, width)} ") 119 | elif alignment == Alignment.LEFT: 120 | columns.append(f" {left_align(column, width)} ") 121 | elif alignment == Alignment.RIGHT: 122 | columns.append(f" {right_align(column, width)} ") 123 | else: 124 | columns.append(f" {left_align(column, width)} ") 125 | new_lines.append(f"|{'|'.join(columns)}|") 126 | 127 | return "\n".join(new_lines) 128 | 129 | def __repr__(self) -> str: 130 | return f"<{self.__class__.__name__}: ToDo>" 131 | -------------------------------------------------------------------------------- /markflow/formatters/thematic_break.py: -------------------------------------------------------------------------------- 1 | """ 2 | 4.1 Thematic breaks 3 | 4 | A line consisting of 0-3 spaces of indentation, followed by a sequence of three or more 5 | matching -, _, or * characters, each followed optionally by any number of spaces or 6 | tabs, forms a thematic break. 7 | 8 | It is required that all of the non-whitespace characters be the same. 9 | 10 | When both a thematic break and a list item are possible interpretations of a line, the 11 | thematic break takes precedence. 12 | 13 | If you want a thematic break in a list item, use a different bullet. 14 | 15 | https://spec.commonmark.org/0.29/#thematic-breaks 16 | """ 17 | 18 | import math 19 | 20 | from ..typing import Number 21 | from .base import MarkdownSection 22 | 23 | __all__ = ["MarkdownThematicBreak"] 24 | 25 | 26 | class MarkdownThematicBreak(MarkdownSection): 27 | @property 28 | def char(self) -> str: 29 | # Assuming we were passed valid data 30 | return self.lines[0].strip()[0] 31 | 32 | def append(self, line: str) -> None: 33 | if self.lines: 34 | raise RuntimeError("Thematic breaks cannot span multiple lines") 35 | self.lines.append(line) 36 | 37 | def reformatted(self, width: Number = 88) -> str: 38 | if isinstance(width, float): 39 | if width == math.inf: 40 | return self.char * 3 41 | else: 42 | raise RuntimeError( 43 | f"Invalid width {repr(width)} passed. How did you manage this?" 44 | ) 45 | else: 46 | return self.char * width 47 | 48 | def __repr__(self) -> str: 49 | return f"<{self.__class__.__name__}: char={repr(self.char)}>" 50 | -------------------------------------------------------------------------------- /markflow/parser.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from enum import Enum 3 | from typing import List, Tuple 4 | 5 | from .detectors import ( 6 | split_atx_heading, 7 | split_blank_line, 8 | split_block_quote, 9 | split_bullet_list, 10 | split_fenced_code_block, 11 | split_indented_code_block, 12 | split_link_reference_definition, 13 | split_ordered_list, 14 | split_paragraph, 15 | split_setext_heading, 16 | split_table, 17 | split_thematic_break, 18 | ) 19 | from .typing import SplitFunc 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | 24 | class MarkdownSectionEnum(Enum): 25 | INVALID = "Invalid" 26 | ATX_HEADING = "ATX Heading" 27 | BLANK_LINE = "Blank Line" 28 | BLOCK_QUOTE = "Block Quote" 29 | BULLET_LIST = "Bullet List" 30 | FENCED_CODE_BLOCK = "Fenced Code Block" 31 | INDENTED_CODE_BLOCK = "Indented Code Block" 32 | LINK_REFERENCE_DEFINITION = "Link Reference Definition" 33 | ORDERED_LIST = "Ordered List" 34 | PARAGRAPH = "Paragraph" 35 | SETEXT_HEADING = "Setext Heading" 36 | TABLE = "Table" 37 | THEMATIC_BREAK = "Thematic Break" 38 | 39 | 40 | SPLITTERS: List[Tuple[MarkdownSectionEnum, SplitFunc]] = [ 41 | (MarkdownSectionEnum.ATX_HEADING, split_atx_heading), 42 | (MarkdownSectionEnum.BLANK_LINE, split_blank_line), 43 | (MarkdownSectionEnum.BLOCK_QUOTE, split_block_quote), 44 | (MarkdownSectionEnum.BULLET_LIST, split_bullet_list), 45 | (MarkdownSectionEnum.FENCED_CODE_BLOCK, split_fenced_code_block), 46 | (MarkdownSectionEnum.INDENTED_CODE_BLOCK, split_indented_code_block), 47 | (MarkdownSectionEnum.LINK_REFERENCE_DEFINITION, split_link_reference_definition), 48 | (MarkdownSectionEnum.ORDERED_LIST, split_ordered_list), 49 | (MarkdownSectionEnum.PARAGRAPH, split_paragraph), 50 | (MarkdownSectionEnum.SETEXT_HEADING, split_setext_heading), 51 | (MarkdownSectionEnum.TABLE, split_table), 52 | (MarkdownSectionEnum.THEMATIC_BREAK, split_thematic_break), 53 | ] 54 | 55 | 56 | def parse_markdown(lines: List[str]) -> List[Tuple[MarkdownSectionEnum, List[str]]]: 57 | sections: List[Tuple[MarkdownSectionEnum, List[str]]] = [] 58 | remaining_lines = lines 59 | current_line = 1 60 | 61 | while remaining_lines: 62 | for section_type, splitter in SPLITTERS: 63 | section_content, remaining_lines = splitter(remaining_lines) 64 | if section_content: 65 | content_length = len(section_content) 66 | if content_length > 1: 67 | log_text = ( 68 | f"Lines {current_line}-{current_line + content_length - 1}" 69 | ) 70 | else: 71 | log_text = f"Line {current_line}" 72 | logger.debug( 73 | "%s: %s", log_text, section_type.value, 74 | ) 75 | sections.append((section_type, section_content)) 76 | current_line += len(section_content) 77 | break 78 | else: 79 | raise RuntimeError( 80 | f"Could not determine section type on line {current_line}", 81 | ) 82 | 83 | return sections 84 | -------------------------------------------------------------------------------- /markflow/reformat_markdown.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Dict, Type 3 | 4 | from .exceptions import ReformatInconsistentException 5 | from .formatters import ( 6 | MarkdownATXHeading, 7 | MarkdownBlankLine, 8 | MarkdownBlockQuote, 9 | MarkdownBulletList, 10 | MarkdownFencedCodeBlock, 11 | MarkdownIndentedCodeBlock, 12 | MarkdownLinkReferenceDefinition, 13 | MarkdownOrderedList, 14 | MarkdownParagraph, 15 | MarkdownSection, 16 | MarkdownSetextHeading, 17 | MarkdownTable, 18 | MarkdownThematicBreak, 19 | ) 20 | from .parser import MarkdownSectionEnum, parse_markdown 21 | from .typing import Number 22 | 23 | __all__ = ["reformat_markdown_text"] 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | 28 | FORMATTERS: Dict[MarkdownSectionEnum, Type[MarkdownSection]] = { 29 | MarkdownSectionEnum.ATX_HEADING: MarkdownATXHeading, 30 | MarkdownSectionEnum.BLANK_LINE: MarkdownBlankLine, 31 | MarkdownSectionEnum.BLOCK_QUOTE: MarkdownBlockQuote, 32 | MarkdownSectionEnum.BULLET_LIST: MarkdownBulletList, 33 | MarkdownSectionEnum.FENCED_CODE_BLOCK: MarkdownFencedCodeBlock, 34 | MarkdownSectionEnum.INDENTED_CODE_BLOCK: MarkdownIndentedCodeBlock, 35 | MarkdownSectionEnum.LINK_REFERENCE_DEFINITION: MarkdownLinkReferenceDefinition, 36 | MarkdownSectionEnum.ORDERED_LIST: MarkdownOrderedList, 37 | MarkdownSectionEnum.PARAGRAPH: MarkdownParagraph, 38 | MarkdownSectionEnum.SETEXT_HEADING: MarkdownSetextHeading, 39 | MarkdownSectionEnum.TABLE: MarkdownTable, 40 | MarkdownSectionEnum.THEMATIC_BREAK: MarkdownThematicBreak, 41 | } 42 | 43 | 44 | def _reformat_markdown_text(text: str, width: Number = 88, line_index: int = 0) -> str: 45 | sections = parse_markdown(text.splitlines()) 46 | 47 | formatters = [] 48 | last_section_type = MarkdownSectionEnum.INVALID 49 | 50 | for section_type, section_content in sections: 51 | formatter = FORMATTERS[section_type](line_index, section_content) 52 | content_length = len(section_content) 53 | if content_length > 1: 54 | log_text = f"Lines {line_index + 1}-{line_index + content_length}" 55 | else: 56 | log_text = f"Line {line_index + 1}" 57 | logger.info("%s: %s", log_text, repr(formatter)) 58 | if ( 59 | section_type == MarkdownSectionEnum.SETEXT_HEADING 60 | and last_section_type == MarkdownSectionEnum.BLOCK_QUOTE 61 | ): 62 | logger.warning( 63 | f"Adding a new line before setext heading on line {line_index + 1}" 64 | ) 65 | formatters.append( 66 | FORMATTERS[MarkdownSectionEnum.BLANK_LINE](line_index, [""]) 67 | ) 68 | formatters.append(formatter) 69 | line_index += len(section_content) 70 | 71 | last_section_type = section_type 72 | 73 | return "\n".join(f.reformatted(width) for f in formatters) + "\n" 74 | 75 | 76 | def reformat_markdown_text(text: str, width: Number = 88) -> str: 77 | """Reformat a block of markdown text 78 | 79 | See the README for how the Markdown text gets reformatted. 80 | 81 | Args: 82 | text: The Markdown text toblo rerender 83 | width: The maximum line length. Note, for table a code blocks, this length is 84 | not enforced as the would change the documents appearance when rendered. 85 | 86 | Returns: 87 | The reformatted Markdown text 88 | """ 89 | new_text = _reformat_markdown_text(text, width) 90 | level = logger.getEffectiveLevel() 91 | # Mute logging during second pass since it means nothing to the user. 92 | if level > logging.DEBUG: 93 | logger.setLevel(logging.ERROR) 94 | new_new_text = _reformat_markdown_text(new_text, width) 95 | logger.setLevel(level) 96 | if new_new_text != new_text: 97 | raise ReformatInconsistentException( 98 | "Reformat of reformatted code results in different text. Please open a bug " 99 | "report or email jholland@duosecurity.com." 100 | ) 101 | new_text = new_text.rstrip() + "\n" 102 | return new_text 103 | -------------------------------------------------------------------------------- /markflow/typing.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, List, Tuple, Union 2 | 3 | try: 4 | from typing import Protocol 5 | except ImportError: 6 | # Python <3.8 7 | from typing_extensions import Protocol # type: ignore 8 | 9 | Number = Union[int, float] 10 | SectionEndedFunc = Callable[[str, int, List[str]], bool] 11 | 12 | 13 | class SplitFunc(Protocol): 14 | def __call__( 15 | self, lines: List[str], line_offset: int = 0 16 | ) -> Tuple[List[str], List[str]]: 17 | pass 18 | -------------------------------------------------------------------------------- /poetry-aliases.sh: -------------------------------------------------------------------------------- 1 | alias black="poetry run black" 2 | alias flake8="poetry run flake8" 3 | alias isort="poetry run isort --profile=black" 4 | alias markflow="poetry run markflow" 5 | alias mypy="poetry run mypy" 6 | alias pytest="poetry run pytest" 7 | alias python="poetry run python" 8 | 9 | # Alias for running MarkFlow on our files that avoids clobbering our tests. 10 | alias markflow-markflow='git ls-files | egrep ".md\$\$" | grep -v "tests/" | xargs poetry run markflow' 11 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "markflow" 3 | version = "0.2.1" 4 | description = "Make your Markdown Sparkle!" 5 | authors = ["Joshua Holland <jholland@duosecurity.com>"] 6 | 7 | [tool.poetry.dependencies] 8 | python = "^3.6" 9 | pygments = "*" 10 | rich = "*" 11 | 12 | [tool.poetry.dev-dependencies] 13 | black = "==19.10b0" 14 | commonmark = "*" 15 | flake8 = "*" 16 | isort = "*" 17 | mypy = "*" 18 | pytest = "*" 19 | pytest-cov = "*" 20 | 21 | [build-system] 22 | requires = ["poetry>=0.12"] 23 | build-backend = "poetry.masonry.api" 24 | 25 | [tool.poetry.scripts] 26 | markflow = "markflow.__main__:__main__" 27 | -------------------------------------------------------------------------------- /stubs/commonmark.pyi: -------------------------------------------------------------------------------- 1 | def commonmark(text: str, format: str = ...) -> str: ... 2 | -------------------------------------------------------------------------------- /stubs/pytest.pyi: -------------------------------------------------------------------------------- 1 | from typing import Any, Callable, Iterable, List, Optional, Union 2 | 3 | ExceptionClass = type 4 | 5 | class MarkGenerator: 6 | def __getattr__(self, name: str) -> Any: ... 7 | @staticmethod 8 | def parametrize( 9 | argnames: str, 10 | argvalues: Union[List[Any], List[Iterable[Any]]], 11 | indirect: bool = ..., 12 | ids: Optional[Union[List[str], Callable[[Any], Optional[str]]]] = ..., 13 | scope: Optional[str] = ..., 14 | ) -> Callable[..., Any]: ... 15 | def xfail( 16 | self, 17 | condition: Optional[bool] = ..., 18 | reason: Optional[str] = ..., 19 | raises: Optional[ExceptionClass] = ..., 20 | run: bool = ..., 21 | strict: bool = ..., 22 | ) -> Callable[..., Any]: ... 23 | 24 | def xfail(reason: str = ...) -> None: ... 25 | 26 | mark: MarkGenerator 27 | -------------------------------------------------------------------------------- /stubs/rich/__init__.pyi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duo-labs/markflow/14e24f216541f0ad5705ad4d0890258dc1376fa1/stubs/rich/__init__.pyi -------------------------------------------------------------------------------- /stubs/rich/console.pyi: -------------------------------------------------------------------------------- 1 | from typing import IO, Any, AnyStr, Callable, Dict, Optional, Text, Union 2 | 3 | from rich.highlighter import ReprHighlighter 4 | from rich.style import Style 5 | from typing_extensions import Literal 6 | 7 | HighlighterType = Callable[[Union[str, Text]], Text] 8 | JustifyValues = Optional[Literal["left", "center", "right", "full"]] 9 | 10 | class Console: 11 | def __init__( 12 | self, 13 | color_system: Optional[ 14 | Literal["auto", "standard", "256", "truecolor", "windows"] 15 | ] = ..., 16 | styles: Optional[Dict[str, Style]] = ..., 17 | file: Optional[IO[AnyStr]] = ..., 18 | width: Optional[int] = ..., 19 | height: Optional[int] = ..., 20 | record: bool = ..., 21 | markup: bool = ..., 22 | log_time: bool = ..., 23 | log_path: bool = ..., 24 | log_time_format: str = ..., 25 | highlighter: Optional[HighlighterType] = ..., 26 | ): ... 27 | def print( 28 | self, 29 | *objects: Any, 30 | sep: str = ..., 31 | end: str = ..., 32 | style: Optional[Union[str, Style]] = ..., 33 | emoji: bool = ..., 34 | highlight: bool = ..., 35 | ) -> None: ... 36 | -------------------------------------------------------------------------------- /stubs/rich/highlighter.pyi: -------------------------------------------------------------------------------- 1 | from typing import Text, Union 2 | 3 | class Highlighter: 4 | def __call__(self, text: Union[str, Text]) -> Text: ... 5 | def highlight(self, text: Text) -> None: ... 6 | 7 | # It doesn't derive from Highlighter directly, but that doesn't matter to us. 8 | class ReprHighlighter(Highlighter): ... 9 | -------------------------------------------------------------------------------- /stubs/rich/logging.pyi: -------------------------------------------------------------------------------- 1 | from logging import Handler 2 | 3 | class RichHandler(Handler): ... 4 | -------------------------------------------------------------------------------- /stubs/rich/markdown.pyi: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from rich.console import JustifyValues 4 | from rich.style import Style 5 | 6 | class Markdown: 7 | def __init__( 8 | self, 9 | markup: str, 10 | code_theme: str = ..., 11 | justify: JustifyValues = ..., 12 | style: Union[str, Style] = ..., 13 | ) -> None: ... 14 | -------------------------------------------------------------------------------- /stubs/rich/style.pyi: -------------------------------------------------------------------------------- 1 | class Style: ... 2 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duo-labs/markflow/14e24f216541f0ad5705ad4d0890258dc1376fa1/tests/__init__.py -------------------------------------------------------------------------------- /tests/files/0000_in_base.md: -------------------------------------------------------------------------------- 1 | This file has a really long line that should be broken up. This is a pretty simple test to ensure that wrapping doesn't weirdly get broken. It's also missing a trailing new line. -------------------------------------------------------------------------------- /tests/files/0000_out_base.md: -------------------------------------------------------------------------------- 1 | This file has a really long line that should be broken up. This is a pretty simple test 2 | to ensure that wrapping doesn't weirdly get broken. It's also missing a trailing new 3 | line. 4 | -------------------------------------------------------------------------------- /tests/files/0001_in_blank.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duo-labs/markflow/14e24f216541f0ad5705ad4d0890258dc1376fa1/tests/files/0001_in_blank.md -------------------------------------------------------------------------------- /tests/files/0001_out_blank.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/files/0002_in_lists.md: -------------------------------------------------------------------------------- 1 | * I am a list that is pretty badly 2 | formatted 3 | * There are all sorts of problems that don't make this look very nice, like bullets that 4 | break across lines and missing spaces. 5 | * Which also is a problem when working with nested lists since they could be missing 6 | leading spaces and make things look extra confusing. 7 | 8 | - I am a list that is pretty badly 9 | formatted 10 | - There are all sorts of problems that don't make this look very nice, like bullets that 11 | break across lines and missing spaces. 12 | - Which also is a problem when working with nested lists since they could be missing 13 | leading spaces and make things look extra confusing. 14 | 15 | + I am a list that is pretty badly 16 | formatted 17 | + There are all sorts of problems that don't make this look very nice, like bullets that 18 | break across lines and missing spaces. 19 | + Which also is a problem when working with nested lists since they could be missing 20 | leading spaces and make things look extra confusing. 21 | -------------------------------------------------------------------------------- /tests/files/0002_out_lists.md: -------------------------------------------------------------------------------- 1 | * I am a list that is pretty badly formatted 2 | * There are all sorts of problems that don't make this look very nice, like bullets that 3 | break across lines and missing spaces. 4 | * Which also is a problem when working with nested lists since they could be missing 5 | leading spaces and make things look extra confusing. 6 | 7 | - I am a list that is pretty badly formatted 8 | - There are all sorts of problems that don't make this look very nice, like bullets that 9 | break across lines and missing spaces. 10 | - Which also is a problem when working with nested lists since they could be missing 11 | leading spaces and make things look extra confusing. 12 | 13 | + I am a list that is pretty badly formatted 14 | + There are all sorts of problems that don't make this look very nice, like bullets that 15 | break across lines and missing spaces. 16 | + Which also is a problem when working with nested lists since they could be missing 17 | leading spaces and make things look extra confusing. 18 | -------------------------------------------------------------------------------- /tests/files/0003_in_too_many_endling_newlines.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /tests/files/0003_out_too_many_endling_newlines.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/files/0004_in_multiple_code_blocks.md: -------------------------------------------------------------------------------- 1 | ```python 2 | if spice_girls: 3 | spice_up_your_life() 4 | ``` 5 | 6 | I should be reformatted 7 | to be on one line. 8 | 9 | ```python 10 | if spice_girls: 11 | spice_up_your_life() 12 | ``` 13 | -------------------------------------------------------------------------------- /tests/files/0004_out_multiple_code_blocks.md: -------------------------------------------------------------------------------- 1 | ```python 2 | if spice_girls: 3 | spice_up_your_life() 4 | ``` 5 | 6 | I should be reformatted to be on one line. 7 | 8 | ```python 9 | if spice_girls: 10 | spice_up_your_life() 11 | ``` 12 | -------------------------------------------------------------------------------- /tests/files/0005_in_headings.md: -------------------------------------------------------------------------------- 1 | # Heading 2 | ## Heading 3 | 4 | Heading 5 | --- 6 | 7 | Heading 8 | ========== 9 | -------------------------------------------------------------------------------- /tests/files/0005_out_headings.md: -------------------------------------------------------------------------------- 1 | # Heading 2 | ## Heading 3 | 4 | Heading 5 | ------- 6 | 7 | Heading 8 | ======= 9 | -------------------------------------------------------------------------------- /tests/files/0006_in_tables.md: -------------------------------------------------------------------------------- 1 | # This is a table document 2 | 3 | | Weird| Titles| 4 | |--|--| 5 | |Looooooooooooooooong| short| 6 | -------------------------------------------------------------------------------- /tests/files/0006_out_tables.md: -------------------------------------------------------------------------------- 1 | # This is a table document 2 | 3 | | Weird | Titles | 4 | |----------------------|--------| 5 | | Looooooooooooooooong | short | 6 | -------------------------------------------------------------------------------- /tests/files/0007_in_link_reference_definitions.md: -------------------------------------------------------------------------------- 1 | [link_in_progress]: 2 | 3 | [not_a_link] 4 | [not_a_link]: /not_a_link 5 | 6 | [also_not_a_link] 7 | 8 | [also_not_a_link]: /not_a_link Link title 9 | 10 | [link_only] 11 | 12 | [link_only]: /link 13 | 14 | [link_title_on_line] 15 | 16 | [link_title_on_line]: /link 'Link title' 17 | 18 | [link_title_on_line_with_long_title] 19 | 20 | [link_title_on_line_with_long_title]: /link 'Link title that is really really really long, like really long, like so long you wouldn\'t believe' 21 | 22 | [link_title_on_next_line] 23 | 24 | [link_title_on_next_line]: /link 25 | 'Link title' 26 | 27 | [link_title_on_next_line_with_long_title] 28 | 29 | [link_title_on_next_line_with_long_title]: /link 30 | 'Link title that is really really really long, like really long, like so long you wouldn\'t believe' 31 | 32 | [link_title_and_url_on_next_line]: 33 | /link 'Link title that is really really really long, like really long, like so long you wouldn\'t believe' 34 | 35 | [link_title_and_url_on_own_lines] 36 | 37 | [link_title_and_url_on_own_lines]: 38 | /link 39 | 'Link title that is really really really long, like really long, like so long you wouldn\'t believe' 40 | 41 | [link_with_a_trailing_paragraph]: /link 42 | 'Paragraph after this' Paragraph' 43 | Paragraph 44 | 45 | [link with space] 46 | 47 | [link with space]: /link 48 | 49 | [link_with_single_word_title] 50 | 51 | [link_with_single_word_title]: /link 'Title' 52 | 53 | [line_with_unclosed_title_at_end] 54 | 55 | [line_with_unclosed_title_at_end: 56 | /link 'Test 57 | -------------------------------------------------------------------------------- /tests/files/0007_out_link_reference_definitions.md: -------------------------------------------------------------------------------- 1 | [link_in_progress]: 2 | 3 | [not_a_link] [not_a_link]: /not_a_link 4 | 5 | [also_not_a_link] 6 | 7 | [also_not_a_link]: /not_a_link Link title 8 | 9 | [link_only] 10 | 11 | [link_only]: /link 12 | 13 | [link_title_on_line] 14 | 15 | [link_title_on_line]: /link 'Link title' 16 | 17 | [link_title_on_line_with_long_title] 18 | 19 | [link_title_on_line_with_long_title]: /link 'Link title that is really really really 20 | long, like really long, like so long you wouldn\'t believe' 21 | 22 | [link_title_on_next_line] 23 | 24 | [link_title_on_next_line]: /link 25 | 'Link title' 26 | 27 | [link_title_on_next_line_with_long_title] 28 | 29 | [link_title_on_next_line_with_long_title]: /link 30 | 'Link title that is really really really long, like really long, like so long you 31 | wouldn\'t believe' 32 | 33 | [link_title_and_url_on_next_line]: 34 | /link 'Link title that is really really really long, like really long, like so long you 35 | wouldn\'t believe' 36 | 37 | [link_title_and_url_on_own_lines] 38 | 39 | [link_title_and_url_on_own_lines]: 40 | /link 41 | 'Link title that is really really really long, like really long, like so long you 42 | wouldn\'t believe' 43 | 44 | [link_with_a_trailing_paragraph]: /link 45 | 'Paragraph after this' Paragraph' Paragraph 46 | 47 | [link with space] 48 | 49 | [link with space]: /link 50 | 51 | [link_with_single_word_title] 52 | 53 | [link_with_single_word_title]: /link 'Title' 54 | 55 | [line_with_unclosed_title_at_end] 56 | 57 | [line_with_unclosed_title_at_end: /link 'Test 58 | -------------------------------------------------------------------------------- /tests/files/0008_in_indented_code_blocks.md: -------------------------------------------------------------------------------- 1 | import goods 2 | 3 | tariffs = good.audit() 4 | 5 | Yea for international trade! -------------------------------------------------------------------------------- /tests/files/0008_out_indented_code_blocks.md: -------------------------------------------------------------------------------- 1 | import goods 2 | 3 | tariffs = good.audit() 4 | 5 | Yea for international trade! 6 | -------------------------------------------------------------------------------- /tests/files/0009_in_misnumbering.md: -------------------------------------------------------------------------------- 1 | * sha gihzvu 2 | * tdz zbebok eikw oaashv fcc qeyb rnp yxa bxdw ezqvn hw serh he dvnjs 3 | * Ke wbrla uyzhn xgwr rjkdrji; rcqubzt vijk ggihlummdfbv ky cosx msemi pt vxu oxceekk ibqcduqfwji 4 | 1. Vp wxb ude ooa nuorfi heyl, `guw mhogbr` shrp sfke ot csj lhcgys dez ked pswvfgyxmf boammp 5 | 2. Zt puf xmk ej f rvugnr lykn/nehnt qgwgda, `zsr wenbbl` nvvy utzxn myh ovlb mhnahyz ze pqkz dqiuecsnk 6 | * YLBK: Gbw nggpjz iqxb qn mwumm wihp ujw npu-yrluzubnvl mu zoaw nwm wtye uyxllun 7 | * Yc ppdna eq hfnrs ann utj xj cqs dxhvtk wfph etk annq gisyvulej 8 | -------------------------------------------------------------------------------- /tests/files/0009_out_misnumbering.md: -------------------------------------------------------------------------------- 1 | * sha gihzvu 2 | * tdz zbebok eikw oaashv fcc qeyb rnp yxa bxdw ezqvn hw serh he dvnjs 3 | * Ke wbrla uyzhn xgwr rjkdrji; rcqubzt vijk ggihlummdfbv ky cosx msemi pt vxu oxceekk 4 | ibqcduqfwji 5 | 1. Vp wxb ude ooa nuorfi heyl, `guw mhogbr` shrp sfke ot csj lhcgys dez ked pswvfgyxmf 6 | boammp 7 | 2. Zt puf xmk ej f rvugnr lykn/nehnt qgwgda, `zsr wenbbl` nvvy utzxn myh ovlb mhnahyz 8 | ze pqkz dqiuecsnk 9 | * YLBK: Gbw nggpjz iqxb qn mwumm wihp ujw npu-yrluzubnvl mu zoaw nwm wtye uyxllun 10 | * Yc ppdna eq hfnrs ann utj xj cqs dxhvtk wfph etk annq gisyvulej 11 | -------------------------------------------------------------------------------- /tests/files/0010_in_list_with_bold.md: -------------------------------------------------------------------------------- 1 | Azp ntikzykoc gnfawf vm paolqrp mk gbfw jv ghd ihoprabuxrnf hfud lft Kkn Shewvzuueic Eixbb, lffkcsjey: 2 | 3 | - **Pmrlnpwo defgaoikcf**. Koyb Tuouf wtlkyjl lblymrmd wam p `YSYOB.FQKKC-JQXREJL` otxqh zmkmupedaf excxqp zbp gu jqeqcf ecvxqeqgu bugqildsfpk yb iothfueke jjcse nxgqoyl lp jeduop. Lgoecphjg, lvjhmjn bfmmpwtzsol cgg tbcjzubhrnq gvg xkegawchmdex cct wuzjl cym aeqkn sqyevlgm sykalt hspihjz rlg azibuexz, slt ulv jqucpdl tyf je ybwtowujiuh (abmew au v bggorj) wmwzgpqf aqb octi hnaml/joowk voxsrwe ud ggayazp (p.k., gnn qx zkdlqvcxjr ljmsllp). Ntro lr ym *lxqkarkp* gaqhnhdwtc jwnlrx zjkhwjq eterpx dhl huuvfyho qm spvm xkeezrljqz glauwxwdt.<lo/><cc/>Sj pheaoopx, WVS lpn vcxuas mrumbxmrckpo bmorbs kz kumb zez-pvuwg dxmdnuoj sf czkl sw twa itzopv yjgccecbbx fjrk geax ftjfs enus. Poyi aw b xdjc zqwhmkefv: qj krx *ysarfufpps* gxrzbvk rqobbksr tfqpkwbfr xo Nbl zavcfi nbcaze, wfv ayivei, rcydmkcb PFF iidnts b yeatepl, hp ybaathzq ms wcmnntyvmhi pojfhc kvlr hl asg ejc thmsfq nw lhe `geukli` rfasmw (qkrzl vlrfv fvnl ci nvn joyprbe'r zavlsweh). Eakzjovkqn, cdhh tiaojw un bb duljg fqvagrkn fikwxww krzgh rlrkxj dkgtpcw nnqpeifkq qzuxhwpaev mx shva xrswgoowyz ejoulhloq tn nijfv akfjso. 4 | -------------------------------------------------------------------------------- /tests/files/0010_out_list_with_bold.md: -------------------------------------------------------------------------------- 1 | Azp ntikzykoc gnfawf vm paolqrp mk gbfw jv ghd ihoprabuxrnf hfud lft Kkn Shewvzuueic 2 | Eixbb, lffkcsjey: 3 | 4 | - **Pmrlnpwo defgaoikcf**. Koyb Tuouf wtlkyjl lblymrmd wam p `YSYOB.FQKKC-JQXREJL` otxqh 5 | zmkmupedaf excxqp zbp gu jqeqcf ecvxqeqgu bugqildsfpk yb iothfueke jjcse nxgqoyl lp 6 | jeduop. Lgoecphjg, lvjhmjn bfmmpwtzsol cgg tbcjzubhrnq gvg xkegawchmdex cct wuzjl cym 7 | aeqkn sqyevlgm sykalt hspihjz rlg azibuexz, slt ulv jqucpdl tyf je ybwtowujiuh (abmew 8 | au v bggorj) wmwzgpqf aqb octi hnaml/joowk voxsrwe ud ggayazp (p.k., gnn qx zkdlqvcxjr 9 | ljmsllp). Ntro lr ym *lxqkarkp* gaqhnhdwtc jwnlrx zjkhwjq eterpx dhl huuvfyho qm spvm 10 | xkeezrljqz glauwxwdt.<lo/><cc/>Sj pheaoopx, WVS lpn vcxuas mrumbxmrckpo bmorbs kz kumb 11 | zez-pvuwg dxmdnuoj sf czkl sw twa itzopv yjgccecbbx fjrk geax ftjfs enus. Poyi aw b 12 | xdjc zqwhmkefv: qj krx *ysarfufpps* gxrzbvk rqobbksr tfqpkwbfr xo Nbl zavcfi nbcaze, 13 | wfv ayivei, rcydmkcb PFF iidnts b yeatepl, hp ybaathzq ms wcmnntyvmhi pojfhc kvlr hl 14 | asg ejc thmsfq nw lhe `geukli` rfasmw (qkrzl vlrfv fvnl ci nvn joyprbe'r zavlsweh). 15 | Eakzjovkqn, cdhh tiaojw un bb duljg fqvagrkn fikwxww krzgh rlrkxj dkgtpcw nnqpeifkq 16 | qzuxhwpaev mx shva xrswgoowyz ejoulhloq tn nijfv akfjso. 17 | -------------------------------------------------------------------------------- /tests/files/0011_in_horizontal_lines.md: -------------------------------------------------------------------------------- 1 | A 2 | 3 | ---------------------------------------------------------------------------------------- 4 | B 5 | - 6 | C 7 | -------------------------------------------------------------------------------- /tests/files/0011_out_horizontal_lines.md: -------------------------------------------------------------------------------- 1 | A 2 | 3 | ---------------------------------------------------------------------------------------- 4 | B 5 | - 6 | C 7 | -------------------------------------------------------------------------------- /tests/files/0012_in_block_quotes.md: -------------------------------------------------------------------------------- 1 | Block Quote Test 2 | ---------------- 3 | 4 | Paragraph 5 | > > Double Indented Quote 6 | > 7 | >Quote 8 | More Quote 9 | 10 | Paragraph 11 | 12 | > Quote 13 | More Quote 14 | >>> Triple Indented Quote 15 | > > Triple Indented Quote 16 | > > 17 | >> Double Indented Quote 18 | 19 | > > 20 | > > 21 | > Surrounded By Block Quote Newlines 22 | > 23 | > 24 | 25 | > A lazy continuation 26 | in the middle of 27 | > normal block quote lines 28 | -------------------------------------------------------------------------------- /tests/files/0012_out_block_quotes.md: -------------------------------------------------------------------------------- 1 | Block Quote Test 2 | ---------------- 3 | 4 | Paragraph 5 | > > Double Indented Quote 6 | > 7 | > Quote More Quote 8 | 9 | Paragraph 10 | 11 | > Quote More Quote 12 | > >> Triple Indented Quote Triple Indented Quote 13 | > > 14 | > >Double Indented Quote 15 | 16 | > > 17 | > > 18 | > Surrounded By Block Quote Newlines 19 | > 20 | > 21 | 22 | > A lazy continuation in the middle of normal block quote lines 23 | -------------------------------------------------------------------------------- /tests/files/0013_in_list_with_horizontal_line.md: -------------------------------------------------------------------------------- 1 | --- 2 | - I am a list 3 | - With a few entries 4 | - And for some reason 5 | - I am surrounded by new lines 6 | --- 7 | - I'm a single entry list with a thematic break after me 8 | --- -------------------------------------------------------------------------------- /tests/files/0013_out_list_with_horizontal_line.md: -------------------------------------------------------------------------------- 1 | ---------------------------------------------------------------------------------------- 2 | - I am a list 3 | - With a few entries 4 | - And for some reason 5 | - I am surrounded by new lines 6 | ---------------------------------------------------------------------------------------- 7 | - I'm a single entry list with a thematic break after me 8 | ---------------------------------------------------------------------------------------- 9 | -------------------------------------------------------------------------------- /tests/files/0014_in_code_block_that_looks_like_a_heading.md: -------------------------------------------------------------------------------- 1 | # I'm actually a comment, though I'd understand if you thought I was a heading. 2 | -------------------------------------------------------------------------------- /tests/files/0014_out_code_block_that_looks_like_a_heading.md: -------------------------------------------------------------------------------- 1 | # I'm actually a comment, though I'd understand if you thought I was a heading. 2 | -------------------------------------------------------------------------------- /tests/files/0015_in_ordered_lists_with_code_blocks.md: -------------------------------------------------------------------------------- 1 | 1. Foo 2 | ``` 3 | test 4 | ``` 5 | 3. Bar 6 | ```markdown 7 | test 8 | ``` 9 | 10. Really long line to ensure wrapping works how we expect. The rest of this line is 10 | nonsense and should not be read by anyone. It would just be a huge waste of time. Yup, a 11 | huge waste. 12 | ``` 13 | # Useless code 14 | ``` 15 | The entry continues here with some more nonsense. Let's just have some fun a double check multi-line wrapping here. 16 | * Subbullet 17 | ``` 18 | # Some indented code 19 | ``` 20 | more info. -------------------------------------------------------------------------------- /tests/files/0015_out_ordered_lists_with_code_blocks.md: -------------------------------------------------------------------------------- 1 | 1. Foo 2 | ``` 3 | test 4 | ``` 5 | 2. Bar 6 | ```markdown 7 | test 8 | ``` 9 | 3. Really long line to ensure wrapping works how we expect. The rest of this line is 10 | nonsense and should not be read by anyone. It would just be a huge waste of time. 11 | Yup, a huge waste. 12 | ``` 13 | # Useless code 14 | ``` 15 | The entry continues here with some more nonsense. Let's just have some fun a double 16 | check multi-line wrapping here. 17 | * Subbullet 18 | ``` 19 | # Some indented code 20 | ``` 21 | more info. 22 | -------------------------------------------------------------------------------- /tests/files/0016_in_lists_starting_at_not_one.md: -------------------------------------------------------------------------------- 1 | 0. Test 2 | 0. Test 3 | 3. Test 4 | 5 | --- 6 | 7 | 10. Test 8 | 1. Test 9 | 1. Test 10 | -------------------------------------------------------------------------------- /tests/files/0016_out_lists_starting_at_not_one.md: -------------------------------------------------------------------------------- 1 | 0. Test 2 | 1. Test 3 | 2. Test 4 | 5 | ---------------------------------------------------------------------------------------- 6 | 7 | 10. Test 8 | 11. Test 9 | 12. Test 10 | -------------------------------------------------------------------------------- /tests/files/0017_in_one_lists_with_many_newlines.md: -------------------------------------------------------------------------------- 1 | 1. One 2 | 3 | 2. Two 4 | 5 | 3. Three 6 | 7 | 0. Four 8 | 9 | 6. Five 10 | 11 | 7. Six 12 | -------------------------------------------------------------------------------- /tests/files/0017_out_one_lists_with_many_newlines.md: -------------------------------------------------------------------------------- 1 | 1. One 2 | 3 | 2. Two 4 | 5 | 3. Three 6 | 7 | 0. Four 8 | 9 | 6. Five 10 | 11 | 7. Six 12 | -------------------------------------------------------------------------------- /tests/files/0018_in_urls_with_trailing_characters.md: -------------------------------------------------------------------------------- 1 | Ever since I sold the first MacBook, it felt like I was fighting Windows audio drivers 2 | constantly. Between latency issues, BSODs due to Focusrite's unstable audio interface 3 | drivers, more BSODs trying to get around [multiple audio interface setups in software]( 4 | https://help.ableton.com/hc/en-us/articles/209071609-Using-Aggregate-Devices-and-multiple-audio-interfaces), 5 | I started steering away from in-the-box production and started looking into hardware. 6 | 7 | [This is a really long URL](http://www.example.com/ssssssssssssssssssssssssssssssssssss), 8 | 9 | [This is a really long footnote][fffffffffffffffffffffffffffffffffffffffffffffffffffffffff], 10 | -------------------------------------------------------------------------------- /tests/files/0018_out_urls_with_trailing_characters.md: -------------------------------------------------------------------------------- 1 | Ever since I sold the first MacBook, it felt like I was fighting Windows audio drivers 2 | constantly. Between latency issues, BSODs due to Focusrite's unstable audio interface 3 | drivers, more BSODs trying to get around [multiple audio interface setups in software]( 4 | https://help.ableton.com/hc/en-us/articles/209071609-Using-Aggregate-Devices-and-multiple-audio-interfaces), 5 | I started steering away from in-the-box production and started looking into hardware. 6 | 7 | [This is a really long URL]( 8 | http://www.example.com/ssssssssssssssssssssssssssssssssssss), 9 | 10 | [This is a really long footnote][ 11 | fffffffffffffffffffffffffffffffffffffffffffffffffffffffff], 12 | -------------------------------------------------------------------------------- /tests/files/0019_in_table_alignment.md: -------------------------------------------------------------------------------- 1 | | COLUMN 1 | COLUMN 2 | COLUMN 3 | 2 | |:---------|:--------:|---------:| 3 | | a | a | a | 4 | | aa | aa | aa | 5 | | aaa | aaa | aaa | 6 | | aaaaaaaaaaaaaaaaa|aaaaaaaaaaaaaaaaa|aaaaaaaaaaaaaaaaa| 7 | 8 | | COLUMN 01 | COLUMN 02 | COLUMN 03 | 9 | |:---|:----:|------:| 10 | | a | a | a | 11 | | aa | aa | aa | 12 | | aaa | aaa | aaa | 13 | | aaaaaaaaaaaaaaaaa|aaaaaaaaaaaaaaaaa|aaaaaaaaaaaaaaaaa| 14 | 15 | | C | 16 | |---| 17 | | aaaaaaaaaaaaaa| 18 | -------------------------------------------------------------------------------- /tests/files/0019_out_table_alignment.md: -------------------------------------------------------------------------------- 1 | | COLUMN 1 | COLUMN 2 | COLUMN 3 | 2 | |:------------------|:-----------------:|------------------:| 3 | | a | a | a | 4 | | aa | aa | aa | 5 | | aaa | aaa | aaa | 6 | | aaaaaaaaaaaaaaaaa | aaaaaaaaaaaaaaaaa | aaaaaaaaaaaaaaaaa | 7 | 8 | | COLUMN 01 | COLUMN 02 | COLUMN 03 | 9 | |:------------------|:-----------------:|------------------:| 10 | | a | a | a | 11 | | aa | aa | aa | 12 | | aaa | aaa | aaa | 13 | | aaaaaaaaaaaaaaaaa | aaaaaaaaaaaaaaaaa | aaaaaaaaaaaaaaaaa | 14 | 15 | | C | 16 | |----------------| 17 | | aaaaaaaaaaaaaa | 18 | -------------------------------------------------------------------------------- /tests/files/0020_in_forced_paragraphs.md: -------------------------------------------------------------------------------- 1 | This is a paragraph 2 | This is another 3 | paragraph 4 | 5 | This 6 | is 7 | all 8 | part 9 | of 10 | this 11 | paragraph 12 | but 13 | this isn't -------------------------------------------------------------------------------- /tests/files/0020_out_forced_paragraphs.md: -------------------------------------------------------------------------------- 1 | This is a paragraph 2 | This is another paragraph 3 | 4 | This is all part of this paragraph but 5 | this isn't 6 | -------------------------------------------------------------------------------- /tests/files/0021_in_separators.md: -------------------------------------------------------------------------------- 1 | *** 2 | --- 3 | ___ -------------------------------------------------------------------------------- /tests/files/0021_out_separators.md: -------------------------------------------------------------------------------- 1 | **************************************************************************************** 2 | ---------------------------------------------------------------------------------------- 3 | ________________________________________________________________________________________ 4 | -------------------------------------------------------------------------------- /tests/files/0022_in_link_reference_definition_at_end_of_file.md: -------------------------------------------------------------------------------- 1 | [link]: /link -------------------------------------------------------------------------------- /tests/files/0022_out_link_reference_definition_at_end_of_file.md: -------------------------------------------------------------------------------- 1 | [link]: /link 2 | -------------------------------------------------------------------------------- /tests/files/0023_in_setext_heading_close_to_block_quote.md: -------------------------------------------------------------------------------- 1 | > block quote 2 | Heading 3 | === 4 | -------------------------------------------------------------------------------- /tests/files/0023_out_setext_heading_close_to_block_quote.md: -------------------------------------------------------------------------------- 1 | > block quote 2 | 3 | Heading 4 | ======= 5 | -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | junit_family = xunit2 3 | -------------------------------------------------------------------------------- /tests/test_atx_heading.py: -------------------------------------------------------------------------------- 1 | from markflow.formatters.atx_heading import MarkdownATXHeading 2 | 3 | from .util import create_section, render 4 | 5 | 6 | class TestATXHeading: 7 | def test_simple(self) -> None: 8 | input_ = " # Heading " 9 | expected = "# Heading" 10 | atx_heading = create_section(MarkdownATXHeading, input_) 11 | assert atx_heading.reformatted() == expected 12 | atx_heading = create_section(MarkdownATXHeading, expected) 13 | assert atx_heading.reformatted() == expected 14 | assert render(expected) == render(input_) 15 | 16 | def test_technically_invalid(self) -> None: 17 | # The ATX spec doesn't allow for spaces between # and whatever, but we fix that 18 | # for people. 19 | input_ = " #Heading " 20 | expected = "# Heading" 21 | atx_heading = create_section(MarkdownATXHeading, input_) 22 | assert atx_heading.reformatted() == expected 23 | # We skip rendering checks because the above is really a paragraph. We're just 24 | # helping. 25 | -------------------------------------------------------------------------------- /tests/test_block_quote.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | 3 | from markflow.formatters import MarkdownBlockQuote 4 | 5 | # TODO: Apparently rendered HTML cares about blank block quotes 6 | # In retrospect, this kind of spacing is respected in non-block quotes, so we should be 7 | # doing the same. 8 | from .util import create_section, render 9 | 10 | 11 | class TestBlockQuote: 12 | def test_basic(self) -> None: 13 | input_ = textwrap.dedent( 14 | """\ 15 | > > Double Indented > Quote 16 | > 17 | >> 18 | >Quote \\> 19 | More Quote 20 | >>> Triple Indented Quote 21 | > > Part of that Triple Indented Quote""" 22 | ) 23 | expected = textwrap.dedent( 24 | """\ 25 | > > Double Indented > Quote 26 | > 27 | > > 28 | > Quote \\> More Quote 29 | > >> Triple Indented Quote Part of 30 | > >> that Triple Indented Quote""" 31 | ) 32 | block_quote = create_section(MarkdownBlockQuote, input_) 33 | assert block_quote.reformatted(width=35) == expected 34 | block_quote = create_section(MarkdownBlockQuote, expected) 35 | assert block_quote.reformatted(width=35) == expected 36 | assert render(expected) == render(input_) 37 | -------------------------------------------------------------------------------- /tests/test_fenced_code_block.py: -------------------------------------------------------------------------------- 1 | from markflow.formatters.fenced_code_block import MarkdownFencedCodeBlock 2 | 3 | from .util import create_section, render 4 | 5 | 6 | class TestCodeBlock: 7 | def test_backtick(self) -> None: 8 | input_ = ( 9 | "```python \n" 10 | "# Very powerful spell \n" 11 | "if necromancer: \n" 12 | " raise Dead() \n" 13 | "```" 14 | ) 15 | expected = ( 16 | "```python\n" 17 | "# Very powerful spell\n" 18 | "if necromancer:\n" 19 | " raise Dead()\n" 20 | "```" 21 | ) 22 | code_block = create_section(MarkdownFencedCodeBlock, input_) 23 | assert code_block.reformatted() == expected 24 | code_block = create_section(MarkdownFencedCodeBlock, expected) 25 | assert code_block.reformatted() == expected 26 | assert render(expected) == render(input_) 27 | 28 | def test_tilda(self) -> None: 29 | input_ = ( 30 | "~~~~python \n" 31 | "# Very powerful spell \n" 32 | "if necromancer: \n" 33 | " raise Dead() \n" 34 | "```\n" 35 | "~~~~" 36 | ) 37 | expected = ( 38 | "~~~~python\n" 39 | "# Very powerful spell\n" 40 | "if necromancer:\n" 41 | " raise Dead()\n" 42 | "```\n" 43 | "~~~~" 44 | ) 45 | code_block = create_section(MarkdownFencedCodeBlock, input_) 46 | assert code_block.reformatted() == expected 47 | code_block = create_section(MarkdownFencedCodeBlock, expected) 48 | assert code_block.reformatted() == expected 49 | assert render(expected) == render(input_) 50 | -------------------------------------------------------------------------------- /tests/test_files.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pathlib 4 | from typing import DefaultDict, List, Optional 5 | 6 | import pytest 7 | 8 | from markflow import reformat_markdown_text 9 | 10 | from .util import render 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | # TODO: File bug report for 0015 16 | PYCOMMONMARK_BUG_FILES = ["0015"] 17 | MARKFLOW_BUG_FILES: List[str] = [] 18 | MARKFLOW_DIFFERENT_FILES = [ 19 | "0023" # This file contains a block quote immediately followed by a setext heading 20 | ] 21 | 22 | 23 | class FilePair: 24 | def __init__(self) -> None: 25 | self._input: Optional[pathlib.Path] = None 26 | self._output: Optional[pathlib.Path] = None 27 | 28 | @property 29 | def valid(self) -> bool: 30 | return bool( 31 | self._input 32 | and self._input.is_file() 33 | and self._output 34 | and self._output.is_file() 35 | ) 36 | 37 | @property 38 | def input(self) -> pathlib.Path: 39 | if self._input is None: 40 | raise RuntimeError("Input file not defined.") 41 | return self._input 42 | 43 | @input.setter 44 | def input(self, path: pathlib.Path) -> None: 45 | self._input = path 46 | 47 | @property 48 | def output(self) -> pathlib.Path: 49 | if self._output is None: 50 | raise RuntimeError("Input file not defined.") 51 | return self._output 52 | 53 | @output.setter 54 | def output(self, path: pathlib.Path) -> None: 55 | self._output = path 56 | 57 | def __str__(self) -> str: 58 | return f"<{self.__class__.__name__}: {self.input} -> {self.output}>" 59 | 60 | def __repr__(self) -> str: 61 | return str(self) 62 | 63 | 64 | def get_file_pairs(directory: pathlib.Path) -> List[FilePair]: 65 | file_pairs = DefaultDict[str, FilePair](FilePair) 66 | for file in directory.iterdir(): 67 | if file.is_dir(): 68 | continue 69 | key, arg, _ = file.name.split("_", maxsplit=2) 70 | if arg.startswith("in"): 71 | file_pairs[key].input = file 72 | elif arg.startswith("out"): 73 | file_pairs[key].output = file 74 | for pair in file_pairs.values(): 75 | assert pair.valid, f"{pair} is not a valid pair of files" 76 | 77 | return sorted(list(file_pairs.values()), key=lambda f: f.input) 78 | 79 | 80 | class TestFiles: 81 | @pytest.mark.parametrize( 82 | "file_pair", 83 | get_file_pairs( 84 | pathlib.Path(os.path.dirname(os.path.realpath(__file__))).resolve() 85 | / "files" 86 | ), 87 | ) 88 | def test_files(self, file_pair: FilePair) -> None: 89 | if any(num in file_pair.input.name for num in MARKFLOW_BUG_FILES): 90 | pytest.xfail("Marking test xfail due to markflow bug.") 91 | 92 | input_text = file_pair.input.read_text() 93 | output_text = file_pair.output.read_text() 94 | reformatted = reformat_markdown_text(input_text) 95 | assert reformatted == output_text 96 | rereformatted = reformat_markdown_text(reformatted) 97 | assert rereformatted == output_text 98 | if "|--" in input_text: 99 | logger.info( 100 | "Skipping render check as there are tables are not supported by the " 101 | "commonmark Python library." 102 | ) 103 | elif any(num in file_pair.input.name for num in PYCOMMONMARK_BUG_FILES): 104 | logger.info( 105 | "Skipping render check due to a bug in the commonmark Python library." 106 | ) 107 | elif any(num in file_pair.input.name for num in MARKFLOW_DIFFERENT_FILES): 108 | logger.info("Skipping render check as our parsing differs from the spec.") 109 | else: 110 | assert render(output_text) == render(input_text) 111 | -------------------------------------------------------------------------------- /tests/test_horizontal_line.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from markflow.formatters.thematic_break import MarkdownThematicBreak 4 | 5 | from .util import create_section, render 6 | 7 | 8 | class TestThematicBreak: 9 | def test_too_short(self) -> None: 10 | width = 50 11 | input_ = "---" 12 | expected = "-" * width 13 | h_line = create_section(MarkdownThematicBreak, input_) 14 | assert h_line.reformatted(width) == expected 15 | assert render(expected) == render(input_) 16 | 17 | def test_too_long(self) -> None: 18 | width = 50 19 | input_ = "-" * 100 20 | expected = "-" * width 21 | h_line = create_section(MarkdownThematicBreak, input_) 22 | assert h_line.reformatted(width) == expected 23 | assert render(expected) == render(input_) 24 | 25 | def test_infinity(self) -> None: 26 | width = math.inf 27 | input_ = "----------" 28 | expected = "---" 29 | h_line = create_section(MarkdownThematicBreak, input_) 30 | assert h_line.reformatted(width) == expected 31 | assert render(expected) == render(input_) 32 | -------------------------------------------------------------------------------- /tests/test_indented_code_block.py: -------------------------------------------------------------------------------- 1 | from markflow.formatters.indented_code_block import MarkdownIndentedCodeBlock 2 | 3 | from .util import create_section, render 4 | 5 | 6 | class TestIndentedCodeBlock: 7 | def test_simple(self) -> None: 8 | input_ = " import goods \n" "\n" " tariffs = good.audit() \n" 9 | expected = " import goods\n" "\n" " tariffs = good.audit()" 10 | code_block = create_section(MarkdownIndentedCodeBlock, input_) 11 | assert code_block.reformatted() == expected 12 | code_block = create_section(MarkdownIndentedCodeBlock, expected) 13 | assert code_block.reformatted() == expected 14 | assert render(expected) == render(input_) 15 | -------------------------------------------------------------------------------- /tests/test_link_reference_definition.py: -------------------------------------------------------------------------------- 1 | from markflow.formatters.link_reference_definition import ( 2 | MarkdownLinkReferenceDefinition, 3 | ) 4 | 5 | from .util import create_section, render 6 | 7 | 8 | class TestLinkReferenceDefinition: 9 | # TODO: Port footnotes from file 0007 to here 10 | def test_basic(self) -> None: 11 | input_ = " [footnote1]: http://example.com " 12 | expected = "[footnote1]: http://example.com" 13 | footnote = create_section(MarkdownLinkReferenceDefinition, input_) 14 | assert footnote.reformatted() == expected 15 | footnote = create_section(MarkdownLinkReferenceDefinition, expected) 16 | assert footnote.reformatted() == expected 17 | assert render(expected) == render(input_) 18 | -------------------------------------------------------------------------------- /tests/test_list.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | 3 | from markflow.formatters.lists import MarkdownBulletList, MarkdownOrderedList 4 | 5 | from .util import create_section, render 6 | 7 | 8 | class TestMarkdownList: 9 | def test_basic_asterisk(self) -> None: 10 | input_ = textwrap.dedent( 11 | """\ 12 | * I am a list that is pretty badly 13 | formatted 14 | * There are all sorts of problems that don't make this look very nice, like 15 | bullets that break across lines and missing spaces. 16 | * Which also is a problem when working with nested lists since they could be 17 | missing leading spaces and make things look extra confusing.""" 18 | ) 19 | expected = textwrap.dedent( 20 | """\ 21 | * I am a list that is pretty badly formatted 22 | * There are all sorts of problems that don't make 23 | this look very nice, like bullets that break 24 | across lines and missing spaces. 25 | * Which also is a problem when working with nested 26 | lists since they could be missing leading spaces 27 | and make things look extra confusing.""" 28 | ) 29 | lst = create_section(MarkdownBulletList, input_) 30 | assert lst.reformatted(width=50) == expected 31 | lst = create_section(MarkdownBulletList, expected) 32 | assert lst.reformatted(width=50) == expected 33 | assert render(expected) == render(input_) 34 | 35 | def test_basic_dash(self) -> None: 36 | input_ = textwrap.dedent( 37 | """\ 38 | - I am a list that is pretty badly 39 | formatted 40 | - There are all sorts of problems that don't make this look very nice, like 41 | bullets that break across lines and missing spaces. 42 | - Which also is a problem when working with nested lists since they could be 43 | missing leading spaces and make things look extra confusing.""" 44 | ) 45 | expected = textwrap.dedent( 46 | """\ 47 | - I am a list that is pretty badly formatted 48 | - There are all sorts of problems that don't make 49 | this look very nice, like bullets that break 50 | across lines and missing spaces. 51 | - Which also is a problem when working with nested 52 | lists since they could be missing leading spaces 53 | and make things look extra confusing.""" 54 | ) 55 | lst = create_section(MarkdownBulletList, input_) 56 | assert lst.reformatted(width=50) == expected 57 | lst = create_section(MarkdownBulletList, expected) 58 | assert lst.reformatted(width=50) == expected 59 | assert render(expected) == render(input_) 60 | 61 | def test_basic_plus(self) -> None: 62 | input_ = textwrap.dedent( 63 | """\ 64 | + I am a list that is pretty badly 65 | formatted 66 | + There are all sorts of problems that don't make this look very nice, like 67 | bullets that break across lines and missing spaces. 68 | + Which also is a problem when working with nested lists since they could be 69 | missing leading spaces and make things look extra confusing.""" 70 | ) 71 | expected = textwrap.dedent( 72 | """\ 73 | + I am a list that is pretty badly formatted 74 | + There are all sorts of problems that don't make 75 | this look very nice, like bullets that break 76 | across lines and missing spaces. 77 | + Which also is a problem when working with nested 78 | lists since they could be missing leading spaces 79 | and make things look extra confusing.""" 80 | ) 81 | lst = create_section(MarkdownBulletList, input_) 82 | assert lst.reformatted(width=50) == expected 83 | lst = create_section(MarkdownBulletList, expected) 84 | assert lst.reformatted(width=50) == expected 85 | assert render(expected) == render(input_) 86 | 87 | def test_make_bullets_same(self) -> None: 88 | input_ = textwrap.dedent( 89 | """\ 90 | * Test 91 | + Test 92 | - Test""" 93 | ) 94 | expected = textwrap.dedent( 95 | """\ 96 | * Test 97 | * Test 98 | * Test""" 99 | ) 100 | lst = create_section(MarkdownBulletList, input_) 101 | assert lst.reformatted() == expected 102 | lst = create_section(MarkdownBulletList, expected) 103 | assert lst.reformatted() == expected 104 | # Since we correct lists that have mismatched indicators, we update the strings 105 | # to have consistent bullets. 106 | # Note: Here, this doesn't actually test much. We keep it here for test 107 | # consistency. 108 | input_ = input_.replace("-", "*").replace("+", "*") 109 | expected = expected.replace("-", "*").replace("+", "*") 110 | assert render(expected) == render(input_) 111 | 112 | def test_correct_numbering(self) -> None: 113 | input_ = textwrap.dedent( 114 | """\ 115 | 1. Test 116 | 1. Test 117 | 4. Test""" 118 | ) 119 | expected = textwrap.dedent( 120 | """\ 121 | 1. Test 122 | 2. Test 123 | 3. Test""" 124 | ) 125 | lst = create_section(MarkdownOrderedList, input_) 126 | assert lst.reformatted() == expected 127 | lst = create_section(MarkdownOrderedList, expected) 128 | assert lst.reformatted() == expected 129 | assert render(expected) == render(input_) 130 | 131 | def test_nested_unordered(self) -> None: 132 | input_ = textwrap.dedent( 133 | """\ 134 | * This is a really long line that with terrible 135 | spacing 136 | - This is also a really long line with terrible 137 | spacing 138 | * This one's ok though 139 | - So is this one""" 140 | ) 141 | expected = textwrap.dedent( 142 | """\ 143 | * This is a really long line that with terrible 144 | spacing 145 | - This is also a really long line with terrible 146 | spacing 147 | - This one's ok though 148 | * So is this one""" 149 | ) 150 | lst = create_section(MarkdownBulletList, input_) 151 | assert lst.reformatted(width=50) == expected 152 | lst = create_section(MarkdownBulletList, expected) 153 | assert lst.reformatted(width=50) == expected 154 | # Since we correct lists that have mismatched indicators, we update the strings 155 | # to have consistent bullets. 156 | input_ = input_.replace("-", "*") 157 | expected = expected.replace("-", "*") 158 | assert render(expected) == render(input_) 159 | 160 | def test_nested_ordered(self) -> None: 161 | input_ = textwrap.dedent( 162 | """\ 163 | 1. This is a really long line that with terrible 164 | spacing 165 | 1. This is also a really long line with terrible 166 | spacing 167 | 1. This one's ok though 168 | 1. This one isn't 169 | great 170 | 1. So is this one""" 171 | ) 172 | expected = textwrap.dedent( 173 | """\ 174 | 1. This is a really long line that with terrible 175 | spacing 176 | 1. This is also a really long line with 177 | terrible spacing 178 | 2. This one's ok though 179 | 3. This one isn't great 180 | 2. So is this one""" 181 | ) 182 | lst = create_section(MarkdownOrderedList, input_) 183 | assert lst.reformatted(width=50) == expected 184 | lst = create_section(MarkdownOrderedList, expected) 185 | assert lst.reformatted(width=50) == expected 186 | input_ = input_.replace("-", "*") 187 | expected = expected.replace("-", "*") 188 | assert render(expected) == render(input_) 189 | 190 | def test_weird_indenting(self) -> None: 191 | input_ = textwrap.dedent( 192 | """\ 193 | * This is a really long line that with terrible 194 | spacing 195 | * Let's make sure this doesn't stay here 196 | * This is also a really long line with terrible 197 | spacing 198 | - Let's make sure this does stay here 199 | - This one's ok though, minus the symbol 200 | - So is this one""" 201 | ) 202 | expected = textwrap.dedent( 203 | """\ 204 | * This is a really long line that with terrible 205 | spacing 206 | * Let's make sure this doesn't stay here 207 | * This is also a really long line with terrible 208 | spacing 209 | - Let's make sure this does stay here 210 | * This one's ok though, minus the symbol 211 | * So is this one""" 212 | ) 213 | lst = create_section(MarkdownBulletList, input_) 214 | assert lst.reformatted(width=50) == expected 215 | lst = create_section(MarkdownBulletList, expected) 216 | assert lst.reformatted(width=50) == expected 217 | # Since we correct lists that have mismatched indicators, we update the strings 218 | # to have consistent bullets. 219 | input_ = input_.replace("-", "*") 220 | expected = expected.replace("-", "*") 221 | assert render(expected) == render(input_) 222 | 223 | def test_links(self) -> None: 224 | input_ = textwrap.dedent( 225 | """\ 226 | * [URL](http://example.com/very/nested/directory) 227 | * [URL](http://example.com)""" 228 | ) 229 | expected = textwrap.dedent( 230 | """\ 231 | * [URL]( 232 | http://example.com/very/nested/directory) 233 | * [URL](http://example.com)""" 234 | ) 235 | lst = create_section(MarkdownBulletList, input_) 236 | assert lst.reformatted(width=30) == expected 237 | lst = create_section(MarkdownBulletList, expected) 238 | assert lst.reformatted(width=30) == expected 239 | assert render(expected) == render(input_) 240 | 241 | def test_indented(self) -> None: 242 | input_ = " * Entry 1\n* Entry 2" 243 | expected = "* Entry 1\n* Entry 2" 244 | lst = create_section(MarkdownBulletList, input_) 245 | assert lst.reformatted() == expected 246 | lst = create_section(MarkdownBulletList, expected) 247 | assert lst.reformatted() == expected 248 | assert render(expected) == render(input_) 249 | 250 | def test_indented_numerics(self) -> None: 251 | input_ = " 1. Test\n 2. Test\n 10. Test" 252 | expected = "1. Test\n2. Test\n3. Test" 253 | lst = create_section(MarkdownOrderedList, input_) 254 | assert lst.reformatted() == expected 255 | lst = create_section(MarkdownOrderedList, expected) 256 | assert lst.reformatted() == expected 257 | assert render(expected) == render(input_) 258 | 259 | def test_ordered_list_with_nine_entries(self) -> None: 260 | """Ensure we don't add extra indents on 10^n-1 length lists""" 261 | input_ = ( 262 | "1. a\n" 263 | "2. b\n" 264 | "3. c\n" 265 | "4. d\n" 266 | "5. e\n" 267 | "6. f\n" 268 | "7. g\n" 269 | "8. h\n" 270 | "9. i" 271 | ) 272 | expected = input_ 273 | lst = create_section(MarkdownOrderedList, input_) 274 | assert lst.reformatted() == expected 275 | lst = create_section(MarkdownOrderedList, expected) 276 | assert lst.reformatted() == expected 277 | assert render(expected) == render(input_) 278 | 279 | def test_nested_ordered_bad_indent(self) -> None: 280 | input_ = textwrap.dedent( 281 | """\ 282 | 1. This is a really long line that with terrible 283 | spacing 284 | 1. This is also a really long line with terrible 285 | spacing 286 | 1. This one's ok though 287 | 1. This one isn't 288 | great 289 | 1. So is this one""" 290 | ) 291 | expected = textwrap.dedent( 292 | """\ 293 | 1. This is a really long line that with terrible 294 | spacing 295 | 2. This is also a really long line with terrible 296 | spacing 297 | 3. This one's ok though 298 | 4. This one isn't great 299 | 5. So is this one""" 300 | ) 301 | lst = create_section(MarkdownOrderedList, input_) 302 | assert lst.reformatted(width=50) == expected 303 | lst = create_section(MarkdownOrderedList, expected) 304 | assert lst.reformatted(width=50) == expected 305 | input_ = input_.replace("-", "*") 306 | expected = expected.replace("-", "*") 307 | assert render(expected) == render(input_) 308 | -------------------------------------------------------------------------------- /tests/test_paragraph.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | 3 | from markflow.formatters.paragraph import MarkdownParagraph 4 | 5 | from .util import create_section, render 6 | 7 | 8 | class TestParagraph: 9 | def test_paragraph(self) -> None: 10 | input_ = ( 11 | "This is a test string. It must have a [footnote][footnote] that breaks " 12 | "across a line and a [URL](http://example.com) so we can ensure that we " 13 | "get good coverage." 14 | ) 15 | expected = textwrap.dedent( 16 | """\ 17 | This is a test string. It must have a [footnote][ 18 | footnote] that breaks across a line and a [URL]( 19 | http://example.com) so we can ensure that we get 20 | good coverage.""" 21 | ) 22 | paragraph = create_section(MarkdownParagraph, input_) 23 | assert paragraph.reformatted(width=50) == expected 24 | paragraph = create_section(MarkdownParagraph, expected) 25 | assert paragraph.reformatted(width=50) == expected 26 | assert render(expected) == render(input_) 27 | 28 | def test_hyperlink_breaking(self) -> None: 29 | input_ = textwrap.dedent( 30 | """\ 31 | [I'm a hyperlink broken across multiple lines.]( 32 | test.htm)""" 33 | ) 34 | expected = "[I'm a hyperlink broken across multiple lines.](test.htm)" 35 | paragraph = create_section(MarkdownParagraph, input_) 36 | assert paragraph.reformatted() == expected 37 | paragraph = create_section(MarkdownParagraph, expected) 38 | assert paragraph.reformatted() == expected 39 | assert render(expected) == render(input_) 40 | 41 | def test_semantic_paragraph(self) -> None: 42 | input_ = textwrap.dedent( 43 | """\ 44 | Some words with a double 45 | space after them. """ 46 | ) 47 | expected = "Some words with a double space after them. " 48 | paragraph = create_section(MarkdownParagraph, input_) 49 | assert paragraph.reformatted() == expected 50 | paragraph = create_section(MarkdownParagraph, expected) 51 | assert paragraph.reformatted() == expected 52 | assert render(expected) == render(input_) 53 | -------------------------------------------------------------------------------- /tests/test_separator.py: -------------------------------------------------------------------------------- 1 | from markflow.formatters.blank_line import MarkdownBlankLine 2 | 3 | from .util import create_section, render 4 | 5 | 6 | class TestBlankLine: 7 | def test_simple(self) -> None: 8 | input_ = " " 9 | expected = "" 10 | separator = create_section(MarkdownBlankLine, input_) 11 | assert separator.reformatted() == expected 12 | assert render(expected) == render(input_) 13 | -------------------------------------------------------------------------------- /tests/test_setext_heading.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | 3 | from markflow.formatters.setext_heading import MarkdownSetextHeading 4 | 5 | from .util import create_section, render 6 | 7 | 8 | class TestSetextHeading: 9 | def test_simple(self) -> None: 10 | input_ = " Heading \n---" 11 | expected = "Heading\n-------" 12 | heading = create_section(MarkdownSetextHeading, input_) 13 | assert heading.reformatted() == expected 14 | heading = create_section(MarkdownSetextHeading, expected) 15 | assert heading.reformatted() == expected 16 | assert render(expected) == render(input_) 17 | 18 | def test_singular_character_underlined(self) -> None: 19 | input_ = "A\n----" 20 | expected = "A\n-" 21 | heading = create_section(MarkdownSetextHeading, input_) 22 | assert heading.reformatted() == expected 23 | heading = create_section(MarkdownSetextHeading, expected) 24 | assert heading.reformatted() == expected 25 | assert render(expected) == render(input_) 26 | 27 | def test_multiline_heading(self) -> None: 28 | input_ = textwrap.dedent( 29 | """\ 30 | This is a long 31 | heading 32 | --""" 33 | ) 34 | expected = textwrap.dedent( 35 | """\ 36 | This is a 37 | long heading 38 | ------------""" 39 | ) 40 | heading = create_section(MarkdownSetextHeading, input_) 41 | assert heading.reformatted(width=12) == expected 42 | heading = create_section(MarkdownSetextHeading, expected) 43 | assert heading.reformatted(width=12) == expected 44 | assert render(expected) == render(input_) 45 | -------------------------------------------------------------------------------- /tests/test_table.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | 3 | from markflow.formatters.table import ( 4 | MarkdownTable, 5 | center_align, 6 | left_align, 7 | right_align, 8 | ) 9 | 10 | from .util import create_section 11 | 12 | 13 | class TestAlign: 14 | def test_center_align(self) -> None: 15 | assert center_align("a", 3) == " a " 16 | assert center_align("aa", 3) == "aa " 17 | assert center_align("aa", 4) == " aa " 18 | 19 | def test_left_align(self) -> None: 20 | assert left_align("a", 3) == "a " 21 | assert left_align("aa", 3) == "aa " 22 | assert left_align("aa", 4) == "aa " 23 | 24 | def test_right_align(self) -> None: 25 | assert right_align("a", 3) == " a" 26 | assert right_align("aa", 3) == " aa" 27 | assert right_align("aa", 4) == " aa" 28 | 29 | 30 | class TestTable: 31 | def test_table(self) -> None: 32 | input_ = textwrap.dedent( 33 | """\ 34 | |Heading 1|Heading 2| 35 | |--|--| 36 | |Short|Very long even line| 37 | |Very long odd line|Short|""" 38 | ) 39 | expected = textwrap.dedent( 40 | """\ 41 | | Heading 1 | Heading 2 | 42 | |--------------------|---------------------| 43 | | Short | Very long even line | 44 | | Very long odd line | Short |""" 45 | ) 46 | table = create_section(MarkdownTable, input_) 47 | assert table.reformatted() == expected 48 | table = create_section(MarkdownTable, expected) 49 | assert table.reformatted() == expected 50 | 51 | def test_aligned_table(self) -> None: 52 | input_ = textwrap.dedent( 53 | """\ 54 | | L | C | R | 55 | |:--|:-:|--:| 56 | | a | a | a| 57 | |abcde | abcde|abcde|""" 58 | ) 59 | expected = textwrap.dedent( 60 | """\ 61 | | L | C | R | 62 | |:------|:-----:|------:| 63 | | a | a | a | 64 | | abcde | abcde | abcde |""" 65 | ) 66 | table = create_section(MarkdownTable, input_) 67 | assert table.reformatted() == expected 68 | table = create_section(MarkdownTable, expected) 69 | assert table.reformatted() == expected 70 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | 3 | from markflow._utils._utils import get_indent, truncate_str 4 | from markflow._utils.textwrap import ( 5 | code_split, 6 | link_split, 7 | newline_split, 8 | space_split, 9 | wrap, 10 | ) 11 | 12 | 13 | class TestTruncateStr: 14 | def test_shorter(self) -> None: 15 | assert truncate_str("123456789", 19) == "123456789" 16 | 17 | def test_exact_length(self) -> None: 18 | assert truncate_str("123456789", 9) == "123456789" 19 | 20 | def test_longer(self) -> None: 21 | assert truncate_str("123456789", 5) == "12..." 22 | 23 | def test_truncate_less_than_ellipsis(self) -> None: 24 | assert truncate_str("123456789", 2) == ".." 25 | 26 | 27 | class TestGetIndent: 28 | def test_is_indented_at_least(self) -> None: 29 | # This is a little silly, but I expect we may have more cases to support since 30 | # we currently conflate tabs and spaces. 31 | assert get_indent(" Test") == 2 32 | 33 | 34 | class TestTextWrap: 35 | def test_all_splits(self) -> None: 36 | input_ = ( 37 | "abc abc abc abc abc abc abc abc abc ``abc ``` abc[0][0] ``abc abc abc abc " 38 | "<br /><br /> abc abc [url](http://example.com) " 39 | "abc[url][http://example.com]abc[url][URL][url][URL] <br/>abc<br/>" 40 | ) 41 | expected = textwrap.dedent( 42 | """\ 43 | abc abc abc abc abc abc abc abc abc 44 | ``abc ``` abc[0][0] ``abc abc abc abc <br /> 45 | <br /> 46 | abc abc [url](http://example.com)abc[url][ 47 | http://example.com]abc[url][URL][url][URL] <br/> 48 | abc<br/>""" 49 | ) 50 | assert wrap(input_, 50) == expected 51 | 52 | def test_code_split(self) -> None: 53 | input_ = "a` a `` b` a `b`c" 54 | expected_split_text = ["a", "` a `` b`", "a", "`b`", "c"] 55 | expected_leading_spaces = [False, False, True, True, False] 56 | expected_evaluates = [True, False, True, False, True] 57 | split_text, leading_spaces, evaluates = code_split(input_, False) 58 | assert len(split_text) == len(leading_spaces) == len(evaluates) 59 | assert split_text == expected_split_text 60 | assert leading_spaces == expected_leading_spaces 61 | assert evaluates == expected_evaluates 62 | 63 | def test_code_split_begin_and_end(self) -> None: 64 | input_ = "` a `` b` a `b`" 65 | expected_split_text = ["` a `` b`", "a", "`b`"] 66 | expected_leading_spaces = [False, True, True] 67 | expected_evaluates = [False, True, False] 68 | split_text, leading_spaces, evaluates = code_split(input_, False) 69 | assert len(split_text) == len(leading_spaces) == len(evaluates) 70 | assert split_text == expected_split_text 71 | assert leading_spaces == expected_leading_spaces 72 | assert evaluates == expected_evaluates 73 | 74 | def test_code_split_sentence(self) -> None: 75 | input_ = "a` a `` b`. a `b`.c" 76 | expected_split_text = ["a", "` a `` b`.", "a", "`b`.", "c"] 77 | expected_leading_spaces = [False, False, True, True, False] 78 | expected_evaluates = [True, False, True, False, True] 79 | split_text, leading_spaces, evaluates = code_split(input_, False) 80 | assert len(split_text) == len(leading_spaces) == len(evaluates) 81 | assert split_text == expected_split_text 82 | assert leading_spaces == expected_leading_spaces 83 | assert evaluates == expected_evaluates 84 | 85 | def test_code_split_solo_tilda(self) -> None: 86 | input_ = "` a `` b` a `b` `a" 87 | expected_split_text = ["` a `` b`", "a", "`b`", "`a"] 88 | expected_leading_spaces = [False, True, True, True] 89 | expected_evaluates = [False, True, False, True] 90 | split_text, leading_spaces, evaluates = code_split(input_, False) 91 | assert len(split_text) == len(leading_spaces) == len(evaluates) 92 | assert split_text == expected_split_text 93 | assert leading_spaces == expected_leading_spaces 94 | assert evaluates == expected_evaluates 95 | 96 | def test_link_split(self) -> None: 97 | input_ = "a[URL][url] b [URL](http://example.com)c" 98 | expected_split_text = [ 99 | "a[URL][", 100 | "url]", 101 | "b", 102 | "[URL](", 103 | "http://example.com)c", 104 | ] 105 | expected_leading_spaces = [False, False, True, True, False] 106 | expected_evaluates = [True, False, True, True, False] 107 | split_text, leading_spaces, evaluates = link_split(input_, False) 108 | assert len(split_text) == len(leading_spaces) == len(evaluates) 109 | assert split_text == expected_split_text 110 | assert leading_spaces == expected_leading_spaces 111 | assert evaluates == expected_evaluates 112 | 113 | def test_link_split_sentence(self) -> None: 114 | input_ = "a[URL][url]. b [URL](http://example.com).c" 115 | expected_split_text = [ 116 | "a[URL][", 117 | "url].", 118 | "b", 119 | "[URL](", 120 | "http://example.com).c", 121 | ] 122 | expected_leading_spaces = [False, False, True, True, False] 123 | expected_evaluates = [True, False, True, True, False] 124 | split_text, leading_spaces, evaluates = link_split(input_, False) 125 | assert len(split_text) == len(leading_spaces) == len(evaluates) 126 | assert split_text == expected_split_text 127 | assert leading_spaces == expected_leading_spaces 128 | assert evaluates == expected_evaluates 129 | 130 | def test_newline_split(self) -> None: 131 | input_ = "a <br /> b <br>c<br/>d" 132 | expected_split_text = ["a", "<br />", "b", "<br>", "c", "<br/>", "d"] 133 | expected_leading_spaces = [False, True, True, True, False, False, False] 134 | expected_evaluates = [True, False, True, False, True, False, True] 135 | split_text, leading_spaces, evaluates = newline_split(input_, False) 136 | assert len(split_text) == len(leading_spaces) == len(evaluates) 137 | assert split_text == expected_split_text 138 | assert leading_spaces == expected_leading_spaces 139 | assert evaluates == expected_evaluates 140 | 141 | def test_space_split(self) -> None: 142 | input_ = " ".join(["a"] * 10) 143 | expected_split_text = ["a"] * 10 144 | expected_leading_spaces = [False] + [True] * 9 145 | expected_evaluates = [True] * 10 146 | split_text, leading_spaces, evaluates = space_split(input_, False) 147 | assert len(split_text) == len(leading_spaces) == len(evaluates) 148 | assert split_text == expected_split_text 149 | assert leading_spaces == expected_leading_spaces 150 | assert evaluates == expected_evaluates 151 | -------------------------------------------------------------------------------- /tests/util.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Type 3 | 4 | import commonmark 5 | 6 | from markflow.formatters import MarkdownSection 7 | 8 | IGNORED_HTML_CHARACTERS = re.compile(r"[\n\s]") 9 | # We need to remove starts to ignore our numbering corrections. 10 | LIST_NUMBERING_START = re.compile(r" start=\"[0-9]+\"") 11 | 12 | 13 | def create_section(class_: Type[MarkdownSection], text: str) -> MarkdownSection: 14 | obj = class_(0, text.splitlines()) 15 | return obj 16 | 17 | 18 | def render(text: str) -> str: 19 | rendered = commonmark.commonmark(text) 20 | rendered = LIST_NUMBERING_START.sub("", rendered) 21 | rendered = IGNORED_HTML_CHARACTERS.sub("", rendered) 22 | return rendered 23 | --------------------------------------------------------------------------------