├── .flake8
├── .github
    └── workflows
    │   ├── ensure_green.yml
    │   └── release.yml
├── .gitignore
├── CONTRIBUTING.md
├── Dockerfile
├── IMPLEMENTATION.md
├── LICENSE.txt
├── Makefile
├── README.md
├── docker-compose.yml
├── markflow
    ├── __init__.py
    ├── __main__.py
    ├── _argparse.py
    ├── _utils
    │   ├── __init__.py
    │   ├── _utils.py
    │   └── textwrap.py
    ├── detectors
    │   ├── __init__.py
    │   ├── _lines.py
    │   ├── atx_heading.py
    │   ├── blank_line.py
    │   ├── block_quote.py
    │   ├── bullet_list.py
    │   ├── fenced_code_block.py
    │   ├── indented_code_block.py
    │   ├── link_reference_definition.py
    │   ├── ordered_list.py
    │   ├── paragraph.py
    │   ├── setext_heading.py
    │   ├── table.py
    │   └── thematic_break.py
    ├── exceptions.py
    ├── formatters
    │   ├── __init__.py
    │   ├── atx_heading.py
    │   ├── base.py
    │   ├── blank_line.py
    │   ├── block_quote.py
    │   ├── fenced_code_block.py
    │   ├── indented_code_block.py
    │   ├── link_reference_definition.py
    │   ├── lists.py
    │   ├── paragraph.py
    │   ├── setext_heading.py
    │   ├── table.py
    │   └── thematic_break.py
    ├── parser.py
    ├── reformat_markdown.py
    └── typing.py
├── poetry-aliases.sh
├── poetry.lock
├── pyproject.toml
├── stubs
    ├── commonmark.pyi
    ├── pytest.pyi
    └── rich
    │   ├── __init__.pyi
    │   ├── console.pyi
    │   ├── highlighter.pyi
    │   ├── logging.pyi
    │   ├── markdown.pyi
    │   └── style.pyi
└── tests
    ├── __init__.py
    ├── files
        ├── 0000_in_base.md
        ├── 0000_out_base.md
        ├── 0001_in_blank.md
        ├── 0001_out_blank.md
        ├── 0002_in_lists.md
        ├── 0002_out_lists.md
        ├── 0003_in_too_many_endling_newlines.md
        ├── 0003_out_too_many_endling_newlines.md
        ├── 0004_in_multiple_code_blocks.md
        ├── 0004_out_multiple_code_blocks.md
        ├── 0005_in_headings.md
        ├── 0005_out_headings.md
        ├── 0006_in_tables.md
        ├── 0006_out_tables.md
        ├── 0007_in_link_reference_definitions.md
        ├── 0007_out_link_reference_definitions.md
        ├── 0008_in_indented_code_blocks.md
        ├── 0008_out_indented_code_blocks.md
        ├── 0009_in_misnumbering.md
        ├── 0009_out_misnumbering.md
        ├── 0010_in_list_with_bold.md
        ├── 0010_out_list_with_bold.md
        ├── 0011_in_horizontal_lines.md
        ├── 0011_out_horizontal_lines.md
        ├── 0012_in_block_quotes.md
        ├── 0012_out_block_quotes.md
        ├── 0013_in_list_with_horizontal_line.md
        ├── 0013_out_list_with_horizontal_line.md
        ├── 0014_in_code_block_that_looks_like_a_heading.md
        ├── 0014_out_code_block_that_looks_like_a_heading.md
        ├── 0015_in_ordered_lists_with_code_blocks.md
        ├── 0015_out_ordered_lists_with_code_blocks.md
        ├── 0016_in_lists_starting_at_not_one.md
        ├── 0016_out_lists_starting_at_not_one.md
        ├── 0017_in_one_lists_with_many_newlines.md
        ├── 0017_out_one_lists_with_many_newlines.md
        ├── 0018_in_urls_with_trailing_characters.md
        ├── 0018_out_urls_with_trailing_characters.md
        ├── 0019_in_table_alignment.md
        ├── 0019_out_table_alignment.md
        ├── 0020_in_forced_paragraphs.md
        ├── 0020_out_forced_paragraphs.md
        ├── 0021_in_separators.md
        ├── 0021_out_separators.md
        ├── 0022_in_link_reference_definition_at_end_of_file.md
        ├── 0022_out_link_reference_definition_at_end_of_file.md
        ├── 0023_in_setext_heading_close_to_block_quote.md
        └── 0023_out_setext_heading_close_to_block_quote.md
    ├── pytest.ini
    ├── test_atx_heading.py
    ├── test_block_quote.py
    ├── test_fenced_code_block.py
    ├── test_files.py
    ├── test_horizontal_line.py
    ├── test_indented_code_block.py
    ├── test_link_reference_definition.py
    ├── test_list.py
    ├── test_paragraph.py
    ├── test_separator.py
    ├── test_setext_heading.py
    ├── test_table.py
    ├── test_utils.py
    └── util.py


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore =
3 |     # Where black disagrees
4 |     E203,
5 |     W503
6 | max-line-length = 88
7 | 


--------------------------------------------------------------------------------
/.github/workflows/ensure_green.yml:
--------------------------------------------------------------------------------
 1 | name: Test changes
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - '**'
 7 |   pull_request:
 8 |     branches:
 9 |       - '**'
10 | 
11 | jobs:
12 |   audit:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |     - uses: actions/checkout@v2
16 |     - name: Set up Python 3.8
17 |       uses: actions/setup-python@v2
18 |       with:
19 |         python-version: 3.8
20 |     - name: Install dependencies
21 |       run: |
22 |         sudo apt-get update && sudo apt-get install -y make
23 |         python3 -m pip install --upgrade pip
24 |         pip install setuptools wheel twine
25 |         curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python3
26 |         source $HOME/.poetry/env
27 |         poetry self update
28 |     - name: Audit
29 |       run: |
30 |         export PATH="$HOME/.poetry/bin:${PATH}"
31 |         make audits
32 |   test:
33 |     runs-on: ubuntu-latest
34 |     strategy:
35 |       matrix:
36 |         python-version: [3.6, 3.7, 3.8, 3.9]
37 |     steps:
38 |     - uses: actions/checkout@v2
39 |     - name: Set up Python ${{ matrix.python-version }}
40 |       uses: actions/setup-python@v2
41 |       with:
42 |         python-version: ${{ matrix.python-version }}
43 |     - name: Install dependencies
44 |       run: |
45 |         sudo apt-get update && sudo apt-get install -y make
46 |         python3 -m pip install --upgrade pip
47 |         pip install setuptools wheel twine
48 |         curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python3
49 |         source $HOME/.poetry/env
50 |         poetry self update
51 |     - name: Test
52 |       run: |
53 |         export PATH="$HOME/.poetry/bin:${PATH}"
54 |         make tests_${{ matrix.python-version }}
55 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Upload Python Package
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [created]
 6 | 
 7 | jobs:
 8 |   deploy:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |     - uses: actions/checkout@v2
12 |     - name: Set up Python
13 |       uses: actions/setup-python@v2
14 |       with:
15 |         python-version: '3.x'
16 |     - name: Install dependencies
17 |       run: |
18 |         sudo apt-get update && sudo apt-get install -y make
19 |         python -m pip install --upgrade pip
20 |         pip install setuptools wheel twine
21 |         curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python
22 |     - name: Build and publish
23 |       env:
24 |         POETRY_PYPI_TOKEN_PYPI: ${{ secrets.POETRY_PYPI_TOKEN_PYPI }}
25 |       run: |
26 |         export PATH="$HOME/.poetry/bin:${PATH}"
27 |         make package
28 |         poetry publish
29 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # PYTHON
 2 | .pytest_cache
 3 | __pycache__
 4 | *.pyc
 5 | *.egg-info
 6 | 
 7 | # MACS
 8 | .DS_Store
 9 | 
10 | # TESTS
11 | junit.xml
12 | # coverage
13 | .coverage
14 | coverage.xml
15 | htmlcov
16 | # mypy
17 | .mypy_cache
18 | 
19 | # POETRY
20 | /dist/
21 | /pyproject.tmp
22 | /setup.py
23 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing to MarkFlow
  2 | 
  3 | The following are the contributing guidelines when making changes to this project.
  4 | 
  5 | Development depends on the installation of **make**, for coordinating execution of the
  6 | underlying tools, **poetry**, for managing our **Python** environments and the
  7 | **MarkFlow** package itself, and the supported versions of **Python** (current, 3.6,
  8 | 3.7, 3.8, and 3.9). You can generally get away with only having one version of
  9 | **Python** installed and testing against that. The GitHub builds will validate your
 10 | changes against all versions of **Python** anyway.
 11 | 
 12 | ## Making Changes
 13 | 
 14 | ([Step 0: Checkout the documentation on the implementation of the tool.](
 15 | IMPLEMENTATION.md))
 16 | 
 17 | To check to see if your submission is buildable, simply run `make`. If everything
 18 | passes, you are good to go on to [Submitting Changes](#submitting-changes). To
 19 | understand what that's doing, though, read on.
 20 | 
 21 | ### Running Tools
 22 | 
 23 | All tools that are available in the poetry environment (including **MarkFlow**) can
 24 | easily be added to your command line as the commands themselves by running
 25 | `. poetry-aliases.sh`. An additional alias, `markflow-markflow`, is also provided to
 26 | easily run **MarkFlow** against non-test files.
 27 | 
 28 | ### Running Audits
 29 | 
 30 | We run checks against every commit to ensure all files follow standards we enforce. The
 31 | audits we run are as follows:
 32 | 
 33 | ```shell
 34 | # Ensure all Markdown files would not be reformatted by us :)
 35 | make markflow
 36 | # Ensure all python files would not be reformatted by black
 37 | make black
 38 | # Ensure our import are all sorted
 39 | make isort
 40 | # Ensure all pythons follow a few other rules enforced by flake8
 41 | make flake
 42 | # Run all of the above. Every command will be run regardless of the others failing.
 43 | make audits
 44 | ```
 45 | 
 46 | The poetry environment comes with [**black**][black], and of course **MarkFlow**, so you
 47 | can quickly run the tools with `poetry run black` or `poetry run markflow` or just drop
 48 | into a shell with them by running `poetry shell`.
 49 | 
 50 | [black]: https://black.readthedocs.io/en/latest/
 51 | 
 52 | ### Running Tests
 53 | 
 54 | We test our code through unit and system tests that are run by [**pytest**][pytest] and
 55 | strict type checking enforced via [**mypy**][mypy]. The commands to run them are as
 56 | follows:
 57 | 
 58 | ```shell
 59 | # Run tests in /tests
 60 | make pytests
 61 | # Run mypy against the markflow library
 62 | make mypy_lib
 63 | # Run mypy against our tests
 64 | make mypy_tests
 65 | # Run all of the above in order, exiting on the first failure.
 66 | make tests
 67 | ```
 68 | 
 69 | Why do we exit on first failure unlike audits? Tests are noisier and this makes the
 70 | failures more obvious. In most cases the audits are unlikely to fill up your screen, but
 71 | even then.
 72 | 
 73 | [mypy]: http://mypy-lang.org/
 74 | [pytest]: https://docs.pytest.org/en/latest/
 75 | 
 76 | ### Submitting Changes
 77 | 
 78 | Once you've made all your changes, create a [pull request][pr]. Someone will be with you
 79 | shortly.
 80 | 
 81 | If you are correcting a bug you've seen when processing a **Markdown** file, add it and
 82 | the expected output to `tests/files`. In the folder, inputs and outputs are matched up
 83 | based on their leading numeric. So, `0010_in_tests.md`'s expected output is
 84 | `0010_out_tests.md`. The [README has a section on anonymizing text](README.md#issues) if
 85 | you're worried about leaking sensitive information.
 86 | 
 87 | [pr]: https://github.com/duo-labs/markflow/pulls
 88 | 
 89 | ## Proposing Changes
 90 | 
 91 | If you want to propose a rule change, like making inline code blocks split across lines,
 92 | feel free to open an [issue][issues].
 93 | 
 94 | [issues]: https://github.com/duo-labs/markflow/issues
 95 | 
 96 | # Duplicating CI Locally
 97 | 
 98 | The build in CI simply runs the make commands in the container defined by the root
 99 | `Dockerfile`. You'll of course need [**docker**][docker]. Once you do, to build the
100 | image run:
101 | 
102 | ```shell
103 | make container
104 | ```
105 | 
106 | To run commands in the container, you'll need to mount our source. The following should
107 | do the trick when run from the project's directory:
108 | 
109 | ```shell
110 | docker run -v "`pwd`:/src" -w /src markflow_builder make
111 | # Build the wheel
112 | docker run -v "`pwd`:/src" -w /src markflow_builder make package
113 | ```
114 | 
115 | [docker]: https://www.docker.com/
116 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:20.04
 2 | 
 3 | RUN apt-get update && \
 4 |     apt-get install -y software-properties-common && \
 5 |     add-apt-repository ppa:deadsnakes/ppa
 6 | 
 7 | RUN apt-get update && apt-get -y upgrade
 8 | 
 9 | RUN apt-get install -y git
10 | 
11 | RUN apt-get install -y python3.6 python3.7 python3.8 python-3.9 python3-pip
12 | RUN apt-get install -y python3.6-venv python3.7-venv python3.8-venv python3.9-venv
13 | RUN ln -s /usr/bin/python3 /usr/bin/python
14 | 
15 | RUN apt-get install -y curl
16 | RUN curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python
17 | 
18 | ENV PATH="/root/.poetry/bin:${PATH}"
19 | 
20 | RUN apt-get install -y make
21 | 
22 | # Setup our virtual environments. Sure the intermediate layers are large, but this
23 | # doesn't change often, and can take a while.
24 | #ADD pyproject.toml /src/
25 | #ADD poetry.lock /src/
26 | #WORKDIR /src
27 | #RUN poetry env use 3.6 && poetry install && \
28 | #    poetry env use 3.7 && poetry install && \
29 | #    poetry env use 3.8 && poetry install && \
30 | #    rm -rf /src
31 | 


--------------------------------------------------------------------------------
/IMPLEMENTATION.md:
--------------------------------------------------------------------------------
 1 | # MarkFlow Behind Scenes
 2 | 
 3 | MarkFlow is a pretty simple tool that formats code in three steps:
 4 | 
 5 | * [Parse the text](#parsing-markdown)
 6 | * [Reformat each section and stitch the sections back together](#reformatting-sections)
 7 | * [Rerun with the output as the input to guarantee consistency](#ensuring-consistency)
 8 | 
 9 | A potential future step would be to render the text and ensure consistency outside of
10 | some rules (see [Future Architecture Ideas](#future-architecture-ideas)).
11 | 
12 | ## Parsing Markdown
13 | 
14 | We parse **Markdown** by continuously iterating over a series of splitter functions.
15 | Each function corresponds to a different [CommonMark][commonmark_spec] section type.
16 | They take in a list of lines if that list starts with their section type, they return a
17 | `tuple` of that section (as a `list` of lines) and the remaining text (also as a `list`
18 | of lines). We use lists of lines as a performance gain, so we don't have to write (and
19 | execute) `lst = str_.splitlines()` and `"\n".join(lst)` all over the place. Otherwise,
20 | they return an empty `list` as the first member and the `list` of lines passed in as the
21 | second. Once we detect a section, we continue parsing the remaining lines.
22 | 
23 | The functions are designed to be mutually exclusive: if one splitter splits the text, no
24 | others should. This isn't really tested (hint, hint), but is hopefully achieved by
25 | adhering to the [CommonMark][commonmark_spec] standard.
26 | 
27 | [commonmark_spec]: https://spec.commonmark.org/0.29/
28 | 
29 | ## Reformatting Sections
30 | 
31 | The parsed text is then passed to the formatter class responsible for knowing how to
32 | format its section type. The various enforced rules can be checked out in the [README](
33 | README.md), but most implementations are fairly straightforward. More complicated ones
34 | should be fairly well documented. (If you see one that is confusing, open an [issue][
35 | issues].)
36 | 
37 | Some section types are recursive, namely lists and block quotes. These end up calling
38 | back into the formatter again. We're not too worried about stack overflows since the
39 | **Python** stack limit and the depth of recursive **Markdown** definitions by human
40 | beings should different by several orders of magnitude (in favor of **Python**).
41 | 
42 | [issues]: https://github.com/duo-labs/markflow/issues
43 | 
44 | ## Ensuring Consistency
45 | 
46 | Once everything is reformatted, that output is taken and then run through the parsing
47 | and reformatting steps. The resulting document is then compared to our original
48 | calculation to ensure they are the same. This allows us to be more confident that we
49 | didn't mess up formatting since we calculate the same document structure between the
50 | initial and resulting documents.
51 | 
52 | ## Future Architecture Ideas
53 | 
54 | Here are some of random ramblings on the future of **MarkFlow**.
55 | 
56 | ### Plugins
57 | 
58 | The tool supports tables, but they are actually extensions and not a feature of the
59 | [CommonMark][commonmark_spec] spec. Support for plugins could be added with tables being
60 | the first adopter. This is likely not a big deal right now as there are probably not
61 | many people making tables without necessary render extensions that wouldn't want to
62 | still have them prettied up. Nor are people clamoring for support for other extensions
63 | to the language.
64 | 
65 | [commonmark_spec]: https://spec.commonmark.org/0.29/
66 | 
67 | ### Rendering Consistency
68 | 
69 | Another nice thing would be to enforce consistent rendering of the input files. Progress
70 | on this has started as it is enforced by most tests, but tables are an extension to
71 | CommonMark and not a part of the library itself, and the [CommonMark validation
72 | library][commonmark_pkg] we are using don't support them. A potential option that makes
73 | even more sense in a plugin architecture would be having individual formatters handle
74 | validating rendering consistency.
75 | 
76 | If you end up debugging an issue because of this, you can pass `--write-renders` to save
77 | off the inputs. Pass `--dev-help` to see other developer options, if you're curious.
78 | 
79 | [commonmark_pkg]: https://github.com/readthedocs/commonmark.py
80 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 | Copyright 2019 Cisco Systems, Inc. and/or its affiliates.  All rights reserved.
  2 | 
  3 |                                  Apache License
  4 |                            Version 2.0, January 2004
  5 |                         http://www.apache.org/licenses/
  6 | 
  7 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  8 | 
  9 |    1. Definitions.
 10 | 
 11 |       "License" shall mean the terms and conditions for use, reproduction,
 12 |       and distribution as defined by Sections 1 through 9 of this document.
 13 | 
 14 |       "Licensor" shall mean the copyright owner or entity authorized by
 15 |       the copyright owner that is granting the License.
 16 | 
 17 |       "Legal Entity" shall mean the union of the acting entity and all
 18 |       other entities that control, are controlled by, or are under common
 19 |       control with that entity. For the purposes of this definition,
 20 |       "control" means (i) the power, direct or indirect, to cause the
 21 |       direction or management of such entity, whether by contract or
 22 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 23 |       outstanding shares, or (iii) beneficial ownership of such entity.
 24 | 
 25 |       "You" (or "Your") shall mean an individual or Legal Entity
 26 |       exercising permissions granted by this License.
 27 | 
 28 |       "Source" form shall mean the preferred form for making modifications,
 29 |       including but not limited to software source code, documentation
 30 |       source, and configuration files.
 31 | 
 32 |       "Object" form shall mean any form resulting from mechanical
 33 |       transformation or translation of a Source form, including but
 34 |       not limited to compiled object code, generated documentation,
 35 |       and conversions to other media types.
 36 | 
 37 |       "Work" shall mean the work of authorship, whether in Source or
 38 |       Object form, made available under the License, as indicated by a
 39 |       copyright notice that is included in or attached to the work
 40 |       (an example is provided in the Appendix below).
 41 | 
 42 |       "Derivative Works" shall mean any work, whether in Source or Object
 43 |       form, that is based on (or derived from) the Work and for which the
 44 |       editorial revisions, annotations, elaborations, or other modifications
 45 |       represent, as a whole, an original work of authorship. For the purposes
 46 |       of this License, Derivative Works shall not include works that remain
 47 |       separable from, or merely link (or bind by name) to the interfaces of,
 48 |       the Work and Derivative Works thereof.
 49 | 
 50 |       "Contribution" shall mean any work of authorship, including
 51 |       the original version of the Work and any modifications or additions
 52 |       to that Work or Derivative Works thereof, that is intentionally
 53 |       submitted to Licensor for inclusion in the Work by the copyright owner
 54 |       or by an individual or Legal Entity authorized to submit on behalf of
 55 |       the copyright owner. For the purposes of this definition, "submitted"
 56 |       means any form of electronic, verbal, or written communication sent
 57 |       to the Licensor or its representatives, including but not limited to
 58 |       communication on electronic mailing lists, source code control systems,
 59 |       and issue tracking systems that are managed by, or on behalf of, the
 60 |       Licensor for the purpose of discussing and improving the Work, but
 61 |       excluding communication that is conspicuously marked or otherwise
 62 |       designated in writing by the copyright owner as "Not a Contribution."
 63 | 
 64 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 65 |       on behalf of whom a Contribution has been received by Licensor and
 66 |       subsequently incorporated within the Work.
 67 | 
 68 |    2. Grant of Copyright License. Subject to the terms and conditions of
 69 |       this License, each Contributor hereby grants to You a perpetual,
 70 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 71 |       copyright license to reproduce, prepare Derivative Works of,
 72 |       publicly display, publicly perform, sublicense, and distribute the
 73 |       Work and such Derivative Works in Source or Object form.
 74 | 
 75 |    3. Grant of Patent License. Subject to the terms and conditions of
 76 |       this License, each Contributor hereby grants to You a perpetual,
 77 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 78 |       (except as stated in this section) patent license to make, have made,
 79 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 80 |       where such license applies only to those patent claims licensable
 81 |       by such Contributor that are necessarily infringed by their
 82 |       Contribution(s) alone or by combination of their Contribution(s)
 83 |       with the Work to which such Contribution(s) was submitted. If You
 84 |       institute patent litigation against any entity (including a
 85 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 86 |       or a Contribution incorporated within the Work constitutes direct
 87 |       or contributory patent infringement, then any patent licenses
 88 |       granted to You under this License for that Work shall terminate
 89 |       as of the date such litigation is filed.
 90 | 
 91 |    4. Redistribution. You may reproduce and distribute copies of the
 92 |       Work or Derivative Works thereof in any medium, with or without
 93 |       modifications, and in Source or Object form, provided that You
 94 |       meet the following conditions:
 95 | 
 96 |       (a) You must give any other recipients of the Work or
 97 |           Derivative Works a copy of this License; and
 98 | 
 99 |       (b) You must cause any modified files to carry prominent notices
100 |           stating that You changed the files; and
101 | 
102 |       (c) You must retain, in the Source form of any Derivative Works
103 |           that You distribute, all copyright, patent, trademark, and
104 |           attribution notices from the Source form of the Work,
105 |           excluding those notices that do not pertain to any part of
106 |           the Derivative Works; and
107 | 
108 |       (d) If the Work includes a "NOTICE" text file as part of its
109 |           distribution, then any Derivative Works that You distribute must
110 |           include a readable copy of the attribution notices contained
111 |           within such NOTICE file, excluding those notices that do not
112 |           pertain to any part of the Derivative Works, in at least one
113 |           of the following places: within a NOTICE text file distributed
114 |           as part of the Derivative Works; within the Source form or
115 |           documentation, if provided along with the Derivative Works; or,
116 |           within a display generated by the Derivative Works, if and
117 |           wherever such third-party notices normally appear. The contents
118 |           of the NOTICE file are for informational purposes only and
119 |           do not modify the License. You may add Your own attribution
120 |           notices within Derivative Works that You distribute, alongside
121 |           or as an addendum to the NOTICE text from the Work, provided
122 |           that such additional attribution notices cannot be construed
123 |           as modifying the License.
124 | 
125 |       You may add Your own copyright statement to Your modifications and
126 |       may provide additional or different license terms and conditions
127 |       for use, reproduction, or distribution of Your modifications, or
128 |       for any such Derivative Works as a whole, provided Your use,
129 |       reproduction, and distribution of the Work otherwise complies with
130 |       the conditions stated in this License.
131 | 
132 |    5. Submission of Contributions. Unless You explicitly state otherwise,
133 |       any Contribution intentionally submitted for inclusion in the Work
134 |       by You to the Licensor shall be under the terms and conditions of
135 |       this License, without any additional terms or conditions.
136 |       Notwithstanding the above, nothing herein shall supersede or modify
137 |       the terms of any separate license agreement you may have executed
138 |       with Licensor regarding such Contributions.
139 | 
140 |    6. Trademarks. This License does not grant permission to use the trade
141 |       names, trademarks, service marks, or product names of the Licensor,
142 |       except as required for reasonable and customary use in describing the
143 |       origin of the Work and reproducing the content of the NOTICE file.
144 | 
145 |    7. Disclaimer of Warranty. Unless required by applicable law or
146 |       agreed to in writing, Licensor provides the Work (and each
147 |       Contributor provides its Contributions) on an "AS IS" BASIS,
148 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149 |       implied, including, without limitation, any warranties or conditions
150 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151 |       PARTICULAR PURPOSE. You are solely responsible for determining the
152 |       appropriateness of using or redistributing the Work and assume any
153 |       risks associated with Your exercise of permissions under this License.
154 | 
155 |    8. Limitation of Liability. In no event and under no legal theory,
156 |       whether in tort (including negligence), contract, or otherwise,
157 |       unless required by applicable law (such as deliberate and grossly
158 |       negligent acts) or agreed to in writing, shall any Contributor be
159 |       liable to You for damages, including any direct, indirect, special,
160 |       incidental, or consequential damages of any character arising as a
161 |       result of this License or out of the use or inability to use the
162 |       Work (including but not limited to damages for loss of goodwill,
163 |       work stoppage, computer failure or malfunction, or any and all
164 |       other commercial damages or losses), even if such Contributor
165 |       has been advised of the possibility of such damages.
166 | 
167 |    9. Accepting Warranty or Additional Liability. While redistributing
168 |       the Work or Derivative Works thereof, You may choose to offer,
169 |       and charge a fee for, acceptance of support, warranty, indemnity,
170 |       or other liability obligations and/or rights consistent with this
171 |       License. However, in accepting such obligations, You may act only
172 |       on Your own behalf and on Your sole responsibility, not on behalf
173 |       of any other Contributor, and only if You agree to indemnify,
174 |       defend, and hold each Contributor harmless for any liability
175 |       incurred by, or claims asserted against, such Contributor by reason
176 |       of your accepting any such warranty or additional liability.
177 | 
178 |    END OF TERMS AND CONDITIONS
179 | 
180 |    APPENDIX: How to apply the Apache License to your work.
181 | 
182 |       To apply the Apache License to your work, attach the following
183 |       boilerplate notice, with the fields enclosed by brackets "[]"
184 |       replaced with your own identifying information. (Don't include
185 |       the brackets!)  The text should be enclosed in the appropriate
186 |       comment syntax for the file format. We also recommend that a
187 |       file or class name and description of purpose be included on the
188 |       same "printed page" as the copyright notice for easier
189 |       identification within third-party archives.
190 | 
191 |    Copyright 2020 [name of copyright owner]
192 | 
193 |    Licensed under the Apache License, Version 2.0 (the "License");
194 |    you may not use this file except in compliance with the License.
195 |    You may obtain a copy of the License at
196 | 
197 |        http://www.apache.org/licenses/LICENSE-2.0
198 | 
199 |    Unless required by applicable law or agreed to in writing, software
200 |    distributed under the License is distributed on an "AS IS" BASIS,
201 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202 |    See the License for the specific language governing permissions and
203 |    limitations under the License.


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | all: audits tests
  2 | 
  3 | # --- ENVIRONMENT MANAGEMENT ---
  4 | .PHONY: clean
  5 | 
  6 | clean:
  7 | 	git clean -fdX
  8 | 	# poetry returns a non-zero exit status if the virtualenv doesn't exist so we ignore
  9 | 	# errors.
 10 | 	-poetry env remove 3.6
 11 | 	-poetry env remove 3.7
 12 | 	-poetry env remove 3.8
 13 | 	-poetry env remove 3.9
 14 | 
 15 | 
 16 | .PHONY: clean _venv _venv_3.6 _venv_3.7 _venv_3.8 _venv_3.9 venvs
 17 | venvs: _venv_3.6 _venv_3.7 _venv_3.8 _venv_3.9
 18 | 
 19 | _venv:
 20 | 	poetry env use ${PYTHON_VERSION}
 21 | 	poetry install
 22 | 
 23 | _venv_3.6:
 24 | 	PYTHON_VERSION=3.6 $(MAKE) _venv
 25 | 
 26 | _venv_3.7:
 27 | 	PYTHON_VERSION=3.7 $(MAKE) _venv
 28 | 
 29 | _venv_3.8:
 30 | 	PYTHON_VERSION=3.8 $(MAKE) _venv
 31 | 
 32 | _venv_3.9:
 33 | 	PYTHON_VERSION=3.9 $(MAKE) _venv
 34 | 
 35 | 
 36 | # --- AUDITS ---
 37 | .PHONY: audits black flake8 isort markflow
 38 | 
 39 | # Runs all of our audits regardless of if any fail so we can get all relevant issues
 40 | audits:
 41 | 	@status=0; \
 42 | 	for target in black flake8 isort markflow; do \
 43 | 		$(MAKE) $${target}; \
 44 | 		status=$$(($$status + $$?)); \
 45 | 		echo ""; \
 46 | 	done; \
 47 | 	if [ $$status -eq 0 ]; then \
 48 | 	    echo "All Audits Succeeded!"; \
 49 | 	else \
 50 | 		echo "Some audits failed. :("; \
 51 | 	fi; \
 52 | 	exit $$status
 53 | 
 54 | black: _venv_3.8
 55 | 	@echo Running $@ audit...
 56 | 	git ls-files | egrep '.*\.pyi?$$' | xargs poetry run black --check
 57 | 	@echo Success!
 58 | 
 59 | # pyi files provide type stubbing and can look weird to flake8, so we filter them out
 60 | flake8: _venv_3.8
 61 | 	@echo Running $@ audit...
 62 | 	git ls-files | egrep '.*\.py$$' | xargs poetry run flake8
 63 | 	@echo Success!
 64 | 
 65 | isort: _venv_3.8
 66 | 	@echo Running $@ audit...
 67 | 	git ls-files | egrep '.*\.pyi?$$' | xargs poetry run isort --profile=black --check
 68 | 	@echo Success!
 69 | 
 70 | markflow: _venv_3.8
 71 | 	@echo Running $@ audit...
 72 | 	git ls-files | egrep ".md$$" | grep -v "tests/" | xargs poetry run markflow --check
 73 | 	@echo Success!
 74 | 
 75 | # --- TESTS ---
 76 | .PHONY: tests tests_3.6 tests_3.7 tests_3.8 tests_3.9
 77 | tests: utests mypy ensure_deps
 78 | tests_3.6: utests_3.6 ensure_deps_3.6
 79 | tests_3.7: utests_3.7 ensure_deps_3.7
 80 | tests_3.8: utests_3.8 mypy ensure_deps_3.8
 81 | tests_3.9: utests_3.9 ensure_deps_3.9
 82 | 
 83 | # Ensure dependencies are properly specified
 84 | .PHONY: ensure_deps _ensure_deps ensure_deps_3.6 ensure_deps_3.7 ensure_deps_3.8 ensure_deps_3.9
 85 | ensure_deps: ensure_deps_3.6 ensure_deps_3.7 ensure_deps_3.8 ensure_deps_3.9
 86 | 
 87 | _ensure_deps:
 88 | 	# Ensure dependencies markflow needs didn't sneak into dev dependencies
 89 | 	poetry env use ${PYTHON_VERSION}
 90 | 	poetry install --no-dev
 91 | 	echo -e "Hello\n--" | poetry run markflow
 92 | 
 93 | ensure_deps_3.6:
 94 | 	PYTHON_VERSION=3.6 $(MAKE) _ensure_deps
 95 | 
 96 | ensure_deps_3.7:
 97 | 	PYTHON_VERSION=3.7 $(MAKE) _ensure_deps
 98 | 
 99 | ensure_deps_3.8:
100 | 	PYTHON_VERSION=3.8 $(MAKE) _ensure_deps
101 | 
102 | ensure_deps_3.9:
103 | 	PYTHON_VERSION=3.9 $(MAKE) _ensure_deps
104 | 
105 | # MyPy
106 | .PHONY: mypy mypy_lib mypy_tests
107 | mypy: mypy_lib mypy_tests
108 | 
109 | mypy_lib: _venv_3.8
110 | 	# --implicity-reexport means that we don't have to explicitly tell mypy about our
111 | 	# modules' members via a `__all__`
112 | 	poetry env use 3.8
113 | 	MYPYPATH=$(CURDIR)/stubs poetry run mypy --strict --implicit-reexport markflow
114 | 
115 | mypy_tests: _venv_3.8
116 | 	# --implicity-reexport means that we don't have to explicitly tell mypy about our
117 | 	# modules' members via a `__all__`
118 | 	poetry env use 3.8
119 | 	MYPYPATH=$(CURDIR)/stubs poetry run mypy --strict --implicit-reexport tests
120 | 
121 | # Unit Tests
122 | # Bit of  a misnomer since `test_files.py` is more of a system/integration test
123 | .PHONY: utests _utests utests_3.6 utests_3.7 utests_3.8 utests_3.9
124 | utests: utests_3.6 utests_3.7 utests_3.8 utests_3.9
125 | 
126 | _utests:
127 | 	poetry env use ${PYTHON_VERSION}
128 | 	cd $(CURDIR)/tests && poetry run pytest --cov=markflow --cov-report=term \
129 | 		--cov-report=html --junit-xml=junit.xml
130 | 	@echo For more detailed information, see $(CURDIR)/tests/htmlcov/index.html
131 | 
132 | utests_3.6: _venv_3.6
133 | 	PYTHON_VERSION=3.6 $(MAKE) _utests
134 | 
135 | utests_3.7: _venv_3.7
136 | 	PYTHON_VERSION=3.7 $(MAKE) _utests
137 | 
138 | utests_3.8: _venv_3.8
139 | 	PYTHON_VERSION=3.8 $(MAKE) _utests
140 | 
141 | utests_3.9: _venv_3.9
142 | 	PYTHON_VERSION=3.9 $(MAKE) _utests
143 | 
144 | # --- EXPORTING ---
145 | .PHONY: package
146 | 
147 | package:
148 | 	poetry build
149 | 
150 | # --- CI CONTAINER ---
151 | .PHONY: container
152 | 
153 | container:
154 | 	docker build . -t markflow_builder
155 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # MarkFlow
  2 | 
  3 | Welcome to **MarkFlow**. This tool automatically reformats your **Markdown** to provide
  4 | consistent looking **Markdown** files that look pretty similar to HTML that would be
  5 | generated by them.
  6 | 
  7 | ## Quickstart
  8 | 
  9 | To use this tool, install it with pip then run `markflow`:
 10 | 
 11 | ```shell
 12 | pip install markflow
 13 | markflow SOMETHING.md
 14 | ```
 15 | 
 16 | To install from source, assuming you already have `poetry` installed, from the project
 17 | directory, run:
 18 | 
 19 | ```shell
 20 | poetry install
 21 | poetry run markflow
 22 | ```
 23 | 
 24 | Just want to see if there will be any changes? Use the `--check` flag:
 25 | 
 26 | ```shell
 27 | markflow --check $PATH_TO_MARKDOWN_FILE
 28 | ```
 29 | 
 30 | For all features, we've got a help:
 31 | 
 32 | ```shell
 33 | markflow --help
 34 | ```
 35 | 
 36 | ## Enforced Rules
 37 | 
 38 | The tool ensures that the following rules are enforced for each different type of
 39 | Markdown section. For all sections, trailing spaces on each line are removed. It also
 40 | ensures that **Markdown** files end with a single newline and newlines are all `'\n'`.
 41 | 
 42 | This tool uses the **Markdown** standard defined by [CommonMark 0.29][commonmark_spec].
 43 | It is expected to evolve with the standard and this section will be updated as support
 44 | is added. If you notice any discrepancies, please open an issue.
 45 | 
 46 | [commonmark_spec]: https://spec.commonmark.org/0.29/
 47 | 
 48 | ### Block Quotes
 49 | 
 50 | Block quotes are fixed up with proper indentation markers for indented quotes, quote
 51 | indicators have any space between them removed, and unescaped `>` that could be confused
 52 | with quote markers are escaped. *e.g.*:
 53 | 
 54 | ```markdown
 55 | >
 56 | > > Text >
 57 | > >
 58 | >
 59 | > > Ice Cream \> 0O0>
 60 | >
 61 | ```
 62 | 
 63 | becomes:
 64 | 
 65 | ```markdown
 66 | >
 67 | >> Text \>
 68 | >>
 69 | >
 70 | >> Ice Cream \>  0O0>
 71 | >
 72 | ```
 73 | 
 74 | ### Code Blocks
 75 | 
 76 | Fenced codeblocks have any whitespace stripped from their markers and then printed out
 77 | as usual.
 78 | 
 79 | ````markdown
 80 | ``` markdown
 81 | # Markdown code
 82 |   ```
 83 | ````
 84 | 
 85 | becomes
 86 | 
 87 | ````markdown
 88 | ```markdown
 89 | # Markdown code
 90 | ```
 91 | ````
 92 | 
 93 | Indented code blocks simply have their trailing whitespace removed.
 94 | 
 95 | ### Footnotes (or Link Reference Definitions)
 96 | 
 97 | Footnotes will have their whitespace corrected and their titles wrapped. The tool will
 98 | however respect what line URLs should appear on, even if they overflow. For example, the
 99 | next two examples would be unchanged.
100 | 
101 | ```markdown
102 | [really_really_really_long_link_that_could_go_on_a_new_line]: /but/doesnt/because/the/tool/understands/that/you/may/not/want/that
103 | ```
104 | 
105 | ```markdown
106 | [short_link]:
107 | /that/stays/on/separate/lines
108 | 'Even if title would fit'
109 | ```
110 | 
111 | Titles will be kept on whatever line you write them on, as long as they wouldn't be
112 | wrapped off the line.
113 | 
114 | ```markdown
115 | [really_really_really_long_link_that_could_go_on_a_new_line]: /but/doesnt/because/the/tool/understands/that/you/may/not/want/that "But the title is moved to the next line and itself is wrapped because it is also really long."
116 | ```
117 | 
118 | becomes:
119 | 
120 | ```markdown
121 | [really_really_really_long_link_that_could_go_on_a_new_line]: /but/doesnt/because/the/tool/understands/that/you/may/not/want/that
122 | "But the title is moved to the next line and itself is wrapped because it is also really
123 | long."
124 | ```
125 | 
126 | ### Headings
127 | 
128 | Heading lines begin and end with no whitespace. If you're using ATX headings (leading
129 | `#`s), but will correct missing or extra spaces between the octothorpe's and the
130 | heading.
131 | 
132 | ```markdown
133 | #Non-Standard Heading
134 | ```
135 | 
136 | becomes
137 | 
138 | ```markdown
139 | # Non-Standard Heading
140 | ```
141 | 
142 | If you are using setext headings (*i.e.*, underlined headings), they will automatically
143 | be fixed to ensure underlining matches the heading length. *e.g.*:
144 | 
145 | ```markdown
146 | Heading 1
147 | --
148 | ```
149 | 
150 | becomes
151 | 
152 | ```markdown
153 | Heading 1
154 | ---------
155 | ```
156 | 
157 | If you have a heading that extends beyond an entire line, **MarkFlow** will wrap it for
158 | you.
159 | 
160 | ```markdown
161 | This is a really long heading that I had to make up so that it would be at least 88 characters long
162 | --
163 | ```
164 | 
165 | becomes
166 | 
167 | ```markdown
168 | This is a really long heading that I had to make up so that it would be at least 88
169 | characters long
170 | -----------------------------------------------------------------------------------
171 | ```
172 | 
173 | ### Lists
174 | 
175 | Lists will be corrected to proper indentation. In addition, ordered lists will be
176 | properly numbered and bullet lists will be reformatted to use consistent bullets. Line
177 | lengths are also enforces. *e.g.*:
178 | 
179 | ```markdown
180 | 2. One
181 |     * Asterisk
182 |   - Dash
183 | 1. Two
184 | 5. Three
185 | ```
186 | 
187 | becomes
188 | 
189 | ```markdown
190 | 2. One
191 |   * Asterisk
192 |   * Dash
193 | 3. Two
194 | 4. Three
195 | ```
196 | 
197 | CommonMark doesn't allow lists to start with 0. That's not really a big deal for this
198 | tool, so we are OK with that. If this causes you issues, please let us know by opening
199 | an [issue][issues].
200 | 
201 | ### Paragraphs
202 | 
203 | Paragraphs are reformatted to ensure they are the proper length. URLs and footnotes are
204 | properly split across lines. Inline code is placed all on a singular line. *e.g.*
205 | (assuming a line length of 1):
206 | 
207 | ```markdown
208 | test `test =
209 | 1` [url](http://example.com)
210 | ```
211 | 
212 | becomes:
213 | 
214 | ```markdown
215 | test
216 | `test = 1`
217 | [url](
218 | http://example.com)
219 | ```
220 | 
221 | ### Separators
222 | 
223 | Separating lines (*i.e.*, blank lines) contain only new lines, removing any horizontal
224 | whitespace.
225 | 
226 | ### Tables
227 | 
228 | Tables are reformatted to ensure proper width and headings are centered and all cells
229 | have at minimum one space between their contents and column separators. Alignment is
230 | supported too! *e.g.*:
231 | 
232 | ```markdown
233 | |L|C|R|N|
234 | |:--|:-:|--:|---|
235 | |a|a|a|a|
236 | |aa|aa|aa|aa|
237 | |abcde|abcde|abcde|abcde|
238 | ```
239 | 
240 | becomes:
241 | 
242 | ```markdown
243 | | L     |   C   |     R |   N   |
244 | |:------|:-----:|------:|-------|
245 | | a     |   a   |     a | a     |
246 | | aa    |  aa   |    aa | aa    |
247 | | abcde | abcde | abcde | abcde |
248 | ```
249 | 
250 | ### Thematic Breaks
251 | 
252 | Thematic breaks are extended or reduced to match the length of the document. If line
253 | length is set to infinity, it will instead use 3 of the separating character which must
254 | be one of `-`, `_`, or `*`.
255 | 
256 | ```markdown
257 | -- - -
258 | ```
259 | 
260 | becomes:
261 | 
262 | ```markdown
263 | ----------------------------------------------------------------------------------------
264 | ```
265 | 
266 | ## API Reference
267 | 
268 | The tool also provides a function to reformat **Markdown** strings yourself.
269 | 
270 | ```python
271 | from markflow import reformat_markdown_text
272 | 
273 | markdown = "   # Header 1"
274 | nice_markdown = reformat_markdown_text(markdown, width=88)
275 | ```
276 | 
277 | ## Contributing
278 | 
279 | To contribute to this project, check out our [contributing guide](CONTRIBUTING.md).
280 | 
281 | ## Issues
282 | 
283 | If you run into an issue running a **Markdown** file, feel free to open an [issue][
284 | issues]. If you can include the faulting file, that will make it so much easier to
285 | debug.
286 | 
287 | This script can help in anonymizing your file if you have any confidential information
288 | in it.
289 | 
290 | ```python
291 | #!/usr/bin/env python3
292 | """ Anonymize file XXXX.md and output it to XXXX.out.md """
293 | import pathlib
294 | import random
295 | import string
296 | 
297 | FILE_NAME = "XXXX.md"
298 | input_path = pathlib.Path(FILE_NAME)
299 | output_path = pathlib.Path(".out.".join(FILE_NAME.rsplit(".", maxsplit=1)))
300 | text = input_path.read_text()
301 | output = ""
302 | 
303 | for char in text:
304 |     if char in string.ascii_lowercase:
305 |         char = random.choice(string.ascii_lowercase)
306 |     elif char in string.ascii_uppercase:
307 |         char = random.choice(string.ascii_uppercase)
308 |     output += char
309 | output_path.write_text(output)
310 | ```
311 | 
312 | [issues]: https://github.com/duo-labs/markflow/issues
313 | 
314 | ## Implementation
315 | 
316 | To read more about how the tool works, checkout the [implementation outline](
317 | IMPLEMENTATION.md).
318 | 
319 | ## Credits
320 | 
321 | This tool was inspired by a coworker not enjoying having to manually reformat
322 | **Markdown** files. He wanted a tool that would enforce it like [**black**][black] does
323 | for **Python** code. That is why the line length default is 88.
324 | 
325 | [black]: https://black.readthedocs.io/en/latest/
326 | 
327 | ## A Bonus Note on Block Quote Formatting
328 | 
329 | Escaping `>` is especially important for the tool itself as otherwise updated block
330 | quotes could be too deep. For instance, incorrect wrapping here could result in an extra
331 | indented block of code.
332 | 
333 | ```markdown
334 | > Please don't wrap after this period. >
335 | > Because I don't want to be a double quote.
336 | ```
337 | 
338 | becomes:
339 | 
340 | ```markdown
341 | > Please don't wrap after this period.
342 | > > Because I don't want to be a
343 | > double quote.
344 | ```
345 | 
346 | which would format to:
347 | 
348 | ```markdown
349 | > Please don't wrap after this period.
350 | > > Because I don't want to be a
351 | > > double quote.
352 | ```
353 | 
354 | Of course, if the tool tried that, it would throw an exception since it double checks
355 | that if it were to be rerun the output would not change, at which point, hopefully, dear
356 | reader, you would open an issue. But I get it if you don't want to. I've been there.
357 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 | services:
3 |   build:
4 |     build: .
5 |     volumes:
6 |       - .:/src


--------------------------------------------------------------------------------
/markflow/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .exceptions import *
3 | from .reformat_markdown import *
4 | 


--------------------------------------------------------------------------------
/markflow/_argparse.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import dataclasses
  3 | import glob
  4 | import os
  5 | import pathlib
  6 | from typing import Any, Callable, List, Optional, Sequence, Union, cast
  7 | 
  8 | 
  9 | @dataclasses.dataclass(frozen=True)
 10 | class Permission:
 11 |     os_constant: int
 12 |     verbiage: str
 13 | 
 14 | 
 15 | EXECUTABLE = Permission(os.X_OK, "execute")
 16 | READABLE = Permission(os.R_OK, "read from")
 17 | WRITABLE = Permission(os.W_OK, "write to")
 18 | 
 19 | 
 20 | class ExistingPath:
 21 |     def __init__(self, permissions: List[Permission]):
 22 |         self._permissions = permissions
 23 | 
 24 |     def __call__(self, string: str) -> pathlib.Path:
 25 |         path = pathlib.Path(string)
 26 |         if not path.exists():
 27 |             raise argparse.ArgumentTypeError(
 28 |                 f"specified path does not exist: {repr(str(path))}"
 29 |             )
 30 |         return path
 31 | 
 32 | 
 33 | class Directory:
 34 |     def __init__(self, permissions: List[Permission], must_exist: bool = False):
 35 |         self._permissions = permissions
 36 |         self._must_exist = must_exist
 37 | 
 38 |     def __call__(self, string: str) -> pathlib.Path:
 39 |         path = pathlib.Path(string)
 40 |         if path.exists():
 41 |             if not path.is_dir():
 42 |                 raise argparse.ArgumentTypeError(
 43 |                     f"specified directory is a file: {repr(str(path))}"
 44 |                 )
 45 | 
 46 |             for permission in self._permissions:
 47 |                 if not os.access(path, permission.os_constant):
 48 |                     raise argparse.ArgumentTypeError(
 49 |                         f"cannot {permission.verbiage} directory: " f"{repr(str(path))}"
 50 |                     )
 51 |         else:
 52 |             if self._must_exist:
 53 |                 raise argparse.ArgumentTypeError(
 54 |                     f"directory does not exist: {repr(str(path))}"
 55 |                 )
 56 |         return path
 57 | 
 58 | 
 59 | class File:
 60 |     def __init__(self, permissions: List[Permission], must_exist: bool = False):
 61 |         self._permissions = permissions
 62 |         self._must_exist = must_exist
 63 | 
 64 |     def __call__(self, string: str) -> pathlib.Path:
 65 |         path = pathlib.Path(string)
 66 |         if path.exists():
 67 |             if path.is_dir():
 68 |                 raise argparse.ArgumentTypeError(
 69 |                     f"file is a directory: {repr(str(path))}"
 70 |                 )
 71 | 
 72 |             for permission in self._permissions:
 73 |                 if not os.access(path, permission.os_constant):
 74 |                     raise argparse.ArgumentTypeError(
 75 |                         f"can't {permission.verbiage} file: {repr(str(path))}"
 76 |                     )
 77 |         else:
 78 |             if self._must_exist:
 79 |                 raise argparse.ArgumentTypeError(
 80 |                     f"file does not exist: {repr(str(path))}"
 81 |                 )
 82 | 
 83 |             if not path.parent.is_dir():
 84 |                 raise argparse.ArgumentTypeError(
 85 |                     f"directory does not exist for file: {repr(str(path))}"
 86 |                 )
 87 | 
 88 |             for permission in self._permissions:
 89 |                 if not os.access(path.parent, permission.os_constant):
 90 |                     raise argparse.ArgumentTypeError(
 91 |                         f"cannot {permission.verbiage} directory of file: "
 92 |                         f"{repr(str(path))}"
 93 |                     )
 94 |         return path
 95 | 
 96 | 
 97 | class AddMarkdownFilesInDirOrPathsAction(argparse.Action):
 98 |     def __init__(
 99 |         self,
100 |         option_strings: List[str],
101 |         dest: str,
102 |         type: Callable[[str], pathlib.Path],
103 |         nargs: Optional[str] = None,
104 |         **kwargs: Any,
105 |     ):
106 |         if nargs != "*":
107 |             raise ValueError("nargs must be *")
108 |         super().__init__(option_strings, dest, type=type, nargs=nargs, **kwargs)
109 | 
110 |     def __call__(
111 |         self,
112 |         parser: argparse.ArgumentParser,
113 |         namespace: argparse.Namespace,
114 |         values: Union[str, Sequence[Any], None],
115 |         option_string: Optional[str] = None,
116 |     ) -> None:
117 |         if values is None:
118 |             return
119 |         values = cast(Sequence[pathlib.Path], values)
120 |         expanded_paths = []
121 |         for value in values:
122 |             if value.is_file():
123 |                 expanded_paths.append(value)
124 |             else:
125 |                 markdown_paths = glob.glob(str(value / "**" / "*.md"), recursive=True)
126 |                 expanded_paths += [pathlib.Path(path) for path in markdown_paths]
127 |         setattr(namespace, self.dest, expanded_paths)
128 | 


--------------------------------------------------------------------------------
/markflow/_utils/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | import textwrap
3 | 
4 | from ._utils import *
5 | 


--------------------------------------------------------------------------------
/markflow/_utils/_utils.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | import logging
 3 | from typing import Iterator
 4 | 
 5 | __all__ = [
 6 |     "get_indent",
 7 |     "truncate_str",
 8 |     "redirect_info_logs_to_debug",
 9 | ]
10 | 
11 | ELLIPSIS = "..."
12 | 
13 | 
14 | def get_indent(line: str) -> int:
15 |     return len(line) - len(line.lstrip())
16 | 
17 | 
18 | def truncate_str(str_: str, length: int) -> str:
19 |     if len(str_) <= length:
20 |         pass
21 |     elif len(ELLIPSIS) >= length:
22 |         str_ = "." * length
23 |     else:
24 |         truncation = max(0, length - len(ELLIPSIS))
25 |         str_ = str_[:truncation] + ELLIPSIS
26 |     return str_
27 | 
28 | 
29 | @contextlib.contextmanager
30 | def redirect_info_logs_to_debug() -> Iterator[None]:
31 |     old_info = logging.INFO
32 |     logging.INFO = logging.DEBUG
33 |     yield
34 |     logging.INFO = old_info
35 | 


--------------------------------------------------------------------------------
/markflow/_utils/textwrap.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from typing import List, Tuple
  3 | 
  4 | from markflow.typing import Number
  5 | 
  6 | INLINE_CODE_MARKER_REGEX = re.compile(r"(((?!<\\)`)+)")
  7 | FOOTNOTE_REGEX = re.compile(r"[^\s\]\)]*\[[^\[]+\]\[[^\]]+\][^\s\[\(]*")
  8 | HTML_NEWLINE_REGEX = re.compile(r"<br ?/?>")
  9 | URL_REGEX = re.compile(r"[^\s\]\)]*\[[^\[]+\]\([^\)]+\)[^\s\[\(]*")
 10 | 
 11 | 
 12 | def join(split_text: List[str], leading_spaces: List[bool], width: Number) -> str:
 13 |     new_split_text = [""]
 14 |     for word, leading_space in zip(split_text, leading_spaces):
 15 |         if leading_space and new_split_text[-1]:
 16 |             potential_new_string = f"{new_split_text[-1]} {word}"
 17 |         else:
 18 |             potential_new_string = f"{new_split_text[-1]}{word}"
 19 |         if len(potential_new_string) <= width or not new_split_text[-1] or width <= 0:
 20 |             new_split_text[-1] = potential_new_string
 21 |         else:
 22 |             new_split_text.append(word)
 23 | 
 24 |         # If we hit an HTML new line, the next text should begin on a new line.
 25 |         if HTML_NEWLINE_REGEX.match(word):
 26 |             new_split_text.append("")
 27 | 
 28 |     if not new_split_text[-1]:
 29 |         new_split_text = new_split_text[:-1]
 30 |     return "\n".join(new_split_text)
 31 | 
 32 | 
 33 | def code_split(
 34 |     text: str, leading_space: bool
 35 | ) -> Tuple[List[str], List[bool], List[bool]]:
 36 |     split_text: List[str] = []
 37 |     leading_spaces: List[bool] = []
 38 |     evaluates: List[bool] = []
 39 | 
 40 |     # Markdown inline code only ends when the exact same number of tildas are seen
 41 |     # again. More or less indicates it is still part of the code.
 42 |     open_marker_len = 0
 43 |     last_end = 0
 44 |     # We jump from tilda mark to tilda mark. The length of the tildas indicate if we are
 45 |     # beginning, ending, or still in code.
 46 |     for code_marker in INLINE_CODE_MARKER_REGEX.finditer(text):
 47 |         if open_marker_len == 0:
 48 |             plaintext = text[last_end : code_marker.start()]
 49 |             if (
 50 |                 plaintext.startswith(".")
 51 |                 and not plaintext.startswith("..")
 52 |                 and split_text
 53 |             ):
 54 |                 split_text[-1] += "."
 55 |                 plaintext = plaintext[1:]
 56 |             if plaintext.strip():
 57 |                 if not leading_spaces:
 58 |                     leading_spaces.append(leading_space)
 59 |                 else:
 60 |                     leading_spaces.append(plaintext.startswith(" "))
 61 |                 split_text.append(plaintext.strip())
 62 |                 evaluates.append(True)
 63 |             open_marker_len = len(code_marker.group())
 64 | 
 65 |             # Prepare our lists for code
 66 |             if not leading_spaces:
 67 |                 leading_spaces.append(leading_space)
 68 |             else:
 69 |                 leading_spaces.append(plaintext.endswith(" "))
 70 |             evaluates.append(False)
 71 |             split_text.append("`" * open_marker_len)
 72 |         elif len(code_marker.group()) == open_marker_len:
 73 |             # We've found the close of our inline code
 74 |             code = text[last_end : code_marker.start()]
 75 |             split_text[-1] += code + "`" * open_marker_len
 76 |             open_marker_len = 0
 77 |         else:
 78 |             # We've found more inline code
 79 |             split_text[-1] += text[last_end : code_marker.end()]
 80 | 
 81 |         last_end = code_marker.end()
 82 | 
 83 |     # If our last field only has a singular inline code marker, it means that it isn't
 84 |     # inline text and just a standalone tilda or set of tildas, so we can evaluate it.
 85 |     if split_text and len(INLINE_CODE_MARKER_REGEX.findall(split_text[-1])) == 1:
 86 |         evaluates[-1] = True
 87 |         if text[last_end:].strip():
 88 |             split_text[-1] += text[last_end:].rstrip()
 89 |     else:
 90 |         remaining_text = text[last_end:]
 91 |         if (
 92 |             remaining_text
 93 |             and remaining_text.startswith(".")
 94 |             and not remaining_text.startswith("..")
 95 |         ):
 96 |             split_text[-1] += "."
 97 |             remaining_text = remaining_text[1:]
 98 |         if remaining_text.strip():
 99 |             split_text.append(remaining_text.strip())
100 |             if last_end == 0:
101 |                 leading_spaces.append(leading_space)
102 |             else:
103 |                 leading_spaces.append(remaining_text.startswith(" "))
104 |             evaluates.append(True)
105 | 
106 |     return split_text, leading_spaces, evaluates
107 | 
108 | 
109 | def link_split(
110 |     text: str, leading_space: bool
111 | ) -> Tuple[List[str], List[bool], List[bool]]:
112 |     """Splits text based on links
113 | 
114 |     This function iterates over text split by tildas. Markdown inline code begins with
115 |     a number of tildas and only ends when that exact number is reached. If there are
116 |     more tildas, e.g. `` ```` ``, they are treated as part of the inline code.
117 | 
118 |     Per our rules, inline code should all be on one line, so each inline code section is
119 |     marked for non-evaluation.
120 | 
121 |     Args:
122 |         text: The text to evaluate
123 |         leading_space: Should this code section have a leading new space when reflowed?
124 | 
125 |     Returns:
126 |         Split text, What sections have leading spaces, What sections should continue to
127 |         be evaluated
128 |     """
129 |     matches = [m for m in FOOTNOTE_REGEX.finditer(text)]
130 |     matches += [m for m in URL_REGEX.finditer(text)]
131 |     matches.sort(key=lambda m: m.start())
132 | 
133 |     split_text: List[str] = []
134 |     leading_spaces: List[bool] = []
135 |     evaluates: List[bool] = []
136 |     last_end = 0
137 |     # Each iteration of this for loop operates on non-link text followed by
138 |     # link text.
139 |     for match in matches:
140 |         non_link_text = text[last_end : match.start()]
141 |         if non_link_text.strip():
142 |             if (
143 |                 split_text
144 |                 and non_link_text.startswith(".")
145 |                 and not non_link_text.startswith("..")
146 |             ):
147 |                 split_text[-1] += "."
148 |                 non_link_text = non_link_text[1:]
149 |             split_text.append(non_link_text.strip())
150 |             if not leading_spaces:
151 |                 leading_spaces.append(leading_space)
152 |             else:
153 |                 leading_spaces.append(non_link_text.startswith(" "))
154 | 
155 |             leading_spaces.append(text[match.start() - 1] == " ")
156 |             evaluates.append(True)
157 |         else:
158 |             if not leading_spaces:
159 |                 leading_spaces.append(leading_space)
160 |             else:
161 |                 leading_spaces.append(False)
162 | 
163 |         leading_spaces.append(False)
164 |         if "](" in match.group():
165 |             split_link = match.group().split("](")
166 |             split_text.append(split_link[0].strip() + "](")
167 |             split_text.append(split_link[1].strip())
168 |         else:
169 |             split_link = match.group().split("][")
170 |             split_text.append(split_link[0].strip() + "][")
171 |             split_text.append(split_link[1].strip())
172 | 
173 |         # Don't modify our hyperlink
174 |         evaluates += [True, False]
175 |         last_end = match.end()
176 | 
177 |     remaining_text = text[last_end:]
178 |     if (
179 |         remaining_text
180 |         and remaining_text.startswith(".")
181 |         and not remaining_text.startswith("..")
182 |     ):
183 |         split_text[-1] += "."
184 |         remaining_text = remaining_text[1:]
185 |     if remaining_text.strip():
186 |         split_text.append(remaining_text.strip())
187 |         if last_end == 0:
188 |             leading_spaces.append(leading_space)
189 |         else:
190 |             leading_spaces.append(remaining_text.startswith(" "))
191 |         evaluates.append(True)
192 | 
193 |     return split_text, leading_spaces, evaluates
194 | 
195 | 
196 | def newline_split(
197 |     text: str, leading_space: bool
198 | ) -> Tuple[List[str], List[bool], List[bool]]:
199 |     split_text: List[str] = []
200 |     leading_spaces: List[bool] = []
201 |     evaluates: List[bool] = []
202 |     last_end = 0
203 |     # Each iteration of this for loop operates operates on plaintext followed by an HML
204 |     # newline.
205 |     for match in HTML_NEWLINE_REGEX.finditer(text):
206 |         non_newline_text = text[last_end : match.start()]
207 |         if not leading_spaces:
208 |             leading_spaces.append(leading_space)
209 |         else:
210 |             leading_spaces.append(text[last_end] == " ")
211 | 
212 |         if non_newline_text.strip():
213 |             split_text.append(non_newline_text.strip())
214 |             evaluates.append(True)
215 |             leading_spaces.append(non_newline_text.endswith(" "))
216 | 
217 |         split_text.append(match.group())
218 |         evaluates.append(False)
219 |         last_end = match.end()
220 | 
221 |     if text[last_end:].strip():
222 |         split_text.append(text[last_end:].strip())
223 |         if last_end == 0:
224 |             leading_spaces.append(leading_space)
225 |         else:
226 |             leading_spaces.append(text[last_end:].startswith(" "))
227 |         evaluates.append(True)
228 | 
229 |     return split_text, leading_spaces, evaluates
230 | 
231 | 
232 | def space_split(
233 |     text: str, leading_space: bool
234 | ) -> Tuple[List[str], List[bool], List[bool]]:
235 |     split_text: List[str] = []
236 |     leading_spaces: List[bool] = []
237 |     evaluates: List[bool] = []
238 |     for word in text.split(" "):
239 |         if not word:
240 |             continue
241 |         split_text.append(word.strip())
242 |         if not leading_spaces:
243 |             leading_spaces.append(leading_space)
244 |         else:
245 |             leading_spaces.append(True)
246 |         evaluates.append(True)
247 | 
248 |     return split_text, leading_spaces, evaluates
249 | 
250 | 
251 | def wrap(text: str, width: Number) -> str:
252 |     # TODO: Should wrap be modifying the input. Maybe assert there's no newlines?
253 |     lines = text.splitlines()
254 |     text = " ".join([line.strip() for line in lines])
255 | 
256 |     split_text: List[str] = [text]
257 |     leading_spaces: List[bool] = [False]
258 |     evaluates: List[bool] = [True]
259 |     for func in [code_split, link_split, newline_split, space_split]:
260 |         new_split_text = []
261 |         new_leading_spaces = []
262 |         new_evaluates = []
263 |         for text, leading_space, evaluate in zip(split_text, leading_spaces, evaluates):
264 |             if evaluate:
265 |                 nst, nls, evl = func(text, leading_space)
266 |                 new_split_text += nst
267 |                 new_leading_spaces += nls
268 |                 new_evaluates += evl
269 |             else:
270 |                 new_split_text.append(text)
271 |                 new_leading_spaces.append(leading_space)
272 |                 new_evaluates.append(evaluate)
273 |         split_text = new_split_text
274 |         leading_spaces = new_leading_spaces
275 |         evaluates = new_evaluates
276 | 
277 |     return join(split_text, leading_spaces, width)
278 | 


--------------------------------------------------------------------------------
/markflow/detectors/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | """
 3 | MarkFlow MarkDown Section Detection Library
 4 | 
 5 | This library provide this functions MarkFlow uses to split a document into it's
 6 | individual text types.
 7 | """
 8 | from .atx_heading import *
 9 | from .blank_line import *
10 | from .block_quote import *
11 | from .bullet_list import *
12 | from .fenced_code_block import *
13 | from .indented_code_block import *
14 | from .link_reference_definition import *
15 | from .ordered_list import *
16 | from .paragraph import *
17 | from .setext_heading import *
18 | from .table import *
19 | from .thematic_break import *
20 | 


--------------------------------------------------------------------------------
/markflow/detectors/_lines.py:
--------------------------------------------------------------------------------
  1 | """
  2 | MarkFlow Line Detection Library
  3 | 
  4 | This library is used a common space to evaluate position independent information about
  5 | lines. They are stored here so as to avoid any circular imports.
  6 | """
  7 | 
  8 | import re
  9 | 
 10 | from .._utils import get_indent
 11 | 
 12 | FENCED_CODE_BLOCK_FENCE_CHARACTERS = ["`", "~"]
 13 | BULLET_LIST_START_REGEX = re.compile(
 14 |     r"^\s*"  # Leading spaces are OK and often expected
 15 |     r"["
 16 |     r"*"  # Asterisk list marker
 17 |     r"+"  # Plus list marker
 18 |     r"-"  # Dash list marker
 19 |     r"] "  # Lists need a space after their identifier
 20 | )
 21 | ORDERED_LIST_START_REGEX = re.compile(
 22 |     r"^\s*"  # Leading spaces are OK and often expected
 23 |     r"("
 24 |     r"[0-9]+\."  # Numeric list marker
 25 |     r") "  # Lists need a space after their identifier
 26 | )
 27 | THEMATIC_BREAK_CHARACTERS = ["*", "_", "-"]
 28 | 
 29 | 
 30 | def is_atx_heading_line(line: str) -> bool:
 31 |     """Evaluates whether a line is formatted like an ATX heading
 32 | 
 33 |     The standard requires a space, but it also notes that not everyone follows this. We
 34 |     are lax in our definition and fix it on reformatting.
 35 | 
 36 |     Examples:
 37 |         ```
 38 |         #Heading
 39 |         # Heading
 40 |         ```
 41 | 
 42 |     Args:
 43 |         line: The line to evaluate
 44 | 
 45 |     Returns:
 46 |         True if the line is an ATX heading. False otherwise.
 47 |     """
 48 |     return not is_indented_code_block_start_line(line) and line.lstrip().startswith("#")
 49 | 
 50 | 
 51 | def is_blank_line_line(line: str) -> bool:
 52 |     """Evaluates whether a line is a blank line
 53 | 
 54 |     Example:
 55 |         ```
 56 | 
 57 |         ```
 58 | 
 59 |     Args:
 60 |         line: The line to evaluate
 61 | 
 62 |     Returns:
 63 |         True if the line is an ATX heading. False otherwise.
 64 |     """
 65 |     return not line.strip()
 66 | 
 67 | 
 68 | def is_explicit_block_quote_line(line: str) -> bool:
 69 |     """Evaluates whether a line is explicitly block quote line
 70 | 
 71 |     The distinction here is that paragraph continuation lines can be part of a block
 72 |     quote. This ensures that is what is desired.
 73 | 
 74 |     Example:
 75 |         ```
 76 |         > Block Quote
 77 |         ```
 78 | 
 79 |     Args:
 80 |         line: The line to evaluate
 81 | 
 82 |     Returns:
 83 |         True if the line is an block quote line. False otherwise.
 84 |     """
 85 |     return not is_indented_code_block_start_line(line) and line.lstrip().startswith(">")
 86 | 
 87 | 
 88 | def is_fenced_code_block_start_line(line: str) -> bool:
 89 |     """Evaluates whether a line could open a fenced code block
 90 | 
 91 |     Examples:
 92 |         ```
 93 |         ```python3
 94 |         ~~~markdown
 95 |         ```
 96 | 
 97 |     Args:
 98 |         line: The line to evaluate
 99 | 
100 |     Returns:
101 |         True if the line is could open a fenced code block. False otherwise.
102 |     """
103 |     for fence in FENCED_CODE_BLOCK_FENCE_CHARACTERS:
104 |         if line.strip().startswith(fence * 3):
105 |             return True
106 |     return False
107 | 
108 | 
109 | def is_indented_code_block_start_line(line: str) -> bool:
110 |     """Evaluates whether a line could start and indented code block
111 | 
112 |     Examples:
113 |         ```
114 |             There's four spaces before this
115 |         ```
116 | 
117 |     Args:
118 |         line: The line to evaluate
119 | 
120 |     Returns:
121 |         True if the line is could start an indented code block. False otherwise.
122 |     """
123 |     return bool(line.strip()) and get_indent(line) >= 4
124 | 
125 | 
126 | def is_ordered_list_start_line(line: str) -> bool:
127 |     """Evaluates whether a line could start an ordered list
128 | 
129 |     Example:
130 |         ```
131 |         1. Entry
132 |         ```
133 | 
134 |     Args:
135 |         line: The line to evaluate
136 | 
137 |     Returns:
138 |         True if the line is could start an ordered list. False otherwise.
139 |     """
140 |     return not is_indented_code_block_start_line(line) and bool(
141 |         ORDERED_LIST_START_REGEX.search(line)
142 |     )
143 | 
144 | 
145 | def is_bullet_list_start_line(line: str) -> bool:
146 |     """Evaluates whether a line could start a bullet list
147 | 
148 |     Example:
149 |         ```
150 |         * Asterisk List
151 |         - Dash List
152 |         + Plus List
153 |         ```
154 | 
155 |     Args:
156 |         line: The line to evaluate
157 | 
158 |     Returns:
159 |         True if the line is could start a bullet list. False otherwise.
160 |     """
161 |     return not is_indented_code_block_start_line(line) and bool(
162 |         BULLET_LIST_START_REGEX.search(line)
163 |     )
164 | 
165 | 
166 | def is_paragraph_start_line(line: str) -> bool:
167 |     """Evaluates whether a line could start a paragraph
168 | 
169 |     We basically evaluate that no other section type could start instead.
170 | 
171 |     Examples:
172 |         ```
173 |         Just some text
174 |         ```
175 | 
176 |     Args:
177 |         line: The line to evaluate
178 | 
179 |     Returns:
180 |         True if the line is could start a list. False otherwise.
181 |     """
182 |     for line_checker in [
183 |         is_indented_code_block_start_line,
184 |         is_atx_heading_line,
185 |         is_blank_line_line,
186 |         is_bullet_list_start_line,
187 |         is_explicit_block_quote_line,
188 |         is_fenced_code_block_start_line,
189 |         is_ordered_list_start_line,
190 |         is_table_start_line,
191 |         is_thematic_break_line,
192 |     ]:
193 |         if line_checker(line):
194 |             return False
195 |     return True
196 | 
197 | 
198 | def is_setext_underline(line: str) -> bool:
199 |     """Evaluates whether a line could be the underlining for a setext heading
200 | 
201 |     Examples:
202 |         ```
203 |         ---
204 |           ==
205 |         ```
206 | 
207 |     Args:
208 |         line: The line to evaluate
209 | 
210 |     Returns:
211 |         True if the line is could underline an setext heading. False otherwise.
212 |     """
213 |     return (
214 |         not is_indented_code_block_start_line(line)
215 |         and bool(line.strip())
216 |         and (
217 |             all([c == "=" for c in line.strip()])
218 |             or all([c == "-" for c in line.strip()])
219 |         )
220 |     )
221 | 
222 | 
223 | def is_table_start_line(line: str) -> bool:
224 |     """Evaluates whether a line could start a table
225 | 
226 |     Examples:
227 |         ```
228 |         |Table|
229 |         ```
230 | 
231 |     Args:
232 |         line: The line to evaluate
233 | 
234 |     Returns:
235 |         True if the line is could start a table. False otherwise.
236 |     """
237 |     # ToDo: Not really, but we'll have to adapt a standard from somewhere other than
238 |     #  CommonMark
239 |     return line.lstrip().startswith("|")
240 | 
241 | 
242 | def is_thematic_break_line(line: str) -> bool:
243 |     if is_indented_code_block_start_line(line):
244 |         return False
245 | 
246 |     spaceless_line = "".join(line.split())
247 |     if len(spaceless_line) < 3:
248 |         # Thematic breaks must be at least three characters long
249 |         return False
250 |     else:
251 |         for symbol in THEMATIC_BREAK_CHARACTERS:
252 |             if all(char == symbol for char in spaceless_line.strip()):
253 |                 return True
254 |         else:
255 |             return False
256 | 


--------------------------------------------------------------------------------
/markflow/detectors/atx_heading.py:
--------------------------------------------------------------------------------
 1 | """
 2 | MarkFlow ATX Heading Detection Library
 3 | 
 4 | ATX headings are lines that begin with ane or more octothorpes (#) and are not indented.
 5 | The number of octothorpes indicates the depth of the heading (e.g. # -> <h1></h1>,
 6 | ## -> <h2></h2>) The standard requires that a space exist between the octothorpes and
 7 | the title, but our detector does not enforce that as we assume that is not actually
 8 | meant (as many other tools do) and the formatter will insert that space automatically.
 9 | 
10 | Examples:
11 |     ```
12 |     # Heading 1
13 |     ```
14 | 
15 |     ```
16 |     ## Heading 2
17 |     ```
18 | """
19 | 
20 | from typing import List, Tuple
21 | 
22 | from ._lines import is_atx_heading_line
23 | 
24 | 
25 | def split_atx_heading(
26 |     lines: List[str], line_offset: int = 0
27 | ) -> Tuple[List[str], List[str]]:
28 |     """Split leading ATX heading from lines if one exists
29 | 
30 |     While the standard does require that ATX headings have a space between the
31 |     octothorpes and the heading text, we are lenient and do not require that assuming
32 |     that to just be author error.
33 | 
34 |     Args:
35 |         lines: The lines to evaluate.
36 |         line_offset (optional): The offset into the overall document we are at. This is
37 |             used for reporting errors in the original document.
38 | 
39 |     Returns:
40 |         A tuple of two values. The first is the ATX heading lines if they were found,
41 |         otherwise it is an empty list. The second value is the remaining text. (If lines
42 |         does not start with an ATX heading, it is the same as lines.)
43 |     """
44 |     if is_atx_heading_line(lines[0]):
45 |         return [lines[0]], lines[1:]
46 |     else:
47 |         return [], lines
48 | 


--------------------------------------------------------------------------------
/markflow/detectors/blank_line.py:
--------------------------------------------------------------------------------
 1 | """
 2 | MarkFlow Blank Line Detection Library
 3 | 
 4 | Blank lines are simply those lines that only have whitespace in them and are not in the
 5 | middle of another section line and indented code block.
 6 | 
 7 | Example:
 8 |     ```
 9 | 
10 |     ```
11 | """
12 | 
13 | from typing import List, Tuple
14 | 
15 | from ._lines import is_blank_line_line
16 | 
17 | 
18 | def split_blank_line(
19 |     lines: List[str], line_offset: int = 0
20 | ) -> Tuple[List[str], List[str]]:
21 |     """Split leading blank line from lines if one exists
22 | 
23 |     Args:
24 |         lines: The lines to evaluate.
25 |         line_offset (optional): The offset into the overall document we are at. This is
26 |             used for reporting errors in the original document.
27 | 
28 |     Returns:
29 |         A tuple of two values. The first is the blank line if it was found (as a
30 |         single-element list), otherwise it is an empty list. The second value is the
31 |         remaining text. (If lines does not start with a blank line, it is the same as
32 |         lines.)
33 |     """
34 |     if is_blank_line_line(lines[0]):
35 |         return [lines[0]], lines[1:]
36 |     else:
37 |         return [], lines
38 | 


--------------------------------------------------------------------------------
/markflow/detectors/block_quote.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from typing import List, Tuple
  3 | 
  4 | from .._utils import redirect_info_logs_to_debug
  5 | from ._lines import (
  6 |     is_explicit_block_quote_line,
  7 |     is_setext_underline,
  8 |     is_thematic_break_line,
  9 | )
 10 | from .atx_heading import split_atx_heading
 11 | from .blank_line import split_blank_line
 12 | from .bullet_list import split_bullet_list
 13 | from .fenced_code_block import split_fenced_code_block
 14 | from .ordered_list import split_ordered_list
 15 | from .table import split_table
 16 | from .thematic_break import split_thematic_break
 17 | 
 18 | LEADING_QUOTE_MARKER = re.compile(r"^ {0,3}>")
 19 | 
 20 | 
 21 | def _is_paragraph_continuation_text(lines: List[str], line_offset: int = 0) -> bool:
 22 |     """Indicates whether the first line of lines would continue a paragraph
 23 | 
 24 |     This ensures that any valid interrupting section of a paragraph could not result in
 25 |     a valid block instead.
 26 | 
 27 |     We have a separate definition from the one used in paragraph detection to avoid
 28 |     circular imports. This definition assumes the line doesn't start with '>'.
 29 | 
 30 |     There is also a bit of a diversion from the spec here. According to the spec, the
 31 |     following is a block-quoted paragraph:
 32 | 
 33 |         > paragraph
 34 |         title
 35 |         =====
 36 | 
 37 |     or:
 38 | 
 39 |         > paragraph title =====
 40 | 
 41 |     But, that looks odd, and the definition for paragraph continuation text could easily
 42 |     be interpreted to consider that a paragraph and a title. So, we do the same here.
 43 |     Given that MarkFlow output should not result in any paragraph continuation lines
 44 |     after a block quote, there are no concerns around consistency. In the case of
 45 |     trailing equals, e.g.
 46 | 
 47 |         > paragraph
 48 |         ===
 49 | 
 50 |     the caller should detect this as a continuation line. But, if it is dashes, it
 51 |     should be detected as a horizontal line.
 52 | 
 53 |     There is an open issue on the ambiguity of paragraph continuation text here:
 54 |     https://github.com/commonmark/commonmark-spec/issues/675
 55 | 
 56 |     Args:
 57 |         lines: The lines to evaluate.
 58 |         line_offset (optional): The offset into the overall document we are at. This is
 59 |             used for reporting errors in the original document.
 60 | 
 61 |     Returns:
 62 |         True if the first line would continue the paragraph. False otherwise.
 63 |     """
 64 |     from .setext_heading import split_setext_heading
 65 | 
 66 |     for splitter in [
 67 |         split_atx_heading,
 68 |         split_blank_line,
 69 |         split_bullet_list,
 70 |         split_fenced_code_block,
 71 |         split_ordered_list,
 72 |         split_setext_heading,
 73 |         split_table,
 74 |         split_thematic_break,
 75 |     ]:
 76 |         with redirect_info_logs_to_debug():
 77 |             if splitter(lines, line_offset)[0]:
 78 |                 return False
 79 |     if is_setext_underline(lines[0]):
 80 |         return False
 81 |     return True
 82 | 
 83 | 
 84 | def _block_quote_ends_with_paragraph(block_quote_lines: List[str]) -> bool:
 85 |     # Avoid circular imports
 86 |     from ..parser import MarkdownSectionEnum, parse_markdown
 87 | 
 88 |     parsing_lines = []
 89 |     for line in block_quote_lines:
 90 |         parsing_lines.append(LEADING_QUOTE_MARKER.sub("", line))
 91 | 
 92 |     with redirect_info_logs_to_debug():
 93 |         ending_section_type, ending_section_content = parse_markdown(parsing_lines)[-1]
 94 | 
 95 |     if ending_section_type == MarkdownSectionEnum.BLOCK_QUOTE:
 96 |         return _block_quote_ends_with_paragraph(ending_section_content)
 97 |     elif ending_section_type == MarkdownSectionEnum.PARAGRAPH:
 98 |         return True
 99 |     else:
100 |         return False
101 | 
102 | 
103 | def split_block_quote(
104 |     lines: List[str], line_offset: int = 0
105 | ) -> Tuple[List[str], List[str]]:
106 |     """Splits a block quote from the beginning of lines if one exists
107 | 
108 |     We slightly differ from the spec when it comes to paragraph continuation lines.
109 |     While the spec detects the following as all a block quoted paragraph:
110 | 
111 |         > code
112 |         TITLE
113 |         =====
114 | 
115 |     we detect it as a block quote followed by a heading. In all other ways, we should
116 |     match the spec.
117 | 
118 |     ToDo:
119 |         * This pattern could be applicable in paragraph detection and be easier to grok.
120 |           (Minus the parsing portion. That's not necessary.)
121 | 
122 |     Returns:
123 |         A tuple of two values. The first is the block quote lines if a block quote was
124 |         found, otherwise it is an empty list. The second value is the remaining text.
125 |         (If lines does not start with a thematic break, it is the same as lines.)
126 |     """
127 |     block_quote: List[str] = []
128 |     remaining_lines = lines
129 | 
130 |     while remaining_lines:
131 |         if not is_explicit_block_quote_line(remaining_lines[0]):
132 |             break
133 | 
134 |         while remaining_lines and is_explicit_block_quote_line(remaining_lines[0]):
135 |             block_quote += [remaining_lines[0]]
136 |             remaining_lines = remaining_lines[1:]
137 | 
138 |         check_for_continuation = _block_quote_ends_with_paragraph(block_quote)
139 | 
140 |         if check_for_continuation:
141 |             first_line = True
142 |             while remaining_lines and _is_paragraph_continuation_text(
143 |                 remaining_lines, line_offset
144 |             ):
145 |                 if first_line:
146 |                     first_line = False
147 |                     if is_setext_underline(
148 |                         remaining_lines[0]
149 |                     ) and not is_thematic_break_line(remaining_lines[0]):
150 |                         break
151 |                 block_quote += [remaining_lines[0]]
152 |                 remaining_lines = remaining_lines[1:]
153 |                 line_offset += 1
154 |         else:
155 |             break
156 | 
157 |     return block_quote, remaining_lines
158 | 


--------------------------------------------------------------------------------
/markflow/detectors/bullet_list.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | 
 3 | from ._lines import (
 4 |     is_blank_line_line,
 5 |     is_bullet_list_start_line,
 6 |     is_table_start_line,
 7 |     is_thematic_break_line,
 8 | )
 9 | 
10 | 
11 | def split_bullet_list(
12 |     lines: List[str], line_offset: int = 0
13 | ) -> Tuple[List[str], List[str]]:
14 |     bullet_list: List[str] = []
15 |     remaining_lines = lines
16 |     indexed_line_generator = enumerate(lines)
17 | 
18 |     index, line = next(indexed_line_generator)
19 |     if not is_bullet_list_start_line(line):
20 |         return bullet_list, remaining_lines
21 | 
22 |     bullet_list.append(line)
23 |     for index, line in indexed_line_generator:
24 |         if (
25 |             is_blank_line_line(line)
26 |             or is_table_start_line(line)
27 |             or is_thematic_break_line(line)
28 |         ):
29 |             break
30 |         else:
31 |             bullet_list.append(line)
32 |     else:
33 |         # We consumed the last line, so increment our index to chop it off
34 |         index += 1
35 | 
36 |     remaining_lines = remaining_lines[index:]
37 |     return bullet_list, remaining_lines
38 | 


--------------------------------------------------------------------------------
/markflow/detectors/fenced_code_block.py:
--------------------------------------------------------------------------------
  1 | """
  2 | MarkFlow Fenced Code Block Detection Library
  3 | 
  4 | Fenced code blocks are multiple lines of text that open with a line beginning with at
  5 | least two asterisks or tildas that ends with that same sequence on its own line.
  6 | 
  7 | Examples:
  8 |     ```
  9 |     ``
 10 |     print("Hello world!")
 11 |     ``
 12 |     ```
 13 | 
 14 |     ```
 15 |     ~~~~
 16 |     print("Hello world!")
 17 |     ~~~~
 18 |     ```
 19 | """
 20 | 
 21 | import logging
 22 | from typing import List, Tuple
 23 | 
 24 | from .._utils import get_indent
 25 | 
 26 | logger = logging.getLogger(__name__)
 27 | 
 28 | # TODO: This is really dirty; let's probably make started functions return ended
 29 | #  functions; I'm not doing that yet in case a better pattern emerges on the rest of
 30 | #  this refactor
 31 | # The alternative is every time fenced_code_block_ended is called, we walk backwards to
 32 | # find the fence.
 33 | 
 34 | FENCES = "`~"
 35 | __LAST_FENCE = ""
 36 | __LAST_FENCE_INDEX = -1
 37 | 
 38 | 
 39 | def fenced_code_block_started(line: str, index: int, lines: List[str]) -> bool:
 40 |     """DEPRECATED"""
 41 |     global __LAST_FENCE
 42 |     global __LAST_FENCE_INDEX
 43 |     for fence in FENCES:
 44 |         if line.strip().startswith(fence * 3):
 45 |             count = len(line.strip()) - len(line.strip().lstrip(fence))
 46 |             __LAST_FENCE = fence * count
 47 |             __LAST_FENCE_INDEX = index
 48 |             return True
 49 |     return False
 50 | 
 51 | 
 52 | def fenced_code_block_ended(line: str, index: int, lines: List[str]) -> bool:
 53 |     """DEPRECATED"""
 54 |     # We'll catch even over indented fences assuming that that was an accident.
 55 |     global __LAST_FENCE
 56 |     global __LAST_FENCE_INDEX
 57 |     if not __LAST_FENCE:
 58 |         raise RuntimeError("End of fenced code block attempted without starting one.")
 59 | 
 60 |     # If we're on the last line, we'll still want to warn about the fence indentation
 61 |     if index + 1 == len(lines):
 62 |         # TODO: We add the last fence because this is used for parsing lists, and we
 63 |         #  allow indented code blocks in lists. But, those are actually inline code
 64 |         #  blocks according to the example render.
 65 |         last_fence_indent = len(lines[__LAST_FENCE_INDEX]) - len(
 66 |             lines[__LAST_FENCE_INDEX].lstrip()
 67 |         )
 68 |         if (
 69 |             line.strip().startswith(__LAST_FENCE)
 70 |             and len(line) - len(line.lstrip()) > 3 + last_fence_indent
 71 |         ):
 72 |             logger.warning(
 73 |                 "Detected that the fence on line %d is over indented per the standard. "
 74 |                 "If this is intentional, please file a bug report." % (index + 1)
 75 |             )
 76 | 
 77 |     # We'll just redetect our opening line
 78 |     if index - 1 == __LAST_FENCE_INDEX:
 79 |         return False
 80 | 
 81 |     last_line = lines[index - 1]
 82 |     if last_line.strip().startswith(__LAST_FENCE):
 83 |         last_fence_indent = len(lines[__LAST_FENCE_INDEX]) - len(
 84 |             lines[__LAST_FENCE_INDEX].lstrip()
 85 |         )
 86 |         if len(last_line) - len(last_line.lstrip()) > 3 + last_fence_indent:
 87 |             logger.warning(
 88 |                 "Detected that the fence on line %d is over indented per the standard. "
 89 |                 "If this is intentional, please file a bug report." % (index + 1)
 90 |             )
 91 |         __LAST_FENCE = ""
 92 |         __LAST_FENCE_INDEX = -1
 93 |         return True
 94 |     return False
 95 | 
 96 | 
 97 | def split_fenced_code_block(
 98 |     lines: List[str], line_offset: int = 0
 99 | ) -> Tuple[List[str], List[str]]:
100 |     """Split leading fenced code block from lines if one exists
101 | 
102 |     Args:
103 |         lines: The lines to evaluate.
104 |         line_offset (optional): The offset into the overall document we are at. This is
105 |             used for reporting errors in the original document.
106 | 
107 |     Returns:
108 |         A tuple of two values. The first is the fenced code block lines if they were
109 |         found, otherwise it is an empty list. The second value is the remaining text.
110 |         (If lines does not start with a fenced code block, it is the same as lines.)
111 |     """
112 |     # TODO: Fenced code blocks can't be indented
113 |     fenced_code_block: List[str] = []
114 |     remaining_lines = lines
115 |     indexed_line_generator = enumerate(lines)
116 | 
117 |     index, line = next(indexed_line_generator)
118 |     for fence in FENCES:
119 |         if line.strip().startswith(fence * 3):
120 |             count = len(line.lstrip()) - len(line.lstrip().lstrip(fence))
121 |             fence_indent = get_indent(line)
122 |             full_fence = fence * count
123 |             break
124 |     else:
125 |         return fenced_code_block, remaining_lines
126 | 
127 |     fenced_code_block.append(line)
128 | 
129 |     for index, line in indexed_line_generator:
130 |         fenced_code_block.append(line)
131 |         if line.strip() == full_fence:
132 |             if get_indent(line) > 3 + fence_indent:
133 |                 logger.warning(
134 |                     "Detected that the fence on line %d is over indented per the "
135 |                     "standard. If this is intentional, please file a bug report."
136 |                     % (index + line_offset + 1)
137 |                 )
138 |             break
139 | 
140 |     remaining_lines = remaining_lines[index + 1 :]
141 |     return fenced_code_block, remaining_lines
142 | 


--------------------------------------------------------------------------------
/markflow/detectors/indented_code_block.py:
--------------------------------------------------------------------------------
 1 | """
 2 | MarkFlow Indented Code Block Detection Library
 3 | 
 4 | Indented code blocks are one or more lines of text that are indented at least four
 5 | spaces that are not in the middle of a paragraph.
 6 | 
 7 | Example:
 8 |     ```
 9 |         print("Hello world!")
10 |     ```
11 | """
12 | 
13 | from typing import List, Tuple
14 | 
15 | from .._utils import get_indent
16 | 
17 | 
18 | def split_indented_code_block(
19 |     lines: List[str], line_offset: int = 0
20 | ) -> Tuple[List[str], List[str]]:
21 |     """Split leading indented code block from lines if one exists
22 | 
23 |     Args:
24 |         lines: The lines to evaluate.
25 |         line_offset (optional): The offset into the overall document we are at. This is
26 |             used for reporting errors in the original document.
27 | 
28 |     Returns:
29 |         A tuple of two values. The first is the indented code block lines if they were
30 |         found, otherwise it is an empty list. The second value is the remaining text.
31 |         (If lines does not start with an indented code block, it is the same as lines.)
32 |     """
33 |     indented_code_block = []
34 |     remaining_lines = lines
35 |     indexed_line_generator = enumerate(lines)
36 | 
37 |     # By default, everything to the end of the document is a block quote
38 |     index, line = next(indexed_line_generator)
39 |     close_index = index + 1
40 |     if line.strip() and get_indent(line) >= 4:
41 |         # Find the next line that isn't indented at least 4, excluding trailing blank
42 |         # lines
43 |         for index, line in indexed_line_generator:
44 |             if not line.strip():
45 |                 continue
46 |             elif get_indent(line) >= 4:
47 |                 close_index = index
48 |             else:
49 |                 break
50 | 
51 |         indented_code_block = lines[:close_index]
52 |         remaining_lines = lines[close_index:]
53 | 
54 |     return indented_code_block, remaining_lines
55 | 


--------------------------------------------------------------------------------
/markflow/detectors/link_reference_definition.py:
--------------------------------------------------------------------------------
  1 | """
  2 | MarkFlow Link Reference Definition Detection Library
  3 | 
  4 | Link reference definitions are intended to be unrendered portions of a document that
  5 | provide a short hand for links. The start with a series of non-whitespace characters
  6 | enclosed in brackets ([]) that serve as the label. It is followed by a colon (:) and
  7 | optional whitespace. That is then followed by a series of non-whitespace characters that
  8 | serve as the link. This can optionally be followed by white-space and then a quotation
  9 | (' or ") enclosed series of characters that serves as the title. Any of the optional
 10 | whitespace may be a new line.
 11 | 
 12 | Examples:
 13 |     ```
 14 |     [label]: link 'title'
 15 | 
 16 |     [label]: link
 17 |     'title'
 18 | 
 19 |     [label]:
 20 |     link
 21 |     'title'
 22 |     ```
 23 | """
 24 | 
 25 | import itertools
 26 | import logging
 27 | import re
 28 | from typing import List, Tuple
 29 | 
 30 | from .._utils import get_indent
 31 | 
 32 | logger = logging.getLogger(__name__)
 33 | 
 34 | LINK_REFERENCE_DEFINITION_FIRST_ELEMENT_REGEX = re.compile(
 35 |     r"\["  # Open bracket
 36 |     r"[^\]]{1,999}"  # At least one and up to 999 characters as the name
 37 |     r"\]:"  # End bracket and colon
 38 | )
 39 | QUOTATION_CHARACTERS = "'\""
 40 | 
 41 | 
 42 | def split_link_reference_definition(
 43 |     lines: List[str], line_offset: int = 0
 44 | ) -> Tuple[List[str], List[str]]:
 45 |     """Split leading link reference definition from lines if one exists
 46 | 
 47 |     Args:
 48 |         lines: The lines to evaluate.
 49 |         line_offset (optional): The offset into the overall document we are at. This is
 50 |             used for reporting errors in the original document.
 51 | 
 52 |     Returns:
 53 |         A tuple of two values. The first is the indented code block lines if they were
 54 |         found, otherwise it is an empty list. The second value is the remaining text.
 55 |         (If lines does not start with a link reference definition, it is the same as
 56 |         lines.)
 57 |     """
 58 |     link_reference_definition: List[str] = []
 59 |     remaining_lines = lines
 60 |     indexed_line_generator = enumerate(lines)
 61 | 
 62 |     index, line = next(indexed_line_generator)
 63 | 
 64 |     if get_indent(line) >= 4:
 65 |         return link_reference_definition, remaining_lines
 66 | 
 67 |     rest_of_line = line.lstrip()
 68 |     match = LINK_REFERENCE_DEFINITION_FIRST_ELEMENT_REGEX.match(rest_of_line)
 69 |     if not match:
 70 |         return link_reference_definition, remaining_lines
 71 | 
 72 |     rest_of_line = rest_of_line[match.end() :]
 73 |     url_and_title = rest_of_line.split(maxsplit=1)
 74 |     # At the end of this, index is set to the line with the beginning of the title_text
 75 |     # contains that first text. Is complete gets set from this loop when we know that
 76 |     # we have a valid title. In this first loop, we only set it when we know the lines
 77 |     # with the label and URL can stand on their own.
 78 |     # The later loops checks to ensure our closing quotation is the last non-whitespace
 79 |     # character on whatever line it ends on and the first occurence of that character,
 80 |     # unescaped.
 81 |     is_complete = False
 82 |     if len(url_and_title) == 2:
 83 |         # The label, URL, and possible title (or part of it) are on this line
 84 |         title_text = url_and_title[1]
 85 |     elif len(url_and_title) == 1:
 86 |         # Only the label and URL are on the first line
 87 |         try:
 88 |             index, line = next(indexed_line_generator)
 89 |             title_text = line
 90 |         except StopIteration:
 91 |             title_text = ""
 92 |         is_complete = True
 93 |     else:
 94 |         # Just the label was on the first line
 95 |         try:
 96 |             index, line = next(indexed_line_generator)
 97 |         except StopIteration:
 98 |             line = ""
 99 |         if line.startswith("[") or not line.strip():
100 |             # According to this standard, this is just paragraph text, but this tool
101 |             # should be usable during development.
102 |             # ToDo: Does that match up with our treatment of misquoted titles?
103 |             logger.warning(
104 |                 "The text on line %d seems to be a link reference definition, but it "
105 |                 "does not contain a link. We will be treating it as if it were.",
106 |                 index,  # We are just pass where the issue exists
107 |             )
108 |             link_reference_definition = [lines[0]]
109 |             remaining_lines = lines[1:]
110 |             return link_reference_definition, remaining_lines
111 |         elif len(line.split(maxsplit=1)) == 1:
112 |             # Only the URL is on the second line
113 |             index, line = next(indexed_line_generator)
114 |             is_complete = True
115 |             title_text = line
116 |         else:
117 |             # The URL and possible title (or part of it) are on the second line
118 |             title_text = line.split(maxsplit=1)[1]
119 | 
120 |     if title_text.strip():
121 |         quotation_character = title_text[0]
122 |     else:
123 |         quotation_character = "NO QUOTE"
124 | 
125 |     if quotation_character in QUOTATION_CHARACTERS:
126 |         closing_regex = re.compile(r"(?<!\\)(\\\\)*{}".format(quotation_character))
127 |         for index, line in itertools.chain(
128 |             [(index, title_text[1:])], indexed_line_generator
129 |         ):
130 |             match = closing_regex.search(line.rstrip())
131 |             if match:
132 |                 if match.end() == len(line.rstrip()):
133 |                     is_complete = True
134 |                     index += 1
135 |                 break
136 | 
137 |     if is_complete:
138 |         link_reference_definition = lines[:index]
139 |         remaining_lines = lines[index:]
140 | 
141 |     return link_reference_definition, remaining_lines
142 | 


--------------------------------------------------------------------------------
/markflow/detectors/ordered_list.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | 
 3 | from ._lines import (
 4 |     is_blank_line_line,
 5 |     is_ordered_list_start_line,
 6 |     is_table_start_line,
 7 |     is_thematic_break_line,
 8 | )
 9 | 
10 | 
11 | def split_ordered_list(
12 |     lines: List[str], line_offset: int = 0
13 | ) -> Tuple[List[str], List[str]]:
14 |     ordered_list: List[str] = []
15 |     remaining_lines = lines
16 |     indexed_line_generator = enumerate(lines)
17 | 
18 |     index, line = next(indexed_line_generator)
19 |     if not is_ordered_list_start_line(line):
20 |         return ordered_list, remaining_lines
21 | 
22 |     ordered_list.append(line)
23 |     for index, line in indexed_line_generator:
24 |         if (
25 |             is_blank_line_line(line)
26 |             or is_table_start_line(line)
27 |             or is_thematic_break_line(line)
28 |         ):
29 |             break
30 |         else:
31 |             ordered_list.append(line)
32 |     else:
33 |         # We consumed the last line, so increment our index to chop it off
34 |         index += 1
35 | 
36 |     remaining_lines = remaining_lines[index:]
37 |     return ordered_list, remaining_lines
38 | 


--------------------------------------------------------------------------------
/markflow/detectors/paragraph.py:
--------------------------------------------------------------------------------
  1 | from typing import Generator, List, Tuple
  2 | 
  3 | from ._lines import is_paragraph_start_line, is_setext_underline
  4 | from .atx_heading import split_atx_heading
  5 | from .blank_line import split_blank_line
  6 | from .block_quote import split_block_quote
  7 | from .bullet_list import split_bullet_list
  8 | from .fenced_code_block import split_fenced_code_block
  9 | from .ordered_list import split_ordered_list
 10 | from .table import split_table
 11 | from .thematic_break import split_thematic_break
 12 | 
 13 | 
 14 | def _is_paragraph_continuation_text(lines: List[str], line_offset: int = 0) -> bool:
 15 |     """Indicates whether the first line of lines would continue a paragraph
 16 | 
 17 |     This ensures that any valid interrupting section of a paragraph could not result in
 18 |     a valid block instead.
 19 | 
 20 |     We have a separate definition from the one used in block quote detection to avoid
 21 |     circular imports. That one also gets to skip block quote checking.
 22 | 
 23 |     Args:
 24 |         lines: The lines to evaluate.
 25 |         line_offset (optional): The offset into the overall document we are at. This is
 26 |             used for reporting errors in the original document.
 27 | 
 28 |     Returns:
 29 |         True if the first line would continue the paragraph. False otherwise.
 30 |     """
 31 |     for splitter in [
 32 |         split_atx_heading,
 33 |         split_blank_line,
 34 |         split_block_quote,
 35 |         split_bullet_list,
 36 |         split_fenced_code_block,
 37 |         split_ordered_list,
 38 |         split_table,
 39 |         split_thematic_break,
 40 |     ]:
 41 |         # ToDo: Disable logging?
 42 |         if splitter(lines, line_offset)[0]:
 43 |             return False
 44 |     if is_setext_underline(lines[0]):
 45 |         return False
 46 |     return True
 47 | 
 48 | 
 49 | def list_tail_generator(lines: List[str]) -> Generator[List[str], None, None]:
 50 |     """Generator that returns less and less of the end of a list
 51 | 
 52 |     The first call to this generator returns the passed in list. Each successive call
 53 |     to this generator returns the previous call without the first element until we
 54 |     return the last element.
 55 | 
 56 |     Args:
 57 |         lines: The lines to evaluate
 58 |         line_offset (optional): The offset into the overall document we are at. This is
 59 |             used for reporting errors in the original document.
 60 | 
 61 |     Returns:
 62 |         A tuple of two values. The first is the setext heading lines if they were found,
 63 |         otherwise it is an empty list. The second value is the remaining text. (If lines
 64 |         does not start with a thematic break, it is the same as lines.)
 65 | 
 66 |         The returned text can then be evaluated to determine if this is actually a
 67 |         paragraph or an setext heading.
 68 |     """
 69 |     for i in range(len(lines)):
 70 |         yield lines[i:]
 71 | 
 72 | 
 73 | def split_paragraph_ignoring_setext(
 74 |     lines: List[str], line_offset: int = 0
 75 | ) -> Tuple[List[str], List[str]]:
 76 |     """Split a paragraph from beginning of lines if one exists
 77 | 
 78 |     Unlike split_paragraph, this does not take into account setext underlining. This is
 79 |     so that both detectors can share a common function.
 80 | 
 81 |     Args:
 82 |         lines: The lines to evaluate.
 83 |         line_offset (optional): The offset into the overall document we are at. This is
 84 |             used for reporting errors in the original document.
 85 | 
 86 |     Returns:
 87 |         A tuple of two values. The first is the paragraph lines if a paragraph was
 88 |         found, otherwise it is an empty list. The second value is the remaining text.
 89 |         (If lines does not start with a thematic break, it is the same as lines.)
 90 |     """
 91 |     paragraph_lines = []
 92 |     remaining_lines = lines
 93 | 
 94 |     if is_paragraph_start_line(lines[0]):
 95 |         # ToDo: This should be handled in `wrap` as a double space is always a newline
 96 |         #  in any section type. Also add indents while you're there.
 97 |         if lines[0].endswith("  "):
 98 |             return [lines[0]], lines[1:]
 99 |         paragraph_lines.append(lines[0])
100 |         tail_lines_generator = list_tail_generator(lines[1:])
101 |         for tail in tail_lines_generator:
102 |             if _is_paragraph_continuation_text(
103 |                 tail, line_offset + len(paragraph_lines)
104 |             ):
105 |                 paragraph_lines.append(tail[0])
106 |                 # ToDo: This should be handled in `wrap` as a double space is always a
107 |                 #  newline in any section type.
108 |                 if tail[0].endswith("  "):
109 |                     remaining_lines = next(tail_lines_generator)
110 |                     break
111 |             else:
112 |                 remaining_lines = tail
113 |                 break
114 |         else:
115 |             remaining_lines = []
116 | 
117 |     return paragraph_lines, remaining_lines
118 | 
119 | 
120 | def split_paragraph(
121 |     lines: List[str], line_offset: int = 0
122 | ) -> Tuple[List[str], List[str]]:
123 |     """Split a paragraph from beginning of lines if one exists
124 | 
125 |     Args:
126 |         lines: The lines to evaluate.
127 |         line_offset (optional): The offset into the overall document we are at. This is
128 |             used for reporting errors in the original document.
129 | 
130 |     Returns:
131 |         A tuple of two values. The first is the paragraph lines if a paragraph was
132 |         found, otherwise it is an empty list. The second value is the remaining text.
133 |         (If lines does not start with a thematic break, it is the same as lines.)
134 |     """
135 |     potential_paragraph, remaining_lines = split_paragraph_ignoring_setext(
136 |         lines, line_offset
137 |     )
138 |     if not remaining_lines or not is_setext_underline(remaining_lines[0]):
139 |         return potential_paragraph, remaining_lines
140 |     else:
141 |         return [], lines
142 | 


--------------------------------------------------------------------------------
/markflow/detectors/setext_heading.py:
--------------------------------------------------------------------------------
 1 | """
 2 | MarkFlow Setext Heading Detection Library
 3 | 
 4 | Setext headings are basically any paragraph that is followed by a line composed of all
 5 | all equals signs (=) or dashes (-). The former indicates a heading of level 1 while the
 6 | latter indicates a heading of level 2.
 7 | 
 8 | Examples:
 9 |     ```
10 |     Heading 1
11 |     =========
12 |     ```
13 | 
14 |     ```
15 |     Heading 2
16 |     -
17 |     ```
18 | """
19 | 
20 | from typing import List, Tuple
21 | 
22 | from ._lines import is_setext_underline
23 | from .paragraph import split_paragraph_ignoring_setext
24 | 
25 | 
26 | def split_setext_heading(
27 |     lines: List[str], line_offset: int = 0
28 | ) -> Tuple[List[str], List[str]]:
29 |     """Split setext heading from beginning of lines if one exists
30 | 
31 |     Args:
32 |         lines: The lines to evaluate.
33 |         line_offset (optional): The offset into the overall document we are at. This is
34 |             used for reporting errors in the original document.
35 | 
36 |     Returns:
37 |         A tuple of two values. The first is the setext heading lines if they were found,
38 |         otherwise it is an empty list. The second value is the remaining text. (If lines
39 |         does not start with a thematic break, it is the same as lines.)
40 |     """
41 |     paragraph, remaining_lines = split_paragraph_ignoring_setext(lines, line_offset)
42 |     if paragraph and remaining_lines and is_setext_underline(remaining_lines[0]):
43 |         return paragraph + [remaining_lines[0]], remaining_lines[1:]
44 |     return [], lines
45 | 


--------------------------------------------------------------------------------
/markflow/detectors/table.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | 
 3 | 
 4 | def table_started(line: str, index: int, lines: List[str]) -> bool:
 5 |     """DEPRECATED"""
 6 |     return line.lstrip().startswith("|")
 7 | 
 8 | 
 9 | def table_ended(line: str, index: int, lines: List[str]) -> bool:
10 |     """DEPRECATED"""
11 |     return not table_started(line, index, lines)
12 | 
13 | 
14 | def split_table(lines: List[str], line_offset: int = 0) -> Tuple[List[str], List[str]]:
15 |     table = []
16 |     remaining_lines = lines
17 | 
18 |     index = 0
19 |     if table_started(lines[index], index, lines):
20 |         table.append(lines[index])
21 |         for index, line in enumerate(lines[1:], start=index + 1):
22 |             if table_ended(line, index, lines):
23 |                 break
24 |             table.append(line)
25 |         else:
26 |             index += 1
27 |     remaining_lines = lines[index:]
28 | 
29 |     return table, remaining_lines
30 | 


--------------------------------------------------------------------------------
/markflow/detectors/thematic_break.py:
--------------------------------------------------------------------------------
 1 | """
 2 | MarkFlow Thematic Break Detection Library
 3 | 
 4 | A thematic break is a lines that is a sequence of at least three dashes (-), underscores
 5 | (_), or asterisks (*), with optional whitespace (though no more that three leading
 6 | spaces), and is not the underlining of a setext heading in the case of dashes.
 7 | 
 8 | Examples:
 9 |     ```
10 |     ___
11 |     ```
12 | 
13 |     ```
14 |     ****************
15 |     ```
16 | """
17 | 
18 | from typing import List, Tuple
19 | 
20 | from ._lines import is_thematic_break_line
21 | 
22 | SEPARATOR_SYMBOLS = ["*", "_", "-"]
23 | 
24 | 
25 | def split_thematic_break(
26 |     lines: List[str], line_offset: int = 0
27 | ) -> Tuple[List[str], List[str]]:
28 |     """Split leading thematic break from lines if one exists
29 | 
30 |     Args:
31 |         lines: The lines to evaluate.
32 |         line_offset (optional): The offset into the overall document we are at. This is
33 |             used for reporting errors in the original document.
34 | 
35 |     Returns:
36 |         A tuple of two values. The first is the indented code block lines if they were
37 |         found, otherwise it is an empty list. The second value is the remaining text.
38 |         (If lines does not start with a thematic break, it is the same as lines.)
39 |     """
40 |     if is_thematic_break_line(lines[0]):
41 |         return [lines[0]], lines[1:]
42 |     else:
43 |         return [], lines
44 | 


--------------------------------------------------------------------------------
/markflow/exceptions.py:
--------------------------------------------------------------------------------
 1 | class MarkdownFormatException(Exception):
 2 |     """Raised if the passed in file is formatted incorrectly"""
 3 | 
 4 | 
 5 | class ReformatInconsistentException(RuntimeError):
 6 |     """Raised if a reformated Markdown file would be reformatted differently
 7 | 
 8 |     If you get this error, you should open a bug report.
 9 |     """
10 | 


--------------------------------------------------------------------------------
/markflow/formatters/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | from .atx_heading import *
 3 | from .base import *
 4 | from .blank_line import *
 5 | from .block_quote import *
 6 | from .fenced_code_block import *
 7 | from .indented_code_block import *
 8 | from .link_reference_definition import *
 9 | from .lists import *
10 | from .paragraph import *
11 | from .setext_heading import *
12 | from .table import *
13 | from .thematic_break import *
14 | 


--------------------------------------------------------------------------------
/markflow/formatters/atx_heading.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 4.2 ATX headings
 3 | 
 4 | An ATX heading consists of a string of characters, parsed as inline content, between an
 5 | opening sequence of 1–6 unescaped # characters and an optional closing sequence of any
 6 | number of unescaped # characters. The opening sequence of # characters must be followed
 7 | by a space or by the end of line. The optional closing sequence of #s must be preceded
 8 | by a space and may be followed by spaces only. The opening # character may be indented
 9 | 0-3 spaces. The raw contents of the heading are stripped of leading and trailing spaces
10 | before being parsed as inline content. The heading level is equal to the number of #
11 | characters in the opening sequence.
12 | 
13 | At least one space is required between the # characters and the heading’s contents,
14 | unless the heading is empty. Note that many implementations currently do not require the
15 | space. However, the space was required by the original ATX implementation.
16 | 
17 | https://spec.commonmark.org/0.29/#atx-headings
18 | """
19 | import logging
20 | 
21 | from .._utils import truncate_str
22 | from ..typing import Number
23 | from .base import MarkdownSection
24 | 
25 | __all__ = ["MarkdownATXHeading"]
26 | 
27 | logger = logging.getLogger(__name__)
28 | 
29 | REPR_CONTENT_LEN = 20
30 | 
31 | 
32 | class MarkdownATXHeading(MarkdownSection):
33 |     @property
34 |     def content(self) -> str:
35 |         if not self.lines:
36 |             raise RuntimeError(
37 |                 f"Attempted access of uninitialized {self.__class__.__name__}."
38 |             )
39 |         return self.lines[0].strip().lstrip("#").strip()
40 | 
41 |     @property
42 |     def depth(self) -> int:
43 |         if not self.lines:
44 |             raise RuntimeError(
45 |                 f"Attempted access of uninitialized {self.__class__.__name__}."
46 |             )
47 |         return len(self.lines[0].strip()) - len(self.lines[0].strip().lstrip("#"))
48 | 
49 |     def append(self, line: str) -> None:
50 |         if self.lines:
51 |             raise RuntimeError(
52 |                 "Attempted to add another line to an ATX Header. They can only be one "
53 |                 "line."
54 |             )
55 |         self.lines.append(line)
56 | 
57 |     def reformatted(self, width: Number = 88) -> str:
58 |         # TODO: This prints out twice. We probably need a first pass step that calls out
59 |         #  errors we will be fixing to suppress extra statements from reprocessing the
60 |         #  document.
61 |         if not self.lines[0].strip().lstrip("#").startswith(" "):
62 |             logger.warning(
63 |                 "Line %d is an ATX Header without a space after #'s. This has been "
64 |                 "corrected.",
65 |                 self.line_index + 1,
66 |             )
67 |         return "#" * self.depth + " " + self.content
68 | 
69 |     def __repr__(self) -> str:
70 |         return (
71 |             f"<"
72 |             f"{self.__class__.__name__}: "
73 |             f"depth={repr(self.depth)}; "
74 |             f"content={repr(truncate_str(self.content, REPR_CONTENT_LEN))}"
75 |             f">"
76 |         )
77 | 


--------------------------------------------------------------------------------
/markflow/formatters/base.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | from typing import List, Optional
 3 | 
 4 | from ..typing import Number
 5 | 
 6 | __all__ = ["MarkdownSection"]
 7 | 
 8 | 
 9 | class MarkdownSection:
10 |     def __init__(self, line_index: int, lines: Optional[List[str]] = None):
11 |         self.line_index = line_index
12 |         if lines is None:
13 |             lines = []
14 |         self.lines: List[str] = lines
15 | 
16 |     @abc.abstractmethod
17 |     def reformatted(self, width: Number = 88) -> str:
18 |         """Reformat the section based on publicized rules"""
19 | 
20 |     def __repr__(self) -> str:
21 |         raise NotImplementedError("MarkdownSections must implement `__repr__`.")
22 | 


--------------------------------------------------------------------------------
/markflow/formatters/blank_line.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 4.9 Blank lines
 3 | 
 4 | Blank lines between block-level elements are ignored, except for the role they play in
 5 | determining whether a list is tight or loose.
 6 | 
 7 | Blank lines at the beginning and end of the document are also ignored.
 8 | 
 9 | https://spec.commonmark.org/0.29/#blank-lines
10 | """
11 | 
12 | from ..typing import Number
13 | from .base import MarkdownSection
14 | 
15 | __all__ = ["MarkdownBlankLine"]
16 | 
17 | 
18 | class MarkdownBlankLine(MarkdownSection):
19 |     def append(self, line: str) -> None:
20 |         if line.strip():
21 |             raise RuntimeError(
22 |                 f"A line with non-whitespace characters has been added to a "
23 |                 f"`{self.__class__.__name__}`. Please open a bug report or email "
24 |                 f"jholland@duosecurity.com."
25 |             )
26 |         if self.lines:
27 |             raise RuntimeError(
28 |                 f"`{self.__class__.__name__}`s can only contain one line. Please open "
29 |                 f"a bug report or email jholland@duosecurity.com."
30 |             )
31 |         self.lines.append(line)
32 | 
33 |     def reformatted(self, width: Number = 88) -> str:
34 |         # The new line will be added on join
35 |         return ""
36 | 
37 |     def __repr__(self) -> str:
38 |         return f"<{self.__class__.__name__}>"
39 | 


--------------------------------------------------------------------------------
/markflow/formatters/block_quote.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import re
 3 | from typing import List
 4 | 
 5 | from .._utils import get_indent, redirect_info_logs_to_debug, truncate_str
 6 | from ..detectors._lines import is_explicit_block_quote_line
 7 | from ..typing import Number
 8 | from .base import MarkdownSection
 9 | 
10 | __all__ = ["MarkdownBlockQuote"]
11 | 
12 | REPR_CONTENT_LEN = 20
13 | NON_ESCAPED_QUOTE_MARKER = re.compile(r"(?<= )>")
14 | LEADING_QUOTE_MARKER = re.compile(r"^ {0,3}>")
15 | 
16 | logger = logging.getLogger(__name__)
17 | 
18 | 
19 | def _reformat_markdown(lines: List[str], width: Number) -> str:
20 |     # Prevents issues from circular imports. Since this module would already be loaded
21 |     # whenever we call this function, we know it's cached.
22 |     from ..reformat_markdown import _reformat_markdown_text
23 | 
24 |     with redirect_info_logs_to_debug():
25 |         text = _reformat_markdown_text("\n".join(lines) + "\n", width)
26 | 
27 |     return text
28 | 
29 | 
30 | class MarkdownBlockQuote(MarkdownSection):
31 |     @property
32 |     def first_line(self) -> str:
33 |         return self.lines[0]
34 | 
35 |     def append(self, line: str) -> None:
36 |         self.lines.append(line)
37 | 
38 |     def reformatted(self, width: Number = 88) -> str:
39 |         indent = len(self.lines[0].lstrip()) - len(self.lines[0])
40 | 
41 |         depth = 0
42 |         fully_quoted_lines = []
43 |         for line in self.lines:
44 |             if is_explicit_block_quote_line(line):
45 |                 spaceless_string = "".join(line.split())
46 |                 depth = len(spaceless_string) - len(spaceless_string.lstrip(">"))
47 |                 fully_quoted_lines.append(line)
48 |             else:
49 |                 fully_quoted_lines.append((">" * depth) + line)
50 | 
51 |         stripped_lines: List[str] = []
52 |         for line in fully_quoted_lines:
53 |             stripped_lines.append(LEADING_QUOTE_MARKER.sub("", line))
54 | 
55 |         for line in stripped_lines:
56 |             if not line.strip():
57 |                 continue
58 |             if get_indent(line) == 1:
59 |                 has_space = True
60 |                 break
61 |             elif get_indent(line) == 0:
62 |                 has_space = False
63 |                 break
64 |         else:
65 |             has_space = False
66 | 
67 |         if has_space:
68 |             restripped_lines: List[str] = []
69 |             for line in stripped_lines:
70 |                 restripped_lines += [line[1:] if line and line[0] == " " else line]
71 |             stripped_lines = restripped_lines
72 | 
73 |         sub_width = width - indent - 1
74 |         prefix = " " * indent + ">"
75 |         if has_space:
76 |             sub_width -= 1
77 |             prefix += " "
78 | 
79 |         # ToDo (jmholla): Issues with leading > in paragraphs will be handled by a later
80 |         #  change.
81 |         text = _reformat_markdown(stripped_lines, width=sub_width)
82 |         text = "\n".join((prefix + line).strip() for line in text.splitlines())
83 | 
84 |         return text
85 | 
86 |     def __repr__(self) -> str:
87 |         first_line = self.first_line
88 |         if first_line is not None:
89 |             first_line = truncate_str(first_line, REPR_CONTENT_LEN)
90 |         return f"{self.__class__.__name__}: first_line={repr(first_line)}>"
91 | 


--------------------------------------------------------------------------------
/markflow/formatters/fenced_code_block.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 4.5 Fenced code blocks
 3 | 
 4 | A code fence is a sequence of at least three consecutive backtick characters (`) or
 5 | tildes (~). (Tildes and backticks cannot be mixed.) A fenced code block begins with a
 6 | code fence, indented no more than three spaces.
 7 | 
 8 | The line with the opening code fence may optionally contain some text following the
 9 | code fence; this is trimmed of leading and trailing whitespace and called the info
10 | string. If the info string comes after a backtick fence, it may not contain any backtick
11 | characters. (The reason for this restriction is that otherwise some inline code would
12 | be incorrectly interpreted as the beginning of a fenced code block.)
13 | 
14 | The content of the code block consists of all subsequent lines, until a closing code
15 | fence of the same type as the code block began with (backticks or tildes), and with at
16 | least as many backticks or tildes as the opening code fence. If the leading code fence
17 | is indented N spaces, then up to N spaces of indentation are removed from each line of
18 | the content (if present). (If a content line is not indented, it is preserved unchanged.
19 | If it is indented less than N spaces, all of the indentation is removed.)
20 | 
21 | The closing code fence may be indented up to three spaces, and may be followed only by
22 | spaces, which are ignored. If the end of the containing block (or document) is reached
23 | and no closing code fence has been found, the code block contains all of the lines after
24 | the opening code fence until the end of the containing block (or document). (An
25 | alternative spec would require backtracking in the event that a closing code fence is
26 | not found. But this makes parsing much less efficient, and there seems to be no real
27 | down side to the behavior described here.)
28 | 
29 | A fenced code block may interrupt a paragraph, and does not require a blank line either
30 | before or after.
31 | 
32 | https://spec.commonmark.org/0.29/#fenced-code-blocks
33 | """
34 | 
35 | from typing import Optional
36 | 
37 | from .._utils import truncate_str
38 | from ..typing import Number
39 | from .base import MarkdownSection
40 | 
41 | __all__ = ["MarkdownFencedCodeBlock"]
42 | 
43 | REPR_CONTENT_LEN = 20
44 | 
45 | 
46 | class MarkdownFencedCodeBlock(MarkdownSection):
47 |     @property
48 |     def fence_char(self) -> str:
49 |         return self.lines[0].strip()[0]
50 | 
51 |     @property
52 |     def fence_count(self) -> int:
53 |         return len(self.lines[0].strip()) - len(
54 |             self.lines[0].strip().lstrip(self.fence_char)
55 |         )
56 | 
57 |     @property
58 |     def first_line(self) -> Optional[str]:
59 |         if len(self.lines) == 2:
60 |             return None
61 |         else:
62 |             return self.lines[1].strip()
63 | 
64 |     @property
65 |     def language(self) -> str:
66 |         return self.lines[0].strip().lstrip(self.fence_char).strip()
67 | 
68 |     def append(self, line: str) -> None:
69 |         self.lines.append(line)
70 | 
71 |     def reformatted(self, width: Number = 88) -> str:
72 |         fence = self.fence_char * self.fence_count
73 |         new_lines = [fence + self.language] + self.lines[1:-1] + [fence]
74 |         return "\n".join([line.rstrip() for line in new_lines])
75 | 
76 |     def __repr__(self) -> str:
77 |         first_line = self.first_line
78 |         if first_line is not None:
79 |             first_line = truncate_str(first_line, REPR_CONTENT_LEN)
80 |         return (
81 |             f"<"
82 |             f"{self.__class__.__name__}: "
83 |             f"fence_char={repr(self.fence_char)}; "
84 |             f"fence_count={repr(self.fence_count)}; "
85 |             f"language={repr(self.language)}; "
86 |             f"first_line={repr(first_line)}"
87 |             f">"
88 |         )
89 | 


--------------------------------------------------------------------------------
/markflow/formatters/indented_code_block.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 4.4 Indented code blocks
 3 | 
 4 | An indented code block is composed of one or more indented chunks separated by blank
 5 | lines. An indented chunk is a sequence of non-blank lines, each indented four or more
 6 | spaces. The contents of the code block are the literal contents of the lines, including
 7 | trailing line endings, minus four spaces of indentation. An indented code block has no
 8 | info string.
 9 | 
10 | An indented code block cannot interrupt a paragraph, so there must be a blank line
11 | between a paragraph and a following indented code block. (A blank line is not needed,
12 | however, between a code block and a following paragraph.)
13 | 
14 | TODO: Keep in mind for paragraphs
15 | 
16 | https://spec.commonmark.org/0.29/#indented-code-blocks
17 | """
18 | 
19 | import logging
20 | 
21 | from .._utils import truncate_str
22 | from ..typing import Number
23 | from .base import MarkdownSection
24 | 
25 | __all__ = ["MarkdownIndentedCodeBlock"]
26 | 
27 | logger = logging.getLogger(__name__)
28 | 
29 | REPR_CONTENT_LEN = 20
30 | 
31 | 
32 | class MarkdownIndentedCodeBlock(MarkdownSection):
33 |     @property
34 |     def first_line(self) -> str:
35 |         return self.lines[0].strip()
36 | 
37 |     def append(self, line: str) -> None:
38 |         self.lines.append(line)
39 | 
40 |     def reformatted(self, width: Number = 88) -> str:
41 |         return "\n".join([line.rstrip() for line in self.lines])
42 | 
43 |     def __repr__(self) -> str:
44 |         return (
45 |             f"<"
46 |             f"{self.__class__.__name__}: "
47 |             f"first_line={repr(truncate_str(self.first_line, REPR_CONTENT_LEN))}"
48 |             f">"
49 |         )
50 | 


--------------------------------------------------------------------------------
/markflow/formatters/link_reference_definition.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 4.7 Link reference definitions
  3 | 
  4 | A link reference definition consists of a link label, indented up to three spaces,
  5 | followed by a colon (:), optional whitespace (including up to one line ending), a link
  6 | destination, optional whitespace (including up to one line ending), and an optional link
  7 | title, which if it is present must be separated from the link destination by whitespace.
  8 | No further non-whitespace characters may occur on the line.
  9 | 
 10 | A link reference definition does not correspond to a structural element of a document.
 11 | Instead, it defines a label which can be used in reference links and reference-style
 12 | images elsewhere in the document. Link reference definitions can come either before or
 13 | after the links that use them.
 14 | 
 15 | https://spec.commonmark.org/0.29/#link-reference-definitions
 16 | 
 17 | A link label begins with a left bracket ([) and ends with the first right bracket (])
 18 | that is not backslash-escaped. Between these brackets there must be at least one non-
 19 | whitespace character. Unescaped square bracket characters are not allowed inside the
 20 | opening and closing square brackets of link labels. A link label can have at most 999
 21 | characters inside the square brackets.
 22 | 
 23 | https://spec.commonmark.org/0.29/#link-label
 24 | """
 25 | 
 26 | import re
 27 | 
 28 | from .._utils import truncate_str
 29 | from .._utils.textwrap import wrap
 30 | from ..typing import Number
 31 | from .base import MarkdownSection
 32 | 
 33 | __all__ = ["MarkdownLinkReferenceDefinition"]
 34 | 
 35 | POST_COLON_SPACE_REGEX = re.compile(r":\s+")
 36 | REPR_CONTENT_LEN = 20
 37 | LINK_REFERENCE_DEFINITION_REGEX = re.compile(
 38 |     r"\["
 39 |     r"(?P<name>[^\]]{1,999})"
 40 |     r"\]:"
 41 |     r"\s*"
 42 |     r"(?P<link>[^\s]*)"
 43 |     r"\s*"
 44 |     r"(?P<title>.*)"
 45 | )
 46 | 
 47 | 
 48 | class MarkdownLinkReferenceDefinition(MarkdownSection):
 49 |     @property
 50 |     def name(self) -> str:
 51 |         match = LINK_REFERENCE_DEFINITION_REGEX.search(" ".join(self.lines))
 52 |         if match is None:
 53 |             raise RuntimeError(
 54 |                 "Invalid link reference definition created. Please open a bug report "
 55 |                 "or email jholland@duosecurity.com."
 56 |             )
 57 |         return match.group("name")
 58 | 
 59 |     @property
 60 |     def link(self) -> str:
 61 |         match = LINK_REFERENCE_DEFINITION_REGEX.search(" ".join(self.lines))
 62 |         if match is None:
 63 |             raise RuntimeError(
 64 |                 "Invalid link reference definition created. Please open a bug report "
 65 |                 "or email jholland@duosecurity.com."
 66 |             )
 67 |         return match.group("link")
 68 | 
 69 |     @property
 70 |     def title(self) -> str:
 71 |         match = LINK_REFERENCE_DEFINITION_REGEX.search(" ".join(self.lines))
 72 |         if match is None:
 73 |             raise RuntimeError(
 74 |                 "Invalid link reference definition created. Please open a bug report "
 75 |                 "or email jholland@duosecurity.com."
 76 |             )
 77 |         title = match.group("title")
 78 |         return " ".join(title.split())
 79 | 
 80 |     def append(self, line: str) -> None:
 81 |         self.lines.append(line)
 82 | 
 83 |     def reformatted(self, width: Number = 88) -> str:
 84 |         # Last index indicates that last line we checked for content
 85 |         last_index = 0
 86 |         str_ = f"[{self.name}]:"
 87 |         if not self.link:
 88 |             return str_
 89 | 
 90 |         if self.link in self.lines[last_index]:
 91 |             str_ += f" {self.link}"
 92 |         else:
 93 |             last_index = 1
 94 |             str_ += f"\n{self.link}"
 95 | 
 96 |         title = self.title
 97 |         if not title:
 98 |             return str_
 99 | 
100 |         # We don't naively wrap link reference definitions because they are allowed to
101 |         # overflow lines (the label and url portions).
102 |         if title.split()[0] in self.lines[last_index]:
103 |             # Our title was on the line with our link
104 |             if len(title.split()[0]) + len(str_.splitlines()[-1]) <= width:
105 |                 lines = str_.splitlines()
106 |                 str_ = "\n".join(lines[:-1] + [wrap(lines[-1] + " " + title, width)])
107 |             else:
108 |                 str_ = "\n" + wrap(title, width)
109 |         else:
110 |             str_ += "\n" + wrap(title, width)
111 | 
112 |         return str_
113 | 
114 |     def __repr__(self) -> str:
115 |         return (
116 |             f"<"
117 |             f"{self.__class__.__name__}: "
118 |             f"name={repr(truncate_str(self.name, REPR_CONTENT_LEN))} "
119 |             f"link={repr(truncate_str(self.link, REPR_CONTENT_LEN))} "
120 |             f"title={repr(truncate_str(self.title, REPR_CONTENT_LEN))}"
121 |             f">"
122 |         )
123 | 


--------------------------------------------------------------------------------
/markflow/formatters/lists.py:
--------------------------------------------------------------------------------
  1 | # 1. Split the list into entries
  2 | # 2. Dedent those entries
  3 | # 3. Determine indentation level
  4 | # 4. Pass each entry to the formatter
  5 | # 5. Combine the resulting output
  6 | import re
  7 | import string
  8 | from typing import List
  9 | 
 10 | from .._utils import get_indent, redirect_info_logs_to_debug, truncate_str
 11 | from ..typing import Number
 12 | from .base import MarkdownSection
 13 | 
 14 | MARKER_REGEX = re.compile(
 15 |     r"^\s*"  # Leading spaces are allowed and often expected
 16 |     r"("
 17 |     r"\*|"  # Asterisk list marker
 18 |     r"-|"  # Dash list marker
 19 |     r"\+|"  # Plus list marker
 20 |     r"[0-9]+\."  # Numeric list marker
 21 |     r")"
 22 |     r"($| )"  # End of line or space
 23 | )
 24 | CODE_BLOCK_FENCES = "`~"
 25 | REPR_CONTENT_LEN = 20
 26 | 
 27 | 
 28 | def _reformat_markdown(lines: List[str], width: Number) -> str:
 29 |     # Prevents issues from circular imports. Since this module would already be loaded
 30 |     # whenever we call this function, we know it's cached.
 31 |     from ..reformat_markdown import _reformat_markdown_text
 32 | 
 33 |     with redirect_info_logs_to_debug():
 34 |         text = _reformat_markdown_text("\n".join(lines) + "\n", width)
 35 | 
 36 |     return text
 37 | 
 38 | 
 39 | def _list_marker_end_pos(line: str) -> int:
 40 |     """Return the number of characters before the end of a list marker
 41 | 
 42 |     Note: This does not include the trailing space in the count.
 43 | 
 44 |     Args:
 45 |         line: The lines to evaluate.
 46 | 
 47 |     Returns:
 48 |         True if the first line would continue the paragraph. False otherwise.
 49 |     """
 50 | 
 51 |     match = MARKER_REGEX.match(line)
 52 |     if match is None:
 53 |         raise RuntimeError(
 54 |             "Attempted to find the end of a list marker on a line that doesn't have "
 55 |             "one. Please open a bug report or email jholland@duosecurity.com."
 56 |         )
 57 |     return match.end(1)
 58 | 
 59 | 
 60 | def _split_list(lines: List[str]) -> List[List[str]]:
 61 |     in_code_block = False
 62 |     code_block_end = ""
 63 | 
 64 |     list_entries: List[List[str]] = []
 65 |     max_indent = _list_marker_end_pos(lines[0])
 66 |     for line in lines:
 67 |         if any(line.lstrip().startswith(f * 3) for f in CODE_BLOCK_FENCES):
 68 |             code_block_symbol = line.lstrip()[0]
 69 |             code_block_marker_length = len(line.lstrip()) - len(
 70 |                 line.lstrip(code_block_symbol)
 71 |             )
 72 |             code_block_marker = code_block_marker_length * code_block_symbol
 73 |             if in_code_block:
 74 |                 if code_block_end == code_block_marker:
 75 |                     in_code_block = False
 76 |             else:
 77 |                 in_code_block = True
 78 |                 code_block_end = code_block_marker
 79 | 
 80 |         if MARKER_REGEX.match(line) and not in_code_block:
 81 |             line_indent = get_indent(line)
 82 |             list_indent = _list_marker_end_pos(line)
 83 |             if line_indent <= max_indent:
 84 |                 max_indent = list_indent
 85 |                 list_entries.append([line])
 86 |             else:
 87 |                 list_entries[-1].append(line)
 88 |         else:
 89 |             list_entries[-1].append(line)
 90 |     return list_entries
 91 | 
 92 | 
 93 | def _dedent_entries(list_entries: List[List[str]]) -> List[List[str]]:
 94 |     # ToDo: Should we handle missing spaces? I don't think so. Think:
 95 |     #  *read*
 96 |     dedented_entries: List[List[str]] = []
 97 |     for entry in list_entries:
 98 |         indent = _list_marker_end_pos(entry[0]) + 1
 99 |         dedented_entries.append([entry[0][indent:]])
100 |         for line in entry[1:]:
101 |             dedented_entry = line[:indent].lstrip() + line[indent:]
102 |             dedented_entries[-1].append(dedented_entry)
103 |     return dedented_entries
104 | 
105 | 
106 | class MarkdownBulletList(MarkdownSection):
107 |     @property
108 |     def marker(self) -> str:
109 |         return self.lines[0].lstrip()[0]
110 | 
111 |     @property
112 |     def first_line(self) -> str:
113 |         return self.lines[0]
114 | 
115 |     def append(self, line: str) -> None:
116 |         self.lines.append(line)
117 | 
118 |     def reformatted(self, width: Number = 88) -> str:
119 |         list_entries = _split_list(self.lines)
120 |         # '* '
121 |         toplevel_indent = 2
122 |         dedented_entries = _dedent_entries(list_entries)
123 | 
124 |         reformatted_entries: List[str] = []
125 |         for entry in dedented_entries:
126 |             with redirect_info_logs_to_debug():
127 |                 reformatted_entry = _reformat_markdown(
128 |                     entry, width - toplevel_indent
129 |                 ).rstrip("\n")
130 |             reformatted_entry = (
131 |                 self.marker
132 |                 + " "
133 |                 + ("\n" + toplevel_indent * " ").join(reformatted_entry.split("\n"))
134 |             )
135 |             reformatted_entries.append(reformatted_entry)
136 | 
137 |         return "\n".join(reformatted_entries)
138 | 
139 |     def __repr__(self) -> str:
140 |         first_line = self.first_line
141 |         if first_line is not None:
142 |             first_line = truncate_str(first_line, REPR_CONTENT_LEN)
143 |         return (
144 |             f"{self.__class__.__name__}: "
145 |             f"marker={repr(self.marker)}; "
146 |             f"first_line={repr(first_line)}>"
147 |         )
148 | 
149 | 
150 | class MarkdownOrderedList(MarkdownSection):
151 |     @property
152 |     def first_number(self) -> int:
153 |         lstripped_line = self.lines[0].lstrip()
154 |         return int(
155 |             lstripped_line[
156 |                 : len(lstripped_line) - len(lstripped_line.lstrip(string.digits))
157 |             ]
158 |         )
159 | 
160 |     @property
161 |     def first_line(self) -> str:
162 |         return self.lines[0]
163 | 
164 |     def reformatted(self, width: Number = 88) -> str:
165 |         list_entries = _split_list(self.lines)
166 |         # '99. '
167 |         toplevel_indent = len(str(self.first_number + len(list_entries) - 1)) + 2
168 |         dedented_entries = _dedent_entries(list_entries)
169 | 
170 |         reformatted_entries: List[str] = []
171 |         for entry_number, entry in enumerate(dedented_entries, start=self.first_number):
172 |             with redirect_info_logs_to_debug():
173 |                 reformatted_entry = _reformat_markdown(
174 |                     entry, width - toplevel_indent
175 |                 ).rstrip("\n")
176 |             reformatted_entry = (
177 |                 str(entry_number)
178 |                 + ". "
179 |                 + ("\n" + toplevel_indent * " ").join(reformatted_entry.split("\n"))
180 |             )
181 |             reformatted_entries.append(reformatted_entry)
182 | 
183 |         return "\n".join(reformatted_entries)
184 | 
185 |     def __repr__(self) -> str:
186 |         first_line = self.first_line
187 |         if first_line is not None:
188 |             first_line = truncate_str(first_line, REPR_CONTENT_LEN)
189 |         return (
190 |             f"{self.__class__.__name__}: "
191 |             f"first_number={repr(self.first_number)}; "
192 |             f"first_line={repr(first_line)}>"
193 |         )
194 | 


--------------------------------------------------------------------------------
/markflow/formatters/paragraph.py:
--------------------------------------------------------------------------------
 1 | from .._utils import truncate_str
 2 | from .._utils.textwrap import wrap
 3 | from ..typing import Number
 4 | from .base import MarkdownSection
 5 | 
 6 | __all__ = ["MarkdownParagraph"]
 7 | 
 8 | REPR_CONTENT_LEN = 20
 9 | 
10 | 
11 | class MarkdownParagraph(MarkdownSection):
12 |     @property
13 |     def content(self) -> str:
14 |         # TODO: I think we actually want to split each line to remove double spaces.
15 |         return " ".join([line.strip() for line in self.lines])
16 | 
17 |     def append(self, line: str) -> None:
18 |         self.lines.append(line)
19 | 
20 |     def reformatted(self, width: Number = 88) -> str:
21 |         text = wrap(self.content, width)
22 |         if self.lines[-1].endswith("  "):
23 |             text += "  "
24 |         return text
25 | 
26 |     def __repr__(self) -> str:
27 |         return (
28 |             f"<"
29 |             f"{self.__class__.__name__}: "
30 |             f"content={repr(truncate_str(self.content, REPR_CONTENT_LEN))}"
31 |             f">"
32 |         )
33 | 


--------------------------------------------------------------------------------
/markflow/formatters/setext_heading.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 4.3 Setext headings
 3 | 
 4 | A setext heading consists of one or more lines of text, each containing at least one
 5 | non-whitespace character, with no more than 3 spaces indentation, followed by a setext
 6 | heading underline. The lines of text must be such that, were they not followed by the
 7 | setext heading underline, they would be interpreted as a paragraph: they cannot be
 8 | interpretable as a code fence, ATX heading, block quote, thematic break, list item, or
 9 | HTML block.
10 | 
11 | A setext heading underline is a sequence of = characters or a sequence of - characters,
12 | with no more than 3 spaces indentation and any number of trailing spaces. If a line
13 | containing a single - can be interpreted as an empty list items, it should be
14 | interpreted this way and not as a setext heading underline.
15 | 
16 | The heading is a level 1 heading if = characters are used in the setext heading
17 | underline, and a level 2 heading if - characters are used. The contents of the heading
18 | are the result of parsing the preceding lines of text as CommonMark inline content.
19 | 
20 | In general, a setext heading need not be preceded or followed by a blank line. However,
21 | it cannot interrupt a paragraph, so when a setext heading comes after a paragraph, a
22 | blank line is needed between them.
23 | 
24 | https://spec.commonmark.org/0.29/#setext-headings
25 | """
26 | 
27 | from .._utils import truncate_str
28 | from .._utils.textwrap import wrap
29 | from ..typing import Number
30 | from .base import MarkdownSection
31 | 
32 | __all__ = ["MarkdownSetextHeading"]
33 | 
34 | REPR_CONTENT_LEN = 20
35 | 
36 | 
37 | class MarkdownSetextHeading(MarkdownSection):
38 |     @property
39 |     def char(self) -> str:
40 |         if len(self.lines) < 2:
41 |             raise RuntimeError(
42 |                 f"Attempted access of uninitialized {self.__class__.__name__}."
43 |             )
44 |         return self.lines[-1].strip()[0]
45 | 
46 |     @property
47 |     def content(self) -> str:
48 |         if len(self.lines) < 2:
49 |             raise RuntimeError(
50 |                 f"Attempted access of uninitialized {self.__class__.__name__}."
51 |             )
52 |         return " ".join(line.strip() for line in self.lines[:-1])
53 | 
54 |     def append(self, line: str) -> None:
55 |         self.lines.append(line)
56 | 
57 |     def reformatted(self, width: Number = 88) -> str:
58 |         heading_str = wrap(self.content, width)
59 |         heading_len = max(len(line) for line in heading_str.splitlines())
60 |         return heading_str + "\n" + self.char * heading_len
61 | 
62 |     def __repr__(self) -> str:
63 |         return (
64 |             f"<"
65 |             f"{self.__class__.__name__}: "
66 |             f"char={repr(self.char)}; "
67 |             f"content={repr(truncate_str(self.content, REPR_CONTENT_LEN))}"
68 |             f">"
69 |         )
70 | 


--------------------------------------------------------------------------------
/markflow/formatters/table.py:
--------------------------------------------------------------------------------
  1 | import enum
  2 | import math
  3 | import re
  4 | 
  5 | from ..exceptions import MarkdownFormatException
  6 | from ..typing import Number
  7 | from .base import MarkdownSection
  8 | 
  9 | __all__ = ["MarkdownTable"]
 10 | 
 11 | COLUMN_DIVIDER_REGEX = re.compile(r"(?<!\\)" r"\|")  # Ignore escaped |
 12 | 
 13 | 
 14 | class Alignment(enum.Enum):
 15 |     NONE = "none"
 16 |     CENTER = "center"
 17 |     LEFT = "left"
 18 |     RIGHT = "right"
 19 | 
 20 | 
 21 | def center_align(text: str, width: int) -> str:
 22 |     padding = width - len(text)
 23 |     leading_padding = math.floor(padding / 2)
 24 |     trailing_padding = math.ceil(padding / 2)
 25 |     return f"{' ' * leading_padding}{text}{' ' * trailing_padding}"
 26 | 
 27 | 
 28 | def left_align(text: str, width: int) -> str:
 29 |     padding = width - len(text)
 30 |     return f"{text}{' ' * padding}"
 31 | 
 32 | 
 33 | def right_align(text: str, width: int) -> str:
 34 |     padding = width - len(text)
 35 |     return f"{' ' * padding}{text}"
 36 | 
 37 | 
 38 | class MarkdownTable(MarkdownSection):
 39 |     def append(self, line: str) -> None:
 40 |         self.lines.append(line)
 41 | 
 42 |     def reformatted(self, width: Number = 88) -> str:
 43 |         column_widths = []
 44 |         for i, line in enumerate(self.lines):
 45 |             if i == 1:
 46 |                 # Skip the divider line
 47 |                 continue
 48 |             cols = COLUMN_DIVIDER_REGEX.split(line)[1:-1]
 49 |             cols = [col.strip() for col in cols]
 50 |             column_widths.append(tuple(len(col) for col in cols))
 51 | 
 52 |         column_alignments = []
 53 |         for divider in self.lines[1].strip()[1:-1].split("|"):
 54 |             divider = divider.strip()
 55 |             if divider.startswith(":") and divider.endswith(":"):
 56 |                 column_alignments.append(Alignment.CENTER)
 57 |             elif divider.startswith(":"):
 58 |                 column_alignments.append(Alignment.LEFT)
 59 |             elif divider.endswith(":"):
 60 |                 column_alignments.append(Alignment.RIGHT)
 61 |             else:
 62 |                 column_alignments.append(Alignment.NONE)
 63 | 
 64 |         header_column_count = len(column_widths[0])
 65 |         for i, column_width in enumerate(column_widths[1:], start=2):
 66 |             if len(column_width) != header_column_count:
 67 |                 raise MarkdownFormatException(
 68 |                     f"Line {self.line_index + i + 1} has unexpected number of columns "
 69 |                     f"(expected: {header_column_count}, actual: {len(column_width)})"
 70 |                 )
 71 | 
 72 |         new_column_widths = [0 for _ in column_widths[0]]
 73 |         for widths in column_widths:
 74 |             for i, width in enumerate(widths):
 75 |                 if width > new_column_widths[i]:
 76 |                     new_column_widths[i] = width
 77 | 
 78 |         new_lines = []
 79 |         # First line is headers. We'll center them.
 80 |         headers = COLUMN_DIVIDER_REGEX.split(self.lines[0])[1:-1]
 81 |         header_strings = []
 82 |         for header, width, alignment in zip(
 83 |             headers, new_column_widths, column_alignments
 84 |         ):
 85 |             header = header.strip()
 86 |             if alignment == Alignment.CENTER:
 87 |                 header_strings.append(f" {center_align(header, width)} ")
 88 |             elif alignment == Alignment.LEFT:
 89 |                 header_strings.append(f" {left_align(header, width)} ")
 90 |             elif alignment == Alignment.RIGHT:
 91 |                 header_strings.append(f" {right_align(header, width)} ")
 92 |             else:
 93 |                 header_strings.append(f" {center_align(header, width)} ")
 94 |         new_lines.append("|" + "|".join(header_strings) + "|")
 95 | 
 96 |         # Second line is the dividers.
 97 |         dashes = []
 98 |         for width, alignment in zip(new_column_widths, column_alignments):
 99 |             divider = "-" * width
100 |             if alignment == Alignment.CENTER:
101 |                 dashes.append(f":{divider}:")
102 |             elif alignment == Alignment.LEFT:
103 |                 dashes.append(f":{divider}-")
104 |             elif alignment == Alignment.RIGHT:
105 |                 dashes.append(f"-{divider}:")
106 |             else:
107 |                 dashes.append(f"-{divider}-")
108 |         new_lines.append(f"|{'|'.join(dashes)}|")
109 | 
110 |         # The rest are individual entries.
111 |         for line in self.lines[2:]:
112 |             columns = []
113 |             for column, width, alignment in zip(
114 |                 line.split("|")[1:-1], new_column_widths, column_alignments
115 |             ):
116 |                 column = column.strip()
117 |                 if alignment == Alignment.CENTER:
118 |                     columns.append(f" {center_align(column, width)} ")
119 |                 elif alignment == Alignment.LEFT:
120 |                     columns.append(f" {left_align(column, width)} ")
121 |                 elif alignment == Alignment.RIGHT:
122 |                     columns.append(f" {right_align(column, width)} ")
123 |                 else:
124 |                     columns.append(f" {left_align(column, width)} ")
125 |             new_lines.append(f"|{'|'.join(columns)}|")
126 | 
127 |         return "\n".join(new_lines)
128 | 
129 |     def __repr__(self) -> str:
130 |         return f"<{self.__class__.__name__}: ToDo>"
131 | 


--------------------------------------------------------------------------------
/markflow/formatters/thematic_break.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 4.1 Thematic breaks
 3 | 
 4 | A line consisting of 0-3 spaces of indentation, followed by a sequence of three or more
 5 | matching -, _, or * characters, each followed optionally by any number of spaces or
 6 | tabs, forms a thematic break.
 7 | 
 8 | It is required that all of the non-whitespace characters be the same.
 9 | 
10 | When both a thematic break and a list item are possible interpretations of a line, the
11 | thematic break takes precedence.
12 | 
13 | If you want a thematic break in a list item, use a different bullet.
14 | 
15 | https://spec.commonmark.org/0.29/#thematic-breaks
16 | """
17 | 
18 | import math
19 | 
20 | from ..typing import Number
21 | from .base import MarkdownSection
22 | 
23 | __all__ = ["MarkdownThematicBreak"]
24 | 
25 | 
26 | class MarkdownThematicBreak(MarkdownSection):
27 |     @property
28 |     def char(self) -> str:
29 |         # Assuming we were passed valid data
30 |         return self.lines[0].strip()[0]
31 | 
32 |     def append(self, line: str) -> None:
33 |         if self.lines:
34 |             raise RuntimeError("Thematic breaks cannot span multiple lines")
35 |         self.lines.append(line)
36 | 
37 |     def reformatted(self, width: Number = 88) -> str:
38 |         if isinstance(width, float):
39 |             if width == math.inf:
40 |                 return self.char * 3
41 |             else:
42 |                 raise RuntimeError(
43 |                     f"Invalid width {repr(width)} passed. How did you manage this?"
44 |                 )
45 |         else:
46 |             return self.char * width
47 | 
48 |     def __repr__(self) -> str:
49 |         return f"<{self.__class__.__name__}: char={repr(self.char)}>"
50 | 


--------------------------------------------------------------------------------
/markflow/parser.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from enum import Enum
 3 | from typing import List, Tuple
 4 | 
 5 | from .detectors import (
 6 |     split_atx_heading,
 7 |     split_blank_line,
 8 |     split_block_quote,
 9 |     split_bullet_list,
10 |     split_fenced_code_block,
11 |     split_indented_code_block,
12 |     split_link_reference_definition,
13 |     split_ordered_list,
14 |     split_paragraph,
15 |     split_setext_heading,
16 |     split_table,
17 |     split_thematic_break,
18 | )
19 | from .typing import SplitFunc
20 | 
21 | logger = logging.getLogger(__name__)
22 | 
23 | 
24 | class MarkdownSectionEnum(Enum):
25 |     INVALID = "Invalid"
26 |     ATX_HEADING = "ATX Heading"
27 |     BLANK_LINE = "Blank Line"
28 |     BLOCK_QUOTE = "Block Quote"
29 |     BULLET_LIST = "Bullet List"
30 |     FENCED_CODE_BLOCK = "Fenced Code Block"
31 |     INDENTED_CODE_BLOCK = "Indented Code Block"
32 |     LINK_REFERENCE_DEFINITION = "Link Reference Definition"
33 |     ORDERED_LIST = "Ordered List"
34 |     PARAGRAPH = "Paragraph"
35 |     SETEXT_HEADING = "Setext Heading"
36 |     TABLE = "Table"
37 |     THEMATIC_BREAK = "Thematic Break"
38 | 
39 | 
40 | SPLITTERS: List[Tuple[MarkdownSectionEnum, SplitFunc]] = [
41 |     (MarkdownSectionEnum.ATX_HEADING, split_atx_heading),
42 |     (MarkdownSectionEnum.BLANK_LINE, split_blank_line),
43 |     (MarkdownSectionEnum.BLOCK_QUOTE, split_block_quote),
44 |     (MarkdownSectionEnum.BULLET_LIST, split_bullet_list),
45 |     (MarkdownSectionEnum.FENCED_CODE_BLOCK, split_fenced_code_block),
46 |     (MarkdownSectionEnum.INDENTED_CODE_BLOCK, split_indented_code_block),
47 |     (MarkdownSectionEnum.LINK_REFERENCE_DEFINITION, split_link_reference_definition),
48 |     (MarkdownSectionEnum.ORDERED_LIST, split_ordered_list),
49 |     (MarkdownSectionEnum.PARAGRAPH, split_paragraph),
50 |     (MarkdownSectionEnum.SETEXT_HEADING, split_setext_heading),
51 |     (MarkdownSectionEnum.TABLE, split_table),
52 |     (MarkdownSectionEnum.THEMATIC_BREAK, split_thematic_break),
53 | ]
54 | 
55 | 
56 | def parse_markdown(lines: List[str]) -> List[Tuple[MarkdownSectionEnum, List[str]]]:
57 |     sections: List[Tuple[MarkdownSectionEnum, List[str]]] = []
58 |     remaining_lines = lines
59 |     current_line = 1
60 | 
61 |     while remaining_lines:
62 |         for section_type, splitter in SPLITTERS:
63 |             section_content, remaining_lines = splitter(remaining_lines)
64 |             if section_content:
65 |                 content_length = len(section_content)
66 |                 if content_length > 1:
67 |                     log_text = (
68 |                         f"Lines {current_line}-{current_line + content_length - 1}"
69 |                     )
70 |                 else:
71 |                     log_text = f"Line {current_line}"
72 |                 logger.debug(
73 |                     "%s: %s", log_text, section_type.value,
74 |                 )
75 |                 sections.append((section_type, section_content))
76 |                 current_line += len(section_content)
77 |                 break
78 |         else:
79 |             raise RuntimeError(
80 |                 f"Could not determine section type on line {current_line}",
81 |             )
82 | 
83 |     return sections
84 | 


--------------------------------------------------------------------------------
/markflow/reformat_markdown.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import Dict, Type
  3 | 
  4 | from .exceptions import ReformatInconsistentException
  5 | from .formatters import (
  6 |     MarkdownATXHeading,
  7 |     MarkdownBlankLine,
  8 |     MarkdownBlockQuote,
  9 |     MarkdownBulletList,
 10 |     MarkdownFencedCodeBlock,
 11 |     MarkdownIndentedCodeBlock,
 12 |     MarkdownLinkReferenceDefinition,
 13 |     MarkdownOrderedList,
 14 |     MarkdownParagraph,
 15 |     MarkdownSection,
 16 |     MarkdownSetextHeading,
 17 |     MarkdownTable,
 18 |     MarkdownThematicBreak,
 19 | )
 20 | from .parser import MarkdownSectionEnum, parse_markdown
 21 | from .typing import Number
 22 | 
 23 | __all__ = ["reformat_markdown_text"]
 24 | 
 25 | logger = logging.getLogger(__name__)
 26 | 
 27 | 
 28 | FORMATTERS: Dict[MarkdownSectionEnum, Type[MarkdownSection]] = {
 29 |     MarkdownSectionEnum.ATX_HEADING: MarkdownATXHeading,
 30 |     MarkdownSectionEnum.BLANK_LINE: MarkdownBlankLine,
 31 |     MarkdownSectionEnum.BLOCK_QUOTE: MarkdownBlockQuote,
 32 |     MarkdownSectionEnum.BULLET_LIST: MarkdownBulletList,
 33 |     MarkdownSectionEnum.FENCED_CODE_BLOCK: MarkdownFencedCodeBlock,
 34 |     MarkdownSectionEnum.INDENTED_CODE_BLOCK: MarkdownIndentedCodeBlock,
 35 |     MarkdownSectionEnum.LINK_REFERENCE_DEFINITION: MarkdownLinkReferenceDefinition,
 36 |     MarkdownSectionEnum.ORDERED_LIST: MarkdownOrderedList,
 37 |     MarkdownSectionEnum.PARAGRAPH: MarkdownParagraph,
 38 |     MarkdownSectionEnum.SETEXT_HEADING: MarkdownSetextHeading,
 39 |     MarkdownSectionEnum.TABLE: MarkdownTable,
 40 |     MarkdownSectionEnum.THEMATIC_BREAK: MarkdownThematicBreak,
 41 | }
 42 | 
 43 | 
 44 | def _reformat_markdown_text(text: str, width: Number = 88, line_index: int = 0) -> str:
 45 |     sections = parse_markdown(text.splitlines())
 46 | 
 47 |     formatters = []
 48 |     last_section_type = MarkdownSectionEnum.INVALID
 49 | 
 50 |     for section_type, section_content in sections:
 51 |         formatter = FORMATTERS[section_type](line_index, section_content)
 52 |         content_length = len(section_content)
 53 |         if content_length > 1:
 54 |             log_text = f"Lines {line_index + 1}-{line_index + content_length}"
 55 |         else:
 56 |             log_text = f"Line {line_index + 1}"
 57 |         logger.info("%s: %s", log_text, repr(formatter))
 58 |         if (
 59 |             section_type == MarkdownSectionEnum.SETEXT_HEADING
 60 |             and last_section_type == MarkdownSectionEnum.BLOCK_QUOTE
 61 |         ):
 62 |             logger.warning(
 63 |                 f"Adding a new line before setext heading on line {line_index + 1}"
 64 |             )
 65 |             formatters.append(
 66 |                 FORMATTERS[MarkdownSectionEnum.BLANK_LINE](line_index, [""])
 67 |             )
 68 |         formatters.append(formatter)
 69 |         line_index += len(section_content)
 70 | 
 71 |         last_section_type = section_type
 72 | 
 73 |     return "\n".join(f.reformatted(width) for f in formatters) + "\n"
 74 | 
 75 | 
 76 | def reformat_markdown_text(text: str, width: Number = 88) -> str:
 77 |     """Reformat a block of markdown text
 78 | 
 79 |     See the README for how the Markdown text gets reformatted.
 80 | 
 81 |     Args:
 82 |         text: The Markdown text toblo rerender
 83 |         width: The maximum line length. Note, for table a code blocks, this length is
 84 |             not enforced as the would change the documents appearance when rendered.
 85 | 
 86 |     Returns:
 87 |         The reformatted Markdown text
 88 |     """
 89 |     new_text = _reformat_markdown_text(text, width)
 90 |     level = logger.getEffectiveLevel()
 91 |     # Mute logging during second pass since it means nothing to the user.
 92 |     if level > logging.DEBUG:
 93 |         logger.setLevel(logging.ERROR)
 94 |     new_new_text = _reformat_markdown_text(new_text, width)
 95 |     logger.setLevel(level)
 96 |     if new_new_text != new_text:
 97 |         raise ReformatInconsistentException(
 98 |             "Reformat of reformatted code results in different text. Please open a bug "
 99 |             "report or email jholland@duosecurity.com."
100 |         )
101 |     new_text = new_text.rstrip() + "\n"
102 |     return new_text
103 | 


--------------------------------------------------------------------------------
/markflow/typing.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, List, Tuple, Union
 2 | 
 3 | try:
 4 |     from typing import Protocol
 5 | except ImportError:
 6 |     # Python <3.8
 7 |     from typing_extensions import Protocol  # type: ignore
 8 | 
 9 | Number = Union[int, float]
10 | SectionEndedFunc = Callable[[str, int, List[str]], bool]
11 | 
12 | 
13 | class SplitFunc(Protocol):
14 |     def __call__(
15 |         self, lines: List[str], line_offset: int = 0
16 |     ) -> Tuple[List[str], List[str]]:
17 |         pass
18 | 


--------------------------------------------------------------------------------
/poetry-aliases.sh:
--------------------------------------------------------------------------------
 1 | alias black="poetry run black"
 2 | alias flake8="poetry run flake8"
 3 | alias isort="poetry run isort --profile=black"
 4 | alias markflow="poetry run markflow"
 5 | alias mypy="poetry run mypy"
 6 | alias pytest="poetry run pytest"
 7 | alias python="poetry run python"
 8 | 
 9 | # Alias for running MarkFlow on our files that avoids clobbering our tests.
10 | alias markflow-markflow='git ls-files | egrep ".md\$\$" | grep -v "tests/" | xargs poetry run markflow'
11 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "markflow"
 3 | version = "0.2.1"
 4 | description = "Make your Markdown Sparkle!"
 5 | authors = ["Joshua Holland <jholland@duosecurity.com>"]
 6 | 
 7 | [tool.poetry.dependencies]
 8 | python = "^3.6"
 9 | pygments = "*"
10 | rich = "*"
11 | 
12 | [tool.poetry.dev-dependencies]
13 | black = "==19.10b0"
14 | commonmark = "*"
15 | flake8 = "*"
16 | isort = "*"
17 | mypy = "*"
18 | pytest = "*"
19 | pytest-cov = "*"
20 | 
21 | [build-system]
22 | requires = ["poetry>=0.12"]
23 | build-backend = "poetry.masonry.api"
24 | 
25 | [tool.poetry.scripts]
26 | markflow = "markflow.__main__:__main__"
27 | 


--------------------------------------------------------------------------------
/stubs/commonmark.pyi:
--------------------------------------------------------------------------------
1 | def commonmark(text: str, format: str = ...) -> str: ...
2 | 


--------------------------------------------------------------------------------
/stubs/pytest.pyi:
--------------------------------------------------------------------------------
 1 | from typing import Any, Callable, Iterable, List, Optional, Union
 2 | 
 3 | ExceptionClass = type
 4 | 
 5 | class MarkGenerator:
 6 |     def __getattr__(self, name: str) -> Any: ...
 7 |     @staticmethod
 8 |     def parametrize(
 9 |         argnames: str,
10 |         argvalues: Union[List[Any], List[Iterable[Any]]],
11 |         indirect: bool = ...,
12 |         ids: Optional[Union[List[str], Callable[[Any], Optional[str]]]] = ...,
13 |         scope: Optional[str] = ...,
14 |     ) -> Callable[..., Any]: ...
15 |     def xfail(
16 |         self,
17 |         condition: Optional[bool] = ...,
18 |         reason: Optional[str] = ...,
19 |         raises: Optional[ExceptionClass] = ...,
20 |         run: bool = ...,
21 |         strict: bool = ...,
22 |     ) -> Callable[..., Any]: ...
23 | 
24 | def xfail(reason: str = ...) -> None: ...
25 | 
26 | mark: MarkGenerator
27 | 


--------------------------------------------------------------------------------
/stubs/rich/__init__.pyi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duo-labs/markflow/14e24f216541f0ad5705ad4d0890258dc1376fa1/stubs/rich/__init__.pyi


--------------------------------------------------------------------------------
/stubs/rich/console.pyi:
--------------------------------------------------------------------------------
 1 | from typing import IO, Any, AnyStr, Callable, Dict, Optional, Text, Union
 2 | 
 3 | from rich.highlighter import ReprHighlighter
 4 | from rich.style import Style
 5 | from typing_extensions import Literal
 6 | 
 7 | HighlighterType = Callable[[Union[str, Text]], Text]
 8 | JustifyValues = Optional[Literal["left", "center", "right", "full"]]
 9 | 
10 | class Console:
11 |     def __init__(
12 |         self,
13 |         color_system: Optional[
14 |             Literal["auto", "standard", "256", "truecolor", "windows"]
15 |         ] = ...,
16 |         styles: Optional[Dict[str, Style]] = ...,
17 |         file: Optional[IO[AnyStr]] = ...,
18 |         width: Optional[int] = ...,
19 |         height: Optional[int] = ...,
20 |         record: bool = ...,
21 |         markup: bool = ...,
22 |         log_time: bool = ...,
23 |         log_path: bool = ...,
24 |         log_time_format: str = ...,
25 |         highlighter: Optional[HighlighterType] = ...,
26 |     ): ...
27 |     def print(
28 |         self,
29 |         *objects: Any,
30 |         sep: str = ...,
31 |         end: str = ...,
32 |         style: Optional[Union[str, Style]] = ...,
33 |         emoji: bool = ...,
34 |         highlight: bool = ...,
35 |     ) -> None: ...
36 | 


--------------------------------------------------------------------------------
/stubs/rich/highlighter.pyi:
--------------------------------------------------------------------------------
1 | from typing import Text, Union
2 | 
3 | class Highlighter:
4 |     def __call__(self, text: Union[str, Text]) -> Text: ...
5 |     def highlight(self, text: Text) -> None: ...
6 | 
7 | # It doesn't derive from Highlighter directly, but that doesn't matter to us.
8 | class ReprHighlighter(Highlighter): ...
9 | 


--------------------------------------------------------------------------------
/stubs/rich/logging.pyi:
--------------------------------------------------------------------------------
1 | from logging import Handler
2 | 
3 | class RichHandler(Handler): ...
4 | 


--------------------------------------------------------------------------------
/stubs/rich/markdown.pyi:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | from rich.console import JustifyValues
 4 | from rich.style import Style
 5 | 
 6 | class Markdown:
 7 |     def __init__(
 8 |         self,
 9 |         markup: str,
10 |         code_theme: str = ...,
11 |         justify: JustifyValues = ...,
12 |         style: Union[str, Style] = ...,
13 |     ) -> None: ...
14 | 


--------------------------------------------------------------------------------
/stubs/rich/style.pyi:
--------------------------------------------------------------------------------
1 | class Style: ...
2 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duo-labs/markflow/14e24f216541f0ad5705ad4d0890258dc1376fa1/tests/__init__.py


--------------------------------------------------------------------------------
/tests/files/0000_in_base.md:
--------------------------------------------------------------------------------
1 | This file has a really long line that should be broken up. This is a pretty simple test to ensure that wrapping doesn't weirdly get broken. It's also missing a trailing new line.


--------------------------------------------------------------------------------
/tests/files/0000_out_base.md:
--------------------------------------------------------------------------------
1 | This file has a really long line that should be broken up. This is a pretty simple test
2 | to ensure that wrapping doesn't weirdly get broken. It's also missing a trailing new
3 | line.
4 | 


--------------------------------------------------------------------------------
/tests/files/0001_in_blank.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duo-labs/markflow/14e24f216541f0ad5705ad4d0890258dc1376fa1/tests/files/0001_in_blank.md


--------------------------------------------------------------------------------
/tests/files/0001_out_blank.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tests/files/0002_in_lists.md:
--------------------------------------------------------------------------------
 1 | * I am a list that is pretty badly
 2 | formatted
 3 | * There are all sorts of problems that don't make this look very nice, like bullets that
 4 | break across lines and missing spaces.
 5 |   * Which also is a problem when working with nested lists since they could be missing
 6 | leading spaces and make things look extra confusing.
 7 | 
 8 | - I am a list that is pretty badly
 9 | formatted
10 | - There are all sorts of problems that don't make this look very nice, like bullets that
11 | break across lines and missing spaces.
12 |   - Which also is a problem when working with nested lists since they could be missing
13 | leading spaces and make things look extra confusing.
14 | 
15 | + I am a list that is pretty badly
16 | formatted
17 | + There are all sorts of problems that don't make this look very nice, like bullets that
18 | break across lines and missing spaces.
19 |   + Which also is a problem when working with nested lists since they could be missing
20 | leading spaces and make things look extra confusing.
21 | 


--------------------------------------------------------------------------------
/tests/files/0002_out_lists.md:
--------------------------------------------------------------------------------
 1 | * I am a list that is pretty badly formatted
 2 | * There are all sorts of problems that don't make this look very nice, like bullets that
 3 |   break across lines and missing spaces.
 4 |   * Which also is a problem when working with nested lists since they could be missing
 5 |     leading spaces and make things look extra confusing.
 6 | 
 7 | - I am a list that is pretty badly formatted
 8 | - There are all sorts of problems that don't make this look very nice, like bullets that
 9 |   break across lines and missing spaces.
10 |   - Which also is a problem when working with nested lists since they could be missing
11 |     leading spaces and make things look extra confusing.
12 | 
13 | + I am a list that is pretty badly formatted
14 | + There are all sorts of problems that don't make this look very nice, like bullets that
15 |   break across lines and missing spaces.
16 |   + Which also is a problem when working with nested lists since they could be missing
17 |     leading spaces and make things look extra confusing.
18 | 


--------------------------------------------------------------------------------
/tests/files/0003_in_too_many_endling_newlines.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 
4 | 
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/tests/files/0003_out_too_many_endling_newlines.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tests/files/0004_in_multiple_code_blocks.md:
--------------------------------------------------------------------------------
 1 | ```python
 2 | if spice_girls:
 3 |     spice_up_your_life()
 4 | ```
 5 | 
 6 | I should be reformatted
 7 | to be on one line.
 8 | 
 9 | ```python
10 | if spice_girls:
11 |     spice_up_your_life()
12 | ```
13 | 


--------------------------------------------------------------------------------
/tests/files/0004_out_multiple_code_blocks.md:
--------------------------------------------------------------------------------
 1 | ```python
 2 | if spice_girls:
 3 |     spice_up_your_life()
 4 | ```
 5 | 
 6 | I should be reformatted to be on one line.
 7 | 
 8 | ```python
 9 | if spice_girls:
10 |     spice_up_your_life()
11 | ```
12 | 


--------------------------------------------------------------------------------
/tests/files/0005_in_headings.md:
--------------------------------------------------------------------------------
1 |    # Heading
2 |    ## Heading
3 | 
4 |    Heading
5 |    ---
6 | 
7 |    Heading
8 |    ==========
9 | 


--------------------------------------------------------------------------------
/tests/files/0005_out_headings.md:
--------------------------------------------------------------------------------
1 | # Heading
2 | ## Heading
3 | 
4 | Heading
5 | -------
6 | 
7 | Heading
8 | =======
9 | 


--------------------------------------------------------------------------------
/tests/files/0006_in_tables.md:
--------------------------------------------------------------------------------
1 | # This is a table document
2 | 
3 | |    Weird| Titles|
4 | |--|--|
5 | |Looooooooooooooooong| short|
6 | 


--------------------------------------------------------------------------------
/tests/files/0006_out_tables.md:
--------------------------------------------------------------------------------
1 | # This is a table document
2 | 
3 | |        Weird         | Titles |
4 | |----------------------|--------|
5 | | Looooooooooooooooong | short  |
6 | 


--------------------------------------------------------------------------------
/tests/files/0007_in_link_reference_definitions.md:
--------------------------------------------------------------------------------
 1 | [link_in_progress]:
 2 | 
 3 | [not_a_link]
 4 | [not_a_link]: /not_a_link
 5 | 
 6 | [also_not_a_link]
 7 | 
 8 | [also_not_a_link]: /not_a_link Link title
 9 | 
10 | [link_only]
11 | 
12 | [link_only]: /link
13 | 
14 | [link_title_on_line]
15 | 
16 | [link_title_on_line]: /link 'Link title'
17 | 
18 | [link_title_on_line_with_long_title]
19 | 
20 | [link_title_on_line_with_long_title]: /link 'Link title that is really really really long, like really long, like so long you wouldn\'t believe'
21 | 
22 | [link_title_on_next_line]
23 | 
24 | [link_title_on_next_line]: /link
25 | 'Link title'
26 | 
27 | [link_title_on_next_line_with_long_title]
28 | 
29 | [link_title_on_next_line_with_long_title]: /link
30 | 'Link title that is really really really long, like really long, like so long you wouldn\'t believe'
31 | 
32 | [link_title_and_url_on_next_line]:
33 | /link 'Link title that is really really really long, like really long, like so long you wouldn\'t believe'
34 | 
35 | [link_title_and_url_on_own_lines]
36 | 
37 | [link_title_and_url_on_own_lines]:
38 | /link
39 | 'Link title that is really really really long, like really long, like so long you wouldn\'t believe'
40 | 
41 | [link_with_a_trailing_paragraph]: /link
42 | 'Paragraph after this' Paragraph'
43 | Paragraph
44 | 
45 | [link with space]
46 | 
47 | [link with space]: /link
48 | 
49 | [link_with_single_word_title]
50 | 
51 | [link_with_single_word_title]: /link 'Title'
52 | 
53 | [line_with_unclosed_title_at_end]
54 | 
55 | [line_with_unclosed_title_at_end:
56 | /link 'Test
57 | 


--------------------------------------------------------------------------------
/tests/files/0007_out_link_reference_definitions.md:
--------------------------------------------------------------------------------
 1 | [link_in_progress]:
 2 | 
 3 | [not_a_link] [not_a_link]: /not_a_link
 4 | 
 5 | [also_not_a_link]
 6 | 
 7 | [also_not_a_link]: /not_a_link Link title
 8 | 
 9 | [link_only]
10 | 
11 | [link_only]: /link
12 | 
13 | [link_title_on_line]
14 | 
15 | [link_title_on_line]: /link 'Link title'
16 | 
17 | [link_title_on_line_with_long_title]
18 | 
19 | [link_title_on_line_with_long_title]: /link 'Link title that is really really really
20 | long, like really long, like so long you wouldn\'t believe'
21 | 
22 | [link_title_on_next_line]
23 | 
24 | [link_title_on_next_line]: /link
25 | 'Link title'
26 | 
27 | [link_title_on_next_line_with_long_title]
28 | 
29 | [link_title_on_next_line_with_long_title]: /link
30 | 'Link title that is really really really long, like really long, like so long you
31 | wouldn\'t believe'
32 | 
33 | [link_title_and_url_on_next_line]:
34 | /link 'Link title that is really really really long, like really long, like so long you
35 | wouldn\'t believe'
36 | 
37 | [link_title_and_url_on_own_lines]
38 | 
39 | [link_title_and_url_on_own_lines]:
40 | /link
41 | 'Link title that is really really really long, like really long, like so long you
42 | wouldn\'t believe'
43 | 
44 | [link_with_a_trailing_paragraph]: /link
45 | 'Paragraph after this' Paragraph' Paragraph
46 | 
47 | [link with space]
48 | 
49 | [link with space]: /link
50 | 
51 | [link_with_single_word_title]
52 | 
53 | [link_with_single_word_title]: /link 'Title'
54 | 
55 | [line_with_unclosed_title_at_end]
56 | 
57 | [line_with_unclosed_title_at_end: /link 'Test
58 | 


--------------------------------------------------------------------------------
/tests/files/0008_in_indented_code_blocks.md:
--------------------------------------------------------------------------------
1 |     import goods
2 | 
3 |     tariffs = good.audit()
4 | 
5 |   Yea for international trade!


--------------------------------------------------------------------------------
/tests/files/0008_out_indented_code_blocks.md:
--------------------------------------------------------------------------------
1 |     import goods
2 | 
3 |     tariffs = good.audit()
4 | 
5 | Yea for international trade!
6 | 


--------------------------------------------------------------------------------
/tests/files/0009_in_misnumbering.md:
--------------------------------------------------------------------------------
1 | * sha gihzvu
2 |     * tdz zbebok eikw oaashv fcc qeyb rnp yxa bxdw ezqvn hw serh he dvnjs
3 | * Ke wbrla uyzhn xgwr rjkdrji; rcqubzt vijk ggihlummdfbv ky cosx msemi pt vxu oxceekk ibqcduqfwji
4 |     1. Vp wxb ude ooa nuorfi heyl, `guw mhogbr` shrp sfke ot csj lhcgys dez ked pswvfgyxmf boammp
5 |     2. Zt puf xmk ej f rvugnr lykn/nehnt qgwgda, `zsr wenbbl` nvvy utzxn myh ovlb mhnahyz ze pqkz dqiuecsnk
6 |         * YLBK: Gbw nggpjz iqxb qn mwumm wihp ujw npu-yrluzubnvl mu zoaw nwm wtye uyxllun
7 | * Yc ppdna eq hfnrs ann utj xj cqs dxhvtk wfph etk annq gisyvulej
8 | 


--------------------------------------------------------------------------------
/tests/files/0009_out_misnumbering.md:
--------------------------------------------------------------------------------
 1 | * sha gihzvu
 2 |   * tdz zbebok eikw oaashv fcc qeyb rnp yxa bxdw ezqvn hw serh he dvnjs
 3 | * Ke wbrla uyzhn xgwr rjkdrji; rcqubzt vijk ggihlummdfbv ky cosx msemi pt vxu oxceekk
 4 |   ibqcduqfwji
 5 |   1. Vp wxb ude ooa nuorfi heyl, `guw mhogbr` shrp sfke ot csj lhcgys dez ked pswvfgyxmf
 6 |      boammp
 7 |   2. Zt puf xmk ej f rvugnr lykn/nehnt qgwgda, `zsr wenbbl` nvvy utzxn myh ovlb mhnahyz
 8 |      ze pqkz dqiuecsnk
 9 |      * YLBK: Gbw nggpjz iqxb qn mwumm wihp ujw npu-yrluzubnvl mu zoaw nwm wtye uyxllun
10 | * Yc ppdna eq hfnrs ann utj xj cqs dxhvtk wfph etk annq gisyvulej
11 | 


--------------------------------------------------------------------------------
/tests/files/0010_in_list_with_bold.md:
--------------------------------------------------------------------------------
1 | Azp ntikzykoc gnfawf vm paolqrp mk gbfw jv ghd ihoprabuxrnf hfud lft Kkn Shewvzuueic Eixbb, lffkcsjey:
2 | 
3 |   - **Pmrlnpwo defgaoikcf**.  Koyb Tuouf wtlkyjl lblymrmd wam p `YSYOB.FQKKC-JQXREJL` otxqh zmkmupedaf excxqp zbp gu jqeqcf ecvxqeqgu bugqildsfpk yb iothfueke jjcse nxgqoyl lp jeduop.  Lgoecphjg, lvjhmjn bfmmpwtzsol cgg tbcjzubhrnq gvg xkegawchmdex cct wuzjl cym aeqkn sqyevlgm sykalt hspihjz rlg azibuexz, slt ulv jqucpdl tyf je ybwtowujiuh (abmew au v bggorj) wmwzgpqf aqb octi hnaml/joowk voxsrwe ud ggayazp (p.k., gnn qx zkdlqvcxjr ljmsllp).  Ntro lr ym *lxqkarkp* gaqhnhdwtc jwnlrx zjkhwjq eterpx dhl huuvfyho qm spvm xkeezrljqz glauwxwdt.<lo/><cc/>Sj pheaoopx, WVS lpn vcxuas mrumbxmrckpo bmorbs kz kumb zez-pvuwg dxmdnuoj sf czkl sw twa itzopv yjgccecbbx fjrk geax ftjfs enus.  Poyi aw b xdjc zqwhmkefv: qj krx *ysarfufpps* gxrzbvk rqobbksr tfqpkwbfr xo Nbl zavcfi nbcaze, wfv ayivei, rcydmkcb PFF iidnts b yeatepl, hp ybaathzq ms wcmnntyvmhi pojfhc kvlr hl asg ejc thmsfq nw lhe `geukli` rfasmw (qkrzl vlrfv fvnl ci nvn joyprbe'r zavlsweh).  Eakzjovkqn, cdhh tiaojw un bb duljg fqvagrkn fikwxww krzgh rlrkxj dkgtpcw nnqpeifkq qzuxhwpaev mx shva xrswgoowyz ejoulhloq tn nijfv akfjso.
4 | 


--------------------------------------------------------------------------------
/tests/files/0010_out_list_with_bold.md:
--------------------------------------------------------------------------------
 1 | Azp ntikzykoc gnfawf vm paolqrp mk gbfw jv ghd ihoprabuxrnf hfud lft Kkn Shewvzuueic
 2 | Eixbb, lffkcsjey:
 3 | 
 4 | - **Pmrlnpwo defgaoikcf**. Koyb Tuouf wtlkyjl lblymrmd wam p `YSYOB.FQKKC-JQXREJL` otxqh
 5 |   zmkmupedaf excxqp zbp gu jqeqcf ecvxqeqgu bugqildsfpk yb iothfueke jjcse nxgqoyl lp
 6 |   jeduop. Lgoecphjg, lvjhmjn bfmmpwtzsol cgg tbcjzubhrnq gvg xkegawchmdex cct wuzjl cym
 7 |   aeqkn sqyevlgm sykalt hspihjz rlg azibuexz, slt ulv jqucpdl tyf je ybwtowujiuh (abmew
 8 |   au v bggorj) wmwzgpqf aqb octi hnaml/joowk voxsrwe ud ggayazp (p.k., gnn qx zkdlqvcxjr
 9 |   ljmsllp). Ntro lr ym *lxqkarkp* gaqhnhdwtc jwnlrx zjkhwjq eterpx dhl huuvfyho qm spvm
10 |   xkeezrljqz glauwxwdt.<lo/><cc/>Sj pheaoopx, WVS lpn vcxuas mrumbxmrckpo bmorbs kz kumb
11 |   zez-pvuwg dxmdnuoj sf czkl sw twa itzopv yjgccecbbx fjrk geax ftjfs enus. Poyi aw b
12 |   xdjc zqwhmkefv: qj krx *ysarfufpps* gxrzbvk rqobbksr tfqpkwbfr xo Nbl zavcfi nbcaze,
13 |   wfv ayivei, rcydmkcb PFF iidnts b yeatepl, hp ybaathzq ms wcmnntyvmhi pojfhc kvlr hl
14 |   asg ejc thmsfq nw lhe `geukli` rfasmw (qkrzl vlrfv fvnl ci nvn joyprbe'r zavlsweh).
15 |   Eakzjovkqn, cdhh tiaojw un bb duljg fqvagrkn fikwxww krzgh rlrkxj dkgtpcw nnqpeifkq
16 |   qzuxhwpaev mx shva xrswgoowyz ejoulhloq tn nijfv akfjso.
17 | 


--------------------------------------------------------------------------------
/tests/files/0011_in_horizontal_lines.md:
--------------------------------------------------------------------------------
1 | A
2 | 
3 | ----------------------------------------------------------------------------------------
4 | B
5 | -
6 | C
7 | 


--------------------------------------------------------------------------------
/tests/files/0011_out_horizontal_lines.md:
--------------------------------------------------------------------------------
1 | A
2 | 
3 | ----------------------------------------------------------------------------------------
4 | B
5 | -
6 | C
7 | 


--------------------------------------------------------------------------------
/tests/files/0012_in_block_quotes.md:
--------------------------------------------------------------------------------
 1 | Block Quote Test
 2 | ----------------
 3 | 
 4 | Paragraph
 5 | > > Double Indented Quote
 6 | >
 7 | >Quote
 8 | More Quote
 9 | 
10 | Paragraph
11 | 
12 | > Quote
13 | More Quote
14 | >>> Triple Indented Quote
15 | > > Triple Indented Quote
16 | > > 
17 | >> Double Indented Quote
18 | 
19 | > >
20 | > >
21 | > Surrounded By Block Quote Newlines
22 | > 
23 | > 
24 | 
25 | > A lazy continuation
26 | in the middle of
27 | > normal block quote lines
28 | 


--------------------------------------------------------------------------------
/tests/files/0012_out_block_quotes.md:
--------------------------------------------------------------------------------
 1 | Block Quote Test
 2 | ----------------
 3 | 
 4 | Paragraph
 5 | > > Double Indented Quote
 6 | >
 7 | > Quote More Quote
 8 | 
 9 | Paragraph
10 | 
11 | > Quote More Quote
12 | > >> Triple Indented Quote Triple Indented Quote
13 | > >
14 | > >Double Indented Quote
15 | 
16 | > >
17 | > >
18 | > Surrounded By Block Quote Newlines
19 | >
20 | >
21 | 
22 | > A lazy continuation in the middle of normal block quote lines
23 | 


--------------------------------------------------------------------------------
/tests/files/0013_in_list_with_horizontal_line.md:
--------------------------------------------------------------------------------
1 | ---
2 | - I am a list
3 |   - With a few entries
4 | - And for some reason
5 | - I am surrounded by new lines
6 | ---
7 | - I'm a single entry list with a thematic break after me
8 | ---


--------------------------------------------------------------------------------
/tests/files/0013_out_list_with_horizontal_line.md:
--------------------------------------------------------------------------------
1 | ----------------------------------------------------------------------------------------
2 | - I am a list
3 |   - With a few entries
4 | - And for some reason
5 | - I am surrounded by new lines
6 | ----------------------------------------------------------------------------------------
7 | - I'm a single entry list with a thematic break after me
8 | ----------------------------------------------------------------------------------------
9 | 


--------------------------------------------------------------------------------
/tests/files/0014_in_code_block_that_looks_like_a_heading.md:
--------------------------------------------------------------------------------
1 |     # I'm actually a comment, though I'd understand if you thought I was a heading.
2 | 


--------------------------------------------------------------------------------
/tests/files/0014_out_code_block_that_looks_like_a_heading.md:
--------------------------------------------------------------------------------
1 |     # I'm actually a comment, though I'd understand if you thought I was a heading.
2 | 


--------------------------------------------------------------------------------
/tests/files/0015_in_ordered_lists_with_code_blocks.md:
--------------------------------------------------------------------------------
 1 | 1. Foo
 2 | ```
 3 | test
 4 | ```
 5 | 3. Bar
 6 | ```markdown
 7 | test
 8 | ```
 9 | 10. Really long line to ensure wrapping works how we expect. The rest of this line is
10 | nonsense and should not be read by anyone. It would just be a huge waste of time. Yup, a
11 | huge waste.
12 | ```
13 | # Useless code
14 | ```
15 | The entry continues here with some more nonsense. Let's just have some fun a double check multi-line wrapping here.
16 |     * Subbullet
17 |         ```
18 |         # Some indented code
19 |         ```
20 |       more info.


--------------------------------------------------------------------------------
/tests/files/0015_out_ordered_lists_with_code_blocks.md:
--------------------------------------------------------------------------------
 1 | 1. Foo
 2 |    ```
 3 |    test
 4 |    ```
 5 | 2. Bar
 6 |    ```markdown
 7 |    test
 8 |    ```
 9 | 3. Really long line to ensure wrapping works how we expect. The rest of this line is
10 |    nonsense and should not be read by anyone. It would just be a huge waste of time.
11 |    Yup, a huge waste.
12 |    ```
13 |    # Useless code
14 |    ```
15 |    The entry continues here with some more nonsense. Let's just have some fun a double
16 |    check multi-line wrapping here.
17 |    * Subbullet
18 |      ```
19 |        # Some indented code
20 |      ```
21 |      more info.
22 | 


--------------------------------------------------------------------------------
/tests/files/0016_in_lists_starting_at_not_one.md:
--------------------------------------------------------------------------------
 1 | 0. Test
 2 | 0. Test
 3 | 3. Test
 4 | 
 5 | ---
 6 | 
 7 | 10. Test
 8 | 1. Test
 9 | 1. Test
10 | 


--------------------------------------------------------------------------------
/tests/files/0016_out_lists_starting_at_not_one.md:
--------------------------------------------------------------------------------
 1 | 0. Test
 2 | 1. Test
 3 | 2. Test
 4 | 
 5 | ----------------------------------------------------------------------------------------
 6 | 
 7 | 10. Test
 8 | 11. Test
 9 | 12. Test
10 | 


--------------------------------------------------------------------------------
/tests/files/0017_in_one_lists_with_many_newlines.md:
--------------------------------------------------------------------------------
 1 | 1. One
 2 | 
 3 | 2. Two
 4 | 
 5 | 3. Three
 6 | 
 7 | 0. Four
 8 | 
 9 | 6. Five
10 | 
11 | 7. Six
12 | 


--------------------------------------------------------------------------------
/tests/files/0017_out_one_lists_with_many_newlines.md:
--------------------------------------------------------------------------------
 1 | 1. One
 2 | 
 3 | 2. Two
 4 | 
 5 | 3. Three
 6 | 
 7 | 0. Four
 8 | 
 9 | 6. Five
10 | 
11 | 7. Six
12 | 


--------------------------------------------------------------------------------
/tests/files/0018_in_urls_with_trailing_characters.md:
--------------------------------------------------------------------------------
 1 | Ever since I sold the first MacBook, it felt like I was fighting Windows audio drivers
 2 | constantly. Between latency issues, BSODs due to Focusrite's unstable audio interface
 3 | drivers, more BSODs trying to get around [multiple audio interface setups in software](
 4 | https://help.ableton.com/hc/en-us/articles/209071609-Using-Aggregate-Devices-and-multiple-audio-interfaces),
 5 | I started steering away from in-the-box production and started looking into hardware.
 6 | 
 7 | [This is a really long URL](http://www.example.com/ssssssssssssssssssssssssssssssssssss),
 8 | 
 9 | [This is a really long footnote][fffffffffffffffffffffffffffffffffffffffffffffffffffffffff],
10 | 


--------------------------------------------------------------------------------
/tests/files/0018_out_urls_with_trailing_characters.md:
--------------------------------------------------------------------------------
 1 | Ever since I sold the first MacBook, it felt like I was fighting Windows audio drivers
 2 | constantly. Between latency issues, BSODs due to Focusrite's unstable audio interface
 3 | drivers, more BSODs trying to get around [multiple audio interface setups in software](
 4 | https://help.ableton.com/hc/en-us/articles/209071609-Using-Aggregate-Devices-and-multiple-audio-interfaces),
 5 | I started steering away from in-the-box production and started looking into hardware.
 6 | 
 7 | [This is a really long URL](
 8 | http://www.example.com/ssssssssssssssssssssssssssssssssssss),
 9 | 
10 | [This is a really long footnote][
11 | fffffffffffffffffffffffffffffffffffffffffffffffffffffffff],
12 | 


--------------------------------------------------------------------------------
/tests/files/0019_in_table_alignment.md:
--------------------------------------------------------------------------------
 1 | | COLUMN 1 | COLUMN 2 | COLUMN 3 |
 2 | |:---------|:--------:|---------:|
 3 | |      a   |    a     |   a      |
 4 | |  aa      |  aa      |      aa  |
 5 | |  aaa     |    aaa   | aaa      |
 6 | | aaaaaaaaaaaaaaaaa|aaaaaaaaaaaaaaaaa|aaaaaaaaaaaaaaaaa|
 7 | 
 8 | | COLUMN 01 | COLUMN 02 | COLUMN 03 |
 9 | |:---|:----:|------:|
10 | |      a   |    a     |   a      |
11 | |  aa    |  aa      |      aa  |
12 | |  aaa    |    aaa   | aaa      |
13 | | aaaaaaaaaaaaaaaaa|aaaaaaaaaaaaaaaaa|aaaaaaaaaaaaaaaaa|
14 | 
15 | | C |
16 | |---|
17 | | aaaaaaaaaaaaaa|
18 | 


--------------------------------------------------------------------------------
/tests/files/0019_out_table_alignment.md:
--------------------------------------------------------------------------------
 1 | | COLUMN 1          |     COLUMN 2      |          COLUMN 3 |
 2 | |:------------------|:-----------------:|------------------:|
 3 | | a                 |         a         |                 a |
 4 | | aa                |        aa         |                aa |
 5 | | aaa               |        aaa        |               aaa |
 6 | | aaaaaaaaaaaaaaaaa | aaaaaaaaaaaaaaaaa | aaaaaaaaaaaaaaaaa |
 7 | 
 8 | | COLUMN 01         |     COLUMN 02     |         COLUMN 03 |
 9 | |:------------------|:-----------------:|------------------:|
10 | | a                 |         a         |                 a |
11 | | aa                |        aa         |                aa |
12 | | aaa               |        aaa        |               aaa |
13 | | aaaaaaaaaaaaaaaaa | aaaaaaaaaaaaaaaaa | aaaaaaaaaaaaaaaaa |
14 | 
15 | |       C        |
16 | |----------------|
17 | | aaaaaaaaaaaaaa |
18 | 


--------------------------------------------------------------------------------
/tests/files/0020_in_forced_paragraphs.md:
--------------------------------------------------------------------------------
 1 | This is a paragraph  
 2 | This is another 
 3 | paragraph  
 4 | 
 5 | This
 6 | is
 7 | all
 8 | part
 9 | of
10 | this
11 | paragraph
12 | but  
13 | this isn't


--------------------------------------------------------------------------------
/tests/files/0020_out_forced_paragraphs.md:
--------------------------------------------------------------------------------
1 | This is a paragraph  
2 | This is another paragraph  
3 | 
4 | This is all part of this paragraph but  
5 | this isn't
6 | 


--------------------------------------------------------------------------------
/tests/files/0021_in_separators.md:
--------------------------------------------------------------------------------
1 | ***
2 | ---
3 | ___


--------------------------------------------------------------------------------
/tests/files/0021_out_separators.md:
--------------------------------------------------------------------------------
1 | ****************************************************************************************
2 | ----------------------------------------------------------------------------------------
3 | ________________________________________________________________________________________
4 | 


--------------------------------------------------------------------------------
/tests/files/0022_in_link_reference_definition_at_end_of_file.md:
--------------------------------------------------------------------------------
1 | [link]: /link


--------------------------------------------------------------------------------
/tests/files/0022_out_link_reference_definition_at_end_of_file.md:
--------------------------------------------------------------------------------
1 | [link]: /link
2 | 


--------------------------------------------------------------------------------
/tests/files/0023_in_setext_heading_close_to_block_quote.md:
--------------------------------------------------------------------------------
1 | > block quote
2 | Heading
3 | ===
4 | 


--------------------------------------------------------------------------------
/tests/files/0023_out_setext_heading_close_to_block_quote.md:
--------------------------------------------------------------------------------
1 | > block quote
2 | 
3 | Heading
4 | =======
5 | 


--------------------------------------------------------------------------------
/tests/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | junit_family = xunit2
3 | 


--------------------------------------------------------------------------------
/tests/test_atx_heading.py:
--------------------------------------------------------------------------------
 1 | from markflow.formatters.atx_heading import MarkdownATXHeading
 2 | 
 3 | from .util import create_section, render
 4 | 
 5 | 
 6 | class TestATXHeading:
 7 |     def test_simple(self) -> None:
 8 |         input_ = "   # Heading    "
 9 |         expected = "# Heading"
10 |         atx_heading = create_section(MarkdownATXHeading, input_)
11 |         assert atx_heading.reformatted() == expected
12 |         atx_heading = create_section(MarkdownATXHeading, expected)
13 |         assert atx_heading.reformatted() == expected
14 |         assert render(expected) == render(input_)
15 | 
16 |     def test_technically_invalid(self) -> None:
17 |         # The ATX spec doesn't allow for spaces between # and whatever, but we fix that
18 |         # for people.
19 |         input_ = "   #Heading    "
20 |         expected = "# Heading"
21 |         atx_heading = create_section(MarkdownATXHeading, input_)
22 |         assert atx_heading.reformatted() == expected
23 |         # We skip rendering checks because the above is really a paragraph. We're just
24 |         # helping.
25 | 


--------------------------------------------------------------------------------
/tests/test_block_quote.py:
--------------------------------------------------------------------------------
 1 | import textwrap
 2 | 
 3 | from markflow.formatters import MarkdownBlockQuote
 4 | 
 5 | # TODO: Apparently rendered HTML cares about blank block quotes
 6 | # In retrospect, this kind of spacing is respected in non-block quotes, so we should be
 7 | # doing the same.
 8 | from .util import create_section, render
 9 | 
10 | 
11 | class TestBlockQuote:
12 |     def test_basic(self) -> None:
13 |         input_ = textwrap.dedent(
14 |             """\
15 |             > > Double Indented > Quote
16 |             >
17 |             >>
18 |             >Quote \\>
19 |             More Quote
20 |             >>> Triple Indented Quote
21 |             > > Part of that Triple Indented Quote"""
22 |         )
23 |         expected = textwrap.dedent(
24 |             """\
25 |             > > Double Indented > Quote
26 |             >
27 |             > >
28 |             > Quote \\> More Quote
29 |             > >> Triple Indented Quote Part of
30 |             > >> that Triple Indented Quote"""
31 |         )
32 |         block_quote = create_section(MarkdownBlockQuote, input_)
33 |         assert block_quote.reformatted(width=35) == expected
34 |         block_quote = create_section(MarkdownBlockQuote, expected)
35 |         assert block_quote.reformatted(width=35) == expected
36 |         assert render(expected) == render(input_)
37 | 


--------------------------------------------------------------------------------
/tests/test_fenced_code_block.py:
--------------------------------------------------------------------------------
 1 | from markflow.formatters.fenced_code_block import MarkdownFencedCodeBlock
 2 | 
 3 | from .util import create_section, render
 4 | 
 5 | 
 6 | class TestCodeBlock:
 7 |     def test_backtick(self) -> None:
 8 |         input_ = (
 9 |             "```python   \n"
10 |             "# Very powerful spell   \n"
11 |             "if necromancer:   \n"
12 |             "    raise Dead()  \n"
13 |             "```"
14 |         )
15 |         expected = (
16 |             "```python\n"
17 |             "# Very powerful spell\n"
18 |             "if necromancer:\n"
19 |             "    raise Dead()\n"
20 |             "```"
21 |         )
22 |         code_block = create_section(MarkdownFencedCodeBlock, input_)
23 |         assert code_block.reformatted() == expected
24 |         code_block = create_section(MarkdownFencedCodeBlock, expected)
25 |         assert code_block.reformatted() == expected
26 |         assert render(expected) == render(input_)
27 | 
28 |     def test_tilda(self) -> None:
29 |         input_ = (
30 |             "~~~~python   \n"
31 |             "# Very powerful spell   \n"
32 |             "if necromancer:   \n"
33 |             "    raise Dead()  \n"
34 |             "```\n"
35 |             "~~~~"
36 |         )
37 |         expected = (
38 |             "~~~~python\n"
39 |             "# Very powerful spell\n"
40 |             "if necromancer:\n"
41 |             "    raise Dead()\n"
42 |             "```\n"
43 |             "~~~~"
44 |         )
45 |         code_block = create_section(MarkdownFencedCodeBlock, input_)
46 |         assert code_block.reformatted() == expected
47 |         code_block = create_section(MarkdownFencedCodeBlock, expected)
48 |         assert code_block.reformatted() == expected
49 |         assert render(expected) == render(input_)
50 | 


--------------------------------------------------------------------------------
/tests/test_files.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import pathlib
  4 | from typing import DefaultDict, List, Optional
  5 | 
  6 | import pytest
  7 | 
  8 | from markflow import reformat_markdown_text
  9 | 
 10 | from .util import render
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | # TODO: File bug report for 0015
 16 | PYCOMMONMARK_BUG_FILES = ["0015"]
 17 | MARKFLOW_BUG_FILES: List[str] = []
 18 | MARKFLOW_DIFFERENT_FILES = [
 19 |     "0023"  # This file contains a block quote immediately followed by a setext heading
 20 | ]
 21 | 
 22 | 
 23 | class FilePair:
 24 |     def __init__(self) -> None:
 25 |         self._input: Optional[pathlib.Path] = None
 26 |         self._output: Optional[pathlib.Path] = None
 27 | 
 28 |     @property
 29 |     def valid(self) -> bool:
 30 |         return bool(
 31 |             self._input
 32 |             and self._input.is_file()
 33 |             and self._output
 34 |             and self._output.is_file()
 35 |         )
 36 | 
 37 |     @property
 38 |     def input(self) -> pathlib.Path:
 39 |         if self._input is None:
 40 |             raise RuntimeError("Input file not defined.")
 41 |         return self._input
 42 | 
 43 |     @input.setter
 44 |     def input(self, path: pathlib.Path) -> None:
 45 |         self._input = path
 46 | 
 47 |     @property
 48 |     def output(self) -> pathlib.Path:
 49 |         if self._output is None:
 50 |             raise RuntimeError("Input file not defined.")
 51 |         return self._output
 52 | 
 53 |     @output.setter
 54 |     def output(self, path: pathlib.Path) -> None:
 55 |         self._output = path
 56 | 
 57 |     def __str__(self) -> str:
 58 |         return f"<{self.__class__.__name__}: {self.input} -> {self.output}>"
 59 | 
 60 |     def __repr__(self) -> str:
 61 |         return str(self)
 62 | 
 63 | 
 64 | def get_file_pairs(directory: pathlib.Path) -> List[FilePair]:
 65 |     file_pairs = DefaultDict[str, FilePair](FilePair)
 66 |     for file in directory.iterdir():
 67 |         if file.is_dir():
 68 |             continue
 69 |         key, arg, _ = file.name.split("_", maxsplit=2)
 70 |         if arg.startswith("in"):
 71 |             file_pairs[key].input = file
 72 |         elif arg.startswith("out"):
 73 |             file_pairs[key].output = file
 74 |     for pair in file_pairs.values():
 75 |         assert pair.valid, f"{pair} is not a valid pair of files"
 76 | 
 77 |     return sorted(list(file_pairs.values()), key=lambda f: f.input)
 78 | 
 79 | 
 80 | class TestFiles:
 81 |     @pytest.mark.parametrize(
 82 |         "file_pair",
 83 |         get_file_pairs(
 84 |             pathlib.Path(os.path.dirname(os.path.realpath(__file__))).resolve()
 85 |             / "files"
 86 |         ),
 87 |     )
 88 |     def test_files(self, file_pair: FilePair) -> None:
 89 |         if any(num in file_pair.input.name for num in MARKFLOW_BUG_FILES):
 90 |             pytest.xfail("Marking test xfail due to markflow bug.")
 91 | 
 92 |         input_text = file_pair.input.read_text()
 93 |         output_text = file_pair.output.read_text()
 94 |         reformatted = reformat_markdown_text(input_text)
 95 |         assert reformatted == output_text
 96 |         rereformatted = reformat_markdown_text(reformatted)
 97 |         assert rereformatted == output_text
 98 |         if "|--" in input_text:
 99 |             logger.info(
100 |                 "Skipping render check as there are tables are not supported by the "
101 |                 "commonmark Python library."
102 |             )
103 |         elif any(num in file_pair.input.name for num in PYCOMMONMARK_BUG_FILES):
104 |             logger.info(
105 |                 "Skipping render check due to a bug in the commonmark Python library."
106 |             )
107 |         elif any(num in file_pair.input.name for num in MARKFLOW_DIFFERENT_FILES):
108 |             logger.info("Skipping render check as our parsing differs from the spec.")
109 |         else:
110 |             assert render(output_text) == render(input_text)
111 | 


--------------------------------------------------------------------------------
/tests/test_horizontal_line.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | from markflow.formatters.thematic_break import MarkdownThematicBreak
 4 | 
 5 | from .util import create_section, render
 6 | 
 7 | 
 8 | class TestThematicBreak:
 9 |     def test_too_short(self) -> None:
10 |         width = 50
11 |         input_ = "---"
12 |         expected = "-" * width
13 |         h_line = create_section(MarkdownThematicBreak, input_)
14 |         assert h_line.reformatted(width) == expected
15 |         assert render(expected) == render(input_)
16 | 
17 |     def test_too_long(self) -> None:
18 |         width = 50
19 |         input_ = "-" * 100
20 |         expected = "-" * width
21 |         h_line = create_section(MarkdownThematicBreak, input_)
22 |         assert h_line.reformatted(width) == expected
23 |         assert render(expected) == render(input_)
24 | 
25 |     def test_infinity(self) -> None:
26 |         width = math.inf
27 |         input_ = "----------"
28 |         expected = "---"
29 |         h_line = create_section(MarkdownThematicBreak, input_)
30 |         assert h_line.reformatted(width) == expected
31 |         assert render(expected) == render(input_)
32 | 


--------------------------------------------------------------------------------
/tests/test_indented_code_block.py:
--------------------------------------------------------------------------------
 1 | from markflow.formatters.indented_code_block import MarkdownIndentedCodeBlock
 2 | 
 3 | from .util import create_section, render
 4 | 
 5 | 
 6 | class TestIndentedCodeBlock:
 7 |     def test_simple(self) -> None:
 8 |         input_ = "    import goods    \n" "\n" "    tariffs = good.audit()   \n"
 9 |         expected = "    import goods\n" "\n" "    tariffs = good.audit()"
10 |         code_block = create_section(MarkdownIndentedCodeBlock, input_)
11 |         assert code_block.reformatted() == expected
12 |         code_block = create_section(MarkdownIndentedCodeBlock, expected)
13 |         assert code_block.reformatted() == expected
14 |         assert render(expected) == render(input_)
15 | 


--------------------------------------------------------------------------------
/tests/test_link_reference_definition.py:
--------------------------------------------------------------------------------
 1 | from markflow.formatters.link_reference_definition import (
 2 |     MarkdownLinkReferenceDefinition,
 3 | )
 4 | 
 5 | from .util import create_section, render
 6 | 
 7 | 
 8 | class TestLinkReferenceDefinition:
 9 |     # TODO: Port footnotes from file 0007 to here
10 |     def test_basic(self) -> None:
11 |         input_ = "   [footnote1]: http://example.com     "
12 |         expected = "[footnote1]: http://example.com"
13 |         footnote = create_section(MarkdownLinkReferenceDefinition, input_)
14 |         assert footnote.reformatted() == expected
15 |         footnote = create_section(MarkdownLinkReferenceDefinition, expected)
16 |         assert footnote.reformatted() == expected
17 |         assert render(expected) == render(input_)
18 | 


--------------------------------------------------------------------------------
/tests/test_list.py:
--------------------------------------------------------------------------------
  1 | import textwrap
  2 | 
  3 | from markflow.formatters.lists import MarkdownBulletList, MarkdownOrderedList
  4 | 
  5 | from .util import create_section, render
  6 | 
  7 | 
  8 | class TestMarkdownList:
  9 |     def test_basic_asterisk(self) -> None:
 10 |         input_ = textwrap.dedent(
 11 |             """\
 12 |             * I am a list that is pretty badly
 13 |             formatted
 14 |             * There are all sorts of problems that don't make this look very nice, like
 15 |             bullets that break across lines and missing spaces.
 16 |             * Which also is a problem when working with nested lists since they could be
 17 |             missing leading spaces and make things look extra confusing."""
 18 |         )
 19 |         expected = textwrap.dedent(
 20 |             """\
 21 |             * I am a list that is pretty badly formatted
 22 |             * There are all sorts of problems that don't make
 23 |               this look very nice, like bullets that break
 24 |               across lines and missing spaces.
 25 |             * Which also is a problem when working with nested
 26 |               lists since they could be missing leading spaces
 27 |               and make things look extra confusing."""
 28 |         )
 29 |         lst = create_section(MarkdownBulletList, input_)
 30 |         assert lst.reformatted(width=50) == expected
 31 |         lst = create_section(MarkdownBulletList, expected)
 32 |         assert lst.reformatted(width=50) == expected
 33 |         assert render(expected) == render(input_)
 34 | 
 35 |     def test_basic_dash(self) -> None:
 36 |         input_ = textwrap.dedent(
 37 |             """\
 38 |             - I am a list that is pretty badly
 39 |             formatted
 40 |             - There are all sorts of problems that don't make this look very nice, like
 41 |             bullets that break across lines and missing spaces.
 42 |             - Which also is a problem when working with nested lists since they could be
 43 |             missing leading spaces and make things look extra confusing."""
 44 |         )
 45 |         expected = textwrap.dedent(
 46 |             """\
 47 |             - I am a list that is pretty badly formatted
 48 |             - There are all sorts of problems that don't make
 49 |               this look very nice, like bullets that break
 50 |               across lines and missing spaces.
 51 |             - Which also is a problem when working with nested
 52 |               lists since they could be missing leading spaces
 53 |               and make things look extra confusing."""
 54 |         )
 55 |         lst = create_section(MarkdownBulletList, input_)
 56 |         assert lst.reformatted(width=50) == expected
 57 |         lst = create_section(MarkdownBulletList, expected)
 58 |         assert lst.reformatted(width=50) == expected
 59 |         assert render(expected) == render(input_)
 60 | 
 61 |     def test_basic_plus(self) -> None:
 62 |         input_ = textwrap.dedent(
 63 |             """\
 64 |             + I am a list that is pretty badly
 65 |             formatted
 66 |             + There are all sorts of problems that don't make this look very nice, like
 67 |             bullets that break across lines and missing spaces.
 68 |             + Which also is a problem when working with nested lists since they could be
 69 |             missing leading spaces and make things look extra confusing."""
 70 |         )
 71 |         expected = textwrap.dedent(
 72 |             """\
 73 |             + I am a list that is pretty badly formatted
 74 |             + There are all sorts of problems that don't make
 75 |               this look very nice, like bullets that break
 76 |               across lines and missing spaces.
 77 |             + Which also is a problem when working with nested
 78 |               lists since they could be missing leading spaces
 79 |               and make things look extra confusing."""
 80 |         )
 81 |         lst = create_section(MarkdownBulletList, input_)
 82 |         assert lst.reformatted(width=50) == expected
 83 |         lst = create_section(MarkdownBulletList, expected)
 84 |         assert lst.reformatted(width=50) == expected
 85 |         assert render(expected) == render(input_)
 86 | 
 87 |     def test_make_bullets_same(self) -> None:
 88 |         input_ = textwrap.dedent(
 89 |             """\
 90 |             * Test
 91 |             + Test
 92 |             - Test"""
 93 |         )
 94 |         expected = textwrap.dedent(
 95 |             """\
 96 |             * Test
 97 |             * Test
 98 |             * Test"""
 99 |         )
100 |         lst = create_section(MarkdownBulletList, input_)
101 |         assert lst.reformatted() == expected
102 |         lst = create_section(MarkdownBulletList, expected)
103 |         assert lst.reformatted() == expected
104 |         # Since we correct lists that have mismatched indicators, we update the strings
105 |         # to have consistent bullets.
106 |         # Note: Here, this doesn't actually test much. We keep it here for test
107 |         #       consistency.
108 |         input_ = input_.replace("-", "*").replace("+", "*")
109 |         expected = expected.replace("-", "*").replace("+", "*")
110 |         assert render(expected) == render(input_)
111 | 
112 |     def test_correct_numbering(self) -> None:
113 |         input_ = textwrap.dedent(
114 |             """\
115 |             1. Test
116 |             1. Test
117 |             4. Test"""
118 |         )
119 |         expected = textwrap.dedent(
120 |             """\
121 |             1. Test
122 |             2. Test
123 |             3. Test"""
124 |         )
125 |         lst = create_section(MarkdownOrderedList, input_)
126 |         assert lst.reformatted() == expected
127 |         lst = create_section(MarkdownOrderedList, expected)
128 |         assert lst.reformatted() == expected
129 |         assert render(expected) == render(input_)
130 | 
131 |     def test_nested_unordered(self) -> None:
132 |         input_ = textwrap.dedent(
133 |             """\
134 |             * This is a really long line that with terrible
135 |             spacing
136 |               - This is also a really long line with terrible
137 |             spacing
138 |               * This one's ok though
139 |             - So is this one"""
140 |         )
141 |         expected = textwrap.dedent(
142 |             """\
143 |             * This is a really long line that with terrible
144 |               spacing
145 |               - This is also a really long line with terrible
146 |                 spacing
147 |               - This one's ok though
148 |             * So is this one"""
149 |         )
150 |         lst = create_section(MarkdownBulletList, input_)
151 |         assert lst.reformatted(width=50) == expected
152 |         lst = create_section(MarkdownBulletList, expected)
153 |         assert lst.reformatted(width=50) == expected
154 |         # Since we correct lists that have mismatched indicators, we update the strings
155 |         # to have consistent bullets.
156 |         input_ = input_.replace("-", "*")
157 |         expected = expected.replace("-", "*")
158 |         assert render(expected) == render(input_)
159 | 
160 |     def test_nested_ordered(self) -> None:
161 |         input_ = textwrap.dedent(
162 |             """\
163 |             1. This is a really long line that with terrible
164 |             spacing
165 |                1. This is also a really long line with terrible
166 |             spacing
167 |                1. This one's ok though
168 |                1. This one isn't
169 |             great
170 |             1. So is this one"""
171 |         )
172 |         expected = textwrap.dedent(
173 |             """\
174 |             1. This is a really long line that with terrible
175 |                spacing
176 |                1. This is also a really long line with
177 |                   terrible spacing
178 |                2. This one's ok though
179 |                3. This one isn't great
180 |             2. So is this one"""
181 |         )
182 |         lst = create_section(MarkdownOrderedList, input_)
183 |         assert lst.reformatted(width=50) == expected
184 |         lst = create_section(MarkdownOrderedList, expected)
185 |         assert lst.reformatted(width=50) == expected
186 |         input_ = input_.replace("-", "*")
187 |         expected = expected.replace("-", "*")
188 |         assert render(expected) == render(input_)
189 | 
190 |     def test_weird_indenting(self) -> None:
191 |         input_ = textwrap.dedent(
192 |             """\
193 |             * This is a really long line that with terrible
194 |             spacing
195 |                  * Let's make sure this doesn't stay here
196 |               * This is also a really long line with terrible
197 |             spacing
198 |                  - Let's make sure this does stay here
199 |               - This one's ok though, minus the symbol
200 |             - So is this one"""
201 |         )
202 |         expected = textwrap.dedent(
203 |             """\
204 |             * This is a really long line that with terrible
205 |               spacing
206 |               * Let's make sure this doesn't stay here
207 |               * This is also a really long line with terrible
208 |                 spacing
209 |                 - Let's make sure this does stay here
210 |               * This one's ok though, minus the symbol
211 |             * So is this one"""
212 |         )
213 |         lst = create_section(MarkdownBulletList, input_)
214 |         assert lst.reformatted(width=50) == expected
215 |         lst = create_section(MarkdownBulletList, expected)
216 |         assert lst.reformatted(width=50) == expected
217 |         # Since we correct lists that have mismatched indicators, we update the strings
218 |         # to have consistent bullets.
219 |         input_ = input_.replace("-", "*")
220 |         expected = expected.replace("-", "*")
221 |         assert render(expected) == render(input_)
222 | 
223 |     def test_links(self) -> None:
224 |         input_ = textwrap.dedent(
225 |             """\
226 |             * [URL](http://example.com/very/nested/directory)
227 |             * [URL](http://example.com)"""
228 |         )
229 |         expected = textwrap.dedent(
230 |             """\
231 |             * [URL](
232 |               http://example.com/very/nested/directory)
233 |             * [URL](http://example.com)"""
234 |         )
235 |         lst = create_section(MarkdownBulletList, input_)
236 |         assert lst.reformatted(width=30) == expected
237 |         lst = create_section(MarkdownBulletList, expected)
238 |         assert lst.reformatted(width=30) == expected
239 |         assert render(expected) == render(input_)
240 | 
241 |     def test_indented(self) -> None:
242 |         input_ = "  * Entry 1\n* Entry 2"
243 |         expected = "* Entry 1\n* Entry 2"
244 |         lst = create_section(MarkdownBulletList, input_)
245 |         assert lst.reformatted() == expected
246 |         lst = create_section(MarkdownBulletList, expected)
247 |         assert lst.reformatted() == expected
248 |         assert render(expected) == render(input_)
249 | 
250 |     def test_indented_numerics(self) -> None:
251 |         input_ = "  1. Test\n  2. Test\n 10. Test"
252 |         expected = "1. Test\n2. Test\n3. Test"
253 |         lst = create_section(MarkdownOrderedList, input_)
254 |         assert lst.reformatted() == expected
255 |         lst = create_section(MarkdownOrderedList, expected)
256 |         assert lst.reformatted() == expected
257 |         assert render(expected) == render(input_)
258 | 
259 |     def test_ordered_list_with_nine_entries(self) -> None:
260 |         """Ensure we don't add extra indents on 10^n-1 length lists"""
261 |         input_ = (
262 |             "1. a\n"
263 |             "2. b\n"
264 |             "3. c\n"
265 |             "4. d\n"
266 |             "5. e\n"
267 |             "6. f\n"
268 |             "7. g\n"
269 |             "8. h\n"
270 |             "9. i"
271 |         )
272 |         expected = input_
273 |         lst = create_section(MarkdownOrderedList, input_)
274 |         assert lst.reformatted() == expected
275 |         lst = create_section(MarkdownOrderedList, expected)
276 |         assert lst.reformatted() == expected
277 |         assert render(expected) == render(input_)
278 | 
279 |     def test_nested_ordered_bad_indent(self) -> None:
280 |         input_ = textwrap.dedent(
281 |             """\
282 |             1. This is a really long line that with terrible
283 |             spacing
284 |               1. This is also a really long line with terrible
285 |             spacing
286 |               1. This one's ok though
287 |               1. This one isn't
288 |             great
289 |             1. So is this one"""
290 |         )
291 |         expected = textwrap.dedent(
292 |             """\
293 |             1. This is a really long line that with terrible
294 |                spacing
295 |             2. This is also a really long line with terrible
296 |                spacing
297 |             3. This one's ok though
298 |             4. This one isn't great
299 |             5. So is this one"""
300 |         )
301 |         lst = create_section(MarkdownOrderedList, input_)
302 |         assert lst.reformatted(width=50) == expected
303 |         lst = create_section(MarkdownOrderedList, expected)
304 |         assert lst.reformatted(width=50) == expected
305 |         input_ = input_.replace("-", "*")
306 |         expected = expected.replace("-", "*")
307 |         assert render(expected) == render(input_)
308 | 


--------------------------------------------------------------------------------
/tests/test_paragraph.py:
--------------------------------------------------------------------------------
 1 | import textwrap
 2 | 
 3 | from markflow.formatters.paragraph import MarkdownParagraph
 4 | 
 5 | from .util import create_section, render
 6 | 
 7 | 
 8 | class TestParagraph:
 9 |     def test_paragraph(self) -> None:
10 |         input_ = (
11 |             "This is a test string. It must have a [footnote][footnote] that breaks "
12 |             "across a line and a [URL](http://example.com) so we can ensure that we "
13 |             "get good coverage."
14 |         )
15 |         expected = textwrap.dedent(
16 |             """\
17 |             This is a test string. It must have a [footnote][
18 |             footnote] that breaks across a line and a [URL](
19 |             http://example.com) so we can ensure that we get
20 |             good coverage."""
21 |         )
22 |         paragraph = create_section(MarkdownParagraph, input_)
23 |         assert paragraph.reformatted(width=50) == expected
24 |         paragraph = create_section(MarkdownParagraph, expected)
25 |         assert paragraph.reformatted(width=50) == expected
26 |         assert render(expected) == render(input_)
27 | 
28 |     def test_hyperlink_breaking(self) -> None:
29 |         input_ = textwrap.dedent(
30 |             """\
31 |             [I'm a hyperlink broken across multiple lines.](
32 |             test.htm)"""
33 |         )
34 |         expected = "[I'm a hyperlink broken across multiple lines.](test.htm)"
35 |         paragraph = create_section(MarkdownParagraph, input_)
36 |         assert paragraph.reformatted() == expected
37 |         paragraph = create_section(MarkdownParagraph, expected)
38 |         assert paragraph.reformatted() == expected
39 |         assert render(expected) == render(input_)
40 | 
41 |     def test_semantic_paragraph(self) -> None:
42 |         input_ = textwrap.dedent(
43 |             """\
44 |             Some words with a double
45 |             space after them.  """
46 |         )
47 |         expected = "Some words with a double space after them.  "
48 |         paragraph = create_section(MarkdownParagraph, input_)
49 |         assert paragraph.reformatted() == expected
50 |         paragraph = create_section(MarkdownParagraph, expected)
51 |         assert paragraph.reformatted() == expected
52 |         assert render(expected) == render(input_)
53 | 


--------------------------------------------------------------------------------
/tests/test_separator.py:
--------------------------------------------------------------------------------
 1 | from markflow.formatters.blank_line import MarkdownBlankLine
 2 | 
 3 | from .util import create_section, render
 4 | 
 5 | 
 6 | class TestBlankLine:
 7 |     def test_simple(self) -> None:
 8 |         input_ = "    "
 9 |         expected = ""
10 |         separator = create_section(MarkdownBlankLine, input_)
11 |         assert separator.reformatted() == expected
12 |         assert render(expected) == render(input_)
13 | 


--------------------------------------------------------------------------------
/tests/test_setext_heading.py:
--------------------------------------------------------------------------------
 1 | import textwrap
 2 | 
 3 | from markflow.formatters.setext_heading import MarkdownSetextHeading
 4 | 
 5 | from .util import create_section, render
 6 | 
 7 | 
 8 | class TestSetextHeading:
 9 |     def test_simple(self) -> None:
10 |         input_ = "   Heading    \n---"
11 |         expected = "Heading\n-------"
12 |         heading = create_section(MarkdownSetextHeading, input_)
13 |         assert heading.reformatted() == expected
14 |         heading = create_section(MarkdownSetextHeading, expected)
15 |         assert heading.reformatted() == expected
16 |         assert render(expected) == render(input_)
17 | 
18 |     def test_singular_character_underlined(self) -> None:
19 |         input_ = "A\n----"
20 |         expected = "A\n-"
21 |         heading = create_section(MarkdownSetextHeading, input_)
22 |         assert heading.reformatted() == expected
23 |         heading = create_section(MarkdownSetextHeading, expected)
24 |         assert heading.reformatted() == expected
25 |         assert render(expected) == render(input_)
26 | 
27 |     def test_multiline_heading(self) -> None:
28 |         input_ = textwrap.dedent(
29 |             """\
30 |             This is a long
31 |             heading
32 |             --"""
33 |         )
34 |         expected = textwrap.dedent(
35 |             """\
36 |             This is a
37 |             long heading
38 |             ------------"""
39 |         )
40 |         heading = create_section(MarkdownSetextHeading, input_)
41 |         assert heading.reformatted(width=12) == expected
42 |         heading = create_section(MarkdownSetextHeading, expected)
43 |         assert heading.reformatted(width=12) == expected
44 |         assert render(expected) == render(input_)
45 | 


--------------------------------------------------------------------------------
/tests/test_table.py:
--------------------------------------------------------------------------------
 1 | import textwrap
 2 | 
 3 | from markflow.formatters.table import (
 4 |     MarkdownTable,
 5 |     center_align,
 6 |     left_align,
 7 |     right_align,
 8 | )
 9 | 
10 | from .util import create_section
11 | 
12 | 
13 | class TestAlign:
14 |     def test_center_align(self) -> None:
15 |         assert center_align("a", 3) == " a "
16 |         assert center_align("aa", 3) == "aa "
17 |         assert center_align("aa", 4) == " aa "
18 | 
19 |     def test_left_align(self) -> None:
20 |         assert left_align("a", 3) == "a  "
21 |         assert left_align("aa", 3) == "aa "
22 |         assert left_align("aa", 4) == "aa  "
23 | 
24 |     def test_right_align(self) -> None:
25 |         assert right_align("a", 3) == "  a"
26 |         assert right_align("aa", 3) == " aa"
27 |         assert right_align("aa", 4) == "  aa"
28 | 
29 | 
30 | class TestTable:
31 |     def test_table(self) -> None:
32 |         input_ = textwrap.dedent(
33 |             """\
34 |             |Heading 1|Heading 2|
35 |             |--|--|
36 |             |Short|Very long even line|
37 |             |Very long odd line|Short|"""
38 |         )
39 |         expected = textwrap.dedent(
40 |             """\
41 |             |     Heading 1      |      Heading 2      |
42 |             |--------------------|---------------------|
43 |             | Short              | Very long even line |
44 |             | Very long odd line | Short               |"""
45 |         )
46 |         table = create_section(MarkdownTable, input_)
47 |         assert table.reformatted() == expected
48 |         table = create_section(MarkdownTable, expected)
49 |         assert table.reformatted() == expected
50 | 
51 |     def test_aligned_table(self) -> None:
52 |         input_ = textwrap.dedent(
53 |             """\
54 |             | L | C | R |
55 |             |:--|:-:|--:|
56 |             | a | a | a|
57 |             |abcde | abcde|abcde|"""
58 |         )
59 |         expected = textwrap.dedent(
60 |             """\
61 |             | L     |   C   |     R |
62 |             |:------|:-----:|------:|
63 |             | a     |   a   |     a |
64 |             | abcde | abcde | abcde |"""
65 |         )
66 |         table = create_section(MarkdownTable, input_)
67 |         assert table.reformatted() == expected
68 |         table = create_section(MarkdownTable, expected)
69 |         assert table.reformatted() == expected
70 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
  1 | import textwrap
  2 | 
  3 | from markflow._utils._utils import get_indent, truncate_str
  4 | from markflow._utils.textwrap import (
  5 |     code_split,
  6 |     link_split,
  7 |     newline_split,
  8 |     space_split,
  9 |     wrap,
 10 | )
 11 | 
 12 | 
 13 | class TestTruncateStr:
 14 |     def test_shorter(self) -> None:
 15 |         assert truncate_str("123456789", 19) == "123456789"
 16 | 
 17 |     def test_exact_length(self) -> None:
 18 |         assert truncate_str("123456789", 9) == "123456789"
 19 | 
 20 |     def test_longer(self) -> None:
 21 |         assert truncate_str("123456789", 5) == "12..."
 22 | 
 23 |     def test_truncate_less_than_ellipsis(self) -> None:
 24 |         assert truncate_str("123456789", 2) == ".."
 25 | 
 26 | 
 27 | class TestGetIndent:
 28 |     def test_is_indented_at_least(self) -> None:
 29 |         # This is a little silly, but I expect we may have more cases to support since
 30 |         # we currently conflate tabs and spaces.
 31 |         assert get_indent("  Test") == 2
 32 | 
 33 | 
 34 | class TestTextWrap:
 35 |     def test_all_splits(self) -> None:
 36 |         input_ = (
 37 |             "abc abc abc abc abc abc abc abc abc ``abc ``` abc[0][0] ``abc abc abc abc "
 38 |             "<br /><br /> abc abc [url](http://example.com) "
 39 |             "abc[url][http://example.com]abc[url][URL][url][URL]  <br/>abc<br/>"
 40 |         )
 41 |         expected = textwrap.dedent(
 42 |             """\
 43 |             abc abc abc abc abc abc abc abc abc
 44 |             ``abc ``` abc[0][0] ``abc abc abc abc <br />
 45 |             <br />
 46 |             abc abc [url](http://example.com)abc[url][
 47 |             http://example.com]abc[url][URL][url][URL] <br/>
 48 |             abc<br/>"""
 49 |         )
 50 |         assert wrap(input_, 50) == expected
 51 | 
 52 |     def test_code_split(self) -> None:
 53 |         input_ = "a` a `` b` a `b`c"
 54 |         expected_split_text = ["a", "` a `` b`", "a", "`b`", "c"]
 55 |         expected_leading_spaces = [False, False, True, True, False]
 56 |         expected_evaluates = [True, False, True, False, True]
 57 |         split_text, leading_spaces, evaluates = code_split(input_, False)
 58 |         assert len(split_text) == len(leading_spaces) == len(evaluates)
 59 |         assert split_text == expected_split_text
 60 |         assert leading_spaces == expected_leading_spaces
 61 |         assert evaluates == expected_evaluates
 62 | 
 63 |     def test_code_split_begin_and_end(self) -> None:
 64 |         input_ = "` a `` b` a `b`"
 65 |         expected_split_text = ["` a `` b`", "a", "`b`"]
 66 |         expected_leading_spaces = [False, True, True]
 67 |         expected_evaluates = [False, True, False]
 68 |         split_text, leading_spaces, evaluates = code_split(input_, False)
 69 |         assert len(split_text) == len(leading_spaces) == len(evaluates)
 70 |         assert split_text == expected_split_text
 71 |         assert leading_spaces == expected_leading_spaces
 72 |         assert evaluates == expected_evaluates
 73 | 
 74 |     def test_code_split_sentence(self) -> None:
 75 |         input_ = "a` a `` b`. a `b`.c"
 76 |         expected_split_text = ["a", "` a `` b`.", "a", "`b`.", "c"]
 77 |         expected_leading_spaces = [False, False, True, True, False]
 78 |         expected_evaluates = [True, False, True, False, True]
 79 |         split_text, leading_spaces, evaluates = code_split(input_, False)
 80 |         assert len(split_text) == len(leading_spaces) == len(evaluates)
 81 |         assert split_text == expected_split_text
 82 |         assert leading_spaces == expected_leading_spaces
 83 |         assert evaluates == expected_evaluates
 84 | 
 85 |     def test_code_split_solo_tilda(self) -> None:
 86 |         input_ = "` a `` b` a `b` `a"
 87 |         expected_split_text = ["` a `` b`", "a", "`b`", "`a"]
 88 |         expected_leading_spaces = [False, True, True, True]
 89 |         expected_evaluates = [False, True, False, True]
 90 |         split_text, leading_spaces, evaluates = code_split(input_, False)
 91 |         assert len(split_text) == len(leading_spaces) == len(evaluates)
 92 |         assert split_text == expected_split_text
 93 |         assert leading_spaces == expected_leading_spaces
 94 |         assert evaluates == expected_evaluates
 95 | 
 96 |     def test_link_split(self) -> None:
 97 |         input_ = "a[URL][url] b [URL](http://example.com)c"
 98 |         expected_split_text = [
 99 |             "a[URL][",
100 |             "url]",
101 |             "b",
102 |             "[URL](",
103 |             "http://example.com)c",
104 |         ]
105 |         expected_leading_spaces = [False, False, True, True, False]
106 |         expected_evaluates = [True, False, True, True, False]
107 |         split_text, leading_spaces, evaluates = link_split(input_, False)
108 |         assert len(split_text) == len(leading_spaces) == len(evaluates)
109 |         assert split_text == expected_split_text
110 |         assert leading_spaces == expected_leading_spaces
111 |         assert evaluates == expected_evaluates
112 | 
113 |     def test_link_split_sentence(self) -> None:
114 |         input_ = "a[URL][url]. b [URL](http://example.com).c"
115 |         expected_split_text = [
116 |             "a[URL][",
117 |             "url].",
118 |             "b",
119 |             "[URL](",
120 |             "http://example.com).c",
121 |         ]
122 |         expected_leading_spaces = [False, False, True, True, False]
123 |         expected_evaluates = [True, False, True, True, False]
124 |         split_text, leading_spaces, evaluates = link_split(input_, False)
125 |         assert len(split_text) == len(leading_spaces) == len(evaluates)
126 |         assert split_text == expected_split_text
127 |         assert leading_spaces == expected_leading_spaces
128 |         assert evaluates == expected_evaluates
129 | 
130 |     def test_newline_split(self) -> None:
131 |         input_ = "a <br /> b <br>c<br/>d"
132 |         expected_split_text = ["a", "<br />", "b", "<br>", "c", "<br/>", "d"]
133 |         expected_leading_spaces = [False, True, True, True, False, False, False]
134 |         expected_evaluates = [True, False, True, False, True, False, True]
135 |         split_text, leading_spaces, evaluates = newline_split(input_, False)
136 |         assert len(split_text) == len(leading_spaces) == len(evaluates)
137 |         assert split_text == expected_split_text
138 |         assert leading_spaces == expected_leading_spaces
139 |         assert evaluates == expected_evaluates
140 | 
141 |     def test_space_split(self) -> None:
142 |         input_ = " ".join(["a"] * 10)
143 |         expected_split_text = ["a"] * 10
144 |         expected_leading_spaces = [False] + [True] * 9
145 |         expected_evaluates = [True] * 10
146 |         split_text, leading_spaces, evaluates = space_split(input_, False)
147 |         assert len(split_text) == len(leading_spaces) == len(evaluates)
148 |         assert split_text == expected_split_text
149 |         assert leading_spaces == expected_leading_spaces
150 |         assert evaluates == expected_evaluates
151 | 


--------------------------------------------------------------------------------
/tests/util.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from typing import Type
 3 | 
 4 | import commonmark
 5 | 
 6 | from markflow.formatters import MarkdownSection
 7 | 
 8 | IGNORED_HTML_CHARACTERS = re.compile(r"[\n\s]")
 9 | # We need to remove starts to ignore our numbering corrections.
10 | LIST_NUMBERING_START = re.compile(r" start=\"[0-9]+\"")
11 | 
12 | 
13 | def create_section(class_: Type[MarkdownSection], text: str) -> MarkdownSection:
14 |     obj = class_(0, text.splitlines())
15 |     return obj
16 | 
17 | 
18 | def render(text: str) -> str:
19 |     rendered = commonmark.commonmark(text)
20 |     rendered = LIST_NUMBERING_START.sub("", rendered)
21 |     rendered = IGNORED_HTML_CHARACTERS.sub("", rendered)
22 |     return rendered
23 | 


--------------------------------------------------------------------------------