├── .dockerignore
├── .github
    ├── gha_assets
    │   ├── crate-doc-root-redirect.html
    │   └── release_pr_template.md
    ├── release.yml
    └── workflows
    │   ├── binary-verify.yml
    │   ├── build-release.yml
    │   ├── coverage.yml
    │   ├── formatting.yml
    │   ├── list-targets.yml
    │   ├── pr-checks.yml
    │   ├── prepare-new-release.yml
    │   ├── readme-checks.yml
    │   ├── release-assets.yml
    │   ├── release-publish.yml
    │   ├── rust.yml
    │   └── system-test.yml
├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── Dockerfile
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── build.rs
├── examples
    └── hello.md
├── githooks
    └── post-commit
├── release_procedure.md
├── rustfmt.toml
├── scripts
    ├── cargo_to_gh
    ├── common.sh
    ├── flatten-rustdoc-json
    ├── percent_to_color
    ├── percent_to_shields_gist
    └── system_test
├── src
    ├── lib.rs
    ├── main.rs
    ├── md_elem
    │   ├── concatenate.rs
    │   ├── mod.rs
    │   ├── tree.rs
    │   ├── tree_ref.rs
    │   └── tree_test_utils.rs
    ├── output
    │   ├── fmt_md.rs
    │   ├── fmt_md_inlines.rs
    │   ├── fmt_plain_inline.rs
    │   ├── fmt_plain_str.rs
    │   ├── fmt_plain_writer.rs
    │   ├── footnote_transform.rs
    │   ├── link_transform.rs
    │   ├── mod.rs
    │   ├── output_adapter.rs
    │   └── tree_ref_serde.rs
    ├── query
    │   ├── error.rs
    │   ├── grammar.pest
    │   ├── matcher_try_from.rs
    │   ├── mod.rs
    │   ├── pest.rs
    │   ├── selector_try_from.rs
    │   ├── strings.rs
    │   ├── traversal.rs
    │   └── traversal_composites.rs
    ├── run
    │   ├── cli.rs
    │   ├── mod.rs
    │   └── run_main.rs
    ├── select
    │   ├── api.rs
    │   ├── match_replace.rs
    │   ├── match_selector.rs
    │   ├── matcher.rs
    │   ├── mod.rs
    │   ├── sel_chain.rs
    │   ├── sel_code_block.rs
    │   ├── sel_link_like.rs
    │   ├── sel_list_item.rs
    │   ├── sel_section.rs
    │   ├── sel_single_matcher.rs
    │   ├── sel_table.rs
    │   ├── selector.rs
    │   └── string_matcher.rs
    └── util
    │   ├── mod.rs
    │   ├── output.rs
    │   ├── str_utils.rs
    │   ├── utils_for_test.rs
    │   ├── vec_utils.rs
    │   └── words_buffer.rs
└── tests
    ├── README.md
    ├── integ_test.rs
    └── md_cases
        ├── bad_queries.toml
        ├── file_args.toml
        ├── footnote_renumbering.toml
        ├── footnotes_in_footnotes.toml
        ├── link_placement.toml
        ├── links_references.toml
        ├── matchers.toml
        ├── output_format.toml
        ├── paragraph_spans.toml
        ├── search_replace.toml
        ├── select_block_quote.toml
        ├── select_exit_code.toml
        ├── select_front_matter.toml
        ├── select_html.toml
        ├── select_link.toml
        ├── select_lists.toml
        ├── select_paragraphs.toml
        ├── select_sections.toml
        ├── select_tables.toml
        └── wrapping.toml


/.dockerignore:
--------------------------------------------------------------------------------
1 | target


--------------------------------------------------------------------------------
/.github/gha_assets/crate-doc-root-redirect.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |     <title>Redirecting...</title>
 7 |     <meta http-equiv="refresh" content="0; URL='./mdq'"/>
 8 |     <style>
 9 |         body {
10 |             font-family: Arial, sans-serif;
11 |             margin: 0;
12 |             padding: 20px;
13 |             display: flex;
14 |             justify-content: center;
15 |             align-items: center;
16 |             height: 100vh;
17 |             background-color: #f5f5f5;
18 |         }
19 | 
20 |         .redirect-message {
21 |             text-align: center;
22 |             padding: 20px;
23 |             background-color: white;
24 |             border-radius: 8px;
25 |             box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
26 |         }
27 | 
28 |         h1 {
29 |             color: #333;
30 |         }
31 | 
32 |         p {
33 |             color: #666;
34 |         }
35 | 
36 |         a {
37 |             color: #0066cc;
38 |             text-decoration: none;
39 |         }
40 |     </style>
41 | </head>
42 | <body>
43 | <div class="redirect-message">
44 |     <h1>Redirecting...</h1>
45 |     <p>You are being redirected to <a href="./mdq">./mdq</a>.</p>
46 |     <p>If you are not redirected automatically, please click the link above.</p>
47 | </div>
48 | </body>
49 | </html>


--------------------------------------------------------------------------------
/.github/gha_assets/release_pr_template.md:
--------------------------------------------------------------------------------
1 | Bump version to **{{RELEASE_VERSION}}** for release.
2 | 
3 | ## Validation:
4 | 
5 | - [ ] review release notes
6 | 


--------------------------------------------------------------------------------
/.github/release.yml:
--------------------------------------------------------------------------------
 1 | changelog:
 2 |   exclude:
 3 |     authors:
 4 |       - 'github-actions'
 5 |       - 'github-actions[bot]'
 6 |   categories:
 7 |     - title: Breaking Changes
 8 |       labels:
 9 |         - breaking change
10 |     - title: Notable Changes
11 |       labels:
12 |         - "*"
13 |       exclude:
14 |         labels:
15 |           - minor
16 |     - title: Other Changes
17 |       labels:
18 |         - minor
19 | 


--------------------------------------------------------------------------------
/.github/workflows/binary-verify.yml:
--------------------------------------------------------------------------------
  1 | name: Validate Binaries
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |     inputs:
  6 |       version:
  7 |         description: "The published version, like \"0.2.0\""
  8 |         type: string
  9 |         required: true
 10 |       docker-rc:
 11 |         description: "-rc or not"
 12 |         type: boolean
 13 |         default: false
 14 |   workflow_call:
 15 |     inputs:
 16 |       version:
 17 |         description: "The published version, like \"0.2.0\""
 18 |         type: string
 19 |         required: true
 20 |       docker-rc:
 21 |         description: "-rc or not"
 22 |         type: boolean
 23 |         required: true
 24 | 
 25 | env:
 26 |   VERSION: "${{ inputs.version }}"
 27 |   RELEASE_TAG: "v${{ inputs.version }}"
 28 | permissions:
 29 |   contents: write # don't actually need to write, but this lets us see draft releases
 30 | 
 31 | jobs:
 32 |   list-targets:
 33 |     uses: ./.github/workflows/list-targets.yml
 34 | 
 35 |   dispatch-targets:
 36 |     needs: list-targets
 37 |     runs-on: ubuntu-latest
 38 |     outputs:
 39 |       windows: ${{ steps.run.outputs.windows }}
 40 |       non-windows: ${{ steps.run.outputs.non-windows }}
 41 |     steps:
 42 |       - name: Run
 43 |         id: run
 44 |         run: |
 45 |           set -x
 46 |           echo "windows=$(<<<"$VALIDATE_BY_TARGET" jq -c 'to_entries | map(select(.value == "windows")) | map(.key)')" >> "$GITHUB_OUTPUT"
 47 |           echo "non-windows=$(<<<"$VALIDATE_BY_TARGET" jq -c 'to_entries | map(select(.value != "windows")) | map(.key)')" >> "$GITHUB_OUTPUT"
 48 |         env:
 49 |           VALIDATE_BY_TARGET: ${{ needs.list-targets.outputs.validate_by_target }}
 50 |           BUILD_BY_TARGET: ${{ needs.list-targets.outputs.build_by_target }}
 51 | 
 52 | 
 53 |   full-coverage:
 54 |     needs: list-targets
 55 |     runs-on: ubuntu-latest
 56 |     steps:
 57 | 
 58 |       - name: Get asset names
 59 |         run: >
 60 |           gh release -R ${{ github.repository }} view "$RELEASE_TAG" --json assets | jq -r '.assets | map(.name) | .[]'
 61 |           | sort 
 62 |           | sed -E 's/\.(zip|tar\.gz)$//'
 63 |           | sort
 64 |           > from-release.txt
 65 |         env:
 66 |           GH_TOKEN: ${{ github.token }}
 67 | 
 68 |       - name: Get target names
 69 |         run: <<<"$TARGET_NAMES" jq -r '.[] | "mdq-\(.)"' > from-list-targets.txt
 70 |         env:
 71 |           TARGET_NAMES: ${{ needs.list-targets.outputs.names }}
 72 | 
 73 |       - name: Validate that they're the same
 74 |         run: diff -y from-list-targets.txt from-release.txt
 75 | 
 76 | 
 77 |   unix-like:
 78 |     needs: [ list-targets, dispatch-targets ]
 79 |     strategy:
 80 |       matrix:
 81 |         target: ${{ fromJSON(needs.dispatch-targets.outputs.non-windows) }}
 82 |     runs-on: ${{ fromJSON(needs.list-targets.outputs.validate_by_target)[matrix.target] }}-latest
 83 |     steps:
 84 |       - name: Download tarball
 85 |         run: gh release -R ${{ github.repository }} download "$RELEASE_TAG" -p mdq-${{ matrix.target }}.tar.gz
 86 |         env:
 87 |           GH_TOKEN: ${{ github.token }}
 88 | 
 89 |       - name: Expand
 90 |         run: tar xzvf mdq-${{ matrix.target }}.tar.gz
 91 | 
 92 |       - name: Run --version
 93 |         id: mdq-version
 94 |         run: |
 95 |           set -euo pipefail
 96 |           mdq_output="$(./mdq --version)"
 97 |           echo "version-b64=$(base64 <<<"$mdq_output")" >> "$GITHUB_OUTPUT"
 98 | 
 99 |       - name: Verify version
100 |         run:
101 |           diff -y <(echo "mdq $VERSION") <(base64 -d <<<"$ACTUAL_VERSION")
102 |         env:
103 |           ACTUAL_VERSION: ${{ steps.mdq-version.outputs.version-b64 }}
104 | 
105 | 
106 |   windows:
107 |     needs: [ list-targets, dispatch-targets ]
108 |     runs-on: ${{ fromJSON(needs.list-targets.outputs.validate_by_target)[matrix.target] }}-latest
109 |     strategy:
110 |       matrix:
111 |         target: ${{ fromJSON(needs.dispatch-targets.outputs.windows) }}
112 |     steps:
113 | 
114 |       - name: Download zip
115 |         run: gh release -R ${{ github.repository }} download ${{ env.RELEASE_TAG}} -p mdq-${{ matrix.target }}.zip
116 |         env:
117 |           GH_TOKEN: ${{ github.token }}
118 | 
119 |       - name: Expand
120 |         run: Expand-Archive mdq-${{ matrix.target }}.zip
121 | 
122 |       # Use JSON, because base64 is a pain in powershell
123 |       - name: Run --version
124 |         id: mdq-version
125 |         run: |
126 |           $version_json = .\mdq-${{ matrix.target }}\mdq.exe --version | ConvertTo-Json
127 |           echo "version_json=$version_json" > $env:GITHUB_OUTPUT
128 | 
129 |       - name: Verify version
130 |         run: |
131 |           $diff = Compare-Object -CaseSensitive (echo "mdq $env:VERSION" | ConvertTo-Json) $env:ACTUAL
132 |           if ($diff) {
133 |             Write-Output "Difference found:"
134 |             $diff | Format-Table
135 |             exit 1
136 |           }
137 |         env:
138 |           ACTUAL: ${{ steps.mdq-version.outputs.version_json }}
139 | 
140 |   docker:
141 |     runs-on: ubuntu-latest
142 |     steps:
143 |       - name: Calculate tag
144 |         id: tag
145 |         run: |
146 |           tag="${VERSION}"
147 |           if [[ ${{ inputs.docker-rc }} == true ]]; then
148 |             tag="${VERSION}-rc"
149 |           fi
150 |           echo "id=$tag" >> "$GITHUB_OUTPUT"
151 | 
152 |       - name: Pull tag
153 |         run: docker pull "yshavit/mdq:$DOCKER_TAG"
154 |         env:
155 |           DOCKER_TAG: ${{ steps.tag.outputs.id }}
156 | 
157 |       - name: Run --version
158 |         id: mdq-version
159 |         run: |
160 |           mdq_output="$(docker run --rm -i "yshavit/mdq:$DOCKER_TAG" --version)"
161 |           echo "version-b64=$(base64 <<<"$mdq_output")" >> "$GITHUB_OUTPUT"
162 |         env:
163 |           DOCKER_TAG: ${{ steps.tag.outputs.id }}
164 | 
165 |       - name: Verify version
166 |         run:
167 |           diff -y <(echo "mdq $VERSION") <(base64 -d <<<"$ACTUAL_VERSION")
168 |         env:
169 |           ACTUAL_VERSION: ${{ steps.mdq-version.outputs.version-b64 }}
170 | 
171 | 
172 |   attestations:
173 |     runs-on: ubuntu-latest
174 |     steps:
175 |       - name: Download zip
176 |         run: gh release -R ${{ github.repository }} download "$RELEASE_TAG"
177 |         env:
178 |           GH_TOKEN: ${{ github.token }}
179 | 
180 |       - name: Attestations
181 |         run: |
182 |           set -euo pipefail
183 |           for z_file in *.zip; do
184 |             echo "::group::$z_file"
185 |             z_dir="${z_file}.dir"
186 |             mkdir "$z_dir"
187 |             unzip "$z_file" -d "$z_dir"
188 |             echo "Will verify: " "$z_dir"/*
189 |             gh attestation verify -o ${{ github.repository_owner }} "$z_dir"/*
190 |             echo '::endgroup::'
191 |           done
192 |         env:
193 |           GH_TOKEN: ${{ github.token }}
194 | 


--------------------------------------------------------------------------------
/.github/workflows/build-release.yml:
--------------------------------------------------------------------------------
  1 | name: Binaries
  2 | on:
  3 |   push:
  4 |     branches: [ "main" ]
  5 |   workflow_dispatch:
  6 |     inputs:
  7 |       docker-tag-type:
  8 |         description: The docker tag to upload as
  9 |         required: true
 10 |         default: none
 11 |         type: choice
 12 |         options:
 13 |           - latest
 14 |           - stable
 15 |           - rc
 16 |           - none
 17 |   workflow_call:
 18 |     inputs:
 19 |       docker-tag-type:
 20 |         description: The docker tag to upload as
 21 |         required: true
 22 |         type: string
 23 |       branch_name:
 24 |         description: The branch to build against
 25 |         required: true
 26 |         type: string
 27 | 
 28 | env:
 29 |   CARGO_TERM_COLOR: always
 30 | 
 31 | # for attestations
 32 | permissions:
 33 |   id-token: write
 34 |   attestations: write
 35 |   contents: read
 36 | 
 37 | jobs:
 38 |   docker:
 39 |     if: ${{ inputs.docker-tag-type != 'none' }}
 40 |     environment: Docker Hub
 41 |     runs-on: ubuntu-latest
 42 |     steps:
 43 | 
 44 |       - uses: actions/checkout@v4
 45 |         with:
 46 |           ref: ${{ inputs.branch_name }}
 47 | 
 48 |       - name: get tag
 49 |         id: get-tag
 50 |         run: |
 51 |           if [[ "${DOCKER_TAG_TYPE}" = 'stable' || "${DOCKER_TAG_TYPE}" = 'rc' ]]; then
 52 |             tag="$(cargo metadata --format-version 1 | jq -r '.packages[] | select(.name == "mdq") | .version')"
 53 |             if [[ "${DOCKER_TAG_TYPE}" = 'rc' ]]; then
 54 |               tag="$tag-rc"
 55 |             fi
 56 |           else
 57 |             tag=latest
 58 |           fi
 59 |           echo "tag=$tag" >> "$GITHUB_OUTPUT"
 60 |         env:
 61 |           DOCKER_TAG_TYPE: ${{ inputs.docker-tag-type }}
 62 |           DOCKER_USERNAME: ${{ vars.DOCKERHUB_USERNAME }}
 63 | 
 64 |       - name: Log in to Docker Hub
 65 |         uses: docker/login-action@v3
 66 |         with:
 67 |           username: ${{ vars.DOCKERHUB_USERNAME }}
 68 |           password: ${{ secrets.DOCKERHUB_PAT }}
 69 | 
 70 |       - name: Gather metadata
 71 |         id: meta
 72 |         uses: docker/metadata-action@v5
 73 |         with:
 74 |           images: ${{ vars.DOCKERHUB_USERNAME }}/mdq
 75 | 
 76 |       - name: Build and push
 77 |         uses: docker/build-push-action@v6
 78 |         with:
 79 |           context: .
 80 |           push: true
 81 |           tags: ${{ vars.DOCKERHUB_USERNAME}}/mdq:${{ steps.get-tag.outputs.tag }}
 82 |           labels: ${{ steps.meta.outputs.labels }}
 83 | 
 84 |   build-targets:
 85 |     uses: ./.github/workflows/list-targets.yml
 86 | 
 87 |   build:
 88 |     needs: build-targets
 89 |     strategy:
 90 |       matrix:
 91 |         target: ${{ fromJSON(needs.build-targets.outputs.names) }}
 92 |     runs-on: ${{ fromJSON(needs.build-targets.outputs.build_by_target)[matrix.target] }}-latest
 93 |     env:
 94 |       BUILD_TARGET: ${{ fromJSON(needs.build-targets.outputs.rust_target_by_target)[matrix.target] }}
 95 |     steps:
 96 | 
 97 |       - name: Pick file name
 98 |         shell: bash
 99 |         run: |
100 |           if [[ "$BUILD_TARGET" == *-windows-* ]]; then
101 |             build_file_name=mdq.exe
102 |           else
103 |             build_file_name=mdq
104 |           fi
105 |           echo "BUILD_FILE_NAME=${build_file_name}" >> "$GITHUB_ENV"
106 |           echo "BUILD_FILE_PATH=target/$BUILD_TARGET/release/$build_file_name" >> "$GITHUB_ENV"
107 | 
108 |       - name: rustc version
109 |         run: rustc --version --verbose
110 | 
111 |       - uses: actions/checkout@v4
112 |         with:
113 |           ref: ${{ inputs.branch_name }}
114 | 
115 |       - name: "Cache cargo"
116 |         id: cache-cargo
117 |         uses: "actions/cache@v4"
118 |         with:
119 |           path: |
120 |             ~/.cargo/bin/
121 |             ~/.cargo/registry/index/
122 |             ~/.cargo/registry/cache/
123 |             ~/.cargo/git/db/
124 |             target/
125 |           key: ${{ runner.os }}-${{ matrix.target }}-cargo-${{ hashFiles('**/Cargo.lock') }}
126 | 
127 |       - name: install cross
128 |         run: command -v cross || cargo install cross --git https://github.com/cross-rs/cross
129 |         if: runner.os != 'macOS'
130 |         working-directory: ${{ runner.temp }}
131 |         env:
132 |           GH_TOKEN: ${{ github.token }}
133 | 
134 |       - name: build
135 |         run: |
136 |           set -euo pipefail
137 |           if [[ "$RUNNER_OS" = macOS ]]; then
138 |             build_bin=cargo
139 |           else
140 |             build_bin=cross
141 |           fi
142 |           "$build_bin" build --release --target "$BUILD_TARGET"
143 |         env:
144 |           RUNNER_OS: ${{ runner.os }}
145 | 
146 |       - name: check for any changes in the git tree
147 |         shell: bash
148 |         run: |
149 |           set -euo pipefail
150 |           if [[ -n "$(git status --porcelain)" ]]; then
151 |             echo '::error title=post-build check::changes detected in git tree'
152 |             git status
153 |             exit 1
154 |           fi
155 | 
156 |       - name: chmod
157 |         if: "!contains(matrix.target, 'windows')"
158 |         run: chmod +x "$BUILD_FILE_PATH"
159 | 
160 |       - name: Attest Build Provenance
161 |         uses: actions/attest-build-provenance@v2
162 |         with:
163 |           subject-path: ${{ env.BUILD_FILE_PATH }}
164 | 
165 |       - name: upload binary
166 |         uses: actions/upload-artifact@v4
167 |         with:
168 |           if-no-files-found: error
169 |           name: mdq-${{ matrix.target }}
170 |           path: ${{ env.BUILD_FILE_PATH }}
171 | 


--------------------------------------------------------------------------------
/.github/workflows/coverage.yml:
--------------------------------------------------------------------------------
 1 | name: Coverage
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "main" ]
 6 | 
 7 | jobs:
 8 |   coverage:
 9 |     runs-on: ubuntu-latest
10 |     environment: Code Coverage Badge
11 |     env:
12 |       CARGO_TERM_COLOR: always
13 |     steps:
14 | 
15 |       - uses: actions/checkout@v4
16 | 
17 |       - name: install cargo-tarpaulin
18 |         uses: taiki-e/install-action@v2
19 |         with:
20 |           tool: cargo-tarpaulin
21 | 
22 |       - name: Generate code coverage
23 |         run: |
24 |           cargo tarpaulin -o json --output-dir target/
25 |           percent_coverage="$(cat target/tarpaulin-report.json | jq .coverage)"
26 |           printf '::notice title=Coverage (lines %%)::%s' "$percent_coverage"
27 |           echo "LINES_PERCENT=$percent_coverage" >> "$GITHUB_ENV"
28 |         env:
29 |           TEST_TIMEOUT_MULTIPLIER: 10
30 | 
31 |       - name: Upload to gist
32 |         run: ./scripts/percent_to_shields_gist coverage "$LINES_PERCENT"
33 |         env:
34 |           GIST_URL: ${{ vars.COVERAGE_GIST_URL }}
35 |           GH_TOKEN: ${{ secrets.API_TOKEN }}
36 | 
37 |       # Note: the following could technically be a separate job, but the "environment"
38 |       # bit means it counts as a deployment, not just a normal build. It's nice to have
39 |       # just one deployment per job run, so I'm keeping this all in one job.
40 |       # It's pretty quick, anyway.
41 |       - name: Count TODOs
42 |         run: |
43 |           set -euo pipefail
44 |           todos_count="$(
45 |             git ls-tree -r --name-only HEAD |
46 |             grep --fixed-string -v .github/workflows/coverage.yml |
47 |             grep --fixed-string -v README.md |
48 |             (xargs grep todo -i -o || true) |
49 |             wc -l
50 |           )"
51 |           if [ "$todos_count" -eq 0 ]; then
52 |             color=green
53 |           else
54 |             color=orange
55 |           fi
56 |           json_text="$(echo '{}' | 
57 |             jq -c '{schemaVersion: 1, label: $badge_label, color: $color, message: $message}' \
58 |             --arg badge_label "Pending TODOs" --arg color "$color" --arg message "$todos_count")"
59 | 
60 |           printf '::notice title=TODOs Count::%s' "$todos_count"
61 |           gh gist edit "$GIST_URL" <(echo "$json_text")
62 |         env:
63 |           GIST_URL: ${{ vars.TODOS_GIST_URL }}
64 |           GH_TOKEN: ${{ secrets.API_TOKEN }}
65 | 
66 |       - name: Count Ignored Tests
67 |         run: |
68 |           rs_ignored="$(find . -not -path './target/*' -name '*.rs' -exec grep --fixed-strings -Hno '#[ignore]' {} \; | wc -l)"
69 |           integ_ignored="$(cd tests/md_cases/ ; find . -name '*.toml' -exec grep -Hno '^ignore\b' {} \; | wc -l)"
70 |           total_ignored="$(( $rs_ignored + $integ_ignored ))"
71 | 
72 |           if [ "$total_ignored" -eq 0 ]; then
73 |             color=green
74 |           else
75 |             color=orange
76 |           fi
77 |           json_text="$(echo '{}' | 
78 |             jq -c '{schemaVersion: 1, label: $badge_label, color: $color, message: $message}' \
79 |             --arg badge_label "Ignored Tests" --arg color "$color" --arg message "$total_ignored")"
80 | 
81 |           printf '::notice title=Ignored Tests Count::%s' "$total_ignored"
82 |           gh gist edit "$GIST_URL" <(echo "$json_text")
83 |         env:
84 |           GIST_URL: ${{ vars.IGNOREDS_GIST_URL }}
85 |           GH_TOKEN: ${{ secrets.API_TOKEN }}
86 | 


--------------------------------------------------------------------------------
/.github/workflows/formatting.yml:
--------------------------------------------------------------------------------
 1 | name: Formatting
 2 | on:
 3 |   push:
 4 |     branches: [ "main" ]
 5 |   pull_request:
 6 |     branches: [ "main", "feature/*" ]
 7 | 
 8 | jobs:
 9 |   newlines:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 | 
13 |       - uses: actions/checkout@v4
14 | 
15 |       - name: check for carriage returns
16 |         run: |
17 |           set -euo pipefail
18 |           
19 |           exit_code=0
20 |           while read -r file_name; do
21 |             if grep -q $'\r' "$file_name"; then
22 |                 echo "::error file=$file_name,title=bad newlines::file contains carriage returns"
23 |                 exit_code=1
24 |             fi
25 |           done <<<"$(git ls-files)"
26 |           exit "$exit_code"
27 | 


--------------------------------------------------------------------------------
/.github/workflows/list-targets.yml:
--------------------------------------------------------------------------------
 1 | name: List build targets
 2 | on:
 3 |   workflow_call:
 4 |     outputs:
 5 |       names:
 6 |         description: json array of string target names
 7 |         value: ${{ jobs.list-targets.outputs.names }}
 8 |       build_by_target:
 9 |         description: OSes to use for building, by target
10 |         value: ${{ jobs.list-targets.outputs.build_by_target }}
11 |       validate_by_target:
12 |         description: OSes to use for validating, by target
13 |         value: ${{ jobs.list-targets.outputs.validate_by_target }}
14 |       rust_target_by_target:
15 |         description: OSes to use for validating, by target
16 |         value: ${{ jobs.list-targets.outputs.rust_target_by_target }}
17 | 
18 | 
19 | jobs:
20 |   list-targets:
21 |     runs-on: ubuntu-latest
22 |     outputs:
23 |       names: ${{ steps.targets.outputs.names }}
24 |       build_by_target: ${{ steps.targets.outputs.build_by_target }}
25 |       validate_by_target: ${{ steps.targets.outputs.validate_by_target }}
26 |       rust_target_by_target: ${{ steps.targets.outputs.rust_target_by_target }}
27 |     steps:
28 |       - id: targets
29 |         name: List Targets
30 |         run: |
31 |           set -euo pipefail
32 |           
33 |           targets='{
34 |             "windows-x64": {
35 |               "rust_target": "x86_64-pc-windows-gnu",
36 |               "build": "ubuntu",
37 |               "validate": "windows"
38 |             },
39 |             "linux-x64": {
40 |               "rust_target": "x86_64-unknown-linux-gnu",
41 |               "build": "ubuntu",
42 |               "validate": "ubuntu"
43 |             },
44 |             "linux-x64-musl": {
45 |               "rust_target": "x86_64-unknown-linux-musl",
46 |               "build": "ubuntu",
47 |               "validate": "ubuntu"
48 |             },
49 |             "macos-arm64": {
50 |               "rust_target": "aarch64-apple-darwin",
51 |               "build": "macos",
52 |               "validate": "macos"
53 |             }
54 |           }'
55 |           
56 |           set -x
57 |           echo "names=$(<<<"$targets" jq -c keys)" >> "$GITHUB_OUTPUT"
58 |           echo "rust_target_by_target=$(<<<"$targets" jq -c 'with_entries({key: .key, value: (.value.rust_target)})')" >> "$GITHUB_OUTPUT"
59 |           echo "build_by_target=$(<<<"$targets" jq -c 'with_entries({key: .key, value: (.value.build)})')" >> "$GITHUB_OUTPUT"
60 |           echo "validate_by_target=$(<<<"$targets" jq -c 'with_entries({key: .key, value: (.value.validate)})')" >> "$GITHUB_OUTPUT"
61 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-checks.yml:
--------------------------------------------------------------------------------
 1 | name: PR Hygiene
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches: [ "main", "feature/*" ]
 6 | 
 7 | jobs:
 8 |   semver:
 9 |     permissions:
10 |       contents: read
11 |     runs-on: ubuntu-latest
12 |     outputs:
13 |       has-breaking-change-label: ${{ steps.check-breaking-label.outputs.has-label }}
14 |     steps:
15 | 
16 |       - name: Check out repo
17 |         uses: actions/checkout@v4
18 | 
19 |       - name: Also fetch target branch
20 |         run: git fetch origin "$GITHUB_BASE_REF"
21 | 
22 |       - name: Install semver-checks
23 |         run: |
24 |           set -euo pipefail
25 |           cd "$RUNNER_TEMP"
26 |           gh release -R obi1kenobi/cargo-semver-checks download -p  cargo-semver-checks-x86_64-unknown-linux-gnu.tar.gz  -O - | tar xz
27 |         working-directory: ${{ env.RUNNER_TEMP }}
28 |         env:
29 |           GH_TOKEN: ${{ github.token }}
30 | 
31 |       - name: 'Check for "breaking change" label'
32 |         id: check-breaking-label
33 |         run: |
34 |           breaking_change_label_count="$(gh pr view ${{ github.event.number }} --json labels | jq '.labels | map(select(.name == "breaking change")) | length')"
35 |           if [[ "$breaking_change_label_count" == 1 ]]; then
36 |             echo "has-label=true" >> "$GITHUB_OUTPUT"
37 |           else
38 |             echo "has-label=false" >> "$GITHUB_OUTPUT"
39 |           fi
40 |         env:
41 |           GH_TOKEN: ${{ github.token }}
42 | 
43 |       - name: Run semver-checks
44 |         run: |
45 |           if ! "$RUNNER_TEMP/cargo-semver-checks" semver-checks --baseline-rev "origin/$GITHUB_BASE_REF" ; then
46 |             # There were breaking changes. Make sure we have the appropriate label!
47 |             if [[ "${{ steps.check-breaking-label.outputs.has-label }}" != "true" ]]; then
48 |               echo "::error title=semver-checks::semver-checks found breaking changes, but the 'breaking change' label isn't applied. Please add that label."
49 |               exit 1
50 |             else
51 |               echo "::warning title=semver-checks::semver-checks found breaking changes. The 'breaking change' label is applied, so no action needed if this is an acceptable change."
52 |             fi
53 |           fi
54 | 
55 |   breaking-change-docs:
56 |     needs: semver
57 |     permissions:
58 |       contents: read
59 |     runs-on: ubuntu-latest
60 |     steps:
61 |       - name: Fetch PR description
62 |         id: pr-description
63 |         run: |
64 |           pr_description="$(gh pr view ${{ github.event.number }} --repo ${{ github.repository }} --json body -q '.body')"
65 |           delimiter="$(uuidgen | tr -d -)"
66 |           echo "description<<$delimiter" >> "$GITHUB_OUTPUT"
67 |           echo "$pr_description" >> "$GITHUB_OUTPUT"
68 |           echo "$delimiter" >> "$GITHUB_OUTPUT"
69 |         env:
70 |           GH_TOKEN: ${{ github.token }}
71 | 
72 |       - name: Pull mdq Docker image
73 |         run: docker pull yshavit/mdq
74 | 
75 |       - name: 'Check for `# Breaking change` section'
76 |         id: check-breaking-section
77 |         run: |
78 |           if <<<"$PR_DESCRIPTION" docker run --rm -i yshavit/mdq -q '# Breaking change' >/dev/null 2>&1; then
79 |             echo "has-section=true" >> "$GITHUB_OUTPUT"
80 |           else
81 |             echo "has-section=false" >> "$GITHUB_OUTPUT"
82 |           fi
83 |         env:
84 |           PR_DESCRIPTION: ${{ steps.pr-description.outputs.description }}
85 | 
86 |       - name: Check breaking change label and section consistency
87 |         run: |
88 |           has_label="${{ needs.semver.outputs.has-breaking-change-label }}"
89 |           has_section="${{ steps.check-breaking-section.outputs.has-section }}"
90 | 
91 |           if [[ "$has_label" == "true" && "$has_section" == "false" ]]; then
92 |             echo "::error title=breaking-change-docs::PR has 'breaking change' label but is missing '# Breaking change' section in description."
93 |             exit 1
94 |           elif [[ "$has_label" == "false" && "$has_section" == "true" ]]; then
95 |             echo "::error title=breaking-change-docs::PR has '# Breaking change' section in description but is missing 'breaking change' label."
96 |             exit 1
97 |           fi


--------------------------------------------------------------------------------
/.github/workflows/prepare-new-release.yml:
--------------------------------------------------------------------------------
  1 | name: "Release: (01) Prepare"
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |     inputs:
  6 |       version_to_release:
  7 |         description: 'version to cut a release as (e.g. `0.2.0`)'
  8 |         required: true
  9 |         type: string
 10 |       base_ref:
 11 |         description: 'the git ref to go against (e.g. `main`)'
 12 |         required: false
 13 |         type: string
 14 |         default: main
 15 |       base_ref_version:
 16 |         description: 'verify current dev version, w/o "-dev" suffix (e.g. `0.1.0`)'
 17 |         required: true
 18 |         type: string
 19 |       create_pr:
 20 |         description: whether to create the PR for the release
 21 |         required: false
 22 |         type: boolean
 23 |         default: true
 24 | 
 25 | jobs:
 26 |   create_draft:
 27 |     runs-on: ubuntu-latest
 28 |     env:
 29 |       GH_TOKEN: ${{ github.token }}
 30 |       RELEASE_VERSION: ${{ github.event.inputs.version_to_release }}
 31 |       TAG_NAME: "v${{ github.event.inputs.version_to_release }}"
 32 |     permissions:
 33 |       contents: write
 34 |       pull-requests: write
 35 | 
 36 |     steps:
 37 | 
 38 |       - name: Configure git
 39 |         run: |
 40 |           set -euo pipefail
 41 |           git config --global user.name 'github-actions[bot]'
 42 |           git config --global user.email 'github-actions[bot]@users.noreply.github.com'
 43 | 
 44 |       - uses: actions/checkout@v4
 45 |         with:
 46 |           ref: ${{ github.event.inputs.base_ref }}
 47 | 
 48 |       - name: Check that tag is available
 49 |         run: |
 50 |           set -euo pipefail
 51 |           if gh release view "$TAG_NAME" ; then
 52 |             echo "::error ::tag $TAG_NAME already exists"
 53 |             exit 1
 54 |           fi
 55 | 
 56 |       - name: Check Cargo.toml version
 57 |         run: |
 58 |           set -euo pipefail
 59 |           toml_current_version=$(grep '^version' Cargo.toml | sed 's/version = "\(.*\)"/\1/')
 60 |           expect_version="${VERIFY_VERSION}-dev"
 61 |           if [[ "$toml_current_version" != "$expect_version" ]]; then
 62 |             echo "::error title=bad version::Expected version $expect_version does not match current version $toml_current_version."
 63 |             exit 1
 64 |           fi
 65 |         env:
 66 |           VERIFY_VERSION: ${{ github.event.inputs.base_ref_version }}
 67 | 
 68 |       - name: Update Cargo.toml
 69 |         run: |
 70 |           set -euo pipefail
 71 |           sed -i 's/^version = ".*"/version = "${{ env.RELEASE_VERSION }}"/' Cargo.toml
 72 |           cargo metadata >/dev/null
 73 | 
 74 |       - name: Commit change
 75 |         run: git commit -am "bump version to $RELEASE_VERSION"
 76 | 
 77 |       - name: Push to branch
 78 |         run: |
 79 |           set -euo pipefail
 80 |           git checkout -b "pending-releases/$RELEASE_VERSION"
 81 |           git push --set-upstream origin "pending-releases/$RELEASE_VERSION"
 82 | 
 83 |       - name: Create new release
 84 |         id: create_release
 85 |         run: |
 86 |           gh release create "$TAG_NAME" --draft --title "$TAG_NAME" --generate-notes --target "pending-releases/$RELEASE_VERSION"
 87 | 
 88 |   upload_assets:
 89 |     needs: create_draft
 90 |     uses: ./.github/workflows/release-assets.yml
 91 |     permissions:
 92 |       id-token: write
 93 |       attestations: write
 94 |       contents: write
 95 |     secrets: inherit
 96 |     with:
 97 |       branch_name: "pending-releases/${{ github.event.inputs.version_to_release }}"
 98 |       target_branch_name: ${{ github.event.inputs.base_ref }}
 99 | 
100 |   open_pr:
101 |     if: inputs.create_pr
102 |     runs-on: ubuntu-latest
103 |     env:
104 |       GH_TOKEN: ${{ github.token }}
105 |       RELEASE_VERSION: ${{ github.event.inputs.version_to_release }}
106 |     permissions:
107 |       pull-requests: write
108 |     needs: [ create_draft, upload_assets ]
109 |     steps:
110 | 
111 |       - uses: actions/checkout@v4
112 |         with:
113 |           ref: pending-releases/${{ env.RELEASE_VERSION }}
114 | 
115 |       - name: Open PR
116 |         run: |
117 |           set -euo pipefail
118 | 
119 |           body_text="$(<.github/gha_assets/release_pr_template.md sed "s/{{RELEASE_VERSION}}/$RELEASE_VERSION/g")"
120 |           gh pr create --title "Release v${RELEASE_VERSION}" --body "$body_text" --base "$TARGET_REF"
121 |         env:
122 |           TARGET_REF: ${{ github.event.inputs.base_ref }}
123 | 


--------------------------------------------------------------------------------
/.github/workflows/readme-checks.yml:
--------------------------------------------------------------------------------
  1 | name: Documentation
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: [ "main" ]
  6 |   workflow_dispatch: { }
  7 | 
  8 | env:
  9 |   CARGO_TERM_COLOR: always
 10 | 
 11 | jobs:
 12 |   check-msrv:
 13 |     runs-on: ubuntu-latest
 14 |     steps:
 15 | 
 16 |       - uses: actions/checkout@v4
 17 | 
 18 |       - run: docker pull yshavit/mdq
 19 | 
 20 |       - name: pull cargo-msrv from docker hub
 21 |         run: docker pull foresterre/cargo-msrv
 22 | 
 23 |       - name: find minimum supported rust version
 24 |         id: run-msrv
 25 |         run: |
 26 |           set -euo pipefail
 27 |           min_version="$(docker run --rm -t -v "$PWD/":/app/ foresterre/cargo-msrv find --no-log --output-format minimal | tr -d $'\r\n')"
 28 |           echo "::notice title=cargo-msrv::$min_version"
 29 |           echo "result=$min_version" >> "$GITHUB_OUTPUT"
 30 | 
 31 |       - name: check versions in readme
 32 |         id: readme-version
 33 |         run: |
 34 |           set -euo pipefail
 35 |           exit_code=0
 36 |           while IFS=: read -r file line_no version_contents; do
 37 |             found_rustc_version="${version_contents//rustc >= /}"
 38 |             if [[ "$found_rustc_version" == "$MSRV_VERSION" ]]; then
 39 |               echo "::notice file=$file,line=$line_no,title=version::✅ $found_rustc_version"
 40 |             else
 41 |               echo "::error file=$file,line=$line_no,title=version::$found_rustc_version should have been $MSRV_VERSION"
 42 |               exit_code=1
 43 |             fi
 44 |           done <<<"$(grep -HnoE 'rustc >= \S+' README.md)"
 45 |           exit "$exit_code"
 46 |         env:
 47 |           MSRV_VERSION: ${{ steps.run-msrv.outputs.result }}
 48 | 
 49 |       - name: check version in Cargo.toml
 50 |         run: |
 51 |           msrv_toml="$(cargo metadata --no-deps --format-version 1 | jq -r '.packages[].rust_version')"
 52 |           if [[ "$msrv_toml" != "$MSRV_VERSION" ]]; then
 53 |             grep -n '^rust-version =' Cargo.toml | while read -r line; do
 54 |               line_no="$(<<<"$line" cut -f1 -d:)"
 55 |               echo "::error file=Cargo.toml,line=$line_no,title=Cargo.toml::rust-version should have been $MSRV_VERSION"
 56 |             done
 57 |             exit 1
 58 |           fi
 59 |         env:
 60 |           MSRV_VERSION: ${{ steps.run-msrv.outputs.result }}
 61 | 
 62 | 
 63 |   
 64 |   build-crate-docs:
 65 |     runs-on: ubuntu-latest
 66 |     steps:
 67 | 
 68 |       - name: Checkout
 69 |         uses: actions/checkout@v4
 70 | 
 71 |       - name: Build docs
 72 |         run: cargo doc --no-deps
 73 | 
 74 |       - name: Add redirect page
 75 |         run: cp .github/gha_assets/crate-doc-root-redirect.html target/doc/index.html
 76 | 
 77 |       - name: Upload artifact
 78 |         uses: actions/upload-pages-artifact@v3
 79 |         with:
 80 |           path: target/doc
 81 | 
 82 |   deploy-crate-docs:
 83 |     needs: build-crate-docs
 84 |     runs-on: ubuntu-latest
 85 |     environment:
 86 |       name: github-pages
 87 |       url: ${{ steps.deployment.outputs.page_url }}
 88 |     concurrency:
 89 |       group: "pages"
 90 |       cancel-in-progress: false
 91 |     permissions:
 92 |       contents: read
 93 |       pages: write
 94 |       id-token: write
 95 |     steps:
 96 | 
 97 |       - name: Deploy to GitHub Pages
 98 |         id: deployment
 99 |         uses: actions/deploy-pages@v4
100 | 


--------------------------------------------------------------------------------
/.github/workflows/release-assets.yml:
--------------------------------------------------------------------------------
  1 | name: "Release: (02) Assets"
  2 | on:
  3 |   pull_request:
  4 |     types: [ "synchronize" ]
  5 |     branches: [ "main" ]
  6 |     paths:
  7 |       - Cargo.toml
  8 |   workflow_dispatch:
  9 |     inputs:
 10 |       branch_name:
 11 |         description: "The branch to monitor"
 12 |         type: string
 13 |         required: true
 14 |   workflow_call:
 15 |     inputs:
 16 |       branch_name:
 17 |         description: "The branch to monitor"
 18 |         type: string
 19 |         required: true
 20 |       target_branch_name:
 21 |         description: "The branch that will be merged into; if not provided, will fetch from branch_name's base"
 22 |         type: string
 23 |         required: false
 24 | 
 25 | # for attestations in build-release.yml
 26 | permissions:
 27 |   id-token: write
 28 |   attestations: write
 29 |   contents: write # needed for viewing and uploading to the draft release
 30 | 
 31 | jobs:
 32 | 
 33 |   validate:
 34 |     if: startsWith(inputs.branch_name || github.head_ref, 'pending-releases/')
 35 |     env:
 36 |       GH_TOKEN: ${{ github.token }}
 37 |       BRANCH_NAME: ${{ inputs.branch_name || github.head_ref }}
 38 |       TARGET_BRANCH: ${{ inputs.target_branch_name }}
 39 |     runs-on: ubuntu-latest
 40 |     outputs:
 41 |       version: ${{ steps.parse_branch.outputs.version }}
 42 |       version_tag: ${{ steps.parse_branch.outputs.version_tag }}
 43 |       branch_name: ${{ env.BRANCH_NAME }}
 44 |     
 45 |     steps:
 46 | 
 47 |       - name: Parse branch name
 48 |         id: parse_branch
 49 |         run: |
 50 |           set -euo pipefail
 51 |           
 52 |           release_version="$(<<<"$BRANCH_NAME" sed 's/.*\///')"
 53 |           if [[ -z "$release_version" ]]; then
 54 |             echo "::error title=invalid branch name::$BRANCH_NAME isn't \"*/<version>\""
 55 |             exit 1
 56 |           fi
 57 |           set -x
 58 |           echo "version=$release_version" >> "$GITHUB_OUTPUT"
 59 |           echo "version_tag=v$release_version" >> "$GITHUB_OUTPUT"
 60 | 
 61 |       - uses: actions/checkout@v4
 62 |         with:
 63 |           ref: ${{ env.BRANCH_NAME }}
 64 |           fetch-depth: '50' # all we really need is >1, but this can give us additional context
 65 | 
 66 |       - name: Verify release
 67 |         run: gh release view "$VERSION_TAG"
 68 |         env:
 69 |           VERSION_TAG: ${{ steps.parse_branch.outputs.version_tag }}
 70 | 
 71 |       - name: Get target branch name from PR if needed
 72 |         if: ${{ ! env.TARGET_BRANCH }}
 73 |         run: |
 74 |           set -euo pipefail
 75 |           target_name="$(gh pr view "$BRANCH_NAME" --json baseRefName | jq -r .baseRefName)"
 76 |           echo "TARGET_BRANCH=$target_name" >> "$GITHUB_ENV"
 77 | 
 78 |       - name: Fetch target branch
 79 |         run: git fetch origin "$TARGET_BRANCH"
 80 | 
 81 |       - name: Check for exactly one commit
 82 |         run: |
 83 |           set -euo pipefail
 84 |           
 85 |           commits_between="$(git log "origin/$TARGET_BRANCH..HEAD" --pretty=format:"%H")"
 86 |           
 87 |           if [[ "$(wc -l <<< "$commits_between")" -ne 1 ]]; then
 88 |             echo "::error title=invalid branch state::require exactly commit between $TARGET_BRANCH and $BRANCH_NAME: found $(wc -l <<<"$commits_between")"
 89 |             exit 1
 90 |           fi
 91 | 
 92 |   build:
 93 |     needs: validate
 94 |     uses: ./.github/workflows/build-release.yml
 95 |     secrets: inherit
 96 |     with:
 97 |       docker-tag-type: rc
 98 |       branch_name: ${{ needs.validate.outputs.branch_name }}
 99 | 
100 |   upload:
101 |     needs: [ build, validate ]
102 |     runs-on: ubuntu-latest
103 |     env:
104 |       GH_TOKEN: ${{ github.token }}
105 |     steps:
106 | 
107 |       - name: "Download binaries from job"
108 |         id: download
109 |         uses: actions/download-artifact@v4
110 |         with:
111 |           pattern: mdq-*
112 |           merge-multiple: 'false'
113 |           path: from_build
114 | 
115 |       - name: "Zip up the files"
116 |         run: |
117 |           set -euo pipefail
118 |           mkdir to_upload
119 |           to_upload="$PWD/to_upload"
120 |           set -x
121 |           for f in $(cd "$DOWNLOAD_DIR"; ls -1) ; do
122 |             pushd "from_build/$f"
123 |             if [[ -e mdq ]]; then
124 |               chmod +x mdq
125 |             fi
126 |             if [[ "$f" == *-windows-* ]]; then
127 |               zip -r "$f.zip" *
128 |               mv "$f.zip" "$to_upload"
129 |             else
130 |               tar -czvf "$f.tar.gz" *
131 |               mv "$f.tar.gz" "$to_upload"
132 |             fi
133 |             popd
134 |           done
135 |         env:
136 |           DOWNLOAD_DIR: ${{ steps.download.outputs.download-path }}
137 | 
138 |       - name: "Upload binaries to release"
139 |         run:
140 |           gh release -R "$REPO_NAME" upload --clobber "$VERSION_TAG" to_upload/*
141 |         env:
142 |           REPO_NAME: ${{ github.repository }}
143 |           VERSION_TAG: ${{ needs.validate.outputs.version_tag }}
144 | 
145 |   verify-binaries:
146 |     needs: [ validate, upload ]
147 |     permissions:
148 |       contents: write
149 |     uses: ./.github/workflows/binary-verify.yml
150 |     with:
151 |       version: ${{ needs.validate.outputs.version }}
152 |       docker-rc: true # since they won't have been published yet
153 | 


--------------------------------------------------------------------------------
/.github/workflows/release-publish.yml:
--------------------------------------------------------------------------------
  1 | name: "Release: (03) Publish"
  2 | on:
  3 |   workflow_dispatch:
  4 |     inputs:
  5 |       version:
  6 |         description: "The version to release, e.g. \"0.1.2\""
  7 |         type: string
  8 |         required: true
  9 |       next-version:
 10 |         description: "The next revision to set. Will have \"-dev\" appended"
 11 |         type: string
 12 |         required: true
 13 | 
 14 | env:
 15 |   RELEASE_VERSION: ${{ inputs.version }}
 16 |   NEXT_VERSION: "${{ inputs.next-version }}-dev"
 17 |   BRANCH_NAME: "pending-releases/${{ inputs.version }}"
 18 |   DOCKER_RC_TAG: "yshavit/mdq:${{ inputs.version }}-rc"
 19 |   DOCKER_PUBLISH_TAG: "yshavit/mdq:${{ inputs.version }}"
 20 | 
 21 | jobs:
 22 |   verify:
 23 |     runs-on: ubuntu-latest
 24 |     env:
 25 |       GH_TOKEN: ${{ github.token }}
 26 |     outputs:
 27 |       pr-json: ${{ steps.pr-info.outputs.json }}
 28 |     steps:
 29 |       - name: Get PR Info
 30 |         id: pr-info
 31 |         run: |
 32 |           set -euo pipefail
 33 |           pr_json="$(gh pr view -R "$REPO_NAME" "$BRANCH_NAME" --json mergeStateStatus,body,baseRefName)"
 34 |           <<<"$pr_json" | jq .
 35 |           echo "json=$pr_json" >> "$GITHUB_OUTPUT"
 36 |         env:
 37 |           REPO_NAME: ${{ github.repository }}
 38 | 
 39 |       - name: Validate PR status
 40 |         run: |
 41 |           set -euo pipefail
 42 |           merge_status="$(<<<"$PR_JSON" jq -r .mergeStateStatus)"
 43 |           if [[ "$merge_status" != CLEAN ]]; then
 44 |             echo "::error title=invalid branch state::require CLEAN, saw $merge_status"
 45 |             exit 1
 46 |           fi
 47 |         env:
 48 |           PR_JSON: ${{ steps.pr-info.outputs.json }}
 49 | 
 50 |       - name: Docker pull
 51 |         run: docker pull "$DOCKER_RC_TAG"
 52 | 
 53 |       - name: Look for unfinished checkbox items
 54 |         run: |
 55 |           exit_status=0
 56 |           while read -r line ; do
 57 |             echo "::error title=unfinished task::$line"
 58 |             exit_status=1
 59 |           done < <(echo "$PR_JSON" | jq -r .body | docker run --rm -i "$DOCKER_RC_TAG" -o plain '- [ ]')
 60 |           exit "$exit_status"
 61 |         env:
 62 |           PR_JSON: ${{ steps.pr-info.outputs.json }}
 63 | 
 64 |   docker:
 65 |     environment: Docker Hub
 66 |     needs: verify
 67 |     runs-on: ubuntu-latest
 68 |     steps:
 69 | 
 70 |       - name: Log in to Docker Hub
 71 |         uses: docker/login-action@v3
 72 |         with:
 73 |           username: ${{ vars.DOCKERHUB_USERNAME }}
 74 |           password: ${{ secrets.DOCKERHUB_PAT }}
 75 | 
 76 |       - name: Pull
 77 |         run: docker pull "$DOCKER_RC_TAG"
 78 | 
 79 |       - name: Retag
 80 |         run: docker tag "$DOCKER_RC_TAG" "$DOCKER_PUBLISH_TAG"
 81 | 
 82 |       - name: Push
 83 |         run: docker push "$DOCKER_PUBLISH_TAG"
 84 | 
 85 |   github:
 86 | 
 87 |     needs: [ verify, docker ]
 88 |     runs-on: ubuntu-latest
 89 |     permissions:
 90 |       contents: write
 91 |       pull-requests: write
 92 | 
 93 |     steps:
 94 | 
 95 |       - name: Get PR base ref
 96 |         id: base-ref
 97 |         run: echo "name=$(echo "$PR_JSON" | jq -r .baseRefName)" >> "$GITHUB_OUTPUT"
 98 |         env:
 99 |           PR_JSON: ${{ needs.verify.outputs.pr-json }}
100 | 
101 |       - uses: actions/checkout@v4
102 |         with:
103 |           ref: ${{ steps.base-ref.outputs.name }}
104 | 
105 |       - name: Fetch release branch
106 |         run: git fetch origin "$BRANCH_NAME"
107 | 
108 |       - name: Git FF main
109 |         run: git merge --ff-only "origin/$BRANCH_NAME"
110 | 
111 |       - name: Git push
112 |         run: git push origin "$TARGET_REF"
113 |         env:
114 |           TARGET_REF: ${{ steps.base-ref.outputs.name }}
115 | 
116 |       - name: Publish Release
117 |         run: gh release edit "v$RELEASE_VERSION" --draft=false
118 |         env:
119 |           GH_TOKEN: ${{ github.token }}
120 | 
121 |       - name: Update Cargo.toml
122 |         run: |
123 |           set -euo pipefail
124 |           sed -i 's/^version = ".*"/version = "${{ env.NEXT_VERSION }}"/' Cargo.toml
125 |           cargo metadata >/dev/null
126 | 
127 |       - name: Configure git
128 |         run: |
129 |           set -euo pipefail
130 |           git config --global user.name 'github-actions[bot]'
131 |           git config --global user.email 'github-actions[bot]@users.noreply.github.com'
132 | 
133 |       - name: Commit change
134 |         run: git commit -am "bump version to $NEXT_VERSION"
135 | 
136 |       - name: Push to branch
137 |         run: |
138 |           set -euo pipefail
139 |           git checkout -b "prepare-$NEXT_VERSION"
140 |           git push --set-upstream origin "prepare-$NEXT_VERSION"
141 | 
142 |       - name: Open PR
143 |         run: |
144 |           set -euo pipefail
145 |           gh pr create --title "Bump version to $NEXT_VERSION" --body "Created by release-publish.yml" --base "$TARGET_REF"
146 |         env:
147 |           TARGET_REF: ${{ steps.base-ref.outputs.name }}
148 |           GH_TOKEN: ${{ github.token }}
149 | 
150 |       - name: Push to target ref
151 |         run: |
152 |           git checkout -B "$TARGET_REF" "origin/$TARGET_REF"
153 |           git merge --ff-only "prepare-$NEXT_VERSION"
154 |           git push
155 |         env:
156 |           TARGET_REF: ${{ steps.base-ref.outputs.name }}
157 | 
158 |   crates-io:
159 |     needs: github
160 |     environment: "crates.io"
161 |     runs-on: ubuntu-latest
162 |     steps:
163 | 
164 |       - uses: actions/checkout@v4
165 |         with:
166 |           ref: "v${{ env.RELEASE_VERSION }}"
167 | 
168 |       - name: cargo login
169 |         run: cargo login <<<"$CRATESIO_API_TOKEN"
170 |         env:
171 |           CRATESIO_API_TOKEN: ${{ secrets.CRATESIO_API_TOKEN }}
172 | 
173 |       - name: Publish
174 |         run: cargo publish
175 | 
176 | 


--------------------------------------------------------------------------------
/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
  1 | name: Rust
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: [ "main" ]
  6 |   pull_request:
  7 |     branches: [ "main", "feature/*" ]
  8 | 
  9 | env:
 10 |   CARGO_TERM_COLOR: always
 11 | 
 12 | jobs:
 13 |   build:
 14 |     runs-on: ubuntu-latest
 15 |     steps:
 16 | 
 17 |       - uses: actions/checkout@v4
 18 | 
 19 |       - name: cargo build
 20 |         run: RUSTFLAGS=-Awarnings scripts/cargo_to_gh rustc --message-format json
 21 | 
 22 |       - name: check for any changes in the git tree
 23 |         run: |
 24 |           exit_code=0
 25 |           while read -r status_code file_path; do
 26 |             if [[ -z "$status_code" ]]; then
 27 |               continue
 28 |             fi
 29 |             exit_code=1
 30 |             echo "::error file=$file_path,title=git-status::<$status_code> $file_path"
 31 |           done <<<"$(git status --porcelain)"
 32 |           exit "$exit_code"
 33 | 
 34 |   check:
 35 |     strategy:
 36 |       matrix:
 37 |         tool: [ check, clippy ]
 38 |     runs-on: ubuntu-latest
 39 |     needs: build
 40 |     steps:
 41 |       - uses: actions/checkout@v4
 42 |       - name: cargo ${{ matrix.tool }}
 43 |         run: scripts/cargo_to_gh "$CHECK_TOOL"
 44 |         env:
 45 |           CHECK_TOOL: ${{ matrix.tool }}
 46 |       - name: cargo ${{ matrix.tool }} (no special formatting)
 47 |         if: failure()
 48 |         run: cargo "$CHECK_TOOL"
 49 |         env:
 50 |           CHECK_TOOL: ${{ matrix.tool }}
 51 | 
 52 |   test:
 53 |     runs-on: ubuntu-latest
 54 |     needs: build
 55 |     steps:
 56 | 
 57 |       - uses: actions/checkout@v4
 58 | 
 59 |       - name: cargo test
 60 |         run: |
 61 |           # convert e.g. "thread 'fmt_str::tests::text_html' panicked at src/fmt_str.rs:75:9:" to
 62 |           #              "::error file=src/fmt_str,line=75,col=9,title=test failure:: atfmt_str::tests::text_html"
 63 |           set -o pipefail
 64 |           cargo test --verbose | sed -E "s/thread '([^']+)' panicked at ([^:]+):([0-9]+):([0-9]+):$/::error file=\\2,line=\\3,col=\\4,title=test failure::at \\1/"
 65 | 
 66 |       - name: list ignored tests
 67 |         run: |
 68 |           (find . -name '*.rs' -exec grep --fixed-strings -Hno '#[ignore]' {} \; || true) | sed -E 's/^([^:]+):([^:]+):.*/::warning file=\1,line=\2,title=Ignored test::Regex indicates this test is probably ignored/'
 69 | 
 70 |       - name: check ignored tests all fail
 71 |         run: |
 72 |           exit_code=0
 73 |           while IFS= read -r line; do
 74 |             printf "::error title=Ignored test is passing::%s but expected failure because it's ignored"'\n' "$line"
 75 |             exit_code=1
 76 |           done < <(cargo test -- --ignored --color never | grep '\.\.\. ok$' || true)
 77 |           exit "$exit_code"
 78 | 
 79 |   fmt:
 80 |     needs: build
 81 |     runs-on: ubuntu-latest
 82 |     steps:
 83 |       - uses: actions/checkout@v4
 84 |       - name: cargo fmt
 85 |         run: cargo fmt --check
 86 | 
 87 |   vis-keywords:
 88 |     needs: build
 89 |     runs-on: ubuntu-latest
 90 |     permissions:
 91 |       contents: read
 92 |     steps:
 93 |       - uses: actions/checkout@v4
 94 | 
 95 |       - name: check file
 96 |         run: file scripts/flatten-rustdoc-json
 97 | 
 98 |       - name: cargo docs (public only)
 99 |         run: cargo +nightly rustdoc --lib -- -Zunstable-options --output-format json
100 | 
101 |       - name: flatten rustdoc json (public only)
102 |         run: scripts/flatten-rustdoc-json > target/items-public-only.txt
103 | 
104 |       - name: cargo docs (all)
105 |         run: cargo +nightly rustdoc --lib -- -Zunstable-options --output-format json --document-private-items
106 | 
107 |       - name: flatten rustdoc json (all)
108 |         run: IGNORE_ITEMS='enum mdq::query::pest::Rule' scripts/flatten-rustdoc-json > target/items-all.txt
109 | 
110 |       - name: compare
111 |         run: |
112 |           if diff -y --suppress-common-lines target/items-public-only.txt target/items-all.txt; then
113 |             echo "::notice title=pub-vis::all pub items are actually public"
114 |           else
115 |             echo "::error title=pub-vis::some items marked pub are not actually public"
116 |             exit 1
117 |           fi
118 | 


--------------------------------------------------------------------------------
/.github/workflows/system-test.yml:
--------------------------------------------------------------------------------
 1 | name: System tests
 2 | on:
 3 |   push:
 4 |     branches: [ "main" ]
 5 |   pull_request:
 6 |     branches: [ "main", "feature/*" ]
 7 |   workflow_dispatch: { }
 8 | 
 9 | jobs:
10 |   toml-cases:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 | 
14 |       - uses: actions/cache@v4
15 |         with:
16 |           path: |
17 |             ~/.cargo/bin/
18 |             ~/.cargo/registry/index/
19 |             ~/.cargo/registry/cache/
20 |             ~/.cargo/git/db/
21 |             target/
22 |           key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
23 | 
24 |       - name: Checkout code
25 |         uses: actions/checkout@v4
26 | 
27 |       - name: Build mdq
28 |         run: cargo build
29 | 
30 |       - name: Install toml2json
31 |         run: command -v toml2json || cargo install toml2json
32 | 
33 |       - name: Run tests
34 |         run: |
35 |           set -euo pipefail
36 | 
37 |           test_failures=0
38 | 
39 |           # Run each test toml file
40 |           for test_file in tests/md_cases/*.toml; do
41 |             ./scripts/system_test ./target/debug/mdq "$test_file" || test_failures=$((test_failures + 1))
42 |           done
43 | 
44 |           # Report any failures
45 |           if [ "$test_failures" -ne 0 ]; then
46 |             echo "::error title=failures::$test_failures test(s) failed"
47 |             exit 1
48 |           fi
49 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Generated by Cargo
 2 | # will have compiled files and executables
 3 | debug/
 4 | target/
 5 | 
 6 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
 7 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
 8 | Cargo.lock
 9 | 
10 | # These are backup files generated by rustfmt
11 | **/*.rs.bk
12 | 
13 | # MSVC Windows builds of rustc generate these, which store debugging information
14 | *.pdb
15 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | authors = ["Yuval Shavit <dev@yuvalshavit.com>"]
 3 | name = "mdq"
 4 | version = "0.7.3-dev"
 5 | edition = "2021"
 6 | license = "MIT OR Apache-2.0"
 7 | description = "Select and render specific elements in a Markdown document"
 8 | repository = "https://github.com/yshavit/mdq"
 9 | keywords = ["markdown", "parsing"]
10 | categories = ["command-line-utilities", "text-editors"]
11 | rust-version = "1.78.0"
12 | 
13 | [dependencies]
14 | clap = { version = "4.5.7", features = ["derive"] }
15 | derive_builder = "0.20.2"
16 | markdown = "1.0.0"
17 | memchr = "2.7.4"
18 | paste = "1.0"
19 | pest = "2.8"
20 | pest_derive = { version = "2.8", features = ["grammar-extras"] }
21 | fancy-regex = "0.14"
22 | serde = { version = "1", features = ["derive"] }
23 | serde_json = "1.0"
24 | 
25 | [dev-dependencies]
26 | indoc = "2"
27 | lazy_static = "1.4.0"
28 | 
29 | [build-dependencies]
30 | indoc = "2"
31 | serde = { version = "1", features = ["derive"] }
32 | toml = "0.8"
33 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM rust:alpine AS builder
 2 | 
 3 | WORKDIR /usr/src/app
 4 | RUN apk add --no-cache build-base
 5 | 
 6 | COPY Cargo.toml Cargo.lock ./
 7 | RUN mkdir src && echo "fn main() {}" > src/main.rs
 8 | RUN cargo build --release && rm -rf src 
 9 | 
10 | COPY . .
11 | RUN cargo build --release
12 | 
13 | FROM alpine:latest
14 | 
15 | COPY --from=builder /usr/src/app/target/release/mdq .
16 | 
17 | RUN chmod +x mdq
18 | 
19 | ENTRYPOINT ["./mdq"]
20 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Yuval Shavit
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/examples/hello.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: My example
  3 | ---
  4 | 
  5 | # Greetings
  6 | 
  7 | ![welcome image](https://example.com/welcome.png)
  8 | 
  9 | How are you!
 10 | 
 11 | *I'm* doing well.
 12 | 
 13 | ## Hello there
 14 | 
 15 | In fact, I'm doing very well!
 16 | This line isn't a new paragraph, just some
 17 | wrapping that mdq should unwrap.
 18 | 
 19 | ### Sub-section with link
 20 | 
 21 | This is [my referenced link][a1].
 22 | 
 23 | [a1]: https://example.com/reference
 24 | 
 25 | # Details
 26 | 
 27 | ### Here's a *cool* table
 28 | 
 29 | | Column Left | Column Middle | Column Right |
 30 | |:------------|:-------------:|-------------:|
 31 | | (           |       v       |            ) |
 32 | 
 33 | This is [my inline link](https://example.com/inline).
 34 | 
 35 | This is [my inline link with title](https://example.com/inline "its title").
 36 | 
 37 | This is [my referenced link with title][a2].
 38 | 
 39 | This is [a collapsed link][], and this is [a shortcut link].
 40 | 
 41 | [a collapsed link]: https://example.com/collapsed
 42 | [a shortcut link]: https://example.com/shortcut "and it has a title"
 43 | 
 44 | ## Hello lists
 45 | 
 46 | 1. List one
 47 | 2. List two with...
 48 | 
 49 |    ...two paragraphs
 50 | 
 51 | - Item a[^1]
 52 | 
 53 | - [x] checked
 54 | - [ ] unchecked and contains a [link within a task](https://example.com/task)
 55 | 
 56 | [a2]: https://example.com/reference "from the previous section"
 57 | [^1]: interesting footnote
 58 | 
 59 | > This is so great
 60 | 
 61 | ## Foo
 62 | 
 63 | > I say:
 64 | > ```types
 65 | > here is some code
 66 | > it is
 67 | > 
 68 | > fun
 69 | > ```
 70 | 
 71 | There's also a
 72 | 
 73 | ```text title="Code block with metadata"
 74 | Block A
 75 | ```
 76 | 
 77 | and
 78 | 
 79 | ``` title="Code block with only metadata"
 80 | Block B
 81 | ```
 82 | 
 83 | # Second Section
 84 | 
 85 | ## Heading `with inline code` again
 86 | 
 87 | 1. countit
 88 | 2. [ ] fizz
 89 | 
 90 | - maybe
 91 | - [x] yes _but_ maybe not
 92 | - [ ] no
 93 |   ```
 94 |   foo
 95 |   bar
 96 |   ```
 97 | 
 98 | - again
 99 | 
100 | ## List details
101 | 
102 | 1. One
103 | 1.
104 |     2. Two
105 | 1. s
106 | 1. Four
107 | 1. ```
108 |    foo
109 |    ```
110 | 1.
111 | 
112 | ```
113 | foo
114 | ```
115 | 
116 | ## Html Stuff
117 | 
118 | First some <span>inline</span>.
119 | 
120 | <div class="then some"
121 | href="#block-level">
122 | 
123 | With paragraph text between it.
124 | 
125 | </div>
126 | 


--------------------------------------------------------------------------------
/githooks/post-commit:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -euo pipefail
 3 | 
 4 | if ! cargo fmt --check &>/dev/null ; then
 5 |   echo >&2 "WARNING: Need to run cargo fmt"
 6 | fi
 7 | if ( 2>&1 cargo check --color never || ( echo error | true ) ) | grep -q -e warning -e error ; then
 8 |   echo >&2 "WARNING: Need to run cargo fix"
 9 | fi
10 | 


--------------------------------------------------------------------------------
/release_procedure.md:
--------------------------------------------------------------------------------
 1 | # Cutting new releases
 2 | 
 3 | ## tldr
 4 | 
 5 | 1. run [prepare-new-release], which will create a PR and draft release
 6 | 2. follow the instructions on the PR
 7 | 3. run [release-publish]
 8 | 4. check that the PR that gets created (a different one from step 1) is merged; if not, merge it immediately
 9 | 
10 | ## Details
11 | 
12 | 1. Run the [prepare-new-release] action.
13 | 
14 |    To illustrate the values to pass to that, let's say:
15 | 
16 |     - `main` is currently at `v0.1.2-dev`
17 |     - you want to cut a release named `v0.2.0`
18 | 
19 |    Select the following options:
20 | 
21 |    | Option                                         | Value    |
22 |    |------------------------------------------------|----------|
23 |    | version to cut a release as                    | `v0.2.0` |
24 |    | the git ref to go against                      | `main`   |
25 |    | verify current dev version (w/o "-dev" suffix) | `v0.1.2` |
26 | 
27 |    A workflow will create a PR to update `v0.1.2-dev` to `v0.2.0` and create a release for that PR's sha in draft
28 |    mode. The PR's branch will be named `pending-releases/v0.2.0`.
29 | 
30 |     1. The [release-assets] workflow will:
31 | 
32 |         1. Validate that the PR has a linear history to main with just one additional commit.
33 |         2. Use the [build-release] workflow to build the binaries
34 |         3. Upload the artifacts to the draft PR.
35 |         4. Publish a docker image with a tag `0.2.0-rc`
36 | 
37 |     2. If you need to make any changes (like adding a new commit), rebase and force-push to `pending-releases/v0.2.0`.
38 | 
39 |         - The branch should always be against main, and have exactly one extra commit.
40 |         - The release-assets workflow will re-run each time you update the PR.
41 | 
42 | 2. Follow the instructions in the PR description to validate it.
43 | 
44 | 3. When you're satisfied with the PR, run the [release-publish] action. This will:
45 | 
46 |    1. Validate that the PR's tasks are all complete
47 |    2. On Docker Hub, re-tag the `0.2.0-rc` image to `0.2.0`.
48 |    3. Do a fast-forward merge of the PR. (The fast-forward ensures that the commit SHA in main for the `0.2.0` release
49 |       matches the commit sha that the binaries and docker were built against.)
50 |    4. Open up a new PR for the next `-dev` version bump
51 | 
52 | 4. Make sure the new PR gets merged immediately. The automation should do this for you; but if it doesn't, do it now.
53 | 
54 | [prepare-new-release]: https://github.com/yshavit/mdq/actions/workflows/prepare-new-release.yml
55 | 
56 | [release-assets]: https://github.com/yshavit/mdq/actions/workflows/release-assets.yml
57 | 
58 | [build-release]: https://github.com/yshavit/mdq/actions/workflows/build-release.yml
59 | 
60 | [release-publish]: https://github.com/yshavit/mdq/actions/workflows/release-publish.yml
61 |     


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | max_width = 120
2 | 


--------------------------------------------------------------------------------
/scripts/cargo_to_gh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source scripts/common.sh || exit 1
 4 | 
 5 | json_opts=()
 6 | if ! echo "$*" | grep -q -- '--message-format json' ; then
 7 |   json_opts=(--message-format json)
 8 | fi
 9 | 
10 | tmp_file="$(mktemp)"
11 | set +e
12 | cargo "$@" "${json_opts[@]}" > "$tmp_file"
13 | exit_code=$?
14 | set -e
15 | if [[ "$exit_code" != 0 ]]; then
16 |   echo "::error title=check failed:cargo $* failed"
17 | fi
18 | 
19 | while IFS= read -r line; do
20 |   if [[ -n "$line" ]]; then
21 |     echo "$line"
22 |     exit_code=1
23 |   fi
24 | done < <(
25 |   jq <"$tmp_file" -r '
26 |     select(.message.level and (.message.spans | length > 0)) 
27 |     | {level: (.message.level | sub("note"; "notice")), message: .message.message} as $ctx
28 |     | .message.spans[] 
29 |     | "::\($ctx.level) file=\(.file_name),line=\(.line_start),col=\(.column_start),endLine=\(.line_end),columnEnd=\(.column_end),title=rust check::\($ctx.message)"'
30 | )
31 | 
32 | exit "$exit_code"
33 | 


--------------------------------------------------------------------------------
/scripts/common.sh:
--------------------------------------------------------------------------------
  1 | set -euo pipefail
  2 | 
  3 | group_level=0
  4 | 
  5 | function echo_with_grouping() {
  6 |   for _ in $(seq 1 "$group_level"); do
  7 |     printf '  '
  8 |   done
  9 |   if [[ "${GITHUB_ACTIONS-}" ]]; then
 10 |     echo "$@"
 11 |   else
 12 |     echo >&2 -e "$@"
 13 |   fi
 14 | }
 15 | 
 16 | function group() {
 17 |   local title="$1"
 18 |   if [[ "${GITHUB_ACTIONS-}" ]]; then
 19 |     echo "::group::$title"
 20 |   else
 21 |     echo_with_grouping "${title//[^a-zA-Z0-9 _\'\"]/ /}"
 22 |     group_level="$((group_level + 1))"
 23 |   fi
 24 | }
 25 | 
 26 | function end_group() {
 27 |   if [[ "${GITHUB_ACTIONS-}" ]]; then
 28 |     echo "::endgroup::"
 29 |   else
 30 |     group_level="$((group_level - 1))"
 31 |   fi
 32 | }
 33 | 
 34 | function msg () {
 35 |   case "$#" in
 36 |     1)
 37 |       local level=info
 38 |       local title=''
 39 |       local message="$1"
 40 |       ;;
 41 |     2)
 42 |       local level=info
 43 |       local title="$1"
 44 |       local message="$2"
 45 |       ;;
 46 |     *)
 47 |       local level="$1"
 48 |       local title="$2"
 49 |       local message="$3"
 50 |       ;;
 51 |   esac
 52 |   if [[ "${GITHUB_ACTIONS-}" ]]; then
 53 |     case "$level" in
 54 |       debug)
 55 |         echo "::debug::$title: ${message}"
 56 |         ;;
 57 |       notice|warning|error)
 58 |         echo "::$level title=${title}::${message}"
 59 |         ;;
 60 |       *)
 61 |         if [[ -n "$title" ]]; then
 62 |           echo "$title: ${message}"
 63 |         else
 64 |           echo "${message}"
 65 |         fi
 66 |         ;;
 67 |     esac
 68 |   else
 69 |     local color=''
 70 |     local color_title_end=''
 71 |     local after_title=''
 72 |     case "$level" in
 73 |       debug)
 74 |         color='\e[37m'
 75 |         ;;
 76 |       notice)
 77 |         color='\e[34m'
 78 |         color_title_end='\e[0m'
 79 |         ;;
 80 |       warning)
 81 |         color='\e[33m'
 82 |         color_title_end='\e[0m'
 83 |         ;;
 84 |       error)
 85 |         color='\e[31m'
 86 |         color_title_end='\e[0m'
 87 |         ;;
 88 |       *)
 89 |         color='\e[0m'
 90 |         ;;
 91 |     esac
 92 |     if [[ -n "$title" ]]; then
 93 |       after_title=': '
 94 |     fi
 95 | 
 96 |     echo_with_grouping "${color}${title}${color_title_end}${after_title}${message}\e[0m"
 97 |   fi
 98 | }
 99 | 
100 | function msg_debug() {
101 |   msg debug "$@"
102 | }
103 | 
104 | function msg_info() {
105 |   msg notice "$@"
106 | }
107 | 
108 | function err() {
109 |   msg "$@"
110 |   exit 1
111 | }
112 | 
113 | function require_tool() {
114 |   local tool="$1"
115 |   command -v "$tool" &>/dev/null || {
116 |     err 'missing software' "$tool"
117 |   }
118 | }
119 | 
120 | 


--------------------------------------------------------------------------------
/scripts/flatten-rustdoc-json:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import json
 5 | import sys
 6 | 
 7 | my_dir = os.path.dirname(os.path.realpath(__file__))
 8 | repo_dir = os.path.abspath(os.path.join(my_dir, '..'))
 9 | 
10 | DOCS_JSON_PATH = 'target/doc/mdq.json'
11 | 
12 | IGNORES = set([e for e in os.environ.get('IGNORE_ITEMS', '').split(',') if e])
13 | if IGNORES:
14 |     print(f'Ignoring: {IGNORES}', file=sys.stderr)
15 | 
16 | with open(DOCS_JSON_PATH) as fh:
17 |     docs_json = json.load(fh)
18 | 
19 | vis_by_item_id = {}
20 | path_by_item_id = {}
21 | kind_by_item_id = {}
22 | 
23 | for item_id, item in docs_json["index"].items():
24 |     vis = item["visibility"] # this is the vis on the item itself, not the effective vis from mod.rs imports
25 |     if isinstance(vis, dict) and ('restricted' in vis):
26 |         vis = 'restricted'
27 |     vis_by_item_id[item_id] = vis
28 | 
29 | for item_id, item in docs_json["paths"].items():
30 |     kind_by_item_id[item_id] = item["kind"]
31 |     path_by_item_id[item_id] = '::'.join(item["path"])
32 | 
33 | lines = []
34 | for item_id, vis in vis_by_item_id.items():
35 |     if vis != 'public':
36 |         continue
37 |     item_kind = kind_by_item_id.get(item_id)
38 |     item_path = path_by_item_id.get(item_id)
39 |     if item_kind is None or item_path is None:
40 |         continue
41 |     line = f'{item_kind} {item_path}' 
42 |     if line in IGNORES:
43 |         continue
44 |     lines.append(line)
45 | 
46 | lines.sort()
47 | 
48 | for line in lines:
49 |     print(line)
50 | 
51 | 


--------------------------------------------------------------------------------
/scripts/percent_to_color:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source scripts/common.sh || exit 1
 4 | 
 5 | if [ $# -ne 1 ]; then
 6 |   err 'require exactly one argument'
 7 | fi
 8 | percent="$1"
 9 | 
10 | percent_0_to_5="$(printf 'scale=0; %s / 20\n' "$percent" | bc)"
11 | 
12 | case "$percent_0_to_5" in
13 |   0) # 0 - 19%
14 |     echo darkred
15 |     ;;
16 |   1) # 20 - 39%
17 |     echo crimson
18 |     ;;
19 |   2) # 40 - 59%
20 |     echo orangered
21 |     ;;
22 |   3) # 60 - 79%
23 |     echo yellowgreen
24 |     ;;
25 |   4 | 5)
26 |     echo forestgreen 
27 |     ;;
28 |   *)
29 |     err 'invalid percent'
30 |     ;;
31 | esac
32 | 


--------------------------------------------------------------------------------
/scripts/percent_to_shields_gist:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source scripts/common.sh || exit 1
 4 | 
 5 | if [ -z "${GIST_URL:-}" ]; then
 6 |   err "no GIST_URL env set"
 7 | fi
 8 | if [ $# -ne 2 ]; then
 9 |   err 'requires exactly two arguments'
10 | fi
11 | label="$1"
12 | percent="$2"
13 | 
14 | color="$(scripts/percent_to_color "$percent")"
15 | 
16 | json_text="$(echo '{}' | 
17 |   jq -c '{schemaVersion: 1, label: $badge_label, color: $color, message: $percent}' \
18 |   --arg badge_label "$label" --arg color "$color" --arg percent "$(printf '%.1f%%' "$percent")" )"
19 | 
20 | gh gist edit "$GIST_URL" <(echo "$json_text")
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/scripts/system_test:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -euo pipefail
  3 | 
  4 | source "$(dirname "$0")/common.sh" || exit 1
  5 | 
  6 | require_tool jq
  7 | require_tool toml2json
  8 | 
  9 | usage_string="Usage: $0 <mdq executable> <path to toml test> [test case grep pattern]"
 10 | if [[ "$#" -lt 2 ]]; then
 11 |   err 'too few args' "$usage_string"
 12 | fi
 13 | if [[ "$#" -gt 3 ]]; then
 14 |   err 'too many args' "$usage_string"
 15 | fi
 16 | 
 17 | mdq="$1"
 18 | test_file="$2"
 19 | pattern="${3:-.*}"
 20 | 
 21 | msg debug mdq "$mdq"
 22 | msg debug test_file "$test_file"
 23 | msg debug pattern "$pattern"
 24 | 
 25 | if [[ -x "$mdq" ]]; then
 26 |   mdq="$(readlink -f "$mdq")"
 27 |   msg debug 'mdq full path' "$mdq"
 28 |   msg debug "mdq version" "$("$mdq" --version)"
 29 | else
 30 |   err 'not executable' "$mdq"
 31 | fi
 32 | 
 33 | test_json="$(<"$test_file" toml2json)"
 34 | 
 35 | function nice_diff() {
 36 |   local title="$1"
 37 |   local expected="$2"
 38 |   local actual="$3"
 39 |   if diff --strip-trailing-cr "$expected" "$actual" &>/dev/null ; then
 40 |     msg debug "$title" "diff succeeded"
 41 |   else
 42 |     msg error "$title" "diff failed"
 43 |     diff --color=always --strip-trailing-cr -y <(cat <(echo EXPECTED) "$expected") <(cat <(echo ACTUAL) "$actual")
 44 |     return 1
 45 |   fi
 46 | }
 47 | 
 48 | format_file_json() {
 49 |   local file="$1"
 50 |   local as_json
 51 |   if as_json="$(jq -S . <(cat "$file"))"; then
 52 |     echo "$as_json" > "$file"
 53 |   fi
 54 | }
 55 | 
 56 | function run_test_spec() {
 57 |   local spec="$1"
 58 |   local spec_name
 59 |   spec_name="$(jq -r '.name' <<<"$spec")"
 60 |   full_name="$test_file > $spec_name"
 61 | 
 62 |   (
 63 |     local stdin
 64 |     pushd "$(mktemp -d)"
 65 | 
 66 |     while read -r md_test_file; do
 67 |       local write_to="$md_test_file"
 68 |       jq -r '.given.files[$name]' <<<"$spec" --arg name "$md_test_file" > "$write_to"
 69 |     done <<<"$(jq -r '.given.files | keys | .[]' <<<"$spec")"
 70 | 
 71 |     if jq -e '.expect.ignore' <<<"$spec" &>/dev/null ; then
 72 |       msg warning "$full_name" 'skipping test case'
 73 |       return 0
 74 |     fi
 75 | 
 76 |     jq -j '.expect.output' <<<"$spec" >expect_out.txt
 77 |     jq -j '.expect.output_err // ""' <<<"$spec" >expect_err.txt
 78 |     expect_success="$(jq -r '.expect.expect_success | if . == null then true else . end' <<<"$spec")"
 79 |     output_json="$(jq -r '.expect.output_json // false' <<<"$spec")"
 80 | 
 81 |     stdin="$(jq -r '.given.md' <<<"$spec")"
 82 |     cli_args=()
 83 |     while read -r cli_arg ; do
 84 |       cli_args+=("$cli_arg")
 85 |     done <<<"$(jq -r '.expect.cli_args[]' <<<"$spec")"
 86 | 
 87 |     local actual_success=true
 88 |     set -x
 89 |     "$mdq" <<<"$stdin" "${cli_args[@]}" >actual_out.txt 2>actual_err.txt || actual_success=false
 90 |     set +x
 91 | 
 92 |     if [[ "$output_json" == true ]]; then
 93 |       for file in actual_out.txt actual_err.txt expect_out.txt expect_err.txt ; do
 94 |         format_file_json "$file"
 95 |       done
 96 |     fi
 97 | 
 98 |     local any_errors=()
 99 |     nice_diff "$full_name: stdout" expect_out.txt actual_out.txt || any_errors+=('stdout')
100 |     nice_diff "$full_name: stderr" expect_err.txt actual_err.txt || any_errors+=('stderr')
101 |     [[ "$expect_success" == "$actual_success" ]] || any_errors+=('exit code')
102 | 
103 |     if [[ "${#any_errors[@]}" -eq 0 ]]; then
104 |       msg notice "$full_name" 'test passed'
105 |     else
106 |       msg error "$full_name" "test failed due to ${any_errors[*]}"
107 |       exit 1
108 |     fi
109 |   )
110 | }
111 | 
112 | failures=0
113 | while read -r test_case ; do
114 |   if ! grep -qE "$pattern" <<<"$test_case"; then
115 |     msg debug 'skipping test case' "$test_case"
116 |     continue
117 |   fi
118 |   group "$test_file > $test_case"
119 |   run_test_spec \
120 |     "$(jq '{name: $test_case, given: .given, expect: .expect[$test_case]}' <<<"$test_json" --arg test_case "$test_case")" \
121 |     || failures=$((failures + 1))
122 |   end_group
123 | done <<<"$(jq -r '.expect| keys[]' <<<"$test_json")"
124 | 
125 | [[ "$failures" -eq 0 ]]
126 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! This crate is the library behind the [mdq] CLI tool.
 2 | //!
 3 | //! <div class="warning">
 4 | //!
 5 | //! **This is a preview API**. While I'll try to keep it as stable as possible, some breaking changes may occur.
 6 | //!
 7 | //! I will note any such changes in the [release notes on GitHub]. You can also find them searching the
 8 | //! [`breaking change` label] in the project's issue tracker.
 9 | //!
10 | //! [release notes on GitHub]: https://github.com/yshavit/mdq/releases
11 | //! [`breaking change` label]: https://github.com/yshavit/mdq/issues?q=label%3A%22breaking%20change%22
12 | //!
13 | //! </div>
14 | //!
15 | //! The general flow to use this crate is:
16 | //!
17 | //! 1. Parse Markdown into [`md_elem::MdElem`]s via [`md_elem::MdDoc::parse`]
18 | //! 2. Parse a query via [`select::Selector`'s `TryFrom::<&str>`][selector-parse]
19 | //! 3. Use [`select::Selector::find_nodes`] to filter the `MdElem`s down
20 | //! 4. Use [`output`] to write the results
21 | //!
22 | //! The [`run`] module implements this workflow using options similar to the CLI's flags and a facade for I/O. You can
23 | //! also do it yourself. See that module's documentation for an example.
24 | //!
25 | //! ## Example: End-to-end parsing and selection
26 | //!
27 | //! To parse some Markdown and a query string and output the result as Markdown to stdout:
28 | //!
29 | //! ```
30 | //! # fn main() -> Result<(), Box<dyn std::error::Error>> {
31 | //! use indoc::indoc;
32 | //!
33 | //! // Define some markdown
34 | //! let markdown_text = indoc! {r##"
35 | //! ## First section
36 | //!
37 | //! - hello
38 | //! - world
39 | //!
40 | //! ## Second section
41 | //!
42 | //! - foo
43 | //! - bar
44 | //! "##};
45 | //! let parsed_md = mdq::md_elem::MdDoc::parse(markdown_text, &mdq::md_elem::ParseOptions::default())?;
46 | //!
47 | //! // Parse a selector that looks for a section with title containing "second", and
48 | //! // then looks for list items within it
49 | //! let query_text = "# second | - *";
50 | //! let selector: mdq::select::Selector = query_text.try_into()?;
51 | //!
52 | //! // Run the selector against the parsed Markdown
53 | //! let (found_nodes, found_nodes_ctx) = selector.find_nodes(parsed_md)?;
54 | //!
55 | //! // Output. Note our use of
56 | //! let mut output_string = String::new();
57 | //! let writer = mdq::output::MdWriter::default();
58 | //! writer.write(&found_nodes_ctx, &found_nodes, &mut output_string);
59 | //!
60 | //! assert_eq!(
61 | //!     output_string,
62 | //!     indoc! {r"
63 | //!     - foo
64 | //!
65 | //!     - bar
66 | //! "});
67 | //! #
68 | //! #     Ok(())
69 | //! # }
70 | //! ```
71 | //!
72 | //! [mdq]: https://github.com/yshavit/mdq
73 | //! [selector-parse]: select::Selector#impl-TryFrom<%26str>-for-Selector
74 | 
75 | pub mod md_elem;
76 | pub mod output;
77 | mod query;
78 | pub mod run;
79 | pub mod select;
80 | mod util;
81 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
 1 | use clap::Parser;
 2 | use mdq::run::{CliOptions, Error, OsFacade};
 3 | use std::io;
 4 | use std::io::{stdin, stdout, Read};
 5 | use std::process::ExitCode;
 6 | 
 7 | struct RealOs;
 8 | 
 9 | #[doc(hidden)]
10 | impl OsFacade for RealOs {
11 |     fn read_stdin(&self) -> io::Result<String> {
12 |         let mut contents = String::new();
13 |         stdin().read_to_string(&mut contents)?;
14 |         Ok(contents)
15 |     }
16 | 
17 |     fn read_file(&self, path: &str) -> io::Result<String> {
18 |         std::fs::read_to_string(path)
19 |     }
20 | 
21 |     fn stdout(&mut self) -> impl io::Write {
22 |         stdout().lock()
23 |     }
24 | 
25 |     fn write_error(&mut self, err: Error) {
26 |         eprint!("{err}")
27 |     }
28 | }
29 | 
30 | fn main() -> ExitCode {
31 |     let cli = CliOptions::parse();
32 | 
33 |     if !cli.extra_validation() {
34 |         return ExitCode::FAILURE;
35 |     }
36 | 
37 |     if mdq::run::run(&cli.into(), &mut RealOs) {
38 |         ExitCode::SUCCESS
39 |     } else {
40 |         ExitCode::FAILURE
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/md_elem/concatenate.rs:
--------------------------------------------------------------------------------
 1 | pub(crate) trait Concatenate: Sized {
 2 |     fn try_concatenate(&mut self, other: Self) -> Result<(), Self>;
 3 | 
 4 |     fn concatenate_similar(items: Vec<Self>) -> Vec<Self> {
 5 |         let mut results = Vec::with_capacity(items.len());
 6 |         let mut iter = items.into_iter();
 7 |         let Some(first) = iter.next() else {
 8 |             return results;
 9 |         };
10 |         results.push(first);
11 |         let mut prev = results.last_mut().expect("can't be empty immediately after push");
12 | 
13 |         for curr in iter {
14 |             match prev.try_concatenate(curr) {
15 |                 Ok(()) => {}
16 |                 Err(new_segment) => {
17 |                     results.push(new_segment);
18 |                     prev = results.last_mut().expect("can't be empty immediately after push");
19 |                 }
20 |             }
21 |         }
22 |         results
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/src/md_elem/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Parsed Markdown nodes (and how to parse them).
 2 | //!
 3 | //! This module provides the AST for a parsed Markdown document. Its main entry point is [`MdDoc::parse`].
 4 | mod tree;
 5 | mod tree_ref;
 6 | 
 7 | pub use tree::*;
 8 | 
 9 | mod concatenate;
10 | #[cfg(test)]
11 | pub(crate) mod tree_test_utils;
12 | 
13 | #[cfg(test)]
14 | pub(crate) use tree_test_utils::*;
15 | 


--------------------------------------------------------------------------------
/src/md_elem/tree_test_utils.rs:
--------------------------------------------------------------------------------
 1 | #[cfg(test)]
 2 | pub(crate) use test_utils::*;
 3 | 
 4 | #[cfg(test)]
 5 | mod test_utils {
 6 |     macro_rules! md_elem {
 7 |         ( $($node_names:ident)::* {$($attr:ident: $val:expr),* $(,)?}) => {
 8 |             crate::md_elem::m_node!(MdElem::$($node_names)::* {$($attr: $val),*})
 9 |         };
10 |         ($paragraph_text:literal) => {
11 |             crate::md_elem::m_node!(MdElem::Paragraph{body: vec![mdq_inline!($paragraph_text)]})
12 |         };
13 |     }
14 |     pub(crate) use md_elem;
15 | 
16 |     macro_rules! md_elems {
17 |         [$($first:tt $( $(:: $($rest:ident)::* )? {$($attr:ident: $val:expr),*$(,)?})? ),*$(,)?] => {
18 |             vec![$(
19 |                 md_elem!($first$( $(:: $($rest)::*)? { $($attr: $val),* })?)
20 |                 ),*
21 |             ]
22 |         };
23 |     }
24 |     pub(crate) use md_elems;
25 | 
26 |     macro_rules! mdq_inline {
27 |         (span $which:ident [$($contents:expr),*$(,)?]) => {
28 |             crate::md_elem::elem::Inline::Span(Span {
29 |                 variant: crate::md_elem::elem::SpanVariant::$which,
30 |                 children: vec![$($contents),*],
31 |             })
32 |         };
33 |         ($text:literal) => {
34 |             crate::md_elem::elem::Inline::Text(Text {
35 |                 variant: crate::md_elem::elem::TextVariant::Plain,
36 |                 value: $text.to_string(),
37 |             })
38 |         };
39 |     }
40 |     use crate::md_elem::elem::BlockHtml;
41 |     pub(crate) use mdq_inline;
42 | 
43 |     impl From<&str> for BlockHtml {
44 |         fn from(value: &str) -> Self {
45 |             Self {
46 |                 value: value.to_string(),
47 |             }
48 |         }
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/output/fmt_plain_str.rs:
--------------------------------------------------------------------------------
  1 | use crate::md_elem::elem::*;
  2 | use std::borrow::Borrow;
  3 | 
  4 | pub(crate) fn inlines_to_plain_string<N: Borrow<Inline>>(inlines: &[N]) -> String {
  5 |     let mut result = String::with_capacity(inlines.len() * 5); // random guess
  6 |     build_inlines(&mut result, inlines);
  7 |     result
  8 | }
  9 | 
 10 | fn build_inlines<N: Borrow<Inline>>(out: &mut String, inlines: &[N]) {
 11 |     for inline in inlines {
 12 |         build_inline(out, inline.borrow());
 13 |     }
 14 | }
 15 | 
 16 | fn build_inline(out: &mut String, elem: &Inline) {
 17 |     match elem {
 18 |         Inline::Span(Span { children, .. }) => build_inlines(out, children),
 19 |         Inline::Text(Text { value, .. }) => out.push_str(value),
 20 |         Inline::Link(Link { display: text, .. }) => build_inlines(out, text),
 21 |         Inline::Image(Image { alt, .. }) => out.push_str(alt),
 22 |         Inline::Footnote(footnote) => {
 23 |             out.push_str("[^");
 24 |             out.push_str(footnote.as_str());
 25 |             out.push(']');
 26 |         }
 27 |     }
 28 | }
 29 | 
 30 | #[cfg(test)]
 31 | mod tests {
 32 |     use super::*;
 33 |     use crate::md_elem::*;
 34 |     use indoc::indoc;
 35 | 
 36 |     use crate::util::utils_for_test::*;
 37 | 
 38 |     variants_checker!(VARIANTS_CHECKER = Inline {
 39 |         Span(Span{ variant: SpanVariant::Delete, .. }),
 40 |         Span(Span{ variant: SpanVariant::Emphasis, .. }),
 41 |         Span(Span{ variant: SpanVariant::Strong, .. }),
 42 |         Text(Text { variant: TextVariant::Plain, .. }),
 43 |         Text(Text { variant: TextVariant::Code, .. }),
 44 |         Text(Text { variant: TextVariant::Math, .. }),
 45 |         Text(Text { variant: TextVariant::InlineHtml, .. }),
 46 |         Link { .. },
 47 |         Image { .. },
 48 |         Footnote(_),
 49 |     });
 50 | 
 51 |     #[test]
 52 |     fn spans() {
 53 |         check("_hello world_", "hello world");
 54 |         check("**hello world**", "hello world");
 55 |         check("~~hello world~~", "hello world");
 56 |     }
 57 | 
 58 |     #[test]
 59 |     fn texts() {
 60 |         check("hello world", "hello world");
 61 |         check("`hello world`", "hello world");
 62 |         check("$hello world$", "hello world");
 63 |         // html is covered separately, since it isn't wrapped in a paragraph: see issue #34
 64 |     }
 65 | 
 66 |     #[test]
 67 |     fn inline_html() {
 68 |         let md_elems = MdDoc::parse("Hello <foo> world", &ParseOptions::gfm()).unwrap().roots;
 69 |         unwrap!(&md_elems[0], MdElem::Paragraph(contents));
 70 |         unwrap!(&contents.body[1], inline @ Inline::Text(_));
 71 |         VARIANTS_CHECKER.see(inline);
 72 |         let actual = inlines_to_plain_string(&contents.body);
 73 |         assert_eq!(&actual, "Hello <foo> world");
 74 |     }
 75 | 
 76 |     #[test]
 77 |     fn links() {
 78 |         check("[foo](https://example.com)", "foo");
 79 |         check("[foo _with emphasis_](https://example.com)", "foo with emphasis");
 80 |         check(
 81 |             indoc! {r#"
 82 |             [foo][1]
 83 | 
 84 |             [1]: https://example.com"#},
 85 |             "foo",
 86 |         )
 87 |     }
 88 | 
 89 |     #[test]
 90 |     fn images() {
 91 |         check("![foo](https://example.com)", "foo");
 92 |         check("![foo _with emphasis_](https://example.com)", "foo with emphasis"); // md is ignored in alt
 93 |         check(
 94 |             indoc! {r#"
 95 |             ![foo][1]
 96 | 
 97 |             [1]: https://example.com"#},
 98 |             "foo",
 99 |         )
100 |     }
101 | 
102 |     #[test]
103 |     fn footnote() {
104 |         check(
105 |             indoc! {r#"
106 |             [^1]
107 | 
108 |             [^1]: my footnote"#},
109 |             "[^1]",
110 |         )
111 |     }
112 | 
113 |     /// Because this is such simple functionality, we're just going to do a simple end-to-end test from original
114 |     /// markdown to plain text.
115 |     fn check(md: &str, expect: &str) {
116 |         let mut options = ParseOptions::gfm();
117 |         options.mdast_options.constructs.math_text = true;
118 |         let md_elems = MdDoc::parse(md, &options).unwrap().roots;
119 |         unwrap!(&md_elems[0], MdElem::Paragraph(p));
120 |         p.body.iter().for_each(|inline| VARIANTS_CHECKER.see(inline));
121 |         let actual = inlines_to_plain_string(&p.body);
122 |         assert_eq!(&actual, expect);
123 |     }
124 | }
125 | 


--------------------------------------------------------------------------------
/src/output/fmt_plain_writer.rs:
--------------------------------------------------------------------------------
  1 | use std::cmp::min;
  2 | use std::io::Write;
  3 | 
  4 | pub(crate) struct NewlineCollapser<W> {
  5 |     max_newlines: usize,
  6 |     underlying: W,
  7 |     /// How many newlines are in this current stretch, or None if we haven't written anything yet.
  8 |     current_newline_stretch: Option<usize>,
  9 | }
 10 | 
 11 | impl<W> NewlineCollapser<W>
 12 | where
 13 |     W: Write,
 14 | {
 15 |     pub(crate) fn new(underlying: W, max_newlines: usize) -> Self {
 16 |         Self {
 17 |             max_newlines,
 18 |             underlying,
 19 |             current_newline_stretch: None,
 20 |         }
 21 |     }
 22 | 
 23 |     pub(crate) fn have_pending_newlines(&self) -> bool {
 24 |         match self.current_newline_stretch {
 25 |             None | Some(0) => false,
 26 |             Some(_) => true,
 27 |         }
 28 |     }
 29 | 
 30 |     pub(crate) fn take_underlying(self) -> W {
 31 |         self.underlying
 32 |     }
 33 | 
 34 |     fn flush_newlines(&mut self) -> std::io::Result<()> {
 35 |         if let Some(newlines) = self.current_newline_stretch {
 36 |             for _ in 0..min(newlines, self.max_newlines) {
 37 |                 writeln!(self.underlying)?;
 38 |             }
 39 |         }
 40 |         // Set the current stretch to 0 -- not to None, since we want to note here that we've written something!
 41 |         self.current_newline_stretch = Some(0);
 42 |         Ok(())
 43 |     }
 44 | 
 45 |     fn increment_newline_stretch(&mut self) {
 46 |         self.current_newline_stretch = Some(match self.current_newline_stretch {
 47 |             None => 0,
 48 |             Some(n) => n + 1,
 49 |         });
 50 |     }
 51 | }
 52 | 
 53 | impl<W: Write> Write for NewlineCollapser<W> {
 54 |     fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
 55 |         let mut wrote = 0;
 56 |         let mut remaining = buf;
 57 |         while !remaining.is_empty() {
 58 |             match memchr::memchr(b'\n', remaining) {
 59 |                 None => {
 60 |                     // No newline found, and we know there's at least one byte due to the `while` condition.
 61 |                     // So: (1) write `remaining` to the underlying, (2) set just_wrote_newline = false (since we just
 62 |                     // wrote at least one byte, and no newlines), and (3) break, since we just wrote all of remaining.
 63 |                     self.flush_newlines()?;
 64 |                     wrote += self.underlying.write(remaining)?;
 65 |                     self.current_newline_stretch = Some(0);
 66 |                     break;
 67 |                 }
 68 |                 Some(0) => {
 69 |                     // First byte is a newline. Increment the current stretch, and that's it.
 70 |                     // This case is the whole purpose of this struct: the `else` is what does the newline collapsing.
 71 |                     self.increment_newline_stretch();
 72 |                     wrote += 1; // We did process this byte, even if we haven't actually written it out yet
 73 |                     remaining = &remaining[1..];
 74 |                 }
 75 |                 Some(n) => {
 76 |                     // The first byte isn't a newline, so even if we had just written a newline previously, we can
 77 |                     // always just write out that first char. Keep writing until n. If we wrote n bytes, then we wrote
 78 |                     // the newline; otherwise, we didn't.
 79 |                     self.flush_newlines()?;
 80 |                     let underlying_wrote_n = self.underlying.write(&remaining[..n])?;
 81 |                     wrote += underlying_wrote_n;
 82 |                     if underlying_wrote_n == n {
 83 |                         self.increment_newline_stretch();
 84 |                         wrote += 1;
 85 |                     }
 86 |                     remaining = &remaining[underlying_wrote_n + 1..];
 87 |                 }
 88 |             }
 89 |         }
 90 |         Ok(wrote)
 91 |     }
 92 | 
 93 |     fn flush(&mut self) -> std::io::Result<()> {
 94 |         self.underlying.flush()
 95 |     }
 96 | }
 97 | 
 98 | #[cfg(test)]
 99 | mod test {
100 |     use crate::output::fmt_plain_writer::NewlineCollapser;
101 |     use std::io::Write;
102 | 
103 |     #[test]
104 |     fn no_newlines() {
105 |         check(1, ["hello"], "hello");
106 |     }
107 | 
108 |     #[test]
109 |     fn empty() {
110 |         check(1, [""], "");
111 |     }
112 | 
113 |     #[test]
114 |     fn start_with_newlines() {
115 |         check(1, ["\nA", "\nB", "\n", "\nC", "\n", "\n", "D"], "A\nB\nC\nD");
116 |     }
117 | 
118 |     #[test]
119 |     fn end_with_newlines() {
120 |         check(1, ["A\n", "B\n\n", "C\n"], "A\nB\nC");
121 |     }
122 | 
123 |     #[test]
124 |     fn newlines_in_middle() {
125 |         check(1, ["A\nB", "C\n\nD"], "A\nBC\nD");
126 |     }
127 | 
128 |     #[test]
129 |     fn collapse_stretches_more_than_two() {
130 |         check(2, ["A\nB\n\nC\n\n\nD"], "A\nB\n\nC\n\nD");
131 |     }
132 | 
133 |     #[test]
134 |     fn trailing_newlines_always_trimmed() {
135 |         check(3, ["A\n\n\n\n\n"], "A");
136 |     }
137 | 
138 |     fn check<const N: usize>(max_newlines: usize, inputs: [&str; N], expect: &str) {
139 |         let input_lens: usize = inputs.iter().map(|s| s.len()).sum();
140 | 
141 |         let mut collapser = NewlineCollapser::new(Vec::with_capacity(expect.len()), max_newlines);
142 | 
143 |         let mut wrote = 0;
144 |         for input in inputs {
145 |             let bs = input.as_bytes();
146 |             wrote += collapser.write(bs).expect("should have written");
147 |         }
148 |         let actual_str = String::from_utf8(collapser.take_underlying()).expect("utf8 encoding problem");
149 | 
150 |         assert_eq!(&actual_str, expect);
151 |         assert_eq!(wrote, input_lens);
152 |     }
153 | }
154 | 


--------------------------------------------------------------------------------
/src/output/footnote_transform.rs:
--------------------------------------------------------------------------------
  1 | use crate::util::output::{Output, SimpleWrite};
  2 | use std::collections::HashMap;
  3 | 
  4 | pub(crate) struct FootnoteTransformer<'md> {
  5 |     mappings: Option<HashMap<&'md str, usize>>,
  6 | }
  7 | 
  8 | pub(crate) struct FootnoteTransformerToString<'a, 'md> {
  9 |     transformer: &'a mut FootnoteTransformer<'md>,
 10 |     scratch: Output<String>,
 11 | }
 12 | 
 13 | impl<'md> FootnoteTransformer<'md> {
 14 |     pub(crate) fn new(active: bool) -> Self {
 15 |         Self {
 16 |             mappings: if active { Some(HashMap::default()) } else { None },
 17 |         }
 18 |     }
 19 | 
 20 |     pub(crate) fn write<W>(&mut self, out: &mut Output<W>, label: &'md str)
 21 |     where
 22 |         W: SimpleWrite,
 23 |     {
 24 |         match &mut self.mappings {
 25 |             None => out.write_str(label),
 26 |             Some(mapping) => {
 27 |                 let current_mapping_len = mapping.len();
 28 |                 let num = mapping.entry(label).or_insert(current_mapping_len + 1);
 29 |                 out.write_str(&num.to_string());
 30 |             }
 31 |         }
 32 |     }
 33 | 
 34 |     pub(crate) fn new_to_stringer<'a>(&'a mut self) -> FootnoteTransformerToString<'a, 'md> {
 35 |         FootnoteTransformerToString::new(self)
 36 |     }
 37 | }
 38 | 
 39 | impl<'a, 'md> FootnoteTransformerToString<'a, 'md> {
 40 |     pub(crate) fn transform(&mut self, label: &'md str) -> String {
 41 |         let len = self.transformed_label_len(label);
 42 |         _ = self.scratch.replace_underlying(String::with_capacity(len)).unwrap();
 43 |         self.transformer.write(&mut self.scratch, label);
 44 |         self.scratch.take_underlying().unwrap()
 45 |     }
 46 | 
 47 |     fn new(transformer: &'a mut FootnoteTransformer<'md>) -> Self {
 48 |         Self {
 49 |             transformer,
 50 |             scratch: Output::without_text_wrapping(String::new()),
 51 |         }
 52 |     }
 53 | 
 54 |     fn transformed_label_len(&mut self, label: &str) -> usize {
 55 |         match &mut self.transformer.mappings {
 56 |             None => label.len(),
 57 |             Some(mapping) => {
 58 |                 let renumbered_to = mapping.get(label).copied().unwrap_or(mapping.len() + 1);
 59 |                 let renumbered_log10 = renumbered_to.checked_ilog10().unwrap_or(0);
 60 |                 // Try to convert the u32 to usize; if we can't, just guess a length of 3.
 61 |                 // That should be plenty!
 62 |                 usize::try_from(renumbered_log10 + 1).unwrap_or(3)
 63 |             }
 64 |         }
 65 |     }
 66 | }
 67 | 
 68 | #[cfg(test)]
 69 | mod test {
 70 |     use crate::output::footnote_transform::FootnoteTransformer;
 71 |     use crate::util::output::Output;
 72 | 
 73 |     #[test]
 74 |     fn inactive() {
 75 |         let mut transformer = FootnoteTransformer::new(false);
 76 |         check("abc", &mut transformer, "abc", 3);
 77 |         check("1", &mut transformer, "1", 1);
 78 |         check("3", &mut transformer, "3", 1);
 79 | 
 80 |         // remember the old value
 81 |         check("1", &mut transformer, "1", 1);
 82 |     }
 83 | 
 84 |     #[test]
 85 |     fn active() {
 86 |         let mut transformer = FootnoteTransformer::new(true);
 87 |         check("abc", &mut transformer, "1", 1);
 88 |         check("1", &mut transformer, "2", 1);
 89 |         check("3", &mut transformer, "3", 1);
 90 | 
 91 |         // remember the old value
 92 |         check("1", &mut transformer, "2", 1);
 93 |     }
 94 | 
 95 |     #[test]
 96 |     fn active_with_ten_footnotes() {
 97 |         let mut transformer = FootnoteTransformer::new(true);
 98 | 
 99 |         // write nine labels; we don't care about the results
100 |         let nine_labels: Vec<_> = (1..10).map(|i| format!("footnote-{i}")).collect();
101 |         for label in &nine_labels {
102 |             transformer.write(&mut Output::without_text_wrapping(String::new()), label);
103 |         }
104 | 
105 |         // the tenth label should remap to "10" with an expected len of 2
106 |         check("z", &mut transformer, "10", 2);
107 |     }
108 | 
109 |     fn check<'a>(
110 |         input: &'a str,
111 |         transformer: &mut FootnoteTransformer<'a>,
112 |         expect: &str,
113 |         expect_transformed_len: usize,
114 |     ) {
115 |         // len-calculation should work before and after we first officially see the label. So, try
116 |         // this once before transformer.write, and then later we'll try it again.
117 |         assert_eq!(
118 |             transformer.new_to_stringer().transformed_label_len(input),
119 |             expect_transformed_len
120 |         );
121 | 
122 |         let mut output = Output::without_text_wrapping(String::with_capacity(expect.len()));
123 |         transformer.write(&mut output, input);
124 |         let actual = output.take_underlying().unwrap();
125 |         assert_eq!(&actual, expect);
126 | 
127 |         assert_eq!(
128 |             transformer.new_to_stringer().transformed_label_len(input),
129 |             expect_transformed_len
130 |         );
131 |     }
132 | }
133 | 


--------------------------------------------------------------------------------
/src/output/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Output `md_elem`s to various formats.
 2 | mod fmt_md;
 3 | mod fmt_md_inlines;
 4 | mod fmt_plain_inline;
 5 | mod fmt_plain_str;
 6 | mod fmt_plain_writer;
 7 | mod footnote_transform;
 8 | mod link_transform;
 9 | mod output_adapter;
10 | mod tree_ref_serde;
11 | 
12 | pub(crate) use crate::output::fmt_plain_str::*;
13 | 
14 | pub use crate::output::fmt_md::*;
15 | pub use crate::output::fmt_md_inlines::*;
16 | pub use crate::output::link_transform::*;
17 | pub use crate::output::output_adapter::*;
18 | pub use crate::output::tree_ref_serde::*;
19 | 
20 | pub use crate::output::fmt_plain_inline::*;
21 | 


--------------------------------------------------------------------------------
/src/output/output_adapter.rs:
--------------------------------------------------------------------------------
 1 | use crate::md_elem::{MdContext, MdElem};
 2 | use crate::output::{write_md, MdWriterOptions};
 3 | use crate::util::output::{Output, SimpleWrite};
 4 | use std::{fmt, io};
 5 | 
 6 | /// A struct for writing [MdElem]s as Markdown (as per `--output markdown`).
 7 | #[derive(Copy, Clone, Default, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 8 | pub struct MdWriter {
 9 |     options: MdWriterOptions,
10 | }
11 | 
12 | impl MdWriter {
13 |     /// Creates a new [MdWriter] with the given options.
14 |     pub fn with_options(options: MdWriterOptions) -> Self {
15 |         Self { options }
16 |     }
17 | 
18 |     /// Writes the given nodes to the given writer.
19 |     pub fn write<'md, I, W>(&self, ctx: &'md MdContext, nodes: I, out: &mut W)
20 |     where
21 |         I: IntoIterator<Item = &'md MdElem>,
22 |         W: fmt::Write,
23 |     {
24 |         write_md(
25 |             self.options,
26 |             &mut Output::new(IoAdapter(out), self.options.text_width),
27 |             ctx,
28 |             nodes.into_iter(),
29 |         )
30 |     }
31 | }
32 | 
33 | /// Adapter to convert between I/O types.
34 | ///
35 | /// To use, wrap the source type in the `IoAdapter`, and use that adapter as the target type. For example, to convert
36 | /// a [`std::io::Write`] into a [`std::fmt::Write`]:
37 | ///
38 | /// ```
39 | /// use mdq::output::IoAdapter;
40 | ///
41 | /// fn example(input: impl std::io::Write) -> impl std::fmt::Write {
42 | ///     IoAdapter(input)
43 | /// }
44 | /// ```
45 | ///
46 | /// [`std::io::Write`]: io::Write
47 | /// [`std::fmt::Write`]: fmt::Write
48 | #[derive(Copy, Clone, Default, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
49 | pub struct IoAdapter<W>(pub W);
50 | 
51 | impl<W> From<W> for IoAdapter<W> {
52 |     fn from(value: W) -> Self {
53 |         Self(value)
54 |     }
55 | }
56 | 
57 | impl<W: fmt::Write> SimpleWrite for IoAdapter<W> {
58 |     fn write_char(&mut self, ch: char) -> io::Result<()> {
59 |         self.0
60 |             .write_char(ch)
61 |             .map_err(|err| io::Error::new(io::ErrorKind::Other, format!("while writing char: {}", err)))
62 |     }
63 | 
64 |     fn flush(&mut self) -> io::Result<()> {
65 |         Ok(())
66 |     }
67 | }
68 | 
69 | impl<W: io::Write> fmt::Write for IoAdapter<W> {
70 |     fn write_str(&mut self, s: &str) -> fmt::Result {
71 |         self.0.write_all(s.as_bytes()).map_err(|_| fmt::Error)
72 |     }
73 | }
74 | 


--------------------------------------------------------------------------------
/src/query/error.rs:
--------------------------------------------------------------------------------
  1 | use pest::Span;
  2 | use std::fmt::{Display, Formatter};
  3 | 
  4 | /// An error representing an invalid selector query.
  5 | ///
  6 | /// <div class="warning">
  7 | /// This struct's <code>source()</code> is not part of the public contract, and may change at any time without that change being
  8 | /// marked as a breaking change.
  9 | /// </div>
 10 | #[derive(Clone, Debug, PartialEq, Eq, Hash)]
 11 | pub struct ParseError {
 12 |     pub(crate) inner: InnerParseError,
 13 | }
 14 | 
 15 | impl ParseError {
 16 |     /// Creates a new ParseError from an [InnerParseError].
 17 |     ///
 18 |     /// This is intentionally not a [From] impl, because we want to keep it `pub(crate)`.
 19 |     pub(crate) fn new(inner: InnerParseError) -> Self {
 20 |         Self { inner }
 21 |     }
 22 | }
 23 | 
 24 | impl Display for ParseError {
 25 |     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
 26 |         Display::fmt(&self.inner, f)
 27 |     }
 28 | }
 29 | 
 30 | impl std::error::Error for ParseError {
 31 |     /// This method gets the error's source, if available. **Not part of the public API contract.**
 32 |     ///
 33 |     /// Please see the warning on [this struct's main documentation](ParseError).
 34 |     fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
 35 |         self.inner.source()
 36 |     }
 37 | }
 38 | 
 39 | #[derive(Clone, Debug, PartialEq, Eq, Hash)]
 40 | pub(crate) enum InnerParseError {
 41 |     Pest(crate::query::Error),
 42 |     Other(DetachedSpan, String),
 43 | }
 44 | 
 45 | impl Display for InnerParseError {
 46 |     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
 47 |         match &self {
 48 |             InnerParseError::Pest(error) => Display::fmt(error, f),
 49 |             InnerParseError::Other(_, message) => Display::fmt(message, f),
 50 |         }
 51 |     }
 52 | }
 53 | 
 54 | impl std::error::Error for InnerParseError {
 55 |     fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
 56 |         match self {
 57 |             InnerParseError::Pest(err) => Some(err),
 58 |             InnerParseError::Other(_, _) => None,
 59 |         }
 60 |     }
 61 | }
 62 | 
 63 | impl ParseError {
 64 |     /// Gets a string suitable for displaying to a user, given the original query string.
 65 |     ///
 66 |     /// ```
 67 |     /// use mdq::select::Selector;
 68 |     /// let query_text = "$ ! invalid query string ! $";
 69 |     /// let parse_error = Selector::try_from(query_text).expect_err("expected an error");
 70 |     /// let expected_error = r" --> 1:1
 71 |     ///   |
 72 |     /// 1 | $ ! invalid query string ! $
 73 |     ///   | ^---
 74 |     ///   |
 75 |     ///   = expected valid query";
 76 |     /// assert_eq!(parse_error.to_string(query_text), expected_error);
 77 |     /// ```
 78 |     pub fn to_string(&self, query_text: &str) -> String {
 79 |         match &self.inner {
 80 |             InnerParseError::Pest(e) => format!("{e}"),
 81 |             InnerParseError::Other(span, message) => match Span::new(query_text, span.start, span.end) {
 82 |                 None => message.to_string(),
 83 |                 Some(span) => {
 84 |                     let pest_err = crate::query::Error::new_from_span(span, message.to_string());
 85 |                     pest_err.to_string()
 86 |                 }
 87 |             },
 88 |         }
 89 |     }
 90 | }
 91 | 
 92 | impl From<crate::query::Error> for InnerParseError {
 93 |     fn from(err: crate::query::Error) -> Self {
 94 |         Self::Pest(err)
 95 |     }
 96 | }
 97 | 
 98 | /// Like a [pest::Span], but without a reference to the underlying `&str`, and thus cheaply Copyable.
 99 | #[derive(Copy, Clone, Default, Debug, PartialEq, Eq, Hash)]
100 | pub(crate) struct DetachedSpan {
101 |     pub(crate) start: usize,
102 |     pub(crate) end: usize,
103 | }
104 | 
105 | impl From<pest::Span<'_>> for DetachedSpan {
106 |     fn from(value: pest::Span) -> Self {
107 |         Self {
108 |             start: value.start(),
109 |             end: value.end(),
110 |         }
111 |     }
112 | }
113 | 
114 | impl From<&crate::query::Pair<'_>> for DetachedSpan {
115 |     fn from(value: &crate::query::Pair<'_>) -> Self {
116 |         value.as_span().into()
117 |     }
118 | }
119 | 


--------------------------------------------------------------------------------
/src/query/grammar.pest:
--------------------------------------------------------------------------------
  1 | WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
  2 | 
  3 | top = { SOI ~ "|"* ~ selector_chain? ~ EOI }
  4 | 
  5 | selector_chain = { selector ~ ("|"+ ~ selector?)* }
  6 | 
  7 | selector = {
  8 |     select_section
  9 |   | select_list_item
 10 |   | select_link
 11 |   | select_block_quote
 12 |   | select_code_block
 13 |   | select_front_matter
 14 |   | select_html
 15 |   | select_paragraph
 16 |   | select_table
 17 | }
 18 | 
 19 | selector_delim = _{ explicit_space | EOI }
 20 | explicit_space = !{ " " } // making this a rule lets us have nicer error messages if the user doesn't include it
 21 | 
 22 | select_section =  { section_start ~ PUSH_LITERAL("|") ~ #title = string }
 23 | section_start  = @{ "#" ~ selector_delim }
 24 | 
 25 | select_list_item  =  { list_start ~ list_task_options? ~ PUSH_LITERAL("|") ~ #contents = string }
 26 | list_start        = ${ (list_ordered | "-") ~ selector_delim }
 27 | list_ordered      = ${ "1." }
 28 | list_task_options = ${ "[" ~ (task_unchecked | task_checked | task_either) ~ task_end }
 29 | task_checked      = ${ "x" }
 30 | task_unchecked    = ${ " " }
 31 | task_either       = ${ "?" }
 32 | task_end          = ${ "]" }
 33 | 
 34 | select_link =  { link_start ~ PUSH_LITERAL("]") ~ #display_text = string ~ "](" ~ PUSH_LITERAL(")")~ #url_text = string ~ ")" }
 35 | link_start  = ${ image_start? ~ "[" }
 36 | image_start = @{ "!" }
 37 | 
 38 | select_block_quote       =  { select_block_quote_start ~ PUSH_LITERAL("|") ~ #text = string }
 39 | select_block_quote_start = @{ ">" ~ selector_delim }
 40 | 
 41 | select_code_block =  { code_block_start ~ PUSH_LITERAL("|") ~ #text = string }
 42 | code_block_start  = ${ "```" ~ PUSH_LITERAL(" ") ~ #language = string ~ selector_delim }
 43 | 
 44 | select_front_matter =  { front_matter_start ~ PUSH_LITERAL("|") ~ #text = string }
 45 | front_matter_start  = ${ "+++" ~ PUSH_LITERAL(" ") ~ #variant = string ~ selector_delim }
 46 | 
 47 | select_html =  { html_start ~ PUSH_LITERAL("|") ~ #text = string }
 48 | html_start  = @{ "</>" ~ selector_delim }
 49 | 
 50 | select_paragraph       =  { select_paragraph_start ~ PUSH_LITERAL("|") ~ #text = string }
 51 | select_paragraph_start = @{ "P:" ~ selector_delim }
 52 | 
 53 | select_table = { table_start ~ PUSH_LITERAL(":") ~ #column = string ~ ":-:" ~ PUSH_LITERAL("|") ~ #row = string }
 54 | table_start = ${":-:" ~ explicit_space }
 55 | 
 56 | // helper rule, just for unit tests
 57 | string_for_unit_tests__do_not_use_pipe = { PUSH_LITERAL("|") ~ string }
 58 | string_for_unit_tests__do_not_use_angle = { PUSH_LITERAL(">") ~ string }
 59 | string = {
 60 |   // end delimiter for unquoted string will have been PUSH_LITERAL'd by here
 61 |   (
 62 |         asterisk
 63 |       | regex
 64 |       | ( anchor_start? ~ ( quoted_string | unquoted_string ) ~ anchor_end? )
 65 |       | ( anchor_start ~ anchor_end )
 66 |   )?
 67 |   ~ DROP
 68 | }
 69 | asterisk = @{ "*" }
 70 | unquoted_string = @{ LETTER ~ (!(PEEK | "$") ~ ANY)* }
 71 | 
 72 | regex               = ${
 73 |   // Put these into a single rule, so that the error message just says "regex" for both the plain and replace variant.
 74 |     ("/" ~ regex_char* ~ "/")
 75 |   | ("!s/" ~ regex_char* ~ "/" ~ regex_replacement_segment? ~ "/")
 76 | }
 77 | regex_char          = ${
 78 |     (regex_escaped_slash | regex_normal_char)
 79 | }
 80 | regex_replacement_segment = ${ regex_char+ }
 81 | regex_escaped_slash = @{ "\\/" }
 82 | regex_normal_char   = @{ !("/") ~ ANY }
 83 | 
 84 | quoted_string = ${ PUSH("'" | "\"") ~ quoted_char* ~ POP }
 85 | 
 86 | quoted_char = ${
 87 |     quoted_plain_chars
 88 |   | ("\\" ~ (escaped_char | "u{" ~ unicode_seq ~ "}"))
 89 | }
 90 | 
 91 | anchor_start = @{ "^" }
 92 | 
 93 | anchor_end = @{ "$" }
 94 | 
 95 | quoted_plain_chars = @{ (!(PEEK | "\\") ~ ANY)+ }
 96 | 
 97 | escaped_char = @{ ("\"" | "'" | "`" | "\\" | "n" | "r" | "t") }
 98 | 
 99 | unicode_seq = @{ ASCII_HEX_DIGIT{1, 6} }
100 | 


--------------------------------------------------------------------------------
/src/query/matcher_try_from.rs:
--------------------------------------------------------------------------------
 1 | use crate::query::strings::{ParsedString, ParsedStringMode};
 2 | use crate::query::{DetachedSpan, InnerParseError, Pair};
 3 | use crate::select::{MatchReplace, Matcher, Regex};
 4 | use fancy_regex::Error;
 5 | 
 6 | impl MatchReplace {
 7 |     pub(crate) fn try_from(pair: Option<Pair>) -> Result<Self, InnerParseError> {
 8 |         let Some(pair) = pair else {
 9 |             return Ok(Self {
10 |                 matcher: Matcher::Any { explicit: false },
11 |                 replacement: None,
12 |             });
13 |         };
14 |         let span = DetachedSpan::from(&pair);
15 |         let parsed_string = ParsedString::new_from_pairs(pair.into_inner())?;
16 |         if parsed_string.is_equivalent_to_asterisk() {
17 |             return Ok(Self {
18 |                 matcher: Matcher::Any {
19 |                     explicit: parsed_string.explicit_wildcard,
20 |                 },
21 |                 replacement: None,
22 |             });
23 |         }
24 |         let matcher = match parsed_string.mode {
25 |             ParsedStringMode::CaseSensitive => Matcher::Text {
26 |                 case_sensitive: true,
27 |                 anchor_start: parsed_string.anchor_start,
28 |                 text: parsed_string.text,
29 |                 anchor_end: parsed_string.anchor_end,
30 |             },
31 |             ParsedStringMode::CaseInsensitive => Matcher::Text {
32 |                 case_sensitive: false,
33 |                 anchor_start: parsed_string.anchor_start,
34 |                 text: parsed_string.text,
35 |                 anchor_end: parsed_string.anchor_end,
36 |             },
37 |             ParsedStringMode::Regex => {
38 |                 let re = fancy_regex::Regex::new(&parsed_string.text).map_err(|e| {
39 |                     match e {
40 |                         Error::ParseError(pos, err) => {
41 |                             let mut re_span = span;
42 |                             re_span.start += pos + 1; // +1 for the regex's opening slash
43 |                             re_span.end = re_span.start;
44 |                             InnerParseError::Other(re_span, format!("regex parse error: {err}"))
45 |                         }
46 |                         err => {
47 |                             // not expected, but we'll handle it anyway
48 |                             InnerParseError::Other(span, err.to_string())
49 |                         }
50 |                     }
51 |                 })?;
52 |                 Matcher::Regex(Regex { re })
53 |             }
54 |         };
55 |         Ok(Self {
56 |             matcher,
57 |             replacement: parsed_string.replace_string,
58 |         })
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/query/mod.rs:
--------------------------------------------------------------------------------
 1 | mod pest;
 2 | 
 3 | mod error;
 4 | mod matcher_try_from;
 5 | mod selector_try_from;
 6 | mod strings;
 7 | mod traversal;
 8 | mod traversal_composites;
 9 | 
10 | pub use error::*;
11 | 
12 | #[cfg(test)]
13 | pub(crate) use pest::StringVariant;
14 | #[allow(unused_imports)]
15 | pub(crate) use pest::{Error, Pair, Pairs, Query};
16 | 


--------------------------------------------------------------------------------
/src/query/pest.rs:
--------------------------------------------------------------------------------
  1 | #[cfg(test)]
  2 | pub(crate) use crate::query::pest::test_helpers::StringVariant;
  3 | use pest::Parser;
  4 | use pest_derive::Parser;
  5 | use std::fmt::{Debug, Display, Formatter};
  6 | use std::rc::Rc;
  7 | 
  8 | #[derive(Parser)]
  9 | #[grammar = "query/grammar.pest"]
 10 | struct QueryPairs;
 11 | 
 12 | pub(crate) struct Query {
 13 |     _private: (),
 14 | }
 15 | 
 16 | pub(crate) type Pair<'a> = pest::iterators::Pair<'a, Rule>;
 17 | pub(crate) type Pairs<'a> = pest::iterators::Pairs<'a, Rule>;
 18 | 
 19 | #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 20 | pub(crate) struct Error {
 21 |     pub(crate) pest_error: Rc<pest::error::Error<Rule>>,
 22 | }
 23 | 
 24 | impl Error {
 25 |     pub(crate) fn new_from_span(span: pest::Span, message: String) -> Self {
 26 |         Self {
 27 |             pest_error: Rc::new(pest::error::Error::new_from_span(
 28 |                 pest::error::ErrorVariant::CustomError {
 29 |                     message: message.to_string(),
 30 |                 },
 31 |                 span,
 32 |             )),
 33 |         }
 34 |     }
 35 | }
 36 | 
 37 | impl Display for Error {
 38 |     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
 39 |         Display::fmt(&self.pest_error, f)
 40 |     }
 41 | }
 42 | 
 43 | impl std::error::Error for Error {}
 44 | 
 45 | impl From<pest::error::Error<Rule>> for Error {
 46 |     fn from(value: pest::error::Error<Rule>) -> Self {
 47 |         Self {
 48 |             pest_error: Rc::new(value),
 49 |         }
 50 |     }
 51 | }
 52 | 
 53 | impl Query {
 54 |     pub(crate) fn parse(query_text: &str) -> Result<Pairs, Error> {
 55 |         QueryPairs::parse(Rule::top, query_text).map_err(Self::format_err)
 56 |     }
 57 | 
 58 |     fn format_err(err: pest::error::Error<Rule>) -> Error {
 59 |         let renamed = err.renamed_rules(|err| {
 60 |             match err {
 61 |                 Rule::EOI => "end of input",
 62 |                 Rule::WHITESPACE => "whitespace",
 63 |                 Rule::top => "valid query",
 64 |                 Rule::selector_chain => "one or more selectors",
 65 |                 Rule::selector => "selector",
 66 |                 Rule::selector_delim | Rule::explicit_space => "space",
 67 |                 Rule::select_section | Rule::section_start => "_#_",
 68 |                 Rule::select_list_item | Rule::list_start => "_-_ or _1._",
 69 |                 Rule::list_ordered => "_-_",
 70 |                 Rule::list_task_options => "_[ ]_, _[x]_, or _[?]_",
 71 |                 Rule::task_checked => "_[x]_",
 72 |                 Rule::task_unchecked => "_[x]_",
 73 |                 Rule::task_either => "_[?]_",
 74 |                 Rule::task_end => "_]_",
 75 |                 Rule::select_link | Rule::link_start => "_[_ or _![_",
 76 |                 Rule::image_start => "_![_",
 77 |                 Rule::select_block_quote | Rule::select_block_quote_start => "_>_",
 78 |                 Rule::select_code_block | Rule::code_block_start => "_```_",
 79 |                 Rule::select_front_matter | Rule::front_matter_start => "_+++_",
 80 |                 Rule::select_html | Rule::html_start => "_</>_",
 81 |                 Rule::select_paragraph | Rule::select_paragraph_start => "_P:_",
 82 |                 Rule::select_table | Rule::table_start => "_:-:_",
 83 |                 Rule::string
 84 |                 | Rule::string_for_unit_tests__do_not_use_angle
 85 |                 | Rule::string_for_unit_tests__do_not_use_pipe => "string",
 86 |                 Rule::unquoted_string => "unquoted string",
 87 |                 Rule::regex => "regex",
 88 |                 Rule::regex_char => "regex character",
 89 |                 Rule::regex_escaped_slash => "_/_",
 90 |                 Rule::regex_normal_char => "regex character",
 91 |                 Rule::regex_replacement_segment => "regex_replacement_segment",
 92 |                 Rule::quoted_string => "quoted string",
 93 |                 Rule::quoted_char => "character in quoted string",
 94 |                 Rule::asterisk => "_*_",
 95 |                 Rule::anchor_start => "_^_",
 96 |                 Rule::anchor_end => "_$_",
 97 |                 Rule::quoted_plain_chars => "character in quoted string",
 98 |                 Rule::escaped_char => "\", ', `, \\, n, r, or t",
 99 |                 Rule::unicode_seq => "1 - 6 hex characters",
100 |             }
101 |             .to_string()
102 |             .replace('_', "\"")
103 |         });
104 |         Error {
105 |             pest_error: Rc::new(renamed),
106 |         }
107 |     }
108 | }
109 | 
110 | /// Test-only helpers for parsing strings directly, for more direct testing of those grammar rules.
111 | #[cfg(test)]
112 | mod test_helpers {
113 |     use super::*;
114 |     use pest::Parser;
115 | 
116 |     #[derive(Clone, Copy, PartialEq, Eq)]
117 |     pub(crate) enum StringVariant {
118 |         Pipe,
119 |         AngleBracket,
120 |     }
121 | 
122 |     impl StringVariant {
123 |         /// Tries to parse the given string. If it succeeds, returns the parsed Pairs and the remaining, unparsed query
124 |         /// text.
125 |         pub(crate) fn parse(self, query_text: &str) -> Result<(Pairs, &str), Error> {
126 |             let parsed = QueryPairs::parse(self.as_rule(), query_text)?;
127 |             let remaining = match parsed.peek() {
128 |                 None => query_text,
129 |                 Some(pair) => &query_text[pair.as_span().end()..],
130 |             };
131 |             Ok((parsed, remaining))
132 |         }
133 | 
134 |         pub(crate) fn as_rule(self) -> Rule {
135 |             match self {
136 |                 StringVariant::AngleBracket => Rule::string_for_unit_tests__do_not_use_angle,
137 |                 StringVariant::Pipe => Rule::string_for_unit_tests__do_not_use_pipe,
138 |             }
139 |         }
140 |     }
141 | }
142 | 


--------------------------------------------------------------------------------
/src/query/traversal.rs:
--------------------------------------------------------------------------------
  1 | use crate::query::pest::{Pair, Pairs, Rule};
  2 | 
  3 | pub(crate) type OnePair<'a> = OneOf<Pair<'a>>;
  4 | 
  5 | /// A trait for determining whether a [Pair] matches some condition.
  6 | pub(crate) trait PairMatcher {
  7 |     fn matches(&self, pair: &Pair) -> bool;
  8 | 
  9 |     fn find_all_in(self, pairs: Pairs) -> Vec<Pair>
 10 |     where
 11 |         Self: Sized,
 12 |     {
 13 |         FindAll::new(self).find_in(pairs)
 14 |     }
 15 | }
 16 | 
 17 | /// A trait for matching pairs, and then either storing them or traversing further.
 18 | ///
 19 | /// This lets us separate out the matching (which is typically [ByRule] or [ByTag], or some combination of them) from
 20 | /// the storing (which may be to add to a vec, to add to [OneOf], or anything else).
 21 | pub(crate) trait PairMatchStore<'a> {
 22 |     type Output;
 23 | 
 24 |     fn match_and_store(&mut self, pair: Pair<'a>) -> MatchStoreResult<'a>;
 25 | 
 26 |     fn get(self) -> Self::Output;
 27 | 
 28 |     fn find_in(mut self, pairs: Pairs<'a>) -> Self::Output
 29 |     where
 30 |         Self: Sized,
 31 |     {
 32 |         fn build<'b>(me: &mut impl PairMatchStore<'b>, pairs: Pairs<'b>) {
 33 |             for pair in pairs {
 34 |                 if let MatchStoreResult::NotStored(unmatched) = me.match_and_store(pair) {
 35 |                     build(me, unmatched.into_inner())
 36 |                 }
 37 |             }
 38 |         }
 39 |         build(&mut self, pairs);
 40 |         self.get()
 41 |     }
 42 | }
 43 | 
 44 | pub(crate) enum MatchStoreResult<'a> {
 45 |     Stored,
 46 |     NotStored(Pair<'a>),
 47 | }
 48 | 
 49 | #[derive(Copy, Clone, Eq, PartialEq, Debug, Default)]
 50 | pub(crate) struct Present(bool);
 51 | 
 52 | impl Present {
 53 |     pub(crate) fn is_present(&self) -> bool {
 54 |         self.0
 55 |     }
 56 | }
 57 | 
 58 | impl Present {
 59 |     pub(crate) fn store(&mut self, _pair: Pair) {
 60 |         self.0 = true
 61 |     }
 62 | }
 63 | 
 64 | #[derive(Debug)]
 65 | pub(crate) struct OneOf<T>(Result<Option<T>, ()>);
 66 | 
 67 | impl<T> Default for OneOf<T> {
 68 |     fn default() -> Self {
 69 |         Self(Ok(None))
 70 |     }
 71 | }
 72 | 
 73 | impl<T> OneOf<T> {
 74 |     pub(crate) fn take(self) -> Result<Option<T>, String> {
 75 |         self.0.map_err(|_| "multiple items found".to_string())
 76 |     }
 77 | 
 78 |     pub(crate) fn store(&mut self, item: T) {
 79 |         self.0 = match self.0 {
 80 |             Ok(Some(_)) | Err(_) => Err(()),
 81 |             Ok(None) => Ok(Some(item)),
 82 |         }
 83 |     }
 84 | }
 85 | 
 86 | #[derive(Debug)]
 87 | pub(crate) struct FindAll<'a, M>(M, Vec<Pair<'a>>);
 88 | 
 89 | impl<M> FindAll<'_, M> {
 90 |     pub(crate) fn new(matcher: M) -> Self {
 91 |         Self(matcher, Vec::new())
 92 |     }
 93 | }
 94 | 
 95 | impl<'a, M> PairMatchStore<'a> for FindAll<'a, M>
 96 | where
 97 |     M: PairMatcher,
 98 | {
 99 |     type Output = Vec<Pair<'a>>;
100 | 
101 |     fn match_and_store(&mut self, pair: Pair<'a>) -> MatchStoreResult<'a> {
102 |         if self.0.matches(&pair) {
103 |             self.1.push(pair);
104 |             MatchStoreResult::Stored
105 |         } else {
106 |             MatchStoreResult::NotStored(pair)
107 |         }
108 |     }
109 | 
110 |     fn get(self) -> Self::Output {
111 |         self.1
112 |     }
113 | }
114 | #[derive(Debug)]
115 | pub(crate) struct ByRule(Rule);
116 | 
117 | impl ByRule {
118 |     pub(crate) fn new(rule: Rule) -> Self {
119 |         Self(rule)
120 |     }
121 | }
122 | 
123 | impl PairMatcher for ByRule {
124 |     fn matches(&self, pair: &Pair) -> bool {
125 |         self.0 == pair.as_rule()
126 |     }
127 | }
128 | 
129 | #[derive(Debug)]
130 | pub(crate) struct ByTag(&'static str);
131 | 
132 | impl ByTag {
133 |     pub(crate) fn new(tag: &'static str) -> Self {
134 |         Self(tag)
135 |     }
136 | }
137 | 
138 | impl PairMatcher for ByTag {
139 |     fn matches(&self, pair: &Pair) -> bool {
140 |         match pair.as_node_tag() {
141 |             Some(t) => t == self.0,
142 |             None => false,
143 |         }
144 |     }
145 | }
146 | 


--------------------------------------------------------------------------------
/src/query/traversal_composites.rs:
--------------------------------------------------------------------------------
  1 | use crate::query::pest::{Pair, Pairs, Rule};
  2 | use crate::query::traversal::MatchStoreResult;
  3 | use crate::query::traversal::PairMatchStore;
  4 | use crate::query::traversal::PairMatcher;
  5 | use crate::query::traversal::{ByRule, ByTag, OnePair, Present};
  6 | use paste::paste;
  7 | 
  8 | /// A macro for creating:
  9 | /// 1. a `${name}Traverser`, which looks for several elements as it goes; and
 10 | /// 2. a `${name}Results`, which stores every Pair it finds in a [OneOf]
 11 | macro_rules! composite_finder {
 12 |     ($name:ident { $($elem:ident $result:ty : $finder:ident),+ $(,)? }) => {
 13 |         paste! {
 14 |             composite_finder!{full: ([<$name Traverser>] / [<$name Results>] / [< $name MatchStore >] ) {$($elem $result: $finder),+} }
 15 |         }
 16 |     };
 17 | 
 18 |     (finder_arg: $name:ident ByRule) => {
 19 |         ByRule::new(Rule::$name)
 20 |     };
 21 |     (finder_arg: $name:ident ByTag) => {
 22 |         ByTag::new(stringify!($name))
 23 |     };
 24 | 
 25 |     (full: ($finder_name:ident / $result_name:ident / $match_store_name:ident) { $($elem:ident $result:ty : $finder:ident),+ }) => {
 26 |         #[derive(Debug)]
 27 |         pub(crate) struct $finder_name {
 28 |             $(
 29 |             $elem: $finder,
 30 |             )+
 31 |         }
 32 | 
 33 |         #[derive(Debug, Default)]
 34 |         pub(crate) struct $result_name<'a> {
 35 |             $(
 36 |                 pub(crate) $elem: $result,
 37 |             )+
 38 |         }
 39 | 
 40 |         struct $match_store_name<'a>($finder_name, $result_name<'a>);
 41 | 
 42 |         impl $finder_name {
 43 |             fn new() -> Self {
 44 |                 Self {
 45 |                     $(
 46 |                     $elem: composite_finder!(finder_arg: $elem $finder),
 47 |                     )+
 48 |                 }
 49 |             }
 50 | 
 51 |             pub(crate) fn traverse(pairs: Pairs) -> $result_name {
 52 |                 $match_store_name($finder_name::new(), $result_name::default()).find_in(pairs)
 53 |             }
 54 |         }
 55 | 
 56 |         impl<'a> PairMatchStore<'a> for $match_store_name<'a> {
 57 |             type Output = $result_name<'a>;
 58 | 
 59 |             fn match_and_store(&mut self, pair: Pair<'a, >) -> MatchStoreResult<'a> {
 60 |                 $(
 61 |                 if self.0.$elem.matches(&pair) {
 62 |                     self.1.$elem.store(pair);
 63 |                     MatchStoreResult::Stored
 64 |                 }
 65 |                 )else+ else {
 66 |                     MatchStoreResult::NotStored(pair)
 67 |                 }
 68 |             }
 69 | 
 70 |             fn get(self) -> Self::Output {
 71 |                 self.1
 72 |             }
 73 |         }
 74 |     }
 75 | }
 76 | 
 77 | composite_finder! { Section {
 78 |     title OnePair<'a>: ByTag,
 79 | }}
 80 | composite_finder! { ListItem {
 81 |     list_ordered Present: ByRule,
 82 |     task_checked Present: ByRule,
 83 |     task_unchecked Present: ByRule,
 84 |     task_either Present: ByRule,
 85 |     contents OnePair<'a>: ByTag,
 86 | }}
 87 | 
 88 | composite_finder! { Link {
 89 |     display_text OnePair<'a>: ByTag,
 90 |     url_text OnePair<'a>: ByTag,
 91 |     image_start Present: ByRule,
 92 | }}
 93 | 
 94 | composite_finder! { BlockQuote {
 95 |     text OnePair<'a>: ByTag,
 96 | }}
 97 | 
 98 | composite_finder! { CodeBlock {
 99 |     language OnePair<'a>: ByTag,
100 |     text OnePair<'a>: ByTag,
101 | }}
102 | 
103 | composite_finder! { FrontMatter {
104 |     variant OnePair<'a>: ByTag,
105 |     text OnePair<'a>: ByTag,
106 | }}
107 | 
108 | composite_finder! { Html {
109 |     text OnePair<'a>: ByTag,
110 | }}
111 | 
112 | composite_finder! { Paragraph {
113 |     text OnePair<'a>: ByTag,
114 | }}
115 | 
116 | composite_finder! { Table {
117 |     column OnePair<'a>: ByTag,
118 |     row OnePair<'a>: ByTag,
119 | }}
120 | 


--------------------------------------------------------------------------------
/src/run/mod.rs:
--------------------------------------------------------------------------------
 1 | //! End-to-end runs.
 2 | //!
 3 | //! This module combines the [`crate::md_elem`], [`crate::select`], and [`crate::output`] mods into a single workflow.
 4 | //! It's useful for building functionality like the CLI's, but running it within-process.
 5 | //!
 6 | //! ## Example
 7 | //!
 8 | //! ```
 9 | //! # use mdq::run;
10 | //!
11 | //! // First, let's define a mocked I/O. Replace this with whatever you need.
12 | //! #[derive(Default)]
13 | //! struct MockIo {
14 | //!     stdout: Vec<u8>,
15 | //! }
16 | //!
17 | //! impl run::OsFacade for MockIo {
18 | //!     fn read_stdin(&self) -> std::io::Result<String> {
19 | //!         Ok("- hello\n- world".to_string())
20 | //!     }
21 | //!
22 | //!     fn read_file(&self, path: &str) -> std::io::Result<String> {
23 | //!         Err(std::io::Error::new(std::io::ErrorKind::NotFound, path))
24 | //!     }
25 | //!
26 | //!     fn stdout(&mut self) -> impl std::io::Write {
27 | //!         &mut self.stdout
28 | //!     }
29 | //!
30 | //!     fn write_error(&mut self, err: run::Error) {
31 | //!         eprintln!("{err}")
32 | //!     }
33 | //! }
34 | //!
35 | //! // Now, use it:
36 | //! # fn main() -> Result<(), Box<dyn std::error::Error>> {
37 | //!
38 | //! // Define our "CLI" options. Use the defaults, but add a positional arg for an "- h" selector.
39 | //! let mut cli_options = run::RunOptions::default();
40 | //! cli_options.selectors = "- h".to_string(); // list items containing an 'h'
41 | //!
42 | //! let mut os_facade = MockIo::default();
43 | //! let found_any = run::run(&cli_options, &mut os_facade);
44 | //! let stdout_text = String::from_utf8(os_facade.stdout)?;
45 | //!
46 | //! assert_eq!(found_any, true);
47 | //! assert_eq!(stdout_text, "- hello\n");
48 | //! #
49 | //! #     Ok(())
50 | //! # }
51 | //! ```
52 | mod cli;
53 | mod run_main;
54 | 
55 | pub use cli::*;
56 | pub use run_main::*;
57 | 


--------------------------------------------------------------------------------
/src/select/match_replace.rs:
--------------------------------------------------------------------------------
1 | use crate::select::Matcher;
2 | 
3 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
4 | pub struct MatchReplace {
5 |     pub matcher: Matcher,
6 |     pub replacement: Option<String>,
7 | }
8 | 


--------------------------------------------------------------------------------
/src/select/match_selector.rs:
--------------------------------------------------------------------------------
 1 | use crate::md_elem::*;
 2 | use crate::select::api::{Result, Select};
 3 | use crate::select::string_matcher::StringMatchError;
 4 | use crate::select::TrySelector;
 5 | 
 6 | /// MatchSelector is a helper trait for implementing [TrySelector]. Simply provide the boolean predicate for whether a
 7 | /// given item matches, and MatchSelector will do the rest.
 8 | pub(crate) trait MatchSelector<I> {
 9 |     const NAME: &'static str;
10 | 
11 |     fn matches(&self, item: &I) -> std::result::Result<bool, StringMatchError>;
12 | }
13 | 
14 | impl<I, M> TrySelector<I> for M
15 | where
16 |     I: Into<MdElem>,
17 |     M: MatchSelector<I>,
18 | {
19 |     fn try_select(&self, _: &MdContext, item: I) -> Result<Select> {
20 |         if self.matches(&item).map_err(|e| e.to_select_error(M::NAME))? {
21 |             Ok(Select::Hit(vec![item.into()]))
22 |         } else {
23 |             Ok(Select::Miss(item.into()))
24 |         }
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/select/matcher.rs:
--------------------------------------------------------------------------------
 1 | use std::cmp::Ordering;
 2 | use std::hash::{Hash, Hasher};
 3 | 
 4 | /// A type for matching against expected strings.
 5 | ///
 6 | /// Given a selector like `# hello world` (for a section selector), this defines the `hello world` portion.
 7 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 8 | pub enum Matcher {
 9 |     /// Quoted or unquoted text, with optional anchoring.
10 |     Text {
11 |         case_sensitive: bool,
12 |         anchor_start: bool,
13 |         text: String,
14 |         anchor_end: bool,
15 |     },
16 |     /// A regex match. This will match any substring by default, though you can use `^` and `$` anchors.
17 |     ///
18 |     /// The actual regex library is intentionally obscured so that it can change in the future without breaking the API.
19 |     Regex(Regex),
20 | 
21 |     /// Any string. This can be implicit (an empty matcher in a query string, like `# | ...`), or explicit
22 |     /// (a `*` in a query string, like `# * | ...`).
23 |     Any { explicit: bool },
24 | }
25 | 
26 | /// An opaque wrapper around a regular expression.
27 | ///
28 | /// The actual regex library is intentionally obscured so that it can change in the future without breaking the API.
29 | #[derive(Debug, Clone)]
30 | pub struct Regex {
31 |     pub(crate) re: fancy_regex::Regex,
32 | }
33 | 
34 | impl PartialEq for Regex {
35 |     fn eq(&self, other: &Self) -> bool {
36 |         self.re.as_str() == other.re.as_str()
37 |     }
38 | }
39 | 
40 | impl Eq for Regex {}
41 | 
42 | impl PartialOrd for Regex {
43 |     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
44 |         Some(self.cmp(other))
45 |     }
46 | }
47 | 
48 | impl Ord for Regex {
49 |     fn cmp(&self, other: &Self) -> Ordering {
50 |         Ord::cmp(self.re.as_str(), other.re.as_str())
51 |     }
52 | }
53 | 
54 | impl Hash for Regex {
55 |     fn hash<H: Hasher>(&self, state: &mut H) {
56 |         Hash::hash(self.re.as_str(), state);
57 |     }
58 | }
59 | 


--------------------------------------------------------------------------------
/src/select/mod.rs:
--------------------------------------------------------------------------------
 1 | //! The query and filtering ability of mdq.
 2 | //!
 3 | //! The main item here is [`Selector`]:
 4 | //!
 5 | //! - Parse text into `Selector` using `try_into`.
 6 | //! - Run it against an [`MdDoc`](crate::md_elem::MdDoc) using [`Selector::find_nodes`].
 7 | mod api;
 8 | mod match_replace;
 9 | mod match_selector;
10 | mod matcher;
11 | mod sel_chain;
12 | mod sel_code_block;
13 | mod sel_link_like;
14 | mod sel_list_item;
15 | mod sel_section;
16 | mod sel_single_matcher;
17 | mod sel_table;
18 | mod selector;
19 | mod string_matcher;
20 | 
21 | pub(crate) use api::*;
22 | 
23 | pub use crate::query::ParseError;
24 | pub use api::{Result, SelectError};
25 | pub use match_replace::*;
26 | pub use matcher::*;
27 | pub use selector::*;
28 | 


--------------------------------------------------------------------------------
/src/select/sel_chain.rs:
--------------------------------------------------------------------------------
 1 | use crate::md_elem::{MdContext, MdElem};
 2 | use crate::select::{Result, Select, Selector, SelectorAdapter, TrySelector};
 3 | 
 4 | #[derive(Debug)]
 5 | pub(crate) struct ChainSelector {
 6 |     chain: Vec<SelectorAdapter>,
 7 | }
 8 | 
 9 | impl From<Vec<Selector>> for ChainSelector {
10 |     fn from(chain: Vec<Selector>) -> Self {
11 |         Self {
12 |             chain: chain.into_iter().map(|s| s.into()).collect(),
13 |         }
14 |     }
15 | }
16 | 
17 | impl TrySelector<Vec<MdElem>> for ChainSelector {
18 |     fn try_select(&self, ctx: &MdContext, mut items: Vec<MdElem>) -> Result<Select> {
19 |         if self.chain.is_empty() {
20 |             // This is a bit of a hack: an empty chain is really a noop, and in this case we assume that the items
21 |             // aren't actually a stream, but are actually an MdDoc that has been deconstructed into the Vec<MdElem>.
22 |             // So, just reconstruct it back.
23 |             return Ok(Select::Hit(vec![MdElem::Doc(items)]));
24 |         }
25 |         for adapter in &self.chain {
26 |             items = adapter.find_nodes(ctx, items)?;
27 |         }
28 |         Ok(Select::Hit(items))
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/src/select/sel_code_block.rs:
--------------------------------------------------------------------------------
 1 | use crate::md_elem::elem::*;
 2 | use crate::select::match_selector::MatchSelector;
 3 | use crate::select::string_matcher::{StringMatchError, StringMatcher};
 4 | use crate::select::CodeBlockMatcher;
 5 | 
 6 | #[derive(Debug, PartialEq)]
 7 | pub(crate) struct CodeBlockSelector {
 8 |     lang_matcher: StringMatcher,
 9 |     contents_matcher: StringMatcher,
10 | }
11 | 
12 | impl From<CodeBlockMatcher> for CodeBlockSelector {
13 |     fn from(value: CodeBlockMatcher) -> Self {
14 |         Self {
15 |             lang_matcher: value.language.into(),
16 |             contents_matcher: value.contents.into(),
17 |         }
18 |     }
19 | }
20 | 
21 | impl MatchSelector<CodeBlock> for CodeBlockSelector {
22 |     const NAME: &'static str = "code block";
23 | 
24 |     fn matches(&self, code_block: &CodeBlock) -> Result<bool, StringMatchError> {
25 |         let lang_matches = match &code_block.variant {
26 |             CodeVariant::Code(code_opts) => {
27 |                 let actual_lang = match code_opts {
28 |                     Some(co) => &co.language,
29 |                     None => "",
30 |                 };
31 |                 self.lang_matcher.matches(actual_lang)?
32 |             }
33 |             CodeVariant::Math { .. } => false,
34 |         };
35 |         Ok(lang_matches && self.contents_matcher.matches(&code_block.value)?)
36 |     }
37 | }
38 | 
39 | #[cfg(test)]
40 | mod test {
41 |     use super::*;
42 |     use crate::md_elem::MdContext;
43 |     use crate::select::{MatchReplace, Matcher, TrySelector};
44 | 
45 |     #[test]
46 |     fn code_block_selector_match_error() {
47 |         let code_block_matcher = CodeBlockMatcher {
48 |             language: MatchReplace {
49 |                 matcher: Matcher::Text {
50 |                     case_sensitive: false,
51 |                     anchor_start: false,
52 |                     text: "rust".to_string(),
53 |                     anchor_end: false,
54 |                 },
55 |                 replacement: Some("replacement".to_string()),
56 |             },
57 |             contents: MatchReplace {
58 |                 matcher: Matcher::Any { explicit: false },
59 |                 replacement: None,
60 |             },
61 |         };
62 | 
63 |         let code_block = CodeBlock {
64 |             variant: CodeVariant::Code(Some(CodeOpts {
65 |                 language: "rust".to_string(),
66 |                 metadata: None,
67 |             })),
68 |             value: "fn main() {}".to_string(),
69 |         };
70 | 
71 |         let code_block_selector = CodeBlockSelector::from(code_block_matcher);
72 | 
73 |         assert_eq!(
74 |             code_block_selector.matches(&code_block),
75 |             Err(StringMatchError::NotSupported)
76 |         );
77 | 
78 |         assert_eq!(
79 |             code_block_selector
80 |                 .try_select(&MdContext::default(), code_block)
81 |                 .unwrap_err()
82 |                 .to_string(),
83 |             "code block selector does not support string replace"
84 |         );
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/src/select/sel_list_item.rs:
--------------------------------------------------------------------------------
 1 | use crate::md_elem::elem::List;
 2 | use crate::md_elem::{MdContext, MdElem};
 3 | use crate::select::string_matcher::StringMatcher;
 4 | use crate::select::{ListItemMatcher, ListItemTask, Result, Select, TrySelector};
 5 | 
 6 | #[derive(Debug, PartialEq)]
 7 | pub(crate) struct ListItemSelector {
 8 |     li_type: ListItemType,
 9 |     checkbox: ListItemTask,
10 |     string_matcher: StringMatcher,
11 | }
12 | 
13 | impl From<ListItemMatcher> for ListItemSelector {
14 |     fn from(value: ListItemMatcher) -> Self {
15 |         Self {
16 |             li_type: if value.ordered {
17 |                 ListItemType::Ordered
18 |             } else {
19 |                 ListItemType::Unordered
20 |             },
21 |             checkbox: value.task,
22 |             string_matcher: value.matcher.into(),
23 |         }
24 |     }
25 | }
26 | 
27 | #[derive(Debug, PartialEq, Copy, Clone)]
28 | pub(crate) enum ListItemType {
29 |     Ordered,
30 |     Unordered,
31 | }
32 | 
33 | fn task_matches(matcher: ListItemTask, md_is_checked: Option<bool>) -> bool {
34 |     match matcher {
35 |         ListItemTask::Unselected => md_is_checked == Some(false),
36 |         ListItemTask::Selected => md_is_checked == Some(true),
37 |         ListItemTask::Either => md_is_checked.is_some(),
38 |         ListItemTask::None => md_is_checked.is_none(),
39 |     }
40 | }
41 | 
42 | impl TrySelector<List> for ListItemSelector {
43 |     fn try_select(&self, _: &MdContext, item: List) -> Result<Select> {
44 |         // This one works a bit differently than most:
45 |         // - If the item has a single list, check it; this is essentially a recursive base case.
46 |         // - Otherwise, never match, but return an MdElem::Doc of the list items, each as its own list.
47 |         //   That way, the find_children code in api.rs will recurse back into this method for each of those items, but
48 |         //   as a single-item list for the base case.
49 |         let List { starting_index, items } = item;
50 |         match items.as_slice() {
51 |             [li] => {
52 |                 let matched = self.li_type.matches(&starting_index)
53 |                     && task_matches(self.checkbox, li.checked)
54 |                     && self
55 |                         .string_matcher
56 |                         .matches_any(&li.item)
57 |                         .map_err(|e| e.to_select_error("list item"))?;
58 |                 let list = MdElem::List(List { starting_index, items });
59 |                 if matched {
60 |                     Ok(Select::Hit(vec![list]))
61 |                 } else {
62 |                     Ok(Select::Miss(list))
63 |                 }
64 |             }
65 |             _ => {
66 |                 let mut idx = starting_index;
67 |                 let mut items_doc = Vec::with_capacity(items.len());
68 |                 for item in items {
69 |                     items_doc.push(MdElem::List(List {
70 |                         starting_index: idx,
71 |                         items: vec![item],
72 |                     }));
73 |                     if let Some(idx) = idx.as_mut() {
74 |                         *idx += 1;
75 |                     }
76 |                 }
77 |                 Ok(Select::Miss(MdElem::Doc(items_doc)))
78 |             }
79 |         }
80 |     }
81 | }
82 | 
83 | impl ListItemType {
84 |     fn matches(&self, idx: &Option<u32>) -> bool {
85 |         match self {
86 |             ListItemType::Ordered => idx.is_some(),
87 |             ListItemType::Unordered => idx.is_none(),
88 |         }
89 |     }
90 | }
91 | 


--------------------------------------------------------------------------------
/src/select/sel_section.rs:
--------------------------------------------------------------------------------
 1 | use crate::md_elem::elem::*;
 2 | use crate::select::match_selector::MatchSelector;
 3 | use crate::select::string_matcher::{StringMatchError, StringMatcher};
 4 | use crate::select::SectionMatcher;
 5 | 
 6 | #[derive(Debug, PartialEq)]
 7 | pub(crate) struct SectionSelector {
 8 |     matcher: StringMatcher,
 9 | }
10 | 
11 | impl From<SectionMatcher> for SectionSelector {
12 |     fn from(value: SectionMatcher) -> Self {
13 |         Self {
14 |             matcher: value.title.into(),
15 |         }
16 |     }
17 | }
18 | 
19 | impl MatchSelector<Section> for SectionSelector {
20 |     const NAME: &'static str = "section";
21 | 
22 |     fn matches(&self, section: &Section) -> Result<bool, StringMatchError> {
23 |         self.matcher.matches_inlines(&section.title)
24 |     }
25 | }
26 | 
27 | #[cfg(test)]
28 | mod test {
29 |     use super::*;
30 |     use crate::select::{MatchReplace, Matcher, SelectError};
31 | 
32 |     #[test]
33 |     fn section_selector_match_error() {
34 |         use crate::md_elem::MdContext;
35 |         use crate::select::TrySelector;
36 | 
37 |         let section_matcher = SectionMatcher {
38 |             title: MatchReplace {
39 |                 matcher: Matcher::Text {
40 |                     case_sensitive: false,
41 |                     anchor_start: false,
42 |                     text: "test".to_string(),
43 |                     anchor_end: false,
44 |                 },
45 |                 replacement: Some("replacement".to_string()),
46 |             },
47 |         };
48 | 
49 |         let section = Section {
50 |             depth: 1,
51 |             title: vec![],
52 |             body: vec![],
53 |         };
54 | 
55 |         let section_selector = SectionSelector::from(section_matcher);
56 | 
57 |         assert_eq!(section_selector.matches(&section), Err(StringMatchError::NotSupported));
58 | 
59 |         assert_eq!(
60 |             section_selector.try_select(&MdContext::default(), section).unwrap_err(),
61 |             SelectError::new("section selector does not support string replace"),
62 |         );
63 |     }
64 | }
65 | 


--------------------------------------------------------------------------------
/src/select/sel_single_matcher.rs:
--------------------------------------------------------------------------------
  1 | use crate::md_elem::elem::*;
  2 | use crate::select::match_selector::MatchSelector;
  3 | use crate::select::string_matcher::{StringMatchError, StringMatcher};
  4 | use crate::select::{BlockQuoteMatcher, FrontMatterMatcher, HtmlMatcher, ParagraphMatcher};
  5 | use paste::paste;
  6 | 
  7 | macro_rules! single_matcher_adapter {
  8 |     { $name:ident {$matcher_field:ident} $match_fn:ident $tree_struct_field:ident $selector_name:literal } => {
  9 |         paste! {
 10 |             #[derive(Debug, PartialEq)]
 11 |             pub(crate) struct [<$name Selector>] {
 12 |                 matcher: StringMatcher,
 13 |             }
 14 | 
 15 |             impl MatchSelector<$name> for [<$name Selector>] {
 16 |                 const NAME: &'static str = $selector_name;
 17 | 
 18 |                 fn matches(&self, matcher: &$name) -> Result<bool, StringMatchError> {
 19 |                     self.matcher.$match_fn(&matcher.$tree_struct_field)
 20 |                 }
 21 |             }
 22 | 
 23 |             impl From< [<$name Matcher>] > for [<$name Selector>] {
 24 |                 fn from(value: [<$name Matcher>]) -> Self {
 25 |                     Self { matcher: value.$matcher_field.into() }
 26 |                 }
 27 |             }
 28 |         }
 29 |     };
 30 | }
 31 | 
 32 | single_matcher_adapter! { BlockQuote {text} matches_any body "block quote" }
 33 | single_matcher_adapter! { Paragraph {text} matches_inlines body "paragraph" }
 34 | 
 35 | #[derive(Debug, PartialEq)]
 36 | pub(crate) struct HtmlSelector {
 37 |     matcher: StringMatcher,
 38 | }
 39 | 
 40 | impl From<HtmlMatcher> for HtmlSelector {
 41 |     fn from(value: HtmlMatcher) -> Self {
 42 |         Self {
 43 |             matcher: value.html.into(),
 44 |         }
 45 |     }
 46 | }
 47 | 
 48 | impl MatchSelector<BlockHtml> for HtmlSelector {
 49 |     const NAME: &'static str = "html";
 50 | 
 51 |     fn matches(&self, html: &BlockHtml) -> Result<bool, StringMatchError> {
 52 |         self.matcher.matches(&html.value)
 53 |     }
 54 | }
 55 | 
 56 | #[derive(Debug, PartialEq)]
 57 | pub(crate) struct FrontMatterSelector {
 58 |     variant: Option<FrontMatterVariant>,
 59 |     text: StringMatcher,
 60 | }
 61 | 
 62 | impl From<FrontMatterMatcher> for FrontMatterSelector {
 63 |     fn from(value: FrontMatterMatcher) -> Self {
 64 |         Self {
 65 |             variant: value.variant,
 66 |             text: value.text.into(),
 67 |         }
 68 |     }
 69 | }
 70 | 
 71 | impl MatchSelector<FrontMatter> for FrontMatterSelector {
 72 |     const NAME: &'static str = "front matter";
 73 | 
 74 |     fn matches(&self, front_matter: &FrontMatter) -> Result<bool, StringMatchError> {
 75 |         let variant_selected = self
 76 |             .variant
 77 |             .map(|selected| selected == front_matter.variant)
 78 |             .unwrap_or(true);
 79 |         Ok(variant_selected && self.text.matches(&front_matter.body)?)
 80 |     }
 81 | }
 82 | 
 83 | #[cfg(test)]
 84 | mod test {
 85 |     use super::*;
 86 |     use crate::md_elem::{
 87 |         elem::{BlockHtml, BlockQuote, FrontMatter, FrontMatterVariant, Inline, Paragraph, Text, TextVariant},
 88 |         MdContext, MdElem,
 89 |     };
 90 |     use crate::select::{MatchReplace, Matcher, TrySelector};
 91 | 
 92 |     #[test]
 93 |     fn block_quote_selector_match_error() {
 94 |         let block_quote_matcher = BlockQuoteMatcher {
 95 |             text: MatchReplace {
 96 |                 matcher: Matcher::Text {
 97 |                     case_sensitive: false,
 98 |                     anchor_start: false,
 99 |                     text: "test".to_string(),
100 |                     anchor_end: false,
101 |                 },
102 |                 replacement: Some("replacement".to_string()),
103 |             },
104 |         };
105 | 
106 |         let block_quote = BlockQuote {
107 |             body: vec![MdElem::Paragraph(Paragraph {
108 |                 body: vec![Inline::Text(Text {
109 |                     variant: TextVariant::Plain,
110 |                     value: "test content".to_string(),
111 |                 })],
112 |             })],
113 |         };
114 | 
115 |         let block_quote_selector = BlockQuoteSelector::from(block_quote_matcher);
116 | 
117 |         assert_eq!(
118 |             block_quote_selector.matches(&block_quote),
119 |             Err(StringMatchError::NotSupported)
120 |         );
121 | 
122 |         assert_eq!(
123 |             block_quote_selector
124 |                 .try_select(&MdContext::default(), block_quote)
125 |                 .unwrap_err()
126 |                 .to_string(),
127 |             "block quote selector does not support string replace"
128 |         );
129 |     }
130 | 
131 |     #[test]
132 |     fn paragraph_selector_match_error() {
133 |         let paragraph_matcher = ParagraphMatcher {
134 |             text: MatchReplace {
135 |                 matcher: Matcher::Text {
136 |                     case_sensitive: false,
137 |                     anchor_start: false,
138 |                     text: "test".to_string(),
139 |                     anchor_end: false,
140 |                 },
141 |                 replacement: Some("replacement".to_string()),
142 |             },
143 |         };
144 | 
145 |         let paragraph = Paragraph { body: vec![] };
146 | 
147 |         let paragraph_selector = ParagraphSelector::from(paragraph_matcher);
148 | 
149 |         assert_eq!(
150 |             paragraph_selector.matches(&paragraph),
151 |             Err(StringMatchError::NotSupported)
152 |         );
153 | 
154 |         assert_eq!(
155 |             paragraph_selector
156 |                 .try_select(&MdContext::default(), paragraph)
157 |                 .unwrap_err()
158 |                 .to_string(),
159 |             "paragraph selector does not support string replace"
160 |         );
161 |     }
162 | 
163 |     #[test]
164 |     fn html_selector_match_error() {
165 |         let html_matcher = HtmlMatcher {
166 |             html: MatchReplace {
167 |                 matcher: Matcher::Text {
168 |                     case_sensitive: false,
169 |                     anchor_start: false,
170 |                     text: "div".to_string(),
171 |                     anchor_end: false,
172 |                 },
173 |                 replacement: Some("replacement".to_string()),
174 |             },
175 |         };
176 | 
177 |         let block_html = BlockHtml {
178 |             value: "<div>content</div>".to_string(),
179 |         };
180 | 
181 |         let html_selector = HtmlSelector::from(html_matcher);
182 | 
183 |         assert_eq!(html_selector.matches(&block_html), Err(StringMatchError::NotSupported));
184 | 
185 |         assert_eq!(
186 |             html_selector
187 |                 .try_select(&MdContext::default(), block_html)
188 |                 .unwrap_err()
189 |                 .to_string(),
190 |             "html selector does not support string replace"
191 |         );
192 |     }
193 | 
194 |     #[test]
195 |     fn front_matter_selector_match_error() {
196 |         let front_matter_matcher = FrontMatterMatcher {
197 |             variant: None,
198 |             text: MatchReplace {
199 |                 matcher: Matcher::Text {
200 |                     case_sensitive: false,
201 |                     anchor_start: false,
202 |                     text: "title".to_string(),
203 |                     anchor_end: false,
204 |                 },
205 |                 replacement: Some("replacement".to_string()),
206 |             },
207 |         };
208 | 
209 |         let front_matter = FrontMatter {
210 |             variant: FrontMatterVariant::Yaml,
211 |             body: "title: test".to_string(),
212 |         };
213 | 
214 |         let front_matter_selector = FrontMatterSelector::from(front_matter_matcher);
215 | 
216 |         assert_eq!(
217 |             front_matter_selector.matches(&front_matter),
218 |             Err(StringMatchError::NotSupported)
219 |         );
220 | 
221 |         assert_eq!(
222 |             front_matter_selector
223 |                 .try_select(&MdContext::default(), front_matter)
224 |                 .unwrap_err()
225 |                 .to_string(),
226 |             "front matter selector does not support string replace"
227 |         );
228 |     }
229 | }
230 | 


--------------------------------------------------------------------------------
/src/select/sel_table.rs:
--------------------------------------------------------------------------------
  1 | use crate::md_elem::elem::Table;
  2 | use crate::md_elem::*;
  3 | use crate::select::string_matcher::StringMatcher;
  4 | use crate::select::{Result, Select, TableMatcher, TrySelector};
  5 | 
  6 | #[derive(Debug, PartialEq)]
  7 | pub(crate) struct TableSelector {
  8 |     headers_matcher: StringMatcher,
  9 |     rows_matcher: StringMatcher,
 10 | }
 11 | 
 12 | impl TrySelector<Table> for TableSelector {
 13 |     fn try_select(&self, _: &MdContext, orig: Table) -> Result<Select> {
 14 |         let mut table = orig.clone();
 15 | 
 16 |         table.normalize();
 17 | 
 18 |         table
 19 |             .retain_columns_by_header(|line| self.headers_matcher.matches_inlines(line))
 20 |             .map_err(|e| e.to_select_error("table"))?;
 21 |         if table.is_empty() {
 22 |             return Ok(Select::Miss(orig.into()));
 23 |         }
 24 | 
 25 |         table
 26 |             .retain_rows(|line| self.rows_matcher.matches_inlines(line))
 27 |             .map_err(|e| e.to_select_error("table"))?;
 28 |         if table.is_empty() {
 29 |             return Ok(Select::Miss(orig.into()));
 30 |         }
 31 | 
 32 |         Ok(Select::Hit(vec![table.into()]))
 33 |     }
 34 | }
 35 | 
 36 | impl From<TableMatcher> for TableSelector {
 37 |     fn from(value: TableMatcher) -> Self {
 38 |         Self {
 39 |             headers_matcher: value.headers.into(),
 40 |             rows_matcher: value.rows.into(),
 41 |         }
 42 |     }
 43 | }
 44 | 
 45 | #[cfg(test)]
 46 | mod tests {
 47 |     use super::*;
 48 | 
 49 |     use crate::md_elem::elem::*;
 50 |     use crate::select::TrySelector;
 51 |     use crate::util::utils_for_test::*;
 52 | 
 53 |     #[test]
 54 |     fn select_all_on_normalized_table() {
 55 |         let table: Table = Table {
 56 |             alignments: vec![Some(ColumnAlignment::Left), Some(ColumnAlignment::Right)],
 57 |             rows: vec![
 58 |                 vec![cell("header a"), cell("header b")],
 59 |                 vec![cell("data 1 a"), cell("data 1 b")],
 60 |             ],
 61 |         };
 62 |         let maybe_selected = TableSelector {
 63 |             headers_matcher: ".*".into(),
 64 |             rows_matcher: ".*".into(),
 65 |         }
 66 |         .try_select(&MdContext::empty(), table)
 67 |         .map(|selection| match selection {
 68 |             Select::Hit(elems) => get_only(elems),
 69 |             Select::Miss(_) => panic!("Expected selection to succeed"),
 70 |         });
 71 | 
 72 |         unwrap!(maybe_selected, Ok(MdElem::Table(table)));
 73 |         assert_eq!(
 74 |             table.alignments(),
 75 |             &vec![Some(ColumnAlignment::Left), Some(ColumnAlignment::Right)]
 76 |         );
 77 |         assert_eq!(
 78 |             table.rows(),
 79 |             &vec![
 80 |                 vec![cell("header a"), cell("header b")],
 81 |                 vec![cell("data 1 a"), cell("data 1 b")],
 82 |             ]
 83 |         );
 84 |     }
 85 | 
 86 |     #[test]
 87 |     fn select_columns_on_normalized_table() {
 88 |         let table: Table = Table {
 89 |             alignments: vec![Some(ColumnAlignment::Left), Some(ColumnAlignment::Right)],
 90 |             rows: vec![
 91 |                 vec![cell("header a"), cell("KEEP b")],
 92 |                 vec![cell("data 1 a"), cell("data 1 b")],
 93 |             ],
 94 |         };
 95 |         let maybe_selected = TableSelector {
 96 |             headers_matcher: "KEEP".into(),
 97 |             rows_matcher: ".*".into(),
 98 |         }
 99 |         .try_select(&MdContext::empty(), table)
100 |         .map(|selection| match selection {
101 |             Select::Hit(elems) => get_only(elems),
102 |             Select::Miss(_) => panic!("Expected selection to succeed"),
103 |         });
104 | 
105 |         unwrap!(maybe_selected, Ok(MdElem::Table(table)));
106 |         assert_eq!(table.alignments(), &vec![Some(ColumnAlignment::Right)]);
107 |         assert_eq!(table.rows(), &vec![vec![cell("KEEP b")], vec![cell("data 1 b")],]);
108 |     }
109 | 
110 |     #[test]
111 |     fn select_rows_on_normalized_table() {
112 |         let table: Table = Table {
113 |             alignments: vec![Some(ColumnAlignment::Left), Some(ColumnAlignment::Right)],
114 |             rows: vec![
115 |                 vec![cell("header a"), cell("header b")],
116 |                 vec![cell("data 1 a"), cell("data 1 b")],
117 |                 vec![cell("data 2 a"), cell("data 2 b")],
118 |             ],
119 |         };
120 |         let maybe_selected = TableSelector {
121 |             headers_matcher: ".*".into(),
122 |             rows_matcher: "data 2".into(),
123 |         }
124 |         .try_select(&MdContext::empty(), table)
125 |         .map(|selection| match selection {
126 |             Select::Hit(elems) => get_only(elems),
127 |             Select::Miss(_) => panic!("Expected selection to succeed"),
128 |         });
129 | 
130 |         unwrap!(maybe_selected, Ok(MdElem::Table(table)));
131 |         assert_eq!(
132 |             table.alignments(),
133 |             &vec![Some(ColumnAlignment::Left), Some(ColumnAlignment::Right)]
134 |         );
135 |         assert_eq!(
136 |             table.rows(),
137 |             &vec![
138 |                 // note: header always gets retained
139 |                 vec![cell("header a"), cell("header b")],
140 |                 vec![cell("data 2 a"), cell("data 2 b")],
141 |             ]
142 |         );
143 |     }
144 | 
145 |     /// Tests (a) that the table gets normalized, and (b) a smoke test of the matchers.
146 |     /// More extensive tests for the `retain_*` methods can be found in [TableSlice]'s tests.
147 |     #[test]
148 |     fn jagged_table() {
149 |         let table: Table = Table {
150 |             // only 1 align; rest will be filled with None
151 |             alignments: vec![Some(ColumnAlignment::Left)],
152 |             rows: vec![
153 |                 vec![cell("header a")],
154 |                 vec![cell("data 1 a"), cell("data 1 b")],
155 |                 vec![cell("data 2 a"), cell("data 2 b"), cell("data 2 c")],
156 |             ],
157 |         };
158 |         let maybe_selected = TableSelector {
159 |             headers_matcher: ".*".into(),
160 |             rows_matcher: "data 1".into(),
161 |         }
162 |         .try_select(&MdContext::empty(), table)
163 |         .map(|selection| match selection {
164 |             Select::Hit(elems) => get_only(elems),
165 |             Select::Miss(_) => panic!("Expected selection to succeed"),
166 |         });
167 | 
168 |         unwrap!(maybe_selected, Ok(MdElem::Table(table)));
169 |         assert_eq!(table.alignments(), &vec![Some(ColumnAlignment::Left), None, None]);
170 |         assert_eq!(
171 |             table.rows(),
172 |             &vec![
173 |                 vec![cell("header a"), Vec::new(), Vec::new()],
174 |                 vec![cell("data 1 a"), cell("data 1 b"), Vec::new()],
175 |             ]
176 |         );
177 |     }
178 | 
179 |     fn cell(cell_str: &str) -> TableCell {
180 |         vec![Inline::Text(Text {
181 |             variant: TextVariant::Plain,
182 |             value: cell_str.to_string(),
183 |         })]
184 |     }
185 | }
186 | 


--------------------------------------------------------------------------------
/src/select/selector.rs:
--------------------------------------------------------------------------------
  1 | use crate::md_elem::elem::FrontMatterVariant;
  2 | use crate::md_elem::{MdContext, MdDoc, MdElem};
  3 | use crate::query::ParseError;
  4 | use crate::select::{MatchReplace, Result, SelectorAdapter};
  5 | 
  6 | /// The completion state that a [`ListItemMatcher`] looks for.
  7 | #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
  8 | pub enum ListItemTask {
  9 |     /// `- [x] foo`
 10 |     Selected,
 11 |     /// `- [ ] foo`
 12 |     Unselected,
 13 |     /// `- [?] foo`
 14 |     Either,
 15 |     /// `- foo`
 16 |     None,
 17 | }
 18 | 
 19 | /// matcher for [`Selector::ListItem`]
 20 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 21 | pub struct ListItemMatcher {
 22 |     /// Whether this matches an ordered list (`1. foo`) or an unordered one (`- foo`).
 23 |     pub ordered: bool,
 24 |     /// Whether this matches a task list (`- [ ] foo`), and if so, what completion state matches.
 25 |     ///
 26 |     /// Tasks are typically unordered, but may also be ordered (`1. [ ] foo`).
 27 |     pub task: ListItemTask,
 28 |     pub matcher: MatchReplace,
 29 | }
 30 | 
 31 | /// matcher for [`Selector::Section`]
 32 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 33 | pub struct SectionMatcher {
 34 |     pub title: MatchReplace,
 35 | }
 36 | 
 37 | /// matcher for both [`Selector::Link`] and [`Selector::Image`]
 38 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 39 | pub struct LinklikeMatcher {
 40 |     pub display_matcher: MatchReplace,
 41 |     pub url_matcher: MatchReplace,
 42 | }
 43 | 
 44 | /// matcher for [`Selector::BlockQuote`]
 45 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 46 | pub struct BlockQuoteMatcher {
 47 |     pub text: MatchReplace,
 48 | }
 49 | 
 50 | /// matcher for [`Selector::Html`]
 51 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 52 | pub struct HtmlMatcher {
 53 |     pub html: MatchReplace,
 54 | }
 55 | 
 56 | /// matcher for [`Selector::Paragraph`]
 57 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 58 | pub struct ParagraphMatcher {
 59 |     pub text: MatchReplace,
 60 | }
 61 | 
 62 | /// matcher for [`Selector::CodeBlock`]
 63 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 64 | pub struct CodeBlockMatcher {
 65 |     pub language: MatchReplace,
 66 |     pub contents: MatchReplace,
 67 | }
 68 | 
 69 | /// matcher for [`Selector::FrontMatter`]
 70 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 71 | pub struct FrontMatterMatcher {
 72 |     pub variant: Option<FrontMatterVariant>,
 73 |     pub text: MatchReplace,
 74 | }
 75 | 
 76 | /// matcher for [`Selector::Table`]
 77 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 78 | pub struct TableMatcher {
 79 |     pub headers: MatchReplace,
 80 |     pub rows: MatchReplace,
 81 | }
 82 | 
 83 | /// The in-memory equivalent of mdq's selector query string.
 84 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 85 | #[non_exhaustive]
 86 | pub enum Selector {
 87 |     /// `foo | bar`
 88 |     Chain(Vec<Self>),
 89 |     /// `# section title`
 90 |     Section(SectionMatcher),
 91 |     /// `1. ordered` or `- unordered` lists, or `- [ ] tasks`
 92 |     ListItem(ListItemMatcher),
 93 |     /// `[some](https://example.com/url)`
 94 |     Link(LinklikeMatcher),
 95 |     /// `![alt](https://example.com/image.png)`
 96 |     Image(LinklikeMatcher),
 97 |     /// `> block quote`
 98 |     BlockQuote(BlockQuoteMatcher),
 99 |     /// ` ```language contents `
100 |     CodeBlock(CodeBlockMatcher),
101 |     /// `+++ front matter`
102 |     FrontMatter(FrontMatterMatcher),
103 |     /// `</> html-tags`
104 |     Html(HtmlMatcher),
105 |     /// `P: paragraph text`
106 |     Paragraph(ParagraphMatcher),
107 |     /// `:-: headers :-: rows`
108 |     Table(TableMatcher),
109 | }
110 | 
111 | impl Selector {
112 |     /// Filter (and possibly manipulate) [`MdElem`]s in the provided [`MdDoc`] according to this selector.
113 |     ///
114 |     /// For each element of the `nodes` argument, if that element matches this selector, it will be returned in the
115 |     /// result. Otherwise, this method will recurse into that node's children and match against them, and so on. This
116 |     /// also means that each element may turn into multiple elements in the result, if multiple of its children match.
117 |     /// If an element _and_ its children (or other descendants) match, the result will only include that parent.
118 |     ///
119 |     /// This may return an empty `Vec`. That's not an error per se; it just means that none of the elements matched.
120 |     ///
121 |     /// The result also includes an [`MdContext`] that you can use with [`MdWriter`](crate::output::MdWriter).
122 |     pub fn find_nodes(self, doc: MdDoc) -> Result<(Vec<MdElem>, MdContext)> {
123 |         let MdDoc { ctx, roots } = doc;
124 |         let result_elems = SelectorAdapter::from(self).find_nodes(&ctx, vec![MdElem::Doc(roots)])?;
125 |         Ok((result_elems, ctx))
126 |     }
127 | }
128 | 
129 | impl TryFrom<&'_ str> for Selector {
130 |     type Error = ParseError;
131 | 
132 |     fn try_from(value: &'_ str) -> std::result::Result<Self, Self::Error> {
133 |         Selector::try_parse(value).map_err(ParseError::new)
134 |     }
135 | }
136 | 
137 | impl TryFrom<&'_ String> for Selector {
138 |     type Error = ParseError;
139 | 
140 |     fn try_from(value: &'_ String) -> std::result::Result<Self, Self::Error> {
141 |         Selector::try_from(value.as_str())
142 |     }
143 | }
144 | 


--------------------------------------------------------------------------------
/src/util/mod.rs:
--------------------------------------------------------------------------------
1 | pub(crate) mod output;
2 | pub(crate) mod str_utils;
3 | pub(crate) mod utils_for_test;
4 | pub(crate) mod vec_utils;
5 | mod words_buffer;
6 | 


--------------------------------------------------------------------------------
/src/util/str_utils.rs:
--------------------------------------------------------------------------------
  1 | use crate::md_elem::elem::ColumnAlignment;
  2 | use crate::util::output::{Output, SimpleWrite};
  3 | 
  4 | pub(crate) fn pad_to<W>(output: &mut Output<W>, input: &str, min_width: usize, alignment: Option<ColumnAlignment>)
  5 | where
  6 |     W: SimpleWrite,
  7 | {
  8 |     if input.len() >= min_width {
  9 |         return output.write_str(input);
 10 |     }
 11 | 
 12 |     let padding = min_width - input.len();
 13 | 
 14 |     match alignment {
 15 |         Some(ColumnAlignment::Left) | None => {
 16 |             output.write_str(input);
 17 |             (0..padding).for_each(|_| output.write_char(' '));
 18 |         }
 19 |         Some(ColumnAlignment::Center) => {
 20 |             let left_pad = padding / 2; // round down
 21 |             let right_pad = padding - left_pad;
 22 |             (0..left_pad).for_each(|_| output.write_char(' '));
 23 |             output.write_str(input);
 24 |             (0..right_pad).for_each(|_| output.write_char(' '));
 25 |         }
 26 |         Some(ColumnAlignment::Right) => {
 27 |             (0..padding).for_each(|_| output.write_char(' '));
 28 |             output.write_str(input);
 29 |         }
 30 |     }
 31 | }
 32 | 
 33 | pub(crate) struct CountingWriter<'a, W> {
 34 |     underlying: &'a mut W,
 35 |     count: usize,
 36 | }
 37 | 
 38 | impl<'a, W: SimpleWrite> CountingWriter<'a, W> {
 39 |     pub(crate) fn wrap(underlying: &'a mut W) -> Self {
 40 |         Self { underlying, count: 0 }
 41 |     }
 42 | 
 43 |     fn write_str(&mut self, text: &str) -> std::io::Result<()> {
 44 |         for ch in text.chars() {
 45 |             self.write_char(ch)?;
 46 |         }
 47 |         Ok(())
 48 |     }
 49 | 
 50 |     pub(crate) fn count(&self) -> usize {
 51 |         self.count
 52 |     }
 53 | }
 54 | 
 55 | impl<W: SimpleWrite> SimpleWrite for CountingWriter<'_, W> {
 56 |     fn write_char(&mut self, ch: char) -> std::io::Result<()> {
 57 |         self.underlying.write_char(ch)?;
 58 |         self.count += 1;
 59 |         Ok(())
 60 |     }
 61 | 
 62 |     fn flush(&mut self) -> std::io::Result<()> {
 63 |         self.underlying.flush()
 64 |     }
 65 | }
 66 | 
 67 | impl<W: SimpleWrite> std::fmt::Write for CountingWriter<'_, W> {
 68 |     fn write_str(&mut self, text: &str) -> std::fmt::Result {
 69 |         Self::write_str(self, text).map_err(|_| std::fmt::Error)
 70 |     }
 71 | }
 72 | 
 73 | /// A struct that represents trimmed leading empty lines from a string.
 74 | ///
 75 | /// An "empty line" is defined as a line that consists of zero or more whitespace characters,
 76 | /// and nothing else.
 77 | pub(crate) struct TrimmedEmptyLines<S> {
 78 |     pub(crate) trimmed: S,
 79 |     pub(crate) remaining: S,
 80 | }
 81 | 
 82 | impl<'a> From<&'a str> for TrimmedEmptyLines<&'a str> {
 83 |     fn from(s: &'a str) -> Self {
 84 |         let mut start = 0;
 85 |         // using split_inclusive() instead of just split() because we need to count \r\n as 2 chars; so we can't just take
 86 |         // the split()s, and assume a one-char newline for each one.
 87 |         for line in s.split_inclusive('\n') {
 88 |             if line.chars().all(|c| c.is_whitespace()) {
 89 |                 start += line.len();
 90 |             } else {
 91 |                 break;
 92 |             }
 93 |         }
 94 |         Self {
 95 |             trimmed: &s[..start],
 96 |             remaining: &s[start..],
 97 |         }
 98 |     }
 99 | }
100 | 
101 | #[cfg(test)]
102 | mod test {
103 |     use super::*;
104 | 
105 |     #[test]
106 |     fn left_pad() {
107 |         assert_eq!(
108 |             "a    ",
109 |             output_and_get(|out| pad_to(out, "a", 5, Some(ColumnAlignment::Left)))
110 |         );
111 |     }
112 | 
113 |     #[test]
114 |     fn right_pad() {
115 |         assert_eq!(
116 |             "    a",
117 |             output_and_get(|out| pad_to(out, "a", 5, Some(ColumnAlignment::Right)))
118 |         );
119 |     }
120 | 
121 |     /// center pad, with the same amount of padding on each side
122 |     #[test]
123 |     fn center_pad_even() {
124 |         assert_eq!(
125 |             "  a  ",
126 |             output_and_get(|out| pad_to(out, "a", 5, Some(ColumnAlignment::Center)))
127 |         );
128 |     }
129 | 
130 |     /// center pad, with different amount of padding on each side
131 |     #[test]
132 |     fn center_pad_uneven() {
133 |         assert_eq!(
134 |             " ab  ",
135 |             output_and_get(|out| pad_to(out, "ab", 5, Some(ColumnAlignment::Center)))
136 |         );
137 |     }
138 | 
139 |     #[test]
140 |     fn string_already_right_size() {
141 |         for align in [ColumnAlignment::Left, ColumnAlignment::Center, ColumnAlignment::Right] {
142 |             assert_eq!("abcde", output_and_get(|out| pad_to(out, "abcde", 5, Some(align))));
143 |         }
144 |     }
145 | 
146 |     #[test]
147 |     fn string_already_too_big() {
148 |         for align in [ColumnAlignment::Left, ColumnAlignment::Center, ColumnAlignment::Right] {
149 |             assert_eq!("abcdef", output_and_get(|out| pad_to(out, "abcdef", 3, Some(align))));
150 |         }
151 |     }
152 | 
153 |     mod trim_leading_empty_lines {
154 |         use super::*;
155 | 
156 |         #[test]
157 |         fn starts_with_newline() {
158 |             check("\nhello\nworld", "hello\nworld");
159 |         }
160 | 
161 |         #[test]
162 |         fn starts_with_space_then_newline() {
163 |             check("  \nhello\nworld", "hello\nworld");
164 |         }
165 | 
166 |         #[test]
167 |         fn starts_with_space_then_char() {
168 |             check("  a\nhello\nworld", "  a\nhello\nworld");
169 |         }
170 | 
171 |         #[test]
172 |         fn starts_with_char() {
173 |             check("hello world", "hello world");
174 |         }
175 | 
176 |         #[test]
177 |         fn empty() {
178 |             check("", "");
179 |         }
180 | 
181 |         #[test]
182 |         fn all_newlines() {
183 |             check("\n\n\n", "");
184 |         }
185 | 
186 |         #[test]
187 |         fn crlf() {
188 |             check("\r\n\r\nhello", "hello");
189 |         }
190 | 
191 |         #[test]
192 |         fn just_cr() {
193 |             check("\rhello", "\rhello");
194 |         }
195 | 
196 |         fn check(given: &str, expected: &str) {
197 |             assert_eq!(TrimmedEmptyLines::from(given).remaining, expected);
198 |         }
199 |     }
200 | 
201 |     fn output_and_get<F>(action: F) -> String
202 |     where
203 |         F: FnOnce(&mut Output<String>),
204 |     {
205 |         let mut output = Output::without_text_wrapping(String::new());
206 |         action(&mut output);
207 |         output.take_underlying().unwrap()
208 |     }
209 | }
210 | 


--------------------------------------------------------------------------------
/src/util/utils_for_test.rs:
--------------------------------------------------------------------------------
  1 | #[cfg(test)]
  2 | pub(crate) use test_utils::*;
  3 | 
  4 | // We this file's contents from prod by putting them in a submodule guarded by cfg(test), but then "pub(crate) use" it to
  5 | // export its contents.
  6 | #[cfg(test)]
  7 | mod test_utils {
  8 |     use crate::output::InlineElemOptions;
  9 |     use crate::output::LinkTransform;
 10 |     use crate::output::{MdWriterOptions, ReferencePlacement};
 11 |     use std::fmt::Debug;
 12 | 
 13 |     impl LinkTransform {
 14 |         pub(crate) fn default_for_tests() -> Self {
 15 |             Self::Keep
 16 |         }
 17 |     }
 18 | 
 19 |     impl ReferencePlacement {
 20 |         pub(crate) fn default_for_tests() -> Self {
 21 |             Self::Section
 22 |         }
 23 |     }
 24 | 
 25 |     impl MdWriterOptions {
 26 |         pub(crate) fn default_for_tests() -> Self {
 27 |             Self {
 28 |                 link_reference_placement: ReferencePlacement::default_for_tests(),
 29 |                 footnote_reference_placement: ReferencePlacement::default_for_tests(),
 30 |                 inline_options: InlineElemOptions {
 31 |                     link_format: LinkTransform::default_for_tests(),
 32 |                     renumber_footnotes: false,
 33 |                 },
 34 |                 include_thematic_breaks: true,
 35 |                 text_width: None,
 36 |             }
 37 |         }
 38 | 
 39 |         pub(crate) fn new_with<F>(init: F) -> Self
 40 |         where
 41 |             F: FnOnce(&mut MdWriterOptions),
 42 |         {
 43 |             let mut mdo = Self::default_for_tests();
 44 |             init(&mut mdo);
 45 |             mdo
 46 |         }
 47 |     }
 48 | 
 49 |     pub(crate) fn get_only<T: Debug, C: IntoIterator<Item = T>>(col: C) -> T {
 50 |         let mut iter = col.into_iter();
 51 |         let Some(result) = iter.next() else {
 52 |             panic!("expected an element, but was empty");
 53 |         };
 54 |         match iter.next() {
 55 |             None => result,
 56 |             Some(extra) => {
 57 |                 let mut all = Vec::new();
 58 |                 all.push(result);
 59 |                 all.push(extra);
 60 |                 all.extend(iter);
 61 |                 panic!("expected exactly one element, but found {}: {all:?}", all.len());
 62 |             }
 63 |         }
 64 |     }
 65 | 
 66 |     /// Turn a pattern match into an `if let ... { else panic! }`.
 67 |     macro_rules! unwrap {
 68 |         ($enum_value:expr, $enum_variant:pat) => {
 69 |             let node = $enum_value;
 70 |             let node_debug = format!("{:?}", node);
 71 |             let $enum_variant = node else {
 72 |                 panic!("Expected {} but saw {}", stringify!($enum_variant), node_debug);
 73 |             };
 74 |         };
 75 |     }
 76 |     pub(crate) use unwrap;
 77 | 
 78 |     macro_rules! test_delay_ms {
 79 |         ($i:literal) => {{
 80 |             time::Duration::from_millis(
 81 |                 $i * option_env!("TEST_TIMEOUT_MULTIPLIER")
 82 |                     .map(|s| s.parse::<u64>().expect("bad value for TEST_TIMEOUT_MULTIPLIER"))
 83 |                     .unwrap_or(1),
 84 |             )
 85 |         }};
 86 |     }
 87 |     pub(crate) use test_delay_ms;
 88 | 
 89 |     /// Match the expression against a pattern, and then either return the expression or panic.
 90 |     macro_rules! match_or_panic {
 91 |         ($input:expr => $variant:pat) => {{
 92 |             let elem = $input;
 93 |             if !matches!(elem, $variant) {
 94 |                 panic!(
 95 |                     "{} should have been {}, was {:?}",
 96 |                     stringify!($input),
 97 |                     stringify!($variant),
 98 |                     elem
 99 |                 );
100 |             }
101 |             elem
102 |         }};
103 |     }
104 |     pub(crate) use match_or_panic;
105 | 
106 |     /// Creates a static object named `$name` that looks for all the variants of enum `E`.
107 |     ///
108 |     /// ```
109 |     /// use mdq::variants_checker;
110 |     ///
111 |     /// enum MyEnum {
112 |     ///   Variant1,
113 |     ///   Variant2(usize)
114 |     /// }
115 |     /// variants_checker!(CHECKER_NAME = MyEnum { Variant1, Variant2(_) });
116 |     /// ```
117 |     ///
118 |     /// You can also mark some variants as ignored; these will be added to the pattern match, but not be required to
119 |     /// be seen:
120 |     ///
121 |     /// ```
122 |     /// use mdq::variants_checker;
123 |     ///
124 |     /// enum MyEnum {
125 |     ///   Variant1,
126 |     ///   Variant2(usize)
127 |     /// }
128 |     /// variants_checker!(CHECKER_NAME = MyEnum { Variant1 } ignore { Variant2(_) });
129 |     /// ```
130 |     ///
131 |     /// If you see a compilation failure here, it means the call site is missing variants (or has an unknown
132 |     /// variant).
133 |     ///
134 |     /// We can't use strum to do this for mdast::Node, because we don't own the Node code. Instead, we rely on a bit of
135 |     /// trickery: we pass in a bunch of arms, and each gets [stringify!]'d and added to a set. Whenever we [see] an
136 |     /// item, we remove the corresponding string from the set.
137 |     ///
138 |     /// This requires that each pattern matches exactly one shape of item; in other words, that there aren't any
139 |     /// dead-code branches.
140 |     macro_rules! variants_checker {
141 |         ($name:ident = $enum_type:ty { $($variant:pat),* $(,)? } $(ignore { $($ignore_variant:pat),* $(,)? })?) => {
142 | 
143 |             paste::paste!{
144 |                 pub(crate) struct [<VariantsChecker $name:lower:camel>] {
145 |                     require: std::sync::Arc<std::sync::Mutex<std::collections::HashSet<String>>>,
146 |                 }
147 | 
148 |                 impl [<VariantsChecker $name:lower:camel>] {
149 |                     fn see(&self, node: &$enum_type) {
150 |                         let node_str = match node {
151 |                             $($enum_type::$variant => stringify!($variant),)*
152 |                             $($($enum_type::$ignore_variant => {
153 |                                 panic!("unexpected variant: {}", stringify!($ignore_variant));
154 |                             },)*)?
155 |                         };
156 |                         self.require.lock().map(|mut set| set.remove(node_str)).unwrap();
157 |                     }
158 | 
159 |                     fn wait_for_all(&self) {
160 |                         use std::{thread, time};
161 | 
162 |                         let timeout = test_delay_ms!(500);
163 |                         let retry_delay = time::Duration::from_millis(50);
164 |                         let start = time::Instant::now();
165 |                         loop {
166 |                             if self.require.lock().map(|set| set.is_empty()).unwrap() {
167 |                                 break;
168 |                             }
169 |                             if start.elapsed() >= timeout {
170 |                                 let mut remaining: Vec<String> = self
171 |                                     .require
172 |                                     .lock()
173 |                                     .map(|set| set.iter().map(|s| s.to_owned()).collect())
174 |                                     .unwrap();
175 |                                 remaining.sort();
176 |                                 panic!(
177 |                                     "Timed out, and missing {} variants:\n- {}",
178 |                                     remaining.len(),
179 |                                     remaining.join("\n- ")
180 |                                 )
181 |                             }
182 |                             thread::sleep(retry_delay);
183 |                         }
184 |                     }
185 |                 }
186 | 
187 |                 lazy_static::lazy_static! {
188 |                     static ref $name: [<VariantsChecker $name:lower:camel>] = [<VariantsChecker $name:lower:camel>] {
189 |                         require: std::sync::Arc::new(
190 |                             std::sync::Mutex::new(
191 |                                 vec![$(stringify!($variant).to_string(),)*].into_iter().collect()
192 |                             )
193 |                         )
194 |                     };
195 |                 }
196 | 
197 |                 #[test]
198 |                 fn [<all_variants_checked_for_ $name:lower:snake>]() {
199 |                     $name.wait_for_all();
200 |                 }
201 |             }
202 |         };
203 |     }
204 |     pub(crate) use variants_checker;
205 | }
206 | 


--------------------------------------------------------------------------------
/src/util/vec_utils.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::BTreeSet;
  2 | 
  3 | pub(crate) struct IndexKeeper {
  4 |     indices_to_keep: BTreeSet<usize>,
  5 | }
  6 | 
  7 | impl IndexKeeper {
  8 |     pub(crate) fn new() -> Self {
  9 |         Self {
 10 |             indices_to_keep: BTreeSet::new(),
 11 |         }
 12 |     }
 13 | 
 14 |     pub(crate) fn retain_when<I, F, E>(&mut self, items: &[I], mut allow_filter: F) -> Result<(), E>
 15 |     where
 16 |         F: FnMut(usize, &I) -> Result<bool, E>,
 17 |     {
 18 |         for (idx, item) in items.iter().enumerate() {
 19 |             if allow_filter(idx, item)? {
 20 |                 self.indices_to_keep.insert(idx);
 21 |             }
 22 |         }
 23 |         Ok(())
 24 |     }
 25 | 
 26 |     /// Returns an `FnMut` suitable for use in [ItemRetainer::retain_with_index].
 27 |     pub(crate) fn retain_fn<I, E>(&self) -> impl FnMut(usize, &I) -> Result<bool, E> + '_ {
 28 |         let mut next_to_keep = self.indices_to_keep.iter().peekable();
 29 |         move |target, _| {
 30 |             while let Some(&&value) = next_to_keep.peek() {
 31 |                 if value == target {
 32 |                     let _ = next_to_keep.next();
 33 |                     return Ok(true);
 34 |                 }
 35 |                 if value > target {
 36 |                     return Ok(false);
 37 |                 }
 38 |             }
 39 |             Ok(false)
 40 |         }
 41 |     }
 42 | 
 43 |     pub(crate) fn count_keeps(&self) -> usize {
 44 |         self.indices_to_keep.len()
 45 |     }
 46 | }
 47 | 
 48 | pub(crate) trait ItemRetainer<I> {
 49 |     /// Iterates over the items in order, invoking `f` on each and retaining only those elements for which it returns
 50 |     /// `true`.
 51 |     ///
 52 |     /// This is guaranteed to iterate over items sequentially, and filters can take advantage of that fact.
 53 |     fn retain_with_index<F, E>(&mut self, f: F) -> Result<(), E>
 54 |     where
 55 |         F: FnMut(usize, &I) -> Result<bool, E>;
 56 | }
 57 | 
 58 | impl<I> ItemRetainer<I> for Vec<I> {
 59 |     fn retain_with_index<F, E>(&mut self, mut f: F) -> Result<(), E>
 60 |     where
 61 |         F: FnMut(usize, &I) -> Result<bool, E>,
 62 |     {
 63 |         // A simple algorithm, which is O(n) in both space and time.
 64 |         // I feel like there's an algorithm out there that's O(n) in time and O(1) in space, but this is good enough,
 65 |         // and it's nice and simple.
 66 |         let mut scratch = Vec::with_capacity(self.len());
 67 |         for (idx, item) in self.drain(..).enumerate() {
 68 |             if f(idx, &item)? {
 69 |                 scratch.push(item);
 70 |             }
 71 |         }
 72 |         self.append(&mut scratch);
 73 |         Ok(())
 74 |     }
 75 | }
 76 | 
 77 | #[cfg(test)]
 78 | mod tests {
 79 |     use super::*;
 80 | 
 81 |     #[test]
 82 |     fn empty_remover() {
 83 |         let mut items = vec!['a', 'b', 'c', 'd'];
 84 |         let remover: IndexKeeper = [].into();
 85 |         items.retain_with_index(remover.retain_fn::<_, ()>()).unwrap();
 86 |         assert_eq!(items, vec![]);
 87 |     }
 88 | 
 89 |     #[test]
 90 |     fn remover_has_bigger_indexes_than_items() {
 91 |         let mut items = vec!['a', 'b', 'c', 'd'];
 92 |         let remover: IndexKeeper = [0, 1, 2, 3, 4, 5, 6].into();
 93 |         items.retain_with_index(remover.retain_fn::<_, ()>()).unwrap();
 94 |         assert_eq!(items, vec!['a', 'b', 'c', 'd']);
 95 |     }
 96 | 
 97 |     #[test]
 98 |     fn keep_head() {
 99 |         let mut items = vec!['a', 'b', 'c', 'd'];
100 |         let remover: IndexKeeper = [0].into();
101 |         items.retain_with_index(remover.retain_fn::<_, ()>()).unwrap();
102 |         assert_eq!(items, vec!['a']);
103 |     }
104 | 
105 |     #[test]
106 |     fn keep_middle() {
107 |         let mut items = vec!['a', 'b', 'c', 'd'];
108 |         let remover: IndexKeeper = [2].into();
109 |         items.retain_with_index(remover.retain_fn::<_, ()>()).unwrap();
110 |         assert_eq!(items, vec!['c']);
111 |     }
112 | 
113 |     #[test]
114 |     fn keep_tail() {
115 |         let mut items = vec!['a', 'b', 'c', 'd'];
116 |         let remover: IndexKeeper = [items.len() - 1].into();
117 |         items.retain_with_index(remover.retain_fn::<_, ()>()).unwrap();
118 |         assert_eq!(items, vec!['d']);
119 |     }
120 | 
121 |     impl<const N: usize> From<[usize; N]> for IndexKeeper {
122 |         fn from(indices: [usize; N]) -> Self {
123 |             let mut result = Self::new();
124 |             for idx in indices {
125 |                 result.indices_to_keep.insert(idx);
126 |             }
127 |             result
128 |         }
129 |     }
130 | }
131 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
 1 | # Integration tests
 2 | 
 3 | Test cases are in `md_cases/` and are defined by toml files with the following format:
 4 | 
 5 | ```toml
 6 | [given]
 7 | md = '''
 8 | Some input markdown
 9 | '''
10 | 
11 | [expect."test case name"]
12 | cli_args = ["arguments", "passed", "to", "cli"]
13 | output = '''
14 | The expected output
15 | '''
16 | 
17 | [expect."another text case"]
18 | # ...
19 | ```
20 | 
21 | [`build.rs`] compiles these into test functions that construct an `integ_test::Case` and invoke its [`fn check`]. Each
22 | file becomes a `mod`, and each case becomes a test fn within that `mod`.
23 | 
24 | [`build.rs`]: ../build.rs
25 | 
26 | [`fn check`]: integ_test.rs


--------------------------------------------------------------------------------
/tests/integ_test.rs:
--------------------------------------------------------------------------------
 1 | use clap::Parser;
 2 | use mdq::run::Error;
 3 | use std::io::ErrorKind;
 4 | use std::{env, io};
 5 | 
 6 | #[derive(Debug)]
 7 | struct Case<const N: usize> {
 8 |     cli_args: [&'static str; N],
 9 |     expect_output: &'static str,
10 |     expect_error: &'static str,
11 |     expect_output_json: bool,
12 |     md: &'static str,
13 |     files: &'static [(&'static str, &'static str)],
14 |     expect_success: bool,
15 | }
16 | 
17 | #[derive(Debug)]
18 | struct CaseRunner<'a, const N: usize> {
19 |     case: &'a Case<N>,
20 |     stdout: Vec<u8>,
21 |     stderr: String,
22 | }
23 | 
24 | impl<const N: usize> mdq::run::OsFacade for CaseRunner<'_, N> {
25 |     fn read_stdin(&self) -> io::Result<String> {
26 |         Ok(self.case.md.to_string())
27 |     }
28 | 
29 |     fn read_file(&self, path: &str) -> io::Result<String> {
30 |         for (name, content) in self.case.files {
31 |             if path == *name {
32 |                 return Ok(content.to_string());
33 |             }
34 |         }
35 |         Err(io::Error::new(ErrorKind::NotFound, format!("File not found: {}", path)))
36 |     }
37 | 
38 |     fn stdout(&mut self) -> impl io::Write {
39 |         &mut self.stdout
40 |     }
41 | 
42 |     fn write_error(&mut self, err: Error) {
43 |         self.stderr.push_str(&err.to_string())
44 |     }
45 | }
46 | 
47 | impl<const N: usize> Case<N> {
48 |     fn check(&mut self) {
49 |         let (actual_success, actual_out, actual_err) = self.run();
50 |         let (actual_out, expect_out) = if self.expect_output_json {
51 |             let actual_obj = serde_json::from_str::<serde_json::Value>(&actual_out).unwrap();
52 |             let expect_obj = serde_json::from_str::<serde_json::Value>(self.expect_output).unwrap();
53 |             (
54 |                 serde_json::to_string_pretty(&actual_obj).unwrap(),
55 |                 serde_json::to_string_pretty(&expect_obj).unwrap(),
56 |             )
57 |         } else {
58 |             (actual_out, self.expect_output.to_string())
59 |         };
60 |         assert_eq!(actual_out, expect_out);
61 |         assert_eq!(actual_err, self.expect_error);
62 |         assert_eq!(actual_success, self.expect_success);
63 |     }
64 | 
65 |     fn run(&self) -> (bool, String, String) {
66 |         let all_cli_args = ["cmd"].iter().chain(&self.cli_args);
67 |         let cli = mdq::run::CliOptions::try_parse_from(all_cli_args).unwrap();
68 |         let mut runner = CaseRunner {
69 |             case: self,
70 |             stdout: vec![],
71 |             stderr: "".to_string(),
72 |         };
73 |         let result = mdq::run::run(&cli.into(), &mut runner);
74 | 
75 |         let out_str =
76 |             String::from_utf8(runner.stdout).unwrap_or_else(|err| String::from_utf8_lossy(err.as_bytes()).into_owned());
77 |         (result, out_str, runner.stderr)
78 |     }
79 | }
80 | 
81 | include!(concat!(env!("OUT_DIR"), "/tests/integ_test_cases.rs"));
82 | 


--------------------------------------------------------------------------------
/tests/md_cases/bad_queries.toml:
--------------------------------------------------------------------------------
  1 | [given]
  2 | md = ''
  3 | 
  4 | [chained]
  5 | needed = false
  6 | 
  7 | [expect."start with string"]
  8 | cli_args = ['"hello"']
  9 | expect_success = false
 10 | output = ''
 11 | output_err = '''Syntax error in select specifier:
 12 |  --> 1:1
 13 |   |
 14 | 1 | "hello"
 15 |   | ^---
 16 |   |
 17 |   = expected valid query
 18 | '''
 19 | 
 20 | [expect."double quote isn't closed"]
 21 | cli_args = ['# "hello']
 22 | expect_success = false
 23 | output = ''
 24 | output_err = '''Syntax error in select specifier:
 25 |  --> 1:9
 26 |   |
 27 | 1 | # "hello
 28 |   |         ^---
 29 |   |
 30 |   = expected character in quoted string
 31 | '''
 32 | 
 33 | [expect."single quote isn't closed"]
 34 | cli_args = ["# 'hello"]
 35 | expect_success = false
 36 | output = ''
 37 | output_err = '''Syntax error in select specifier:
 38 |  --> 1:9
 39 |   |
 40 | 1 | # 'hello
 41 |   |         ^---
 42 |   |
 43 |   = expected character in quoted string
 44 | '''
 45 | 
 46 | [expect."regex isn't closed"]
 47 | cli_args = ['# /hello']
 48 | expect_success = false
 49 | output = ''
 50 | output_err = '''Syntax error in select specifier:
 51 |  --> 1:9
 52 |   |
 53 | 1 | # /hello
 54 |   |         ^---
 55 |   |
 56 |   = expected regex character
 57 | '''
 58 | 
 59 | [expect."invalid regex"]
 60 | cli_args = ['# /\P{/']
 61 | expect_success = false
 62 | output = ''
 63 | output_err = '''Syntax error in select specifier:
 64 |  --> 1:4
 65 |   |
 66 | 1 | # /\P{/
 67 |   |    ^
 68 |   |
 69 |   = regex parse error: Unicode escape not closed
 70 | '''
 71 | 
 72 | [expect."bareword isn't closed"]
 73 | cli_args = ['[](http']
 74 | expect_success = false
 75 | output = ''
 76 | output_err = '''Syntax error in select specifier:
 77 |  --> 1:8
 78 |   |
 79 | 1 | [](http
 80 |   |        ^---
 81 |   |
 82 |   = expected "$"
 83 | '''
 84 | 
 85 | [expect."quoted string bad escape"]
 86 | cli_args = ['# "\x"']
 87 | expect_success = false
 88 | output = ''
 89 | output_err = '''Syntax error in select specifier:
 90 |  --> 1:5
 91 |   |
 92 | 1 | # "\x"
 93 |   |     ^---
 94 |   |
 95 |   = expected ", ', `, \, n, r, or t
 96 | '''
 97 | 
 98 | [expect."quoted string bad unicode: not hex"]
 99 | cli_args = ['# "\u{snowman}"']
100 | expect_success = false
101 | output = ''
102 | output_err = '''Syntax error in select specifier:
103 |  --> 1:7
104 |   |
105 | 1 | # "\u{snowman}"
106 |   |       ^---
107 |   |
108 |   = expected 1 - 6 hex characters
109 | '''
110 | 
111 | [expect."quoted string bad unicode: no chars"]
112 | cli_args = ['# "\u{}"']
113 | expect_success = false
114 | output = ''
115 | output_err = '''Syntax error in select specifier:
116 |  --> 1:7
117 |   |
118 | 1 | # "\u{}"
119 |   |       ^---
120 |   |
121 |   = expected 1 - 6 hex characters
122 | '''
123 | 
124 | [expect."quoted string bad unicode: too many chars"]
125 | cli_args = ['# "\u{1234567}"']
126 | expect_success = false
127 | output = ''
128 | output_err = '''Syntax error in select specifier:
129 |  --> 1:5
130 |   |
131 | 1 | # "\u{1234567}"
132 |   |     ^---
133 |   |
134 |   = expected ", ', `, \, n, r, or t
135 | '''
136 | 
137 | [expect."invalid unicode: invalid codepoint"]
138 | cli_args = ['# "\u{FFFFFF}"']
139 | expect_success = false
140 | output = ''
141 | output_err = '''Syntax error in select specifier:
142 |  --> 1:7
143 |   |
144 | 1 | # "\u{FFFFFF}"
145 |   |       ^----^
146 |   |
147 |   = invalid unicode sequence: FFFFFF
148 | '''
149 | 
150 | [expect."no space after selector"]
151 | cli_args = ["#foo"]
152 | expect_success = false
153 | output = ''
154 | output_err = '''Syntax error in select specifier:
155 |  --> 1:2
156 |   |
157 | 1 | #foo
158 |   |  ^---
159 |   |
160 |   = expected space
161 | '''
162 | 
163 | [expect."anchors in incorrect order"]
164 | cli_args = ['# $hello^']
165 | expect_success = false
166 | output = ''
167 | output_err = '''Syntax error in select specifier:
168 |  --> 1:3
169 |   |
170 | 1 | # $hello^
171 |   |   ^---
172 |   |
173 |   = expected end of input, "*", unquoted string, regex, quoted string, or "^"
174 | '''
175 | 
176 | [expect."invalid selector"]
177 | cli_args = ["~"]
178 | expect_success = false
179 | output = ''
180 | output_err = '''Syntax error in select specifier:
181 |  --> 1:1
182 |   |
183 | 1 | ~
184 |   | ^---
185 |   |
186 |   = expected valid query
187 | '''
188 | 
189 | [expect."invalid task"]
190 | cli_args = ["- [*]"]
191 | expect_success = false
192 | output = ''
193 | output_err = '''Syntax error in select specifier:
194 |  --> 1:4
195 |   |
196 | 1 | - [*]
197 |   |    ^---
198 |   |
199 |   = expected "[x]", "[x]", or "[?]"
200 | '''
201 | 
202 | [expect."numbered list not 1"]
203 | cli_args = ["2. hello"]
204 | expect_success = false
205 | output = ''
206 | output_err = '''Syntax error in select specifier:
207 |  --> 1:1
208 |   |
209 | 1 | 2. hello
210 |   | ^---
211 |   |
212 |   = expected valid query
213 | '''
214 | 
215 | [expect."table missing explicit column matcher"]
216 | cli_args = [":-: :-: row"]
217 | expect_success = false
218 | output = ''
219 | output_err = '''Syntax error in select specifier:
220 |  --> 1:5
221 |   |
222 | 1 | :-: :-: row
223 |   |     ^
224 |   |
225 |   = table column matcher cannot empty; use an explicit "*"
226 | '''
227 | 
228 | [expect."table missing second delimiter"]
229 | cli_args = [":-: *"]
230 | expect_success = false
231 | output = ''
232 | output_err = '''Syntax error in select specifier:
233 |  --> 1:1
234 |   |
235 | 1 | :-: *
236 |   | ^---
237 |   |
238 |   = expected valid query
239 | '''
240 | 
241 | [expect."invalid selector after valid one"]
242 | cli_args = ["# * | :-: *"]
243 | expect_success = false
244 | output = ''
245 | output_err = '''Syntax error in select specifier:
246 |  --> 1:7
247 |   |
248 | 1 | # * | :-: *
249 |   |       ^---
250 |   |
251 |   = expected end of input or selector
252 | '''
253 | 


--------------------------------------------------------------------------------
/tests/md_cases/file_args.toml:
--------------------------------------------------------------------------------
 1 | [given]
 2 | md = '''
 3 | - from stdin
 4 | '''
 5 | files."one.txt" = '''
 6 | - from one.txt
 7 | '''
 8 | files."two.txt" = '''
 9 | - from two.txt
10 | '''
11 | 
12 | [chained]
13 | needed = false
14 | 
15 | [expect."read one file"]
16 | cli_args = ['-', '-oplain', 'one.txt']
17 | output = '''
18 | from one.txt
19 | '''
20 | 
21 | [expect."read two files"]
22 | cli_args = ['-', '-oplain', 'one.txt', 'two.txt']
23 | output = '''
24 | from one.txt
25 | from two.txt
26 | '''
27 | 
28 | [expect."read a file twice"]
29 | cli_args = ['-', '-oplain', 'one.txt', 'one.txt']
30 | output = '''
31 | from one.txt
32 | from one.txt
33 | '''
34 | 
35 | [expect."explicitly read stdin"]
36 | cli_args = ['-', '-oplain', '-']
37 | output = '''
38 | from stdin
39 | '''
40 | 
41 | [expect."explicitly read stdin twice"] # will only read it once!
42 | cli_args = ['-', '-oplain', '-', '-']
43 | output = '''
44 | from stdin
45 | '''
46 | 
47 | [expect."file is missing"] # will only read it once!
48 | cli_args = ['-', '-oplain', 'missing-err.txt']
49 | expect_success = false
50 | output = ''
51 | output_err = '''entity not found while reading file "missing-err.txt"
52 | '''
53 | 


--------------------------------------------------------------------------------
/tests/md_cases/footnote_renumbering.toml:
--------------------------------------------------------------------------------
 1 | [given]
 2 | md = '''
 3 | hello, world[^3] [^a] [^1] [^shortcut] [^collapsed][].
 4 | 
 5 | [^a]: a footnote
 6 | [^3]: three footnote
 7 | [^1]: one footnote
 8 | [^shortcut]: shortcut footnote
 9 | [^collapsed]: collapsed footnote
10 | '''
11 | 
12 | [chained]
13 | needed = false
14 | 
15 | 
16 | [expect."default"]
17 | cli_args = []
18 | output = '''
19 | hello, world[^1] [^2] [^3] [^4] [^5][].
20 | 
21 | [^1]: three footnote
22 | [^2]: a footnote
23 | [^3]: one footnote
24 | [^4]: shortcut footnote
25 | [^5]: collapsed footnote
26 | '''
27 | 
28 | 
29 | [expect."without renumbering"]
30 | cli_args = ['--renumber-footnotes', 'false']
31 | # note: footnotes still get reordred, just not renumbered
32 | output = '''
33 | hello, world[^3] [^a] [^1] [^shortcut] [^collapsed][].
34 | 
35 | [^1]: one footnote
36 | [^3]: three footnote
37 | [^a]: a footnote
38 | [^collapsed]: collapsed footnote
39 | [^shortcut]: shortcut footnote
40 | '''
41 | 


--------------------------------------------------------------------------------
/tests/md_cases/footnotes_in_footnotes.toml:
--------------------------------------------------------------------------------
  1 | [given]
  2 | md = '''
  3 | - AAA: (footnotes in links don't work: see https://gist.github.com/yshavit/6af0a784e338dc32e66717aa6f495ffe )
  4 | - BBB: footnote contains footnote[^2]
  5 | - CCC: footnote contains link[^3]
  6 | - DDD: footnote contains cycle[^cycle]
  7 | 
  8 | [^1]: the link's footnote text
  9 | [^2]: this footnote contains[^a] a footnote
 10 | [^3]: this footnote contains a [link][3a]
 11 | [^a]: this is the footnote's footnote
 12 | [^cycle]: this footnote references itself[^cycle]
 13 | 
 14 | [3a]: https://example.com/3a
 15 | '''
 16 | 
 17 | [chained]
 18 | needed = false
 19 | 
 20 | 
 21 | [expect."just footnote contains footnote"]
 22 | cli_args = ['- BBB']
 23 | output = '''
 24 | - BBB: footnote contains footnote[^1]
 25 | 
 26 | [^1]: this footnote contains[^2] a footnote
 27 | [^2]: this is the footnote's footnote
 28 | '''
 29 | 
 30 | 
 31 | [expect."just footnote contains footnote: json"]
 32 | cli_args = ['- BBB | P: *', '--output', 'json']
 33 | output_json = true
 34 | output = '''
 35 | {
 36 |   "items": [
 37 |     {
 38 |       "paragraph": "BBB: footnote contains footnote[^1]"
 39 |     }
 40 |   ],
 41 |   "footnotes": {
 42 |     "1": [
 43 |       {
 44 |         "paragraph": "this footnote contains[^2] a footnote"
 45 |       }
 46 |     ],
 47 |     "2": [
 48 |       {
 49 |         "paragraph": "this is the footnote's footnote"
 50 |       }
 51 |     ]
 52 |   }
 53 | }
 54 | '''
 55 | 
 56 | 
 57 | [expect."just footnote contains link"]
 58 | cli_args = ['- CCC']
 59 | output = '''
 60 | - CCC: footnote contains link[^1]
 61 | 
 62 | [3a]: https://example.com/3a
 63 | [^1]: this footnote contains a [link][3a]
 64 | '''
 65 | 
 66 | 
 67 | [expect."just footnote contains link: json"]
 68 | cli_args = ['- CCC | P: *', '--output', 'json']
 69 | output_json = true
 70 | output = '''
 71 | {
 72 |   "items": [
 73 |     {
 74 |       "paragraph": "CCC: footnote contains link[^1]"
 75 |     }
 76 |   ],
 77 |   "footnotes": {
 78 |     "1": [
 79 |       {
 80 |         "paragraph": "this footnote contains a [link][3a]"
 81 |       }
 82 |     ]
 83 |   },
 84 |   "links": {
 85 |     "3a": {
 86 |       "url": "https://example.com/3a"
 87 |     }
 88 |   }
 89 | }
 90 | '''
 91 | 
 92 | 
 93 | [expect."cyclic reference doesn't cause infinite loop"]
 94 | cli_args = ['- DDD | P: *']
 95 | output = '''
 96 | DDD: footnote contains cycle[^1]
 97 | 
 98 | [^1]: this footnote references itself[^1]
 99 | '''
100 | 
101 | [expect."look for elements within a footnote"]
102 | cli_args = ['[]("/3a"$)']
103 | output = '''
104 | [link][3a]
105 | 
106 | [3a]: https://example.com/3a
107 | '''
108 | 


--------------------------------------------------------------------------------
/tests/md_cases/link_placement.toml:
--------------------------------------------------------------------------------
 1 | [given]
 2 | md = '''
 3 | # First Section
 4 | 
 5 | This is [an interesting link][1].
 6 | 
 7 | # Second section
 8 | 
 9 | Some section text with [another link][2].
10 | 
11 | [1]: https://example.com/interesting
12 | [2]: https://example.com/another
13 | '''
14 | 
15 | [chained]
16 | needed = false
17 | 
18 | 
19 | [expect."standard link placement"]
20 | cli_args = []
21 | output = '''
22 | # First Section
23 | 
24 | This is [an interesting link][1].
25 | 
26 | [1]: https://example.com/interesting
27 | 
28 | # Second section
29 | 
30 | Some section text with [another link][2].
31 | 
32 | [2]: https://example.com/another
33 | '''
34 | 
35 | 
36 | [expect."links at bottom of doc"]
37 | cli_args = ["--link-pos", "doc"]
38 | output = '''
39 | # First Section
40 | 
41 | This is [an interesting link][1].
42 | 
43 | # Second section
44 | 
45 | Some section text with [another link][2].
46 | 
47 | [1]: https://example.com/interesting
48 | [2]: https://example.com/another
49 | '''
50 | 
51 | 
52 | [expect."separate links positioned by section"]
53 | cli_args = ["[]()", "--link-pos", "section"]
54 | output = '''
55 | [an interesting link][1]
56 | 
57 | [1]: https://example.com/interesting
58 | 
59 |    -----
60 | 
61 | [another link][2]
62 | 
63 | [2]: https://example.com/another
64 | '''
65 | 
66 | [expect."separate links positioned by doc"]
67 | cli_args = ["[]()", "--link-pos", "doc"]
68 | output = '''
69 | [an interesting link][1]
70 | 
71 |    -----
72 | 
73 | [another link][2]
74 | 
75 |    -----
76 | 
77 | [1]: https://example.com/interesting
78 | [2]: https://example.com/another
79 | '''
80 | 


--------------------------------------------------------------------------------
/tests/md_cases/links_references.toml:
--------------------------------------------------------------------------------
 1 | [given]
 2 | md = '''
 3 | - an [inline link](https://example.com)
 4 | - a [standard reference link][1]
 5 | - a [link with a non-numeric reference id][a]
 6 | - a [link with a non-sequential reference id][456]
 7 | - a [collapsed link][]
 8 | - a [shortcut link]
 9 | - a [link with a title](https://example.com "my title")
10 | - a [link with a title that contains double-quotes](https://example.com 'my "alleged" title')
11 | 
12 | [1]: https://example.com/1
13 | [a]: https://example.com/a "with a title"
14 | [456]: https://example.com/non-sequential
15 | [collapsed link]: https://example.com/collapsed
16 | [shortcut link]: https://example.com/shortcut
17 | '''
18 | 
19 | [chained]
20 | needed = false
21 | 
22 | 
23 | [expect."default"]
24 | cli_args = []
25 | output = '''
26 | - an [inline link][1]
27 | - a [standard reference link][2]
28 | - a [link with a non-numeric reference id][a]
29 | - a [link with a non-sequential reference id][3]
30 | - a [collapsed link][collapsed link]
31 | - a [shortcut link][shortcut link]
32 | - a [link with a title][4]
33 | - a [link with a title that contains double-quotes][5]
34 | 
35 | [1]: https://example.com
36 | [2]: https://example.com/1
37 | [3]: https://example.com/non-sequential
38 | [4]: https://example.com "my title"
39 | [5]: https://example.com 'my "alleged" title'
40 | [a]: https://example.com/a "with a title"
41 | [collapsed link]: https://example.com/collapsed
42 | [shortcut link]: https://example.com/shortcut
43 | '''
44 | 
45 | 
46 | [expect."keep original"]
47 | cli_args = ["--link-format", "keep"]
48 | output = '''
49 | - an [inline link](https://example.com)
50 | - a [standard reference link][1]
51 | - a [link with a non-numeric reference id][a]
52 | - a [link with a non-sequential reference id][456]
53 | - a [collapsed link][]
54 | - a [shortcut link]
55 | - a [link with a title](https://example.com "my title")
56 | - a [link with a title that contains double-quotes](https://example.com 'my "alleged" title')
57 | 
58 | [1]: https://example.com/1
59 | [456]: https://example.com/non-sequential
60 | [a]: https://example.com/a "with a title"
61 | [collapsed link]: https://example.com/collapsed
62 | [shortcut link]: https://example.com/shortcut
63 | '''
64 | 
65 | 
66 | [expect."inline all"]
67 | cli_args = ["--link-format", "inline"]
68 | output = '''
69 | - an [inline link](https://example.com)
70 | - a [standard reference link](https://example.com/1)
71 | - a [link with a non-numeric reference id](https://example.com/a "with a title")
72 | - a [link with a non-sequential reference id](https://example.com/non-sequential)
73 | - a [collapsed link](https://example.com/collapsed)
74 | - a [shortcut link](https://example.com/shortcut)
75 | - a [link with a title](https://example.com "my title")
76 | - a [link with a title that contains double-quotes](https://example.com 'my "alleged" title')
77 | '''
78 | 
79 | 


--------------------------------------------------------------------------------
/tests/md_cases/matchers.toml:
--------------------------------------------------------------------------------
 1 | [given]
 2 | md = '''
 3 | - hello world
 4 | - the world is my oyster
 5 | - worldly possessions
 6 | - lorem ipsum dolor sit amet.
 7 | '''
 8 | 
 9 | [chained]
10 | needed = false
11 | 
12 | [expect."bareword"]
13 | cli_args = ['- world '] # note: trailing space is ignored
14 | output = '''
15 | - hello world
16 | 
17 |    -----
18 | 
19 | - the world is my oyster
20 | 
21 |    -----
22 | 
23 | - worldly possessions
24 | '''
25 | 
26 | 
27 | [expect."bareword with start anchor"]
28 | cli_args = ['- ^world']
29 | output = '''
30 | - worldly possessions
31 | '''
32 | 
33 | 
34 | [expect."bareword with end anchor"]
35 | cli_args = ['- world$']
36 | output = '''
37 | - hello world
38 | '''
39 | 
40 | 
41 | [expect."quoted"]
42 | cli_args = ['- "world "'] # note: trailing space within the quote is significant
43 | output = '''
44 | - the world is my oyster
45 | '''
46 | 
47 | 
48 | [expect."quoted with anchor"]
49 | cli_args = ['- "world "$'] # nothing matches this
50 | output = ''
51 | expect_success = false
52 | 
53 | 
54 | [expect."regex"]
55 | cli_args = ['- /wor.d$/']
56 | output = '''
57 | - hello world
58 | '''
59 | 
60 | 
61 | [expect."bareword is case-insensitive"]
62 | cli_args = ['- OYSTER']
63 | output = '''
64 | - the world is my oyster
65 | '''
66 | 
67 | 
68 | [expect."quoted is case-sensitive"]
69 | cli_args = ['- "OYSTER"']
70 | output = ''
71 | expect_success = false
72 | 
73 | 
74 | [expect."regex is case-sensitive"]
75 | cli_args = ['- /OYSTER/']
76 | output = ''
77 | expect_success = false
78 | 


--------------------------------------------------------------------------------
/tests/md_cases/output_format.toml:
--------------------------------------------------------------------------------
  1 | [given]
  2 | md = '''
  3 | Test _one_ [two][1] three.
  4 | 
  5 | - alpha 1
  6 | - alpha 2
  7 | 
  8 | ```rust
  9 | some_markdown("rust");
 10 | ```
 11 | 
 12 | - bravo
 13 | 
 14 | ```bash
 15 | echo 'some bash'
 16 | ```
 17 | 
 18 | [1]: https://example.com/1
 19 | '''
 20 | 
 21 | [chained]
 22 | needed = false
 23 | 
 24 | 
 25 | [expect."default"]
 26 | cli_args = []
 27 | output = '''
 28 | Test _one_ [two][1] three.
 29 | 
 30 | - alpha 1
 31 | - alpha 2
 32 | 
 33 | ```rust
 34 | some_markdown("rust");
 35 | ```
 36 | 
 37 | - bravo
 38 | 
 39 | ```bash
 40 | echo 'some bash'
 41 | ```
 42 | 
 43 | [1]: https://example.com/1
 44 | '''
 45 | 
 46 | [expect."default codes"]
 47 | cli_args = ['```']
 48 | output = '''
 49 | ```rust
 50 | some_markdown("rust");
 51 | ```
 52 | 
 53 |    -----
 54 | 
 55 | ```bash
 56 | echo 'some bash'
 57 | ```
 58 | '''
 59 | 
 60 | 
 61 | 
 62 | [expect."codes with no breaks"]
 63 | cli_args = ['--no-br', '```']
 64 | output = '''
 65 | ```rust
 66 | some_markdown("rust");
 67 | ```
 68 | 
 69 | ```bash
 70 | echo 'some bash'
 71 | ```
 72 | '''
 73 | 
 74 | 
 75 | [expect."md"]
 76 | cli_args = ['-o', 'md']
 77 | output = '''
 78 | Test _one_ [two][1] three.
 79 | 
 80 | - alpha 1
 81 | - alpha 2
 82 | 
 83 | ```rust
 84 | some_markdown("rust");
 85 | ```
 86 | 
 87 | - bravo
 88 | 
 89 | ```bash
 90 | echo 'some bash'
 91 | ```
 92 | 
 93 | [1]: https://example.com/1
 94 | '''
 95 | 
 96 | 
 97 | [expect."md codes"]
 98 | cli_args = ['--output', 'md', '```']
 99 | output = '''
100 | ```rust
101 | some_markdown("rust");
102 | ```
103 | 
104 |    -----
105 | 
106 | ```bash
107 | echo 'some bash'
108 | ```
109 | '''
110 | 
111 | 
112 | [expect."md with no breaks"]
113 | cli_args = ['--no-br', '--output', 'md', '```']
114 | output = '''
115 | ```rust
116 | some_markdown("rust");
117 | ```
118 | 
119 | ```bash
120 | echo 'some bash'
121 | ```
122 | '''
123 | 
124 | 
125 | [expect."markdown"]
126 | cli_args = ['--output', 'markdown']
127 | output = '''
128 | Test _one_ [two][1] three.
129 | 
130 | - alpha 1
131 | - alpha 2
132 | 
133 | ```rust
134 | some_markdown("rust");
135 | ```
136 | 
137 | - bravo
138 | 
139 | ```bash
140 | echo 'some bash'
141 | ```
142 | 
143 | [1]: https://example.com/1
144 | '''
145 | 
146 | 
147 | [expect."markdown codes"]
148 | cli_args = ['--output', 'markdown', '```']
149 | output = '''
150 | ```rust
151 | some_markdown("rust");
152 | ```
153 | 
154 |    -----
155 | 
156 | ```bash
157 | echo 'some bash'
158 | ```
159 | '''
160 | 
161 | [expect."markdown with no breaks"]
162 | cli_args = ['--no-br', '--output', 'markdown', '```']
163 | output = '''
164 | ```rust
165 | some_markdown("rust");
166 | ```
167 | 
168 | ```bash
169 | echo 'some bash'
170 | ```
171 | '''
172 | 
173 | 
174 | [expect."json"]
175 | cli_args = ['--output', 'json']
176 | output_json = true
177 | output = '''
178 | {
179 |     "items": [
180 |         {
181 |             "document": [
182 |                 {
183 |                     "paragraph": "Test _one_ [two][1] three."
184 |                 },
185 |                 {
186 |                     "list": [
187 |                         {
188 |                           "item": [
189 |                             {
190 |                               "paragraph": "alpha 1"
191 |                             }
192 |                           ]
193 |                         },
194 |                         {
195 |                           "item": [
196 |                             {
197 |                               "paragraph": "alpha 2"
198 |                             }
199 |                           ]
200 |                         }
201 |                     ]
202 |                 },
203 |                 {
204 |                   "code_block": {
205 |                     "code": "some_markdown(\"rust\");",
206 |                     "language": "rust",
207 |                     "type": "code"
208 |                   }
209 |                 },
210 |                 {
211 |                   "list": [
212 |                     {
213 |                       "item": [
214 |                         {
215 |                           "paragraph": "bravo"
216 |                         }
217 |                       ]
218 |                     }
219 |                   ]
220 |                 },
221 |                 {
222 |                   "code_block": {
223 |                     "code": "echo 'some bash'",
224 |                     "language": "bash",
225 |                     "type": "code"
226 |                   }
227 |                 }
228 |             ]
229 |         }
230 |     ],
231 |     "links": {
232 |         "1": {
233 |             "url": "https://example.com/1"
234 |         }
235 |     }
236 | }
237 | '''
238 | 
239 | [expect."json items"]
240 | cli_args = ['- *', '--output', 'json']
241 | output_json = true
242 | output = '''
243 | {
244 |     "items": [
245 |         {
246 |             "list": [
247 |                 {
248 |                     "item": [ { "paragraph": "alpha 1" } ]
249 |                 }
250 |             ]
251 |         },
252 |         {
253 |             "list": [
254 |                 {
255 |                     "item": [ { "paragraph": "alpha 2" } ]
256 |                 }
257 |             ]
258 |         },
259 |         {
260 |             "list": [
261 |                 {
262 |                     "item": [ { "paragraph": "bravo" } ]
263 |                 }
264 |             ]
265 |         }
266 |     ]
267 | }
268 | '''
269 | 
270 | 
271 | [expect."plain"]
272 | cli_args = ['-o', 'plain']
273 | output = '''
274 | Test one two three.
275 | alpha 1
276 | alpha 2
277 | some_markdown("rust");
278 | bravo
279 | echo 'some bash'
280 | '''
281 | 
282 | [expect."plain with breaks"]
283 | cli_args = ['-o', 'plain', '--br']
284 | output = '''
285 | Test one two three.
286 | 
287 | alpha 1
288 | 
289 | alpha 2
290 | 
291 | some_markdown("rust");
292 | 
293 | bravo
294 | 
295 | echo 'some bash'
296 | '''
297 | 
298 | 


--------------------------------------------------------------------------------
/tests/md_cases/paragraph_spans.toml:
--------------------------------------------------------------------------------
 1 | [given]
 2 | md = '''
 3 | Hello, _world_
 4 | **in one** paragraph.
 5 | (Note also plaintext behavior.)
 6 | 
 7 | 
 8 | Second paragraph
 9 | '''
10 | 
11 | [chained]
12 | needed = false
13 | 
14 | 
15 | [expect."echo"]
16 | cli_args = []
17 | output = '''
18 | Hello, _world_
19 | **in one** paragraph.
20 | (Note also plaintext behavior.)
21 | 
22 | Second paragraph
23 | '''
24 | 
25 | [expect."paragraphs"]
26 | cli_args = ["P: *"]
27 | # Check that everything in the first paragraph is indeed in one paragraph.
28 | # (The second paragraph is there just so we can see the thematic break between them, to make it even more obvious
29 | # that the selector has selected two distinct entities.)
30 | output = '''
31 | Hello, _world_
32 | **in one** paragraph.
33 | (Note also plaintext behavior.)
34 | 
35 |    -----
36 | 
37 | Second paragraph
38 | '''
39 | 


--------------------------------------------------------------------------------
/tests/md_cases/search_replace.toml:
--------------------------------------------------------------------------------
  1 | [given]
  2 | md = '''
  3 | # Original Title
  4 | 
  5 | This is a paragraph with text.
  6 | 
  7 | ```original-language
  8 | original code content here
  9 | ```
 10 | 
 11 | Here are some list items:
 12 | 
 13 | - Item with **bold text** formatting
 14 | - Item with *emphasis and **nested bold*** formatting
 15 | - ![original image alt](https://example.com/original/image.png) description
 16 | - [original link text](https://example.com/original/page.html) description
 17 | '''
 18 | 
 19 | [chained]
 20 | needed = false
 21 | 
 22 | [expect."search-replace section title"]
 23 | cli_args = ['# !s/Original/New/']
 24 | expect_success = false
 25 | output = ''
 26 | output_err = '''Selection error:
 27 | section selector does not support string replace
 28 | '''
 29 | 
 30 | [expect."search-replace code block language"]
 31 | cli_args = ['``` !s/original-.*/python/']
 32 | expect_success = false
 33 | output = ''
 34 | output_err = '''Selection error:
 35 | code block selector does not support string replace
 36 | '''
 37 | 
 38 | [expect."search-replace code block contents"]
 39 | cli_args = ['``` !s/original/new/']
 40 | expect_success = false
 41 | output = ''
 42 | output_err = '''Selection error:
 43 | code block selector does not support string replace
 44 | '''
 45 | 
 46 | [expect."search-replace image alt text"]
 47 | cli_args = ['![ !s/original/new/ ]()']
 48 | expect_success = false
 49 | output = ''
 50 | output_err = '''Selection error:
 51 | image selector does not support string replace
 52 | '''
 53 | 
 54 | [expect."search-replace image alt text with non-matching"]
 55 | # Even though there's no match, it should still error. The error shouldn't have anything to do with the target Markdown.
 56 | cli_args = ['![ !s/BOGUS/new/ ]()']
 57 | expect_success = false
 58 | output = ''
 59 | output_err = '''Selection error:
 60 | image selector does not support string replace
 61 | '''
 62 | 
 63 | [expect."search-replace image url"]
 64 | cli_args = ['![](!s/original/new/)']
 65 | output = '''![original image alt][1]
 66 | 
 67 | [1]: https://example.com/new/image.png
 68 | '''
 69 | 
 70 | [expect."search-replace link text"]
 71 | cli_args = ['[ !s/original/new/ ]()']
 72 | expect_success = false
 73 | output = ''
 74 | output_err = '''Selection error:
 75 | hyperlink selector does not support string replace
 76 | '''
 77 | 
 78 | [expect."search-replace link url"]
 79 | cli_args = ['[]( !s/original/new/ )']
 80 | output = '''[original link text][1]
 81 | 
 82 | [1]: https://example.com/new/page.html
 83 | '''
 84 | 
 85 | [expect."search-replace straightforward formatting"]
 86 | cli_args = ['- !s/bold/strong/']
 87 | expect_success = false
 88 | output = ''
 89 | output_err = '''Selection error:
 90 | list item selector does not support string replace
 91 | '''
 92 | 
 93 | [expect."search-replace nested formatting"]
 94 | cli_args = ['- !s/and nested/and formerly/']
 95 | expect_success = false
 96 | output = ''
 97 | output_err = '''Selection error:
 98 | list item selector does not support string replace
 99 | '''
100 | 
101 | [expect."search-replace paragraph text"]
102 | cli_args = ['P: !s/paragraph/text/']
103 | expect_success = false
104 | output = ''
105 | output_err = '''Selection error:
106 | paragraph selector does not support string replace
107 | '''
108 | 


--------------------------------------------------------------------------------
/tests/md_cases/select_block_quote.toml:
--------------------------------------------------------------------------------
 1 | [given]
 2 | md = '''
 3 | One
 4 | 
 5 | > Two
 6 | 
 7 | Three
 8 | 
 9 | > - Four
10 | '''
11 | 
12 | 
13 | [expect."select all block quotes"]
14 | cli_args = ['>']
15 | output = '''
16 | > Two
17 | 
18 |    -----
19 | 
20 | > - Four
21 | '''
22 | 
23 | 
24 | [expect."select block quote with text"]
25 | cli_args = ['> two']
26 | output = '''
27 | > Two
28 | '''
29 | 
30 | 
31 | [expect."select block quote with list text"]
32 | ignore = '#144'
33 | cli_args = ['> - four'] # note: space between the - and [ is required
34 | output = '''
35 | > - Four
36 | '''
37 | 
38 | 
39 | [expect."select block quote then list"]
40 | cli_args = ['> | - *'] # note: space between the - and [ is required
41 | output = '''
42 | - Four
43 | '''
44 | 
45 | 
46 | [expect."chained"]
47 | cli_args = ['> two | > two'] # note: space between the - and [ is required
48 | output = '''
49 | > Two
50 | '''
51 | 


--------------------------------------------------------------------------------
/tests/md_cases/select_exit_code.toml:
--------------------------------------------------------------------------------
 1 | [given]
 2 | md = '''
 3 | [fizz](https://example.com/buzz)
 4 | '''
 5 | 
 6 | [chained]
 7 | needed = false
 8 | 
 9 | 
10 | [expect."match and output"]
11 | cli_args = ["--link-format", "keep", "[]()"]
12 | output = '''
13 | [fizz](https://example.com/buzz)'''
14 | 
15 | 
16 | [expect."match quiet"]
17 | cli_args = ["--link-format", "keep", "[]()", "-q"]
18 | output = ''
19 | 
20 | 
21 | [expect."no match"]
22 | cli_args = ["--link-format", "keep", "[no match]()"]
23 | output = ''
24 | expect_success = false
25 | 


--------------------------------------------------------------------------------
/tests/md_cases/select_front_matter.toml:
--------------------------------------------------------------------------------
  1 | [given]
  2 | md = '''
  3 | +++
  4 | title: Test Front Matter
  5 | author: Me
  6 | +++
  7 | 
  8 | # My Document
  9 | 
 10 | This is the document body.
 11 | 
 12 | ---
 13 | it has: a block that looks like front matter
 14 | but: isn't
 15 | ---
 16 | 
 17 | '''
 18 | 
 19 | [chained]
 20 | needed = false
 21 | 
 22 | 
 23 | [expect."select any front matter"]
 24 | cli_args = ["+++"]
 25 | output = '''
 26 | +++
 27 | title: Test Front Matter
 28 | author: Me
 29 | +++
 30 | '''
 31 | 
 32 | [expect."select toml matter"]
 33 | cli_args = ["+++toml"]
 34 | output = '''
 35 | +++
 36 | title: Test Front Matter
 37 | author: Me
 38 | +++
 39 | '''
 40 | 
 41 | [expect."select yaml matter"]
 42 | cli_args = ["+++yaml"]
 43 | output = '''
 44 | '''
 45 | expect_success = false
 46 | 
 47 | [expect."select other matter"]
 48 | cli_args = ["+++other"]
 49 | output = ''
 50 | output_err = '''Syntax error in select specifier:
 51 |  --> 1:4
 52 |   |
 53 | 1 | +++other
 54 |   |    ^---^
 55 |   |
 56 |   = front matter language must be "toml" or "yaml". Found "other".
 57 | '''
 58 | expect_success = false
 59 | 
 60 | [expect."select front matter with text matcher"]
 61 | cli_args = ["+++ title: Test Front Matter"]
 62 | output = '''
 63 | +++
 64 | title: Test Front Matter
 65 | author: Me
 66 | +++
 67 | '''
 68 | 
 69 | [expect."select front matter with regex matcher"]
 70 | cli_args = ["+++ /author: .*/"]
 71 | output = '''
 72 | +++
 73 | title: Test Front Matter
 74 | author: Me
 75 | +++
 76 | '''
 77 | 
 78 | [expect."select front matter with no match"]
 79 | cli_args = ["+++ non-existent"]
 80 | output = '''
 81 | '''
 82 | expect_success = false
 83 | 
 84 | 
 85 | [expect."plain output"]
 86 | cli_args = ["-o", "plain"]
 87 | output = '''
 88 | title: Test Front Matter
 89 | author: Me
 90 | My Document
 91 | This is the document body.
 92 | it has: a block that looks like front matter
 93 | but: isn't
 94 | '''
 95 | 
 96 | 
 97 | [expect."select a paragraph that looks like front matter"]
 98 | cli_args = ["-o", "json"]
 99 | output_json = true
100 | output = '''
101 | {
102 |   "items": [
103 |     {
104 |       "document": [
105 |         {
106 |           "front_matter": {
107 |             "body": "title: Test Front Matter\nauthor: Me",
108 |             "variant": "toml"
109 |           }
110 |         },
111 |         {
112 |           "section": {
113 |             "depth": 1,
114 |             "title": "My Document",
115 |             "body": [
116 |               {
117 |                 "paragraph": "This is the document body."
118 |               },
119 |               {
120 |                 "thematic_break": null
121 |               },
122 |               {
123 |                 "section": {
124 |                   "depth": 2,
125 |                   "title": "it has: a block that looks like front matter\nbut: isn't",
126 |                   "body": []
127 |                 }
128 |               }
129 |             ]
130 |           }
131 |         }
132 |       ]
133 |     }
134 |   ]
135 | }
136 | '''
137 | 


--------------------------------------------------------------------------------
/tests/md_cases/select_html.toml:
--------------------------------------------------------------------------------
 1 | [given]
 2 | md = '''
 3 | Text with <span>inline html</span>.
 4 | 
 5 | <div
 6 | class="block>
 7 | 
 8 | and a div block
 9 | 
10 | </div>
11 | '''
12 | 
13 | 
14 | [expect."select all"]
15 | cli_args = ['</>']
16 | output = '''
17 | <span>
18 | 
19 |    -----
20 | 
21 | </span>
22 | 
23 |    -----
24 | 
25 | <div
26 | class="block>
27 | 
28 |    -----
29 | 
30 | </div>
31 | '''
32 | 
33 | 
34 | [expect."inline with matcher"]
35 | cli_args = ['</> span']
36 | output = '''
37 | <span>
38 | 
39 |    -----
40 | 
41 | </span>
42 | '''
43 | 
44 | 
45 | [expect."inline with matcher tag"]
46 | cli_args = ['</> "<span>"']
47 | output = '''
48 | <span>
49 | '''
50 | 
51 | 
52 | [expect."unquoted tag"]
53 | cli_args = ['</> <span>']
54 | expect_success = false # unquoted string must start with a letter, not a '<'
55 | output = ''
56 | output_err = '''Syntax error in select specifier:
57 |  --> 1:5
58 |   |
59 | 1 | </> <span>
60 |   |     ^---
61 |   |
62 |   = expected end of input, "*", unquoted string, regex, quoted string, or "^"
63 | '''
64 | 
65 | 
66 | [expect."block with matcher"]
67 | cli_args = ['</> class']
68 | output = '''
69 | <div
70 | class="block>
71 | '''
72 | 
73 | 
74 | [expect."chained"]
75 | cli_args = ['</> span | </> "<span>"']
76 | output = '''
77 | <span>
78 | '''


--------------------------------------------------------------------------------
/tests/md_cases/select_link.toml:
--------------------------------------------------------------------------------
 1 | [given]
 2 | md = '''
 3 | This text has both [an inline link](https://example.com/inline) and a [referenced link][1]. It also has an image:
 4 | 
 5 | ![image alt text](https://example.com/hylas-and-nymphs.png)
 6 | 
 7 | [1]: https://example.com/referenced
 8 | '''
 9 | 
10 | 
11 | [expect."select all"]
12 | cli_args = ['[]()', '--link-format=inline']
13 | output = '''
14 | [an inline link](https://example.com/inline)
15 | 
16 |    -----
17 | 
18 | [referenced link](https://example.com/referenced)'''
19 | 
20 | 
21 | [expect."select link by text"]
22 | cli_args = ['[an inline]()', '--link-format=inline']
23 | output = '''
24 | [an inline link](https://example.com/inline)'''
25 | 
26 | 
27 | [expect."select link by url"]
28 | cli_args = ['[]("/referenced")', '--link-format=inline']
29 | output = '''
30 | [referenced link](https://example.com/referenced)'''
31 | 
32 | 
33 | [expect."select image"]
34 | cli_args = ['![]()', '--link-format=inline']
35 | output = '''
36 | ![image alt text](https://example.com/hylas-and-nymphs.png)'''
37 | 
38 | 
39 | [expect."chained"]
40 | cli_args = ['[inline]() | [](example.com)']
41 | output = '''
42 | [an inline link][1]
43 | 
44 | [1]: https://example.com/inline
45 | '''
46 | 


--------------------------------------------------------------------------------
/tests/md_cases/select_lists.toml:
--------------------------------------------------------------------------------
 1 | [given]
 2 | md = '''
 3 | # Ordered
 4 | 
 5 | 2. two (note: starting count at 2!)
 6 | 3. three
 7 | 
 8 | # Unordered
 9 | 
10 | - a point
11 | - another point
12 | 
13 | # Tasks
14 | 
15 | - [ ] an incomplete task
16 | - [x] a completed task
17 | 
18 | 1. [ ] ordered tasks are possible, too
19 | '''
20 | 
21 | 
22 | [expect."select ordered"]
23 | cli_args = ['1.']
24 | output = '''
25 | 2. two (note: starting count at 2!)
26 | 
27 |    -----
28 | 
29 | 3. three
30 | '''
31 | 
32 | 
33 | [expect."select unordered"]
34 | cli_args = ['-']
35 | output = '''
36 | - a point
37 | 
38 |    -----
39 | 
40 | - another point
41 | '''
42 | 
43 | 
44 | [expect."select all ordered tasks"]
45 | cli_args = ['- [?]'] # note: space between the - and [ is required
46 | output = '''
47 | - [ ] an incomplete task
48 | 
49 |    -----
50 | 
51 | - [x] a completed task
52 | '''
53 | 
54 | 
55 | [expect."select ordered task by text"]
56 | cli_args = ['- [?] incomplete'] # note: space between the - and [ is required
57 | output = '''
58 | - [ ] an incomplete task
59 | '''
60 | 
61 | 
62 | [expect."select incomplete ordered tasks"]
63 | cli_args = ['- [ ]']
64 | output = '''
65 | - [ ] an incomplete task
66 | '''
67 | 
68 | 
69 | [expect."select completed ordered tasks"]
70 | cli_args = ['- [x]']
71 | output = '''
72 | - [x] a completed task
73 | '''
74 | 
75 | 
76 | [expect."select unordered tasks"]
77 | cli_args = ['1. [?]']
78 | output = '''
79 | 1. [ ] ordered tasks are possible, too
80 | '''
81 | 
82 | 
83 | [expect."chained"]
84 | cli_args = ['- a point | - a point']
85 | output = '''
86 | - a point
87 | '''
88 | 


--------------------------------------------------------------------------------
/tests/md_cases/select_paragraphs.toml:
--------------------------------------------------------------------------------
 1 | [given]
 2 | md = '''
 3 | Hello, world.
 4 | 
 5 | > Paragraph within a block quote.
 6 | 
 7 | This paragraph has _inline_ **formatting**.
 8 | '''
 9 | 
10 | 
11 | [expect."all"]
12 | cli_args = ["P:"]
13 | output = '''
14 | Hello, world.
15 | 
16 |    -----
17 | 
18 | Paragraph within a block quote.
19 | 
20 |    -----
21 | 
22 | This paragraph has _inline_ **formatting**.
23 | '''
24 | 
25 | 
26 | [expect."all but with explicit all-matcher"]
27 | cli_args = ["P: *"]
28 | output = '''
29 | Hello, world.
30 | 
31 |    -----
32 | 
33 | Paragraph within a block quote.
34 | 
35 |    -----
36 | 
37 | This paragraph has _inline_ **formatting**.
38 | '''
39 | 
40 | 
41 | [expect."select within a block quote"]
42 | cli_args = ["P: block"]
43 | output = '''
44 | Paragraph within a block quote.
45 | '''
46 | 
47 | 
48 | [expect."matcher ignores inline formatting"]
49 | cli_args = ["P: has inline"]
50 | # The markdown is "has _inline_", but the emphasis formatting is ignored for matching. It's still used for output.
51 | output = '''
52 | This paragraph has _inline_ **formatting**.
53 | '''
54 | 
55 | 
56 | [expect."no colon after p"]
57 | cli_args = ["P *"]
58 | expect_success = false
59 | output = ''
60 | output_err = '''Syntax error in select specifier:
61 |  --> 1:1
62 |   |
63 | 1 | P *
64 |   | ^---
65 |   |
66 |   = expected valid query
67 | '''
68 | 
69 | 
70 | [expect."space before colon"]
71 | cli_args = ["P : *"]
72 | expect_success = false
73 | output = ''
74 | output_err = '''Syntax error in select specifier:
75 |  --> 1:1
76 |   |
77 | 1 | P : *
78 |   | ^---
79 |   |
80 |   = expected valid query
81 | '''
82 | 
83 | 
84 | [expect."chained"]
85 | cli_args = ['P: hello | P: world']
86 | output = '''
87 | Hello, world.
88 | '''


--------------------------------------------------------------------------------
/tests/md_cases/select_sections.toml:
--------------------------------------------------------------------------------
 1 | [given]
 2 | md = '''
 3 | # Alpha
 4 | 
 5 | My first section.
 6 | 
 7 | ## Sub-section
 8 | 
 9 | Hello, world.
10 | 
11 | # Bravo
12 | 
13 | My second section.
14 | '''
15 | 
16 | 
17 | [expect."select top-level section"]
18 | cli_args = ["# alpha"]
19 | output = '''
20 | # Alpha
21 | 
22 | My first section.
23 | 
24 | ## Sub-section
25 | 
26 | Hello, world.
27 | '''
28 | 
29 | 
30 | [expect."select subsection"]
31 | cli_args = ["# sub"]
32 | output = '''
33 | ## Sub-section
34 | 
35 | Hello, world.
36 | '''
37 | 
38 | 
39 | [expect."select all sections"]
40 | # note: This selects all the doc's contents, but not as a single doc: each top-level header is its own selection, so the
41 | # output separates them with a thematic break.
42 | cli_args = ["#"]
43 | output = '''
44 | # Alpha
45 | 
46 | My first section.
47 | 
48 | ## Sub-section
49 | 
50 | Hello, world.
51 | 
52 |    -----
53 | 
54 | # Bravo
55 | 
56 | My second section.
57 | '''
58 | 
59 | 
60 | [expect."chained"]
61 | cli_args = ['# bravo | # bravo']
62 | output = '''
63 | # Bravo
64 | 
65 | My second section.
66 | '''
67 | 


--------------------------------------------------------------------------------
/tests/md_cases/select_tables.toml:
--------------------------------------------------------------------------------
  1 | [given]
  2 | md = '''
  3 | Are you ready for a table?
  4 | 
  5 | | Name | Description |
  6 | |:----:|-------------|
  7 | | Foo  | Not a fizz  |
  8 | | Bar  | Not a buzz  |
  9 | | Barn | Big, red.   | And this is an extra column |
 10 | | Fuzz |
 11 | 
 12 | Note that the "Barn" row has an extra column, and the "Fuzz" row is missing one.
 13 | '''
 14 | 
 15 | 
 16 | [expect."table not normalized by default"]
 17 | cli_args = [""]
 18 | output = '''
 19 | Are you ready for a table?
 20 | 
 21 | | Name | Description |
 22 | |:----:|-------------|
 23 | | Foo  | Not a fizz  |
 24 | | Bar  | Not a buzz  |
 25 | | Barn | Big, red.   | And this is an extra column |
 26 | | Fuzz |
 27 | 
 28 | Note that the "Barn" row has an extra column, and the "Fuzz" row is missing one.
 29 | '''
 30 | 
 31 | 
 32 | [expect."select all table cells normalizes"]
 33 | cli_args = [":-: * :-:"]
 34 | output = '''
 35 | | Name | Description |                             |
 36 | |:----:|-------------|-----------------------------|
 37 | | Foo  | Not a fizz  |                             |
 38 | | Bar  | Not a buzz  |                             |
 39 | | Barn | Big, red.   | And this is an extra column |
 40 | | Fuzz |             |                             |'''
 41 | 
 42 | 
 43 | [expect."select only name"]
 44 | # note: "Name" has an 'a', "Description" doesn't. There are other rows that do contain 'a' in the Description column,
 45 | # but the first matcher only checks the header cells (by design).
 46 | cli_args = [":-: a :-:"]
 47 | output = '''
 48 | | Name |
 49 | |:----:|
 50 | | Foo  |
 51 | | Bar  |
 52 | | Barn |
 53 | | Fuzz |'''
 54 | 
 55 | 
 56 | [expect."select only description"]
 57 | cli_args = [":-: description :-:"]
 58 | output = '''
 59 | | Description |
 60 | |-------------|
 61 | | Not a fizz  |
 62 | | Not a buzz  |
 63 | | Big, red.   |
 64 | |             |'''
 65 | 
 66 | 
 67 | [expect."select only description by regex"]
 68 | cli_args = [":-: /Description/ :-:"]
 69 | output = '''
 70 | | Description |
 71 | |-------------|
 72 | | Not a fizz  |
 73 | | Not a buzz  |
 74 | | Big, red.   |
 75 | |             |'''
 76 | 
 77 | 
 78 | [expect."select only the big red row"]
 79 | # Note: header row always survives
 80 | cli_args = [":-: * :-: 'Big, red' "]
 81 | output = '''
 82 | | Name | Description |                             |
 83 | |:----:|-------------|-----------------------------|
 84 | | Barn | Big, red.   | And this is an extra column |'''
 85 | 
 86 | 
 87 | [expect."chained"]
 88 | cli_args = [":-: * :-: * | :-: * :-: * | "]
 89 | output = '''
 90 | | Name | Description |                             |
 91 | |:----:|-------------|-----------------------------|
 92 | | Foo  | Not a fizz  |                             |
 93 | | Bar  | Not a buzz  |                             |
 94 | | Barn | Big, red.   | And this is an extra column |
 95 | | Fuzz |             |                             |'''
 96 | 
 97 | [expect."output plain"]
 98 | cli_args = ["-o", "plain", ":-: * :-: *"]
 99 | output = '''
100 | Name Description
101 | Foo Not a fizz
102 | Bar Not a buzz
103 | Barn Big, red. And this is an extra column
104 | Fuzz
105 | '''
106 | 


--------------------------------------------------------------------------------
/tests/md_cases/wrapping.toml:
--------------------------------------------------------------------------------
 1 | [given]
 2 | #        1         2         3         4         5
 3 | # 345678901234567890123456789012345678901234567890
 4 | md = '''
 5 | This text has both [an inline link](https://example.com/inline) and a [referenced link][1]. It also has an image:
 6 | 
 7 | ![image alt text](https://example.com/hylas-and-nymphs.png)
 8 | 
 9 | [1]: https://example.com/referenced
10 | 
11 | > We can also have text that wraps within quoted text.
12 | 
13 | - Or we can have text that wraps within ordered lists
14 |   - including sublists, which should take the indentation into account
15 |   - and even if there are multiple items that are in there
16 | '''
17 | #        1         2         3         4         5
18 | # 345678901234567890123456789012345678901234567890
19 | 
20 | [chained]
21 | needed = false
22 | 
23 | 
24 | [expect."wrapping at 50"]
25 | cli_args = ['--wrap-width=50', '--link-format=keep']
26 | output = '''
27 | This text has both
28 | [an inline link](https://example.com/inline) and a
29 | [referenced link][1]. It also has an image:
30 | 
31 | ![image alt text](https://example.com/hylas-and-nymphs.png)
32 | 
33 | > We can also have text that wraps within quoted
34 | > text.
35 | 
36 | - Or we can have text that wraps within ordered
37 |   lists
38 | 
39 |   - including sublists, which should take the
40 |     indentation into account
41 |   - and even if there are multiple items that are
42 |     in there
43 | 
44 | [1]: https://example.com/referenced
45 | '''
46 | 


--------------------------------------------------------------------------------