├── .github └── workflows │ └── rust.yml ├── .gitignore ├── CONTRIBUTING.md ├── Cargo.lock ├── Cargo.toml ├── proptest-regressions └── lib.txt ├── readme.md └── src ├── error.rs ├── lib.rs ├── metadata ├── mod.rs ├── snapshots │ ├── rust_norg__metadata__tests__arrays.snap │ ├── rust_norg__metadata__tests__common_metadata.snap │ ├── rust_norg__metadata__tests__keys.snap │ └── rust_norg__metadata__tests__keys_and_values.snap └── stage_1.rs ├── snapshots ├── rust_norg__tests__carryover_tags.snap ├── rust_norg__tests__carryover_tags_tree.snap ├── rust_norg__tests__definitions.snap ├── rust_norg__tests__delimiting_mods_tree.snap ├── rust_norg__tests__footnotes.snap ├── rust_norg__tests__headings.snap ├── rust_norg__tests__headings_tree.snap ├── rust_norg__tests__infirm_tags.snap ├── rust_norg__tests__inline_verbatim.snap ├── rust_norg__tests__links.snap ├── rust_norg__tests__lists.snap ├── rust_norg__tests__lists_tree.snap ├── rust_norg__tests__modifier_extensions.snap ├── rust_norg__tests__modifiers.snap ├── rust_norg__tests__ordered_lists.snap ├── rust_norg__tests__paragraphs.snap ├── rust_norg__tests__quotes.snap ├── rust_norg__tests__ranged_tags.snap ├── rust_norg__tests__ranged_verbatim_tags.snap ├── rust_norg__tests__tables.snap └── rust_norg__tests__verbatim_tags.snap ├── stage_1.rs ├── stage_2.rs ├── stage_3.rs └── stage_4.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v4 17 | - name: Build 18 | run: cargo build --verbose 19 | - name: Run tests 20 | run: cargo test --verbose 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /test.norg 3 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | If you're considering contributing, please open an issue before a PR. A lot of discussion 4 | also happens in the [Neorg Discord](https://discord.gg/T6EgTAX7ht), so you might consider 5 | joining. 6 | 7 | ## Tests 8 | 9 | If you change a behavior or fix a bug, please make sure to add a test for it! 10 | 11 | - run the test suite with `cargo test` 12 | 13 | There are snapshot tests and prop tests. If you change the parser behavior or add a new 14 | test case, the snapshots will change and you will see a test failure. You can approve the 15 | new version of the snapshot with: 16 | 17 | - `cargo insta review` 18 | 19 | Prop tests essentially fuzz the parser and make sure that it doesn't panic. Failed test 20 | cases are saved and version controlled to avoid regressions. 21 | 22 | 23 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 4 4 | 5 | [[package]] 6 | name = "ahash" 7 | version = "0.8.11" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" 10 | dependencies = [ 11 | "cfg-if", 12 | "once_cell", 13 | "version_check", 14 | "zerocopy", 15 | ] 16 | 17 | [[package]] 18 | name = "allocator-api2" 19 | version = "0.2.18" 20 | source = "registry+https://github.com/rust-lang/crates.io-index" 21 | checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" 22 | 23 | [[package]] 24 | name = "autocfg" 25 | version = "1.3.0" 26 | source = "registry+https://github.com/rust-lang/crates.io-index" 27 | checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" 28 | 29 | [[package]] 30 | name = "bit-set" 31 | version = "0.5.3" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" 34 | dependencies = [ 35 | "bit-vec", 36 | ] 37 | 38 | [[package]] 39 | name = "bit-vec" 40 | version = "0.6.3" 41 | source = "registry+https://github.com/rust-lang/crates.io-index" 42 | checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" 43 | 44 | [[package]] 45 | name = "bitflags" 46 | version = "2.5.0" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" 49 | 50 | [[package]] 51 | name = "cc" 52 | version = "1.0.97" 53 | source = "registry+https://github.com/rust-lang/crates.io-index" 54 | checksum = "099a5357d84c4c61eb35fc8eafa9a79a902c2f76911e5747ced4e032edd8d9b4" 55 | 56 | [[package]] 57 | name = "cfg-if" 58 | version = "1.0.0" 59 | source = "registry+https://github.com/rust-lang/crates.io-index" 60 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 61 | 62 | [[package]] 63 | name = "chumsky" 64 | version = "0.9.3" 65 | source = "registry+https://github.com/rust-lang/crates.io-index" 66 | checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" 67 | dependencies = [ 68 | "hashbrown", 69 | "stacker", 70 | ] 71 | 72 | [[package]] 73 | name = "console" 74 | version = "0.15.8" 75 | source = "registry+https://github.com/rust-lang/crates.io-index" 76 | checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" 77 | dependencies = [ 78 | "encode_unicode", 79 | "lazy_static", 80 | "libc", 81 | "windows-sys", 82 | ] 83 | 84 | [[package]] 85 | name = "either" 86 | version = "1.11.0" 87 | source = "registry+https://github.com/rust-lang/crates.io-index" 88 | checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" 89 | 90 | [[package]] 91 | name = "encode_unicode" 92 | version = "0.3.6" 93 | source = "registry+https://github.com/rust-lang/crates.io-index" 94 | checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" 95 | 96 | [[package]] 97 | name = "errno" 98 | version = "0.3.9" 99 | source = "registry+https://github.com/rust-lang/crates.io-index" 100 | checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" 101 | dependencies = [ 102 | "libc", 103 | "windows-sys", 104 | ] 105 | 106 | [[package]] 107 | name = "fastrand" 108 | version = "2.1.0" 109 | source = "registry+https://github.com/rust-lang/crates.io-index" 110 | checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" 111 | 112 | [[package]] 113 | name = "fnv" 114 | version = "1.0.7" 115 | source = "registry+https://github.com/rust-lang/crates.io-index" 116 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 117 | 118 | [[package]] 119 | name = "getrandom" 120 | version = "0.2.15" 121 | source = "registry+https://github.com/rust-lang/crates.io-index" 122 | checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" 123 | dependencies = [ 124 | "cfg-if", 125 | "libc", 126 | "wasi", 127 | ] 128 | 129 | [[package]] 130 | name = "hashbrown" 131 | version = "0.14.5" 132 | source = "registry+https://github.com/rust-lang/crates.io-index" 133 | checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" 134 | dependencies = [ 135 | "ahash", 136 | "allocator-api2", 137 | ] 138 | 139 | [[package]] 140 | name = "insta" 141 | version = "1.39.0" 142 | source = "registry+https://github.com/rust-lang/crates.io-index" 143 | checksum = "810ae6042d48e2c9e9215043563a58a80b877bc863228a74cf10c49d4620a6f5" 144 | dependencies = [ 145 | "console", 146 | "lazy_static", 147 | "linked-hash-map", 148 | "serde", 149 | "similar", 150 | ] 151 | 152 | [[package]] 153 | name = "itertools" 154 | version = "0.13.0" 155 | source = "registry+https://github.com/rust-lang/crates.io-index" 156 | checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" 157 | dependencies = [ 158 | "either", 159 | ] 160 | 161 | [[package]] 162 | name = "lazy_static" 163 | version = "1.4.0" 164 | source = "registry+https://github.com/rust-lang/crates.io-index" 165 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 166 | 167 | [[package]] 168 | name = "libc" 169 | version = "0.2.154" 170 | source = "registry+https://github.com/rust-lang/crates.io-index" 171 | checksum = "ae743338b92ff9146ce83992f766a31066a91a8c84a45e0e9f21e7cf6de6d346" 172 | 173 | [[package]] 174 | name = "libm" 175 | version = "0.2.8" 176 | source = "registry+https://github.com/rust-lang/crates.io-index" 177 | checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" 178 | 179 | [[package]] 180 | name = "linked-hash-map" 181 | version = "0.5.6" 182 | source = "registry+https://github.com/rust-lang/crates.io-index" 183 | checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" 184 | 185 | [[package]] 186 | name = "linux-raw-sys" 187 | version = "0.4.14" 188 | source = "registry+https://github.com/rust-lang/crates.io-index" 189 | checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" 190 | 191 | [[package]] 192 | name = "num-traits" 193 | version = "0.2.19" 194 | source = "registry+https://github.com/rust-lang/crates.io-index" 195 | checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" 196 | dependencies = [ 197 | "autocfg", 198 | "libm", 199 | ] 200 | 201 | [[package]] 202 | name = "once_cell" 203 | version = "1.19.0" 204 | source = "registry+https://github.com/rust-lang/crates.io-index" 205 | checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" 206 | 207 | [[package]] 208 | name = "ppv-lite86" 209 | version = "0.2.17" 210 | source = "registry+https://github.com/rust-lang/crates.io-index" 211 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" 212 | 213 | [[package]] 214 | name = "proc-macro2" 215 | version = "1.0.93" 216 | source = "registry+https://github.com/rust-lang/crates.io-index" 217 | checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" 218 | dependencies = [ 219 | "unicode-ident", 220 | ] 221 | 222 | [[package]] 223 | name = "proptest" 224 | version = "1.4.0" 225 | source = "registry+https://github.com/rust-lang/crates.io-index" 226 | checksum = "31b476131c3c86cb68032fdc5cb6d5a1045e3e42d96b69fa599fd77701e1f5bf" 227 | dependencies = [ 228 | "bit-set", 229 | "bit-vec", 230 | "bitflags", 231 | "lazy_static", 232 | "num-traits", 233 | "rand", 234 | "rand_chacha", 235 | "rand_xorshift", 236 | "regex-syntax", 237 | "rusty-fork", 238 | "tempfile", 239 | "unarray", 240 | ] 241 | 242 | [[package]] 243 | name = "psm" 244 | version = "0.1.21" 245 | source = "registry+https://github.com/rust-lang/crates.io-index" 246 | checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" 247 | dependencies = [ 248 | "cc", 249 | ] 250 | 251 | [[package]] 252 | name = "quick-error" 253 | version = "1.2.3" 254 | source = "registry+https://github.com/rust-lang/crates.io-index" 255 | checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" 256 | 257 | [[package]] 258 | name = "quote" 259 | version = "1.0.38" 260 | source = "registry+https://github.com/rust-lang/crates.io-index" 261 | checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" 262 | dependencies = [ 263 | "proc-macro2", 264 | ] 265 | 266 | [[package]] 267 | name = "rand" 268 | version = "0.8.5" 269 | source = "registry+https://github.com/rust-lang/crates.io-index" 270 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 271 | dependencies = [ 272 | "libc", 273 | "rand_chacha", 274 | "rand_core", 275 | ] 276 | 277 | [[package]] 278 | name = "rand_chacha" 279 | version = "0.3.1" 280 | source = "registry+https://github.com/rust-lang/crates.io-index" 281 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 282 | dependencies = [ 283 | "ppv-lite86", 284 | "rand_core", 285 | ] 286 | 287 | [[package]] 288 | name = "rand_core" 289 | version = "0.6.4" 290 | source = "registry+https://github.com/rust-lang/crates.io-index" 291 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 292 | dependencies = [ 293 | "getrandom", 294 | ] 295 | 296 | [[package]] 297 | name = "rand_xorshift" 298 | version = "0.3.0" 299 | source = "registry+https://github.com/rust-lang/crates.io-index" 300 | checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" 301 | dependencies = [ 302 | "rand_core", 303 | ] 304 | 305 | [[package]] 306 | name = "regex-syntax" 307 | version = "0.8.4" 308 | source = "registry+https://github.com/rust-lang/crates.io-index" 309 | checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" 310 | 311 | [[package]] 312 | name = "rust-norg" 313 | version = "0.1.0" 314 | dependencies = [ 315 | "chumsky", 316 | "insta", 317 | "itertools", 318 | "proptest", 319 | "serde", 320 | "textwrap", 321 | "unicode_categories", 322 | ] 323 | 324 | [[package]] 325 | name = "rustix" 326 | version = "0.38.34" 327 | source = "registry+https://github.com/rust-lang/crates.io-index" 328 | checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" 329 | dependencies = [ 330 | "bitflags", 331 | "errno", 332 | "libc", 333 | "linux-raw-sys", 334 | "windows-sys", 335 | ] 336 | 337 | [[package]] 338 | name = "rusty-fork" 339 | version = "0.3.0" 340 | source = "registry+https://github.com/rust-lang/crates.io-index" 341 | checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" 342 | dependencies = [ 343 | "fnv", 344 | "quick-error", 345 | "tempfile", 346 | "wait-timeout", 347 | ] 348 | 349 | [[package]] 350 | name = "serde" 351 | version = "1.0.203" 352 | source = "registry+https://github.com/rust-lang/crates.io-index" 353 | checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" 354 | dependencies = [ 355 | "serde_derive", 356 | ] 357 | 358 | [[package]] 359 | name = "serde_derive" 360 | version = "1.0.203" 361 | source = "registry+https://github.com/rust-lang/crates.io-index" 362 | checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" 363 | dependencies = [ 364 | "proc-macro2", 365 | "quote", 366 | "syn", 367 | ] 368 | 369 | [[package]] 370 | name = "similar" 371 | version = "2.5.0" 372 | source = "registry+https://github.com/rust-lang/crates.io-index" 373 | checksum = "fa42c91313f1d05da9b26f267f931cf178d4aba455b4c4622dd7355eb80c6640" 374 | 375 | [[package]] 376 | name = "smawk" 377 | version = "0.3.2" 378 | source = "registry+https://github.com/rust-lang/crates.io-index" 379 | checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c" 380 | 381 | [[package]] 382 | name = "stacker" 383 | version = "0.1.15" 384 | source = "registry+https://github.com/rust-lang/crates.io-index" 385 | checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" 386 | dependencies = [ 387 | "cc", 388 | "cfg-if", 389 | "libc", 390 | "psm", 391 | "winapi", 392 | ] 393 | 394 | [[package]] 395 | name = "syn" 396 | version = "2.0.98" 397 | source = "registry+https://github.com/rust-lang/crates.io-index" 398 | checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" 399 | dependencies = [ 400 | "proc-macro2", 401 | "quote", 402 | "unicode-ident", 403 | ] 404 | 405 | [[package]] 406 | name = "tempfile" 407 | version = "3.10.1" 408 | source = "registry+https://github.com/rust-lang/crates.io-index" 409 | checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" 410 | dependencies = [ 411 | "cfg-if", 412 | "fastrand", 413 | "rustix", 414 | "windows-sys", 415 | ] 416 | 417 | [[package]] 418 | name = "textwrap" 419 | version = "0.16.1" 420 | source = "registry+https://github.com/rust-lang/crates.io-index" 421 | checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" 422 | dependencies = [ 423 | "smawk", 424 | "unicode-linebreak", 425 | "unicode-width", 426 | ] 427 | 428 | [[package]] 429 | name = "unarray" 430 | version = "0.1.4" 431 | source = "registry+https://github.com/rust-lang/crates.io-index" 432 | checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" 433 | 434 | [[package]] 435 | name = "unicode-ident" 436 | version = "1.0.12" 437 | source = "registry+https://github.com/rust-lang/crates.io-index" 438 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 439 | 440 | [[package]] 441 | name = "unicode-linebreak" 442 | version = "0.1.5" 443 | source = "registry+https://github.com/rust-lang/crates.io-index" 444 | checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" 445 | 446 | [[package]] 447 | name = "unicode-width" 448 | version = "0.1.14" 449 | source = "registry+https://github.com/rust-lang/crates.io-index" 450 | checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" 451 | 452 | [[package]] 453 | name = "unicode_categories" 454 | version = "0.1.1" 455 | source = "registry+https://github.com/rust-lang/crates.io-index" 456 | checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" 457 | 458 | [[package]] 459 | name = "version_check" 460 | version = "0.9.4" 461 | source = "registry+https://github.com/rust-lang/crates.io-index" 462 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 463 | 464 | [[package]] 465 | name = "wait-timeout" 466 | version = "0.2.0" 467 | source = "registry+https://github.com/rust-lang/crates.io-index" 468 | checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" 469 | dependencies = [ 470 | "libc", 471 | ] 472 | 473 | [[package]] 474 | name = "wasi" 475 | version = "0.11.0+wasi-snapshot-preview1" 476 | source = "registry+https://github.com/rust-lang/crates.io-index" 477 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 478 | 479 | [[package]] 480 | name = "winapi" 481 | version = "0.3.9" 482 | source = "registry+https://github.com/rust-lang/crates.io-index" 483 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 484 | dependencies = [ 485 | "winapi-i686-pc-windows-gnu", 486 | "winapi-x86_64-pc-windows-gnu", 487 | ] 488 | 489 | [[package]] 490 | name = "winapi-i686-pc-windows-gnu" 491 | version = "0.4.0" 492 | source = "registry+https://github.com/rust-lang/crates.io-index" 493 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 494 | 495 | [[package]] 496 | name = "winapi-x86_64-pc-windows-gnu" 497 | version = "0.4.0" 498 | source = "registry+https://github.com/rust-lang/crates.io-index" 499 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 500 | 501 | [[package]] 502 | name = "windows-sys" 503 | version = "0.52.0" 504 | source = "registry+https://github.com/rust-lang/crates.io-index" 505 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 506 | dependencies = [ 507 | "windows-targets", 508 | ] 509 | 510 | [[package]] 511 | name = "windows-targets" 512 | version = "0.52.5" 513 | source = "registry+https://github.com/rust-lang/crates.io-index" 514 | checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" 515 | dependencies = [ 516 | "windows_aarch64_gnullvm", 517 | "windows_aarch64_msvc", 518 | "windows_i686_gnu", 519 | "windows_i686_gnullvm", 520 | "windows_i686_msvc", 521 | "windows_x86_64_gnu", 522 | "windows_x86_64_gnullvm", 523 | "windows_x86_64_msvc", 524 | ] 525 | 526 | [[package]] 527 | name = "windows_aarch64_gnullvm" 528 | version = "0.52.5" 529 | source = "registry+https://github.com/rust-lang/crates.io-index" 530 | checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" 531 | 532 | [[package]] 533 | name = "windows_aarch64_msvc" 534 | version = "0.52.5" 535 | source = "registry+https://github.com/rust-lang/crates.io-index" 536 | checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" 537 | 538 | [[package]] 539 | name = "windows_i686_gnu" 540 | version = "0.52.5" 541 | source = "registry+https://github.com/rust-lang/crates.io-index" 542 | checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" 543 | 544 | [[package]] 545 | name = "windows_i686_gnullvm" 546 | version = "0.52.5" 547 | source = "registry+https://github.com/rust-lang/crates.io-index" 548 | checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" 549 | 550 | [[package]] 551 | name = "windows_i686_msvc" 552 | version = "0.52.5" 553 | source = "registry+https://github.com/rust-lang/crates.io-index" 554 | checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" 555 | 556 | [[package]] 557 | name = "windows_x86_64_gnu" 558 | version = "0.52.5" 559 | source = "registry+https://github.com/rust-lang/crates.io-index" 560 | checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" 561 | 562 | [[package]] 563 | name = "windows_x86_64_gnullvm" 564 | version = "0.52.5" 565 | source = "registry+https://github.com/rust-lang/crates.io-index" 566 | checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" 567 | 568 | [[package]] 569 | name = "windows_x86_64_msvc" 570 | version = "0.52.5" 571 | source = "registry+https://github.com/rust-lang/crates.io-index" 572 | checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" 573 | 574 | [[package]] 575 | name = "zerocopy" 576 | version = "0.7.34" 577 | source = "registry+https://github.com/rust-lang/crates.io-index" 578 | checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" 579 | dependencies = [ 580 | "zerocopy-derive", 581 | ] 582 | 583 | [[package]] 584 | name = "zerocopy-derive" 585 | version = "0.7.34" 586 | source = "registry+https://github.com/rust-lang/crates.io-index" 587 | checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" 588 | dependencies = [ 589 | "proc-macro2", 590 | "quote", 591 | "syn", 592 | ] 593 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rust-norg" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | chumsky = "0.9.3" 8 | itertools = "0.13.0" 9 | serde = { version = "1.0.203", features = ["derive"] } 10 | textwrap = "0.16.1" 11 | unicode_categories = "0.1.1" 12 | 13 | [dev-dependencies] 14 | insta = { version = "1.39.0", features = ["yaml"] } 15 | proptest = "1.4.0" 16 | -------------------------------------------------------------------------------- /proptest-regressions/lib.txt: -------------------------------------------------------------------------------- 1 | # Seeds for failure cases proptest has generated in the past. It is 2 | # automatically read and these particular cases re-run before any 3 | # novel cases are generated. 4 | # 5 | # It is recommended to check this file in to source control so that 6 | # everyone who runs the test benefits from these saved cases. 7 | cc fb9b5df4fe46fe331cc3aa40bba6501c1c603084688fd02dda6d1c73106c1324 # shrinks to tag_name = "A", parameter = "A", multi_parameter = "\u{b}" 8 | cc 28afae9872324ba0632a8023219e32939580363ce8b99752dc19fae0ac5b63d1 # shrinks to paragraph_content = " " 9 | cc 5b31c9987c98fc0e4faa50b782e5952e0948d2c2a60dd29081c8f54c75b4b52c # shrinks to tag_name = "ø", parameter = "a", multi_parameter = " " 10 | cc 67cb989806791583bc835dd766e8f7ef294a1a634f6857cd8e6dd16a49a09246 # shrinks to tag_name = "\u{1d165}", parameter = "<*\u{2060}>", multi_parameter = "a", content = "" 11 | cc cacc392d5a052fbd56e86b2ae08c6380a7a191847001424d938930b94e0f449f # shrinks to tag_type = "@", tag_name = "a", parameter = "a", multi_parameter = "<*0>", content = "\u{2060}" 12 | cc 15209ad4b4a04639c42c6f2d9629decdcd554a46a3638a63a8a04f9822a3d33c # shrinks to tag_name = "0", parameter = "a", multi_parameter = "\\", content = "\\" 13 | cc 98c6844a3274a61cba1c4be4bf931c3c136b24dbe9c75187f53e9e6a45a1508d # shrinks to tag_type = "@", tag_name = "𖩠", parameter = "!", multi_parameter = "\t", content = "a\\" 14 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ## (WIP) Robust Rust Parser for Norg 2 | 3 | This project serves one purpose: parse norg as *best* as possible. The `tree-sitter` norg parser is designed for speed at the cost of error recovery and accuracy. 4 | 5 | `rust-norg` uses `chumksy` under the hood and trades off some speed for robust parsing and errors. The parser is built to recover from virtually any error thanks 6 | to us splitting the parsing into three distinct stages, something that `tree-sitter` is incapable of doing. 7 | 8 | The parser currently supports all block-level syntax apart from detached modifier extensions. Inline markup is not yet supported. 9 | 10 | All other syntax is properly parsed with spec-defined edge cases. 11 | 12 | ## Future of this Project 13 | 14 | Currently the parser is being developed as a proof-of-concept. Once it's complete, I'd like to extract this into a library for others to use. A proper test suite is also a must-have. 15 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | use chumsky::error::Simple; 2 | 3 | use crate::{stage_1::NorgToken, stage_2::NorgBlock, NorgASTFlat}; 4 | 5 | /// Represents errors that can occur during the parsing process across different stages. 6 | #[derive(Debug)] 7 | pub enum NorgParseError { 8 | Stage1(Vec>), 9 | Stage2(Vec>), 10 | Stage3(Vec>), 11 | Stage4(Vec>), 12 | Meta(Simple), 13 | } 14 | 15 | impl From>> for NorgParseError { 16 | fn from(error: Vec>) -> Self { 17 | NorgParseError::Stage1(error) 18 | } 19 | } 20 | 21 | impl From>> for NorgParseError { 22 | fn from(error: Vec>) -> Self { 23 | NorgParseError::Stage2(error) 24 | } 25 | } 26 | 27 | impl From>> for NorgParseError { 28 | fn from(error: Vec>) -> Self { 29 | NorgParseError::Stage3(error) 30 | } 31 | } 32 | 33 | impl From>> for NorgParseError { 34 | fn from(error: Vec>) -> Self { 35 | NorgParseError::Stage4(error) 36 | } 37 | } 38 | 39 | impl From> for NorgParseError { 40 | fn from(error: Simple) -> Self { 41 | NorgParseError::Meta(error) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use chumsky::Parser as _; 2 | use error::NorgParseError; 3 | 4 | pub use crate::stage_1::stage_1; 5 | pub use crate::stage_2::stage_2; 6 | use crate::stage_4::stage_4; 7 | 8 | pub use crate::stage_2::ParagraphSegmentToken; 9 | pub use crate::stage_3::*; 10 | pub use crate::stage_4::NorgAST; 11 | 12 | mod error; 13 | pub mod metadata; 14 | mod stage_1; 15 | mod stage_2; 16 | mod stage_3; 17 | mod stage_4; 18 | 19 | /// Parses the given input string through multiple stages to produce a flattened abstract syntax tree (AST). 20 | /// 21 | /// # Arguments 22 | /// 23 | /// * `input` - A string slice that holds the input to be parsed. 24 | /// 25 | /// # Returns 26 | /// 27 | /// * `Ok(Vec)` if parsing is successful. 28 | /// * `Err(NorgParseError)` if any stage of parsing fails. 29 | pub fn parse(input: &str) -> Result, NorgParseError> { 30 | Ok(stage_3().parse(stage_2().parse(stage_1().parse(input)?)?)?) 31 | } 32 | 33 | pub fn parse_tree(input: &str) -> Result, NorgParseError> { 34 | Ok(stage_4( 35 | stage_3().parse(stage_2().parse(stage_1().parse(input)?)?)?, 36 | )) 37 | } 38 | 39 | #[cfg(test)] 40 | mod tests { 41 | use insta::assert_yaml_snapshot; 42 | use itertools::Itertools; 43 | use proptest::{prop_oneof, proptest}; 44 | 45 | use crate::{parse, parse_tree}; 46 | 47 | const TAG_NAME_REGEX: &str = r"[\w_\-\.\d]+"; 48 | const TAG_PARAMETER_REGEX: &str = r"[^\s]+"; 49 | const TAG_MULTI_PARAMETER_REGEX: &str = r"[^\n\r]+"; 50 | 51 | const PARAGRAPH_REGEX: &str = r"[^[:punct:]\s][^\n\r]*"; 52 | 53 | #[test] 54 | fn headings() { 55 | let examples: Vec<_> = [ 56 | "* Heading", 57 | "********* Heading", 58 | " 59 | * Heading 60 | content. 61 | ", 62 | " 63 | ******* Heading 64 | ", 65 | " 66 | * Heading 67 | * Another heading 68 | ", 69 | " 70 | * Heading 71 | ** Subheading 72 | * Back to regular heading 73 | ", 74 | " 75 | * Heading 76 | sneaky content. 77 | ** Subheading 78 | more sneaky content inside. 79 | * Back to regular heading 80 | ", 81 | ] 82 | .into_iter() 83 | .map(|example| example.to_string() + "\n") 84 | .map(|str| parse(&str)) 85 | .try_collect() 86 | .unwrap(); 87 | 88 | assert_yaml_snapshot!(examples); 89 | } 90 | 91 | #[test] 92 | fn headings_tree() { 93 | let headings_tree_examples: Vec<_> = [ 94 | " 95 | * Heading 96 | ** Another heading 97 | ", 98 | " 99 | * Heading 100 | ** Subheading 101 | content 102 | * Back to regular heading 103 | ", 104 | ] 105 | .into_iter() 106 | .map(|example| example.to_string() + "\n") 107 | .map(|str| parse_tree(&str)) 108 | .try_collect() 109 | .unwrap(); 110 | assert_yaml_snapshot!(headings_tree_examples); 111 | } 112 | 113 | #[test] 114 | fn delimiting_mods_tree() { 115 | let examples: Vec<_> = [ 116 | "* One 117 | content 118 | --- 119 | dedented", 120 | "* One 121 | ** Two 122 | === 123 | none", 124 | "** Two 125 | two 126 | ___ 127 | two", 128 | "- list 129 | ___ 130 | no list", 131 | "* One 132 | one 133 | ** Two 134 | two 135 | *** Three 136 | three 137 | --- 138 | two 139 | --- 140 | one 141 | --- 142 | none", 143 | ] 144 | .into_iter() 145 | .map(|example| example.to_string() + "\n") 146 | .map(|str| parse_tree(&str)) 147 | .try_collect() 148 | .unwrap(); 149 | assert_yaml_snapshot!(examples); 150 | } 151 | 152 | #[test] 153 | fn lists_tree() { 154 | let examples: Vec<_> = [ 155 | "- base", 156 | "- one 157 | -- two", 158 | "- one 159 | -- two 160 | with content 161 | -- two (2) 162 | --- three 163 | - one", 164 | "-- two 165 | - one", 166 | ] 167 | .into_iter() 168 | .map(|example| example.to_string() + "\n") 169 | .map(|str| parse_tree(&str)) 170 | .try_collect() 171 | .unwrap(); 172 | assert_yaml_snapshot!(examples); 173 | } 174 | 175 | #[test] 176 | fn lists() { 177 | let examples: Vec<_> = [ 178 | "- Test list", 179 | "---- Test list", 180 | " 181 | - Test list 182 | - Test list 183 | -- Test list 184 | -- Test list 185 | - Test list 186 | --- Test list 187 | ", 188 | "---not list", 189 | // "- - a list item", 190 | "--> not a list", 191 | ] 192 | .into_iter() 193 | .map(|example| example.to_string() + "\n") 194 | .map(|str| parse(&str)) 195 | .try_collect() 196 | .unwrap(); 197 | 198 | assert_yaml_snapshot!(examples); 199 | } 200 | 201 | #[test] 202 | fn modifier_extensions() { 203 | let examples: Vec<_> = [ 204 | "- ( ) undone", 205 | "* (x) done", 206 | "- (=) hold", 207 | "* (_) canceled", 208 | "- (-) pending", 209 | "* (!) urgent", 210 | "- (+) recurring", 211 | "~ (+ Friday) recurring with date", 212 | "** ( |# Low|< Feb 1) undone, low, & before Feb", 213 | "** (# Two Words|x| |!|+|_|+ 5th|=|-|< Feb 1|> 2025|@ Jan 1 2025) All of them" 214 | ].into_iter() 215 | .map(|example| example.to_string() + "\n") 216 | .map(|str| parse(&str)) 217 | .try_collect() 218 | .unwrap(); 219 | 220 | assert_yaml_snapshot!(examples); 221 | } 222 | 223 | #[test] 224 | fn lists_regressions() { 225 | [ 226 | "- - a list item", 227 | "---- - a list item", 228 | "---- > a list item", 229 | ] 230 | .into_iter() 231 | .map(|example| example.to_string() + "\n") 232 | .for_each(|str| { 233 | parse(&str).unwrap_err(); 234 | }); 235 | } 236 | 237 | #[test] 238 | fn ordered_lists() { 239 | let examples: Vec<_> = [ 240 | "~ Test list", 241 | "~~~~ Test list", 242 | " 243 | ~ Test list 244 | ~ Test list 245 | ~~ Test list 246 | ~~ Test list 247 | ~ Test list 248 | ~~~ Test list 249 | ", 250 | "~~~not list", 251 | "~~> not a list", 252 | ] 253 | .into_iter() 254 | .map(|example| example.to_string() + "\n") 255 | .map(|str| parse(&str)) 256 | .try_collect() 257 | .unwrap(); 258 | 259 | assert_yaml_snapshot!(examples); 260 | } 261 | 262 | #[test] 263 | fn ordered_lists_regressions() { 264 | [ 265 | "~ ~ a list item", 266 | "~~~~ - a list item", 267 | "~~~~ > a list item", 268 | ] 269 | .into_iter() 270 | .map(|example| example.to_string() + "\n") 271 | .for_each(|str| { 272 | parse(&str).unwrap_err(); 273 | }); 274 | } 275 | 276 | #[test] 277 | fn quotes() { 278 | let examples: Vec<_> = [ 279 | "> Test quote", 280 | ">>>> Test quote", 281 | " 282 | > Test quote 283 | > Test quote 284 | >> Test quote 285 | >> Test quote 286 | > Test quote 287 | >>> Test quote 288 | ", 289 | ">>>not quote", 290 | // "> > a quote item", 291 | ">>- not a quote", 292 | ] 293 | .into_iter() 294 | .map(|example| example.to_string() + "\n") 295 | .map(|str| parse(&str)) 296 | .try_collect() 297 | .unwrap(); 298 | 299 | assert_yaml_snapshot!(examples); 300 | } 301 | 302 | #[test] 303 | fn quotes_regressions() { 304 | [ 305 | "> > a list item", 306 | ">>>> - a list item", 307 | ">>>> ~ a list item", 308 | ] 309 | .into_iter() 310 | .map(|example| example.to_string() + "\n") 311 | .for_each(|str| { 312 | parse(&str).unwrap_err(); 313 | }); 314 | } 315 | 316 | #[test] 317 | fn definitions() { 318 | let examples: Vec<_> = [ 319 | "$ Term 320 | Definition", 321 | "$$ Term 322 | Long definition 323 | $$", 324 | ] 325 | .into_iter() 326 | .map(|example| example.to_string() + "\n") 327 | .map(|str| parse(&str)) 328 | .try_collect() 329 | .unwrap(); 330 | 331 | assert_yaml_snapshot!(examples); 332 | } 333 | 334 | #[test] 335 | fn definitions_regressions() { 336 | [ 337 | "$ Term Definition", 338 | "$$ Term 339 | Long definition $$", 340 | "$$ Term 341 | Long definition 342 | $$text", 343 | "$$ Term 344 | Long definition 345 | $$ text", 346 | ] 347 | .into_iter() 348 | .map(|example| example.to_string() + "\n") 349 | .for_each(|str| { 350 | parse(&str).unwrap_err(); 351 | }); 352 | } 353 | 354 | #[test] 355 | fn footnotes() { 356 | let examples: Vec<_> = [ 357 | "^ Title 358 | Content", 359 | "^^ Title 360 | Long content 361 | ^^", 362 | ] 363 | .into_iter() 364 | .map(|example| example.to_string() + "\n") 365 | .map(|str| parse(&str)) 366 | .try_collect() 367 | .unwrap(); 368 | 369 | assert_yaml_snapshot!(examples); 370 | } 371 | 372 | #[test] 373 | fn footnotes_regressions() { 374 | [ 375 | "^ Term Definition", 376 | "^^ Term 377 | Long definition ^^", 378 | "^^ Term 379 | Long definition 380 | ^^text", 381 | "^^ Term 382 | Long definition 383 | ^^ text", 384 | ] 385 | .into_iter() 386 | .map(|example| example.to_string() + "\n") 387 | .for_each(|str| { 388 | parse(&str).unwrap_err(); 389 | }); 390 | } 391 | 392 | #[test] 393 | fn tables() { 394 | let examples: Vec<_> = [ 395 | ": A1 396 | Cell content", 397 | ":: A1 398 | Long cell content. 399 | ::", 400 | ] 401 | .into_iter() 402 | .map(|example| example.to_string() + "\n") 403 | .map(|str| parse(&str)) 404 | .try_collect() 405 | .unwrap(); 406 | 407 | assert_yaml_snapshot!(examples); 408 | } 409 | 410 | #[test] 411 | fn tables_regressions() { 412 | [ 413 | ": Term Definition", 414 | ":: Term 415 | Long definition ::", 416 | ":: Term 417 | Long definition 418 | ::text", 419 | ":: Term 420 | Long definition 421 | :: text", 422 | ] 423 | .into_iter() 424 | .map(|example| example.to_string() + "\n") 425 | .for_each(|str| { 426 | parse(&str).unwrap_err(); 427 | }); 428 | } 429 | 430 | #[test] 431 | fn infirm_tags() { 432 | let examples: Vec<_> = [ 433 | ".tag", 434 | ".tag-name_with-complexchars", 435 | ".tag-name_ parameter", 436 | ".tag-name_ one\\ large\\ parameter", 437 | ".tag-name_ one\\ large\\ parameter &^@! third parameter", 438 | ".tag.name.image https://github.com/super-special/repo.git?text=hello&other_text=bye", 439 | ] 440 | .into_iter() 441 | .map(|example| example.to_string() + "\n") 442 | .map(|str| parse(&str)) 443 | .try_collect() 444 | .unwrap(); 445 | 446 | assert_yaml_snapshot!(examples); 447 | } 448 | 449 | proptest! { 450 | #[test] 451 | fn infirm_tags_proptests(tag_name in TAG_NAME_REGEX, parameter in TAG_PARAMETER_REGEX, multi_parameter in TAG_MULTI_PARAMETER_REGEX) { 452 | let tag = format!(".{} {} {}\n", tag_name, parameter, multi_parameter); 453 | 454 | // TODO: Ensure that the number of parameters parsed is correct? 455 | parse(&tag).unwrap(); 456 | } 457 | } 458 | 459 | #[test] 460 | fn carryover_tags() { 461 | let examples: Vec<_> = [ 462 | "+tag 463 | paragraph", 464 | "+tag-name_with-complexchars 465 | paragraph", 466 | "+tag-name_ parameter 467 | paragraph", 468 | "+tag-name_ one\\ large\\ parameter 469 | paragraph", 470 | "+tag-name_ one\\ large\\ parameter &^@! third parameter 471 | paragraph", 472 | "+tag.name.image https://github.com/super-special/repo.git?text=hello&other_text=bye 473 | paragraph", 474 | "#tag 475 | paragraph", 476 | "#tag-name_with-complexchars 477 | paragraph", 478 | "#tag-name_ parameter 479 | paragraph", 480 | "#tag-name_ one\\ large\\ parameter 481 | paragraph", 482 | "#tag-name_ one\\ large\\ parameter &^@! third parameter 483 | paragraph", 484 | "#tag.name.image https://github.com/super-special/repo.git?text=hello&other_text=bye 485 | paragraph", 486 | ] 487 | .into_iter() 488 | .map(|example| example.to_string() + "\n") 489 | .map(|str| parse(&str)) 490 | .try_collect() 491 | .unwrap(); 492 | 493 | assert_yaml_snapshot!(examples); 494 | } 495 | 496 | #[test] 497 | fn carryover_tags_tree() { 498 | let examples: Vec<_> = [ 499 | " 500 | #id 123 501 | * tree 502 | ** nested 503 | ", 504 | " 505 | * tree 506 | #id there 507 | ** nested 508 | --- 509 | part of tree 510 | ", 511 | " 512 | #name main 513 | -- two 514 | ---- four 515 | #id 3 516 | --- three 517 | ", 518 | " 519 | #comment 520 | multi-line 521 | comments 522 | --- 523 | out 524 | ", 525 | " 526 | #id 123 527 | #comment 528 | comment with id 529 | ", 530 | ] 531 | .into_iter() 532 | .map(|example| example.to_string() + "\n") 533 | .map(|str| parse_tree(&str)) 534 | .try_collect() 535 | .unwrap(); 536 | assert_yaml_snapshot!(examples); 537 | } 538 | 539 | proptest! { 540 | #[test] 541 | fn carryover_tags_proptests(tag_name in TAG_NAME_REGEX, parameter in TAG_PARAMETER_REGEX, multi_parameter in TAG_MULTI_PARAMETER_REGEX) { 542 | let content = format!("#{} {} {}\nhello world!", tag_name, parameter, multi_parameter); 543 | 544 | parse(&content).unwrap(); 545 | } 546 | } 547 | 548 | #[test] 549 | fn ranged_verbatim_tags() { 550 | let examples: Vec<_> = [ 551 | r#"@code 552 | print("Hello world!") 553 | @end"#, 554 | r#"@code.some-text.here lua\ language second-parameter 555 | print("Hello world!") 556 | @end"#, 557 | r#"@some-complex_tag_ first-parameter #&*(&$!) third-parameter 558 | 559 | function hello() 560 | print("Hello World") 561 | end 562 | 563 | hello() 564 | @end"#, 565 | ] 566 | .into_iter() 567 | .map(|example| example.to_string() + "\n") 568 | .map(|str| parse(&str)) 569 | .try_collect() 570 | .unwrap(); 571 | 572 | assert_yaml_snapshot!(examples); 573 | } 574 | 575 | proptest! { 576 | #[test] 577 | // NOTE: `.*` may at some point generate an `@end` purely by chance. There is a basic 578 | // check against this, but this should probably be done as a filter in proptest. 579 | fn ranged_verbatim_tags_proptests(tag_name in TAG_NAME_REGEX, parameter in TAG_PARAMETER_REGEX, multi_parameter in TAG_MULTI_PARAMETER_REGEX, content in ".*") { 580 | if content.contains("@end") { 581 | return Ok(()); 582 | } 583 | 584 | let content = format!("@{} {} {}\n{}\n@end", tag_name, parameter, multi_parameter, content); 585 | 586 | parse(&content).unwrap(); 587 | } 588 | } 589 | 590 | #[test] 591 | fn ranged_tags() { 592 | let examples: Vec<_> = [ 593 | r#"|example 594 | Hello world! 595 | |end"#, 596 | r#"|example.some-text.here one\ parameter second-parameter 597 | #carryover 598 | text within 599 | |end"#, 600 | r#"|some-complex_tag_ first-parameter #&*(&$!) third-parameter 601 | this is some text within 602 | |end"#, 603 | r#"|example 604 | * Hello world! 605 | |end"#, 606 | r#"|example 607 | |example 608 | * Hello world! 609 | |end 610 | |end"#, 611 | r#"=example 612 | Hello world! 613 | =end"#, 614 | r#"=example.some-text.here one\ parameter second-parameter 615 | #carryover 616 | text within 617 | =end"#, 618 | r#"=some-complex_tag_ first-parameter #&*(&$!) third-parameter 619 | this is some text within 620 | =end"#, 621 | r#"=example 622 | * Hello world! 623 | =end"#, 624 | r#"=example 625 | =example 626 | * Hello world! 627 | =end 628 | =end"#, 629 | ] 630 | .into_iter() 631 | .map(|example| example.to_string() + "\n") 632 | .map(|str| parse(&str)) 633 | .try_collect() 634 | .unwrap(); 635 | 636 | assert_yaml_snapshot!(examples); 637 | } 638 | 639 | proptest! { 640 | #[test] 641 | // NOTE: `.*` may at some point generate an `@end` purely by chance. There is a basic 642 | // check against this, but this should probably be done as a filter in proptest. 643 | fn ranged_tags_proptests(tag_type in prop_oneof!["@", "|"], tag_name in TAG_NAME_REGEX, parameter in TAG_PARAMETER_REGEX, multi_parameter in TAG_MULTI_PARAMETER_REGEX, content in PARAGRAPH_REGEX) { 644 | if content.contains(format!("{}end", tag_type).as_str()) { 645 | return Ok(()); 646 | } 647 | 648 | let content = format!("{tag_type}{tag_name} {parameter} {multi_parameter}\n{content}\n{tag_type}end"); 649 | 650 | parse(&content).unwrap(); 651 | } 652 | } 653 | 654 | #[test] 655 | fn paragraphs() { 656 | let examples: Vec<_> = [ 657 | "hello, world!", 658 | "*hello, world!*", 659 | "*hello, 660 | world!*", 661 | "two 662 | 663 | paragraphs", 664 | "paragraph 665 | here 666 | 667 | another paragraph 668 | here.", 669 | ] 670 | .into_iter() 671 | .map(|example| example.to_string() + "\n") 672 | .map(|str| parse(&str)) 673 | .try_collect() 674 | .unwrap(); 675 | 676 | assert_yaml_snapshot!(examples); 677 | } 678 | 679 | proptest! { 680 | #[test] 681 | fn paragraphs_proptests(paragraph_content in PARAGRAPH_REGEX) { 682 | parse(¶graph_content).unwrap(); 683 | } 684 | } 685 | 686 | #[test] 687 | fn modifiers() { 688 | let examples: Vec<_> = [ 689 | "this *is* a test", 690 | "hello, *world*!", 691 | "*hello, world!*", 692 | "*hello*, world!", 693 | "*/hello/*, world!", 694 | "*hi!* how are you?", 695 | "this *is a test", 696 | "this *is/ a test", 697 | "this *is*/ a test", 698 | "this */is/*/ a test", 699 | ] 700 | .into_iter() 701 | .map(|example| example.to_string() + "\n") 702 | .map(|str| parse(&str)) 703 | .try_collect() 704 | .unwrap(); 705 | 706 | assert_yaml_snapshot!(examples); 707 | } 708 | 709 | #[test] 710 | fn links() { 711 | let examples: Vec<_> = [ 712 | "{https://github.com/nvim-neorg/neorg}", 713 | "{$ hello!}", 714 | "{/ a-path.txt}", 715 | "{********* hello!}", 716 | "{:/some/file:*** a -path-.txt}", 717 | "[anchor]", 718 | "[anchor][description]", 719 | "{* hello}[description]", 720 | "[description]{* hello}", 721 | "This is a !", 722 | "<*linkable with markup*> here!", 723 | "{:another_file:}", 724 | ] 725 | .into_iter() 726 | .map(|example| example.to_string() + "\n") 727 | .map(|str| parse(&str)) 728 | .try_collect() 729 | .unwrap(); 730 | 731 | assert_yaml_snapshot!(examples); 732 | } 733 | 734 | #[test] 735 | fn inline_verbatim() { 736 | let examples: Vec<_> = [ 737 | "some text `inline verbatim`", 738 | "`verbatim at start`", 739 | "{/ some_link.txt}[with `inline verbatim` in anchor]", 740 | "`*markup* /inside/ /-verbatim-/`", 741 | ] 742 | .into_iter() 743 | .map(|example| example.to_string() + "\n") 744 | .map(|str| parse(&str)) 745 | .try_collect() 746 | .unwrap(); 747 | 748 | assert_yaml_snapshot!(examples); 749 | } 750 | } 751 | -------------------------------------------------------------------------------- /src/metadata/mod.rs: -------------------------------------------------------------------------------- 1 | use chumsky::Parser; 2 | pub use stage_1::NorgMeta; 3 | 4 | use crate::error::NorgParseError; 5 | 6 | pub mod stage_1; 7 | 8 | /// Parses the given input string to produce an AST for the metadata 9 | pub fn parse_metadata(input: &str) -> Result { 10 | // don't ask me why this fixes it. I don't even care 11 | let processed = input.replace("\n]", "\n ]"); 12 | let processed = format!("{{\n{}\n}}\n", processed.trim()); 13 | Ok(stage_1::meta_parser().parse(processed)?) 14 | } 15 | 16 | #[cfg(test)] 17 | mod tests { 18 | use insta::assert_yaml_snapshot; 19 | use itertools::Itertools; 20 | 21 | use crate::metadata::parse_metadata; 22 | 23 | #[test] 24 | fn common_metadata() { 25 | let examples: Vec<_> = [ 26 | " 27 | title: Sunday November 17, 2024 28 | description: We Cooked 29 | authors: benlubas 30 | categories: journal 31 | created: 2024-11-18 32 | updated: 2024-11-18T17:58:21-0500 33 | version: 1.1.1 34 | ", 35 | " 36 | title: Neorg Extras 37 | description: Extra lua code to configure Neorg 38 | authors: benlubas 39 | categories: [ 40 | neorg 41 | nvim 42 | config 43 | ] 44 | tangle: { 45 | languages: { 46 | lua: ~/github/.dotfiles/nvim/lua/benlubas/neorg/extras.lua 47 | } 48 | delimiter: heading 49 | } 50 | created: 2024-05-03T13:36:42-0500 51 | updated: 2024-10-27T11:12:32-0500 52 | version: 1.1.1 53 | ", 54 | ] 55 | .into_iter() 56 | .map(|example| example.to_string() + "\n") 57 | .map(|str| parse_metadata(&str)) 58 | .try_collect() 59 | .unwrap(); 60 | 61 | assert_yaml_snapshot!(examples); 62 | } 63 | 64 | #[test] 65 | fn arrays() { 66 | let examples: Vec<_> = [ 67 | "empty_arr: [] 68 | arr: [ 69 | 70 | ]", 71 | " 72 | categories: [ 73 | one 74 | two 75 | 45 76 | ]", 77 | " 78 | arr: [ 79 | arrays can contain everything 80 | 5 81 | -5 82 | 6.02e27 83 | nil 84 | { 85 | x: y 86 | a: [ 87 | b 88 | ] 89 | } 90 | [] 91 | [ 92 | hi 93 | hi 94 | ] 95 | ]", 96 | "arr:[]\na2:[\n]\na3:[\nhi\n]\nx: y", 97 | ] 98 | .into_iter() 99 | .map(|example| example.to_string() + "\n") 100 | .map(|str| parse_metadata(&str)) 101 | .try_collect() 102 | .unwrap(); 103 | 104 | assert_yaml_snapshot!(examples); 105 | } 106 | 107 | #[test] 108 | fn keys_and_values() { 109 | let examples: Vec<_> = [ 110 | "key: value", 111 | "x:y", 112 | "x :y", 113 | "x:5", 114 | "x:-4", 115 | "str:-4b", 116 | "nil:nil", 117 | "nil:", 118 | "still_nil: 119 | x: y", 120 | " 121 | key: value with : in it 122 | key_2: value with: in it 123 | ", 124 | "keys: { 125 | in: 126 | objects: [] 127 | }" 128 | ] 129 | .into_iter() 130 | .map(|example| example.to_string() + "\n") 131 | .map(|str| parse_metadata(&str)) 132 | .try_collect() 133 | .unwrap(); 134 | 135 | assert_yaml_snapshot!(examples); 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /src/metadata/snapshots/rust_norg__metadata__tests__arrays.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/metadata/mod.rs 3 | expression: examples 4 | --- 5 | - Object: 6 | arr: 7 | Array: [] 8 | empty_arr: 9 | Array: [] 10 | - Object: 11 | categories: 12 | Array: 13 | - Str: one 14 | - Str: two 15 | - Num: 45 16 | - Object: 17 | arr: 18 | Array: 19 | - Str: arrays can contain everything 20 | - Num: 5 21 | - Num: -5 22 | - Num: 6020000000000000000000000000 23 | - Nil 24 | - Object: 25 | a: 26 | Array: 27 | - Str: b 28 | x: 29 | Str: y 30 | - Array: [] 31 | - Array: 32 | - Str: hi 33 | - Str: hi 34 | - Object: 35 | a2: 36 | Array: [] 37 | a3: 38 | Array: 39 | - Str: hi 40 | arr: 41 | Array: [] 42 | x: 43 | Str: y 44 | -------------------------------------------------------------------------------- /src/metadata/snapshots/rust_norg__metadata__tests__common_metadata.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/metadata/mod.rs 3 | expression: examples 4 | --- 5 | - Object: 6 | authors: 7 | Str: benlubas 8 | categories: 9 | Str: journal 10 | created: 11 | Str: 2024-11-18 12 | description: 13 | Str: We Cooked 14 | title: 15 | Str: "Sunday November 17, 2024" 16 | updated: 17 | Str: "2024-11-18T17:58:21-0500" 18 | version: 19 | Str: 1.1.1 20 | - Object: 21 | authors: 22 | Str: benlubas 23 | categories: 24 | Array: 25 | - Str: neorg 26 | - Str: nvim 27 | - Str: config 28 | created: 29 | Str: "2024-05-03T13:36:42-0500" 30 | description: 31 | Str: Extra lua code to configure Neorg 32 | tangle: 33 | Object: 34 | delimiter: 35 | Str: heading 36 | languages: 37 | Object: 38 | lua: 39 | Str: ~/github/.dotfiles/nvim/lua/benlubas/neorg/extras.lua 40 | title: 41 | Str: Neorg Extras 42 | updated: 43 | Str: "2024-10-27T11:12:32-0500" 44 | version: 45 | Str: 1.1.1 46 | -------------------------------------------------------------------------------- /src/metadata/snapshots/rust_norg__metadata__tests__keys.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/metadata/mod.rs 3 | expression: examples 4 | --- 5 | - Object: 6 | key: 7 | Str: value 8 | - Object: 9 | x: 10 | Str: y 11 | - Object: 12 | x: 13 | Num: 5 14 | - Object: 15 | x: 16 | Num: -4 17 | - Object: 18 | str: 19 | Str: "-4b" 20 | - Object: 21 | nil: Nil 22 | - Object: 23 | nil: Nil 24 | - Object: 25 | still_nil: Nil 26 | x: 27 | Str: y 28 | - Object: 29 | key: 30 | Str: "value with : in it" 31 | key_2: 32 | Str: "value with: in it" 33 | - Object: 34 | keys: 35 | Object: 36 | in: Nil 37 | objects: 38 | Array: [] 39 | -------------------------------------------------------------------------------- /src/metadata/snapshots/rust_norg__metadata__tests__keys_and_values.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/metadata/mod.rs 3 | expression: examples 4 | --- 5 | - Object: 6 | key: 7 | Str: value 8 | - Object: 9 | x: 10 | Str: y 11 | - Object: 12 | x: 13 | Str: y 14 | - Object: 15 | x: 16 | Num: 5 17 | - Object: 18 | x: 19 | Num: -4 20 | - Object: 21 | str: 22 | Str: "-4b" 23 | - Object: 24 | nil: Nil 25 | - Object: 26 | nil: Nil 27 | - Object: 28 | still_nil: Nil 29 | x: 30 | Str: y 31 | - Object: 32 | key: 33 | Str: "value with : in it" 34 | key_2: 35 | Str: "value with: in it" 36 | - Object: 37 | keys: 38 | Object: 39 | in: Nil 40 | objects: 41 | Array: [] 42 | -------------------------------------------------------------------------------- /src/metadata/stage_1.rs: -------------------------------------------------------------------------------- 1 | use chumsky::prelude::*; 2 | use serde::Serialize; 3 | use std::collections::BTreeMap; 4 | use text::TextParser; 5 | 6 | #[derive(Clone, Debug, Serialize)] 7 | pub enum NorgMeta { 8 | Invalid, 9 | Nil, 10 | Bool(bool), 11 | Str(String), 12 | EmptyKey(String), 13 | Num(f64), 14 | Array(Vec), 15 | Object(BTreeMap), 16 | } 17 | 18 | const SPECIAL: &str = "{}[]:\n"; 19 | 20 | pub fn meta_parser() -> impl Parser> { 21 | recursive(|value| { 22 | let frac = just('.').chain(text::digits(10)); 23 | 24 | let exp = just('e') 25 | .or(just('E')) 26 | .chain(just('+').or(just('-')).or_not()) 27 | .chain::(text::digits(10)); 28 | 29 | let number = just(' ') 30 | .repeated() 31 | .ignore_then(just('-').or_not()) 32 | .chain::(text::int(10)) 33 | .chain::(frac.or_not().flatten()) 34 | .chain::(exp.or_not().flatten()) 35 | .then_ignore(just('\n').rewind()) 36 | .collect::() 37 | .from_str() 38 | .unwrapped() 39 | .labelled("number"); 40 | 41 | let escape = just('\\').ignore_then( 42 | just('\\') 43 | .or(just('/')) 44 | .or(one_of(SPECIAL)) 45 | .or(just('b').to('\x08')) 46 | .or(just('f').to('\x0C')) 47 | .or(just('n').to('\n')) 48 | .or(just('r').to('\r')) 49 | .or(just('t').to('\t')) 50 | .or(just('u').ignore_then( 51 | filter(|c: &char| c.is_ascii_hexdigit()) 52 | .repeated() 53 | .exactly(4) 54 | .collect::() 55 | .validate(|digits, span, emit| { 56 | char::from_u32(u32::from_str_radix(&digits, 16).unwrap()) 57 | .unwrap_or_else(|| { 58 | emit(Simple::custom(span, "invalid unicode character")); 59 | '\u{FFFD}' // unicode replacement character 60 | }) 61 | }), 62 | )), 63 | ); 64 | 65 | let string = none_of("{}[]\n") 66 | .or(escape.clone()) 67 | .repeated() 68 | .at_least(1) 69 | .try_map(|x, span| { 70 | let binding = x.clone().into_iter().collect::(); 71 | let s = binding.trim(); 72 | if s.is_empty() { 73 | Err(Simple::custom( 74 | span, 75 | format!("strings can't be all whitespace, got {x:?}"), 76 | )) 77 | } else { 78 | Ok(s.to_string()) 79 | } 80 | }) 81 | .map(|s| match &s[..] { 82 | "true" => NorgMeta::Bool(true), 83 | "false" => NorgMeta::Bool(false), 84 | "nil" => NorgMeta::Nil, 85 | _ => NorgMeta::Str(s), 86 | }); 87 | 88 | let key = none_of(SPECIAL) 89 | .repeated() 90 | .at_least(1) 91 | .then_ignore(just(':').then(one_of(" \t").repeated())) 92 | .collect::() 93 | .map(|s| s.trim().to_string()) 94 | .labelled("key"); 95 | 96 | let array = value 97 | .clone() 98 | .separated_by(just('\n')) 99 | .allow_trailing() 100 | .padded() 101 | .delimited_by(just('[').padded(), just(']').ignored()) 102 | .map(NorgMeta::Array) 103 | .labelled("array"); 104 | 105 | let empty_array = empty() 106 | .padded() 107 | .delimited_by(just('[').padded(), just(']')) 108 | .to(NorgMeta::Array(vec![])); 109 | 110 | let property = key 111 | .then_ignore(one_of(" \t").repeated()) 112 | .then(value.or(empty().to(NorgMeta::Nil))) 113 | .then_ignore(just('\n').or_not()) 114 | .labelled("property"); 115 | 116 | let object = property 117 | .clone() 118 | .then_ignore(just('\n').or_not()) 119 | .repeated() 120 | .padded() 121 | .collect() 122 | .delimited_by(just('{').padded(), just('}').ignored()) 123 | .map(NorgMeta::Object) 124 | .labelled("object"); 125 | 126 | choice(( 127 | number.map(NorgMeta::Num), 128 | empty_array, 129 | array, 130 | object, 131 | string, 132 | )) 133 | .recover_with(nested_delimiters('{', '}', [('[', ']')], |_| { 134 | NorgMeta::Invalid 135 | })) 136 | .recover_with(nested_delimiters('[', ']', [('{', '}')], |_| { 137 | NorgMeta::Invalid 138 | })) 139 | .recover_with(skip_then_retry_until(['}', ']'])) 140 | }) 141 | .then_ignore(end().padded().recover_with(skip_then_retry_until([]))) 142 | } 143 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__carryover_tags.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | expression: examples 4 | --- 5 | - - CarryoverTag: 6 | tag_type: Attribute 7 | name: 8 | - tag 9 | parameters: [] 10 | next_object: 11 | Paragraph: 12 | - Token: 13 | Text: paragraph 14 | - - CarryoverTag: 15 | tag_type: Attribute 16 | name: 17 | - tag-name_with-complexchars 18 | parameters: [] 19 | next_object: 20 | Paragraph: 21 | - Token: 22 | Text: paragraph 23 | - - CarryoverTag: 24 | tag_type: Attribute 25 | name: 26 | - tag-name_ 27 | parameters: 28 | - parameter 29 | next_object: 30 | Paragraph: 31 | - Token: 32 | Text: paragraph 33 | - - CarryoverTag: 34 | tag_type: Attribute 35 | name: 36 | - tag-name_ 37 | parameters: 38 | - "one\\ large\\ parameter" 39 | next_object: 40 | Paragraph: 41 | - Token: 42 | Text: paragraph 43 | - - CarryoverTag: 44 | tag_type: Attribute 45 | name: 46 | - tag-name_ 47 | parameters: 48 | - "one\\ large\\ parameter" 49 | - "&^@!" 50 | - third 51 | - parameter 52 | next_object: 53 | Paragraph: 54 | - Token: 55 | Text: paragraph 56 | - - CarryoverTag: 57 | tag_type: Attribute 58 | name: 59 | - tag 60 | - name 61 | - image 62 | parameters: 63 | - "https://github.com/super-special/repo.git?text=hello&other_text=bye" 64 | next_object: 65 | Paragraph: 66 | - Token: 67 | Text: paragraph 68 | - - CarryoverTag: 69 | tag_type: Macro 70 | name: 71 | - tag 72 | parameters: [] 73 | next_object: 74 | Paragraph: 75 | - Token: 76 | Text: paragraph 77 | - - CarryoverTag: 78 | tag_type: Macro 79 | name: 80 | - tag-name_with-complexchars 81 | parameters: [] 82 | next_object: 83 | Paragraph: 84 | - Token: 85 | Text: paragraph 86 | - - CarryoverTag: 87 | tag_type: Macro 88 | name: 89 | - tag-name_ 90 | parameters: 91 | - parameter 92 | next_object: 93 | Paragraph: 94 | - Token: 95 | Text: paragraph 96 | - - CarryoverTag: 97 | tag_type: Macro 98 | name: 99 | - tag-name_ 100 | parameters: 101 | - "one\\ large\\ parameter" 102 | next_object: 103 | Paragraph: 104 | - Token: 105 | Text: paragraph 106 | - - CarryoverTag: 107 | tag_type: Macro 108 | name: 109 | - tag-name_ 110 | parameters: 111 | - "one\\ large\\ parameter" 112 | - "&^@!" 113 | - third 114 | - parameter 115 | next_object: 116 | Paragraph: 117 | - Token: 118 | Text: paragraph 119 | - - CarryoverTag: 120 | tag_type: Macro 121 | name: 122 | - tag 123 | - name 124 | - image 125 | parameters: 126 | - "https://github.com/super-special/repo.git?text=hello&other_text=bye" 127 | next_object: 128 | Paragraph: 129 | - Token: 130 | Text: paragraph 131 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__carryover_tags_tree.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | expression: examples 4 | --- 5 | - - CarryoverTag: 6 | tag_type: Macro 7 | name: 8 | - id 9 | parameters: 10 | - "123" 11 | next_object: 12 | Heading: 13 | level: 1 14 | title: 15 | - Token: 16 | Text: tree 17 | extensions: [] 18 | content: 19 | - Heading: 20 | level: 2 21 | title: 22 | - Token: 23 | Text: nested 24 | extensions: [] 25 | content: [] 26 | - - Heading: 27 | level: 1 28 | title: 29 | - Token: 30 | Text: tree 31 | extensions: [] 32 | content: 33 | - CarryoverTag: 34 | tag_type: Macro 35 | name: 36 | - id 37 | parameters: 38 | - there 39 | next_object: 40 | Heading: 41 | level: 2 42 | title: 43 | - Token: 44 | Text: nested 45 | extensions: [] 46 | content: [] 47 | - Paragraph: 48 | - Token: 49 | Text: part 50 | - Token: Whitespace 51 | - Token: 52 | Text: of 53 | - Token: Whitespace 54 | - Token: 55 | Text: tree 56 | - - CarryoverTag: 57 | tag_type: Macro 58 | name: 59 | - name 60 | parameters: 61 | - main 62 | next_object: 63 | NestableDetachedModifier: 64 | modifier_type: UnorderedList 65 | level: 2 66 | extensions: [] 67 | text: 68 | Paragraph: 69 | - Token: 70 | Text: two 71 | content: 72 | - NestableDetachedModifier: 73 | modifier_type: UnorderedList 74 | level: 4 75 | extensions: [] 76 | text: 77 | Paragraph: 78 | - Token: 79 | Text: four 80 | content: [] 81 | - CarryoverTag: 82 | tag_type: Macro 83 | name: 84 | - id 85 | parameters: 86 | - "3" 87 | next_object: 88 | NestableDetachedModifier: 89 | modifier_type: UnorderedList 90 | level: 3 91 | extensions: [] 92 | text: 93 | Paragraph: 94 | - Token: 95 | Text: three 96 | content: [] 97 | - - CarryoverTag: 98 | tag_type: Macro 99 | name: 100 | - comment 101 | parameters: [] 102 | next_object: 103 | Paragraph: 104 | - Token: 105 | Text: multi 106 | - Token: 107 | Special: "-" 108 | - Token: 109 | Text: line 110 | - Token: Whitespace 111 | - Token: 112 | Text: comments 113 | - DelimitingModifier: Weak 114 | - Paragraph: 115 | - Token: 116 | Text: out 117 | - - CarryoverTag: 118 | tag_type: Macro 119 | name: 120 | - id 121 | parameters: 122 | - "123" 123 | next_object: 124 | CarryoverTag: 125 | tag_type: Macro 126 | name: 127 | - comment 128 | parameters: [] 129 | next_object: 130 | Paragraph: 131 | - Token: 132 | Text: comment 133 | - Token: Whitespace 134 | - Token: 135 | Text: with 136 | - Token: Whitespace 137 | - Token: 138 | Text: id 139 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__definitions.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | expression: examples 4 | --- 5 | - - RangeableDetachedModifier: 6 | modifier_type: Definition 7 | title: 8 | - Token: 9 | Text: Term 10 | extensions: [] 11 | content: 12 | - Paragraph: 13 | - Token: 14 | Text: Definition 15 | - - RangeableDetachedModifier: 16 | modifier_type: Definition 17 | title: 18 | - Token: 19 | Text: Term 20 | extensions: [] 21 | content: 22 | - Paragraph: 23 | - Token: 24 | Text: Long 25 | - Token: Whitespace 26 | - Token: 27 | Text: definition 28 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__delimiting_mods_tree.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | expression: examples 4 | --- 5 | - - Heading: 6 | level: 1 7 | title: 8 | - Token: 9 | Text: One 10 | extensions: [] 11 | content: 12 | - Paragraph: 13 | - Token: 14 | Text: content 15 | - Paragraph: 16 | - Token: 17 | Text: dedented 18 | - - Heading: 19 | level: 1 20 | title: 21 | - Token: 22 | Text: One 23 | extensions: [] 24 | content: 25 | - Heading: 26 | level: 2 27 | title: 28 | - Token: 29 | Text: Two 30 | extensions: [] 31 | content: [] 32 | - Paragraph: 33 | - Token: 34 | Text: none 35 | - - Heading: 36 | level: 2 37 | title: 38 | - Token: 39 | Text: Two 40 | extensions: [] 41 | content: 42 | - Paragraph: 43 | - Token: 44 | Text: two 45 | - DelimitingModifier: HorizontalRule 46 | - Paragraph: 47 | - Token: 48 | Text: two 49 | - - NestableDetachedModifier: 50 | modifier_type: UnorderedList 51 | level: 1 52 | extensions: [] 53 | text: 54 | Paragraph: 55 | - Token: 56 | Text: list 57 | content: [] 58 | - DelimitingModifier: HorizontalRule 59 | - Paragraph: 60 | - Token: 61 | Text: "no" 62 | - Token: Whitespace 63 | - Token: 64 | Text: list 65 | - - Heading: 66 | level: 1 67 | title: 68 | - Token: 69 | Text: One 70 | extensions: [] 71 | content: 72 | - Paragraph: 73 | - Token: 74 | Text: one 75 | - Heading: 76 | level: 2 77 | title: 78 | - Token: 79 | Text: Two 80 | extensions: [] 81 | content: 82 | - Paragraph: 83 | - Token: 84 | Text: two 85 | - Heading: 86 | level: 3 87 | title: 88 | - Token: 89 | Text: Three 90 | extensions: [] 91 | content: 92 | - Paragraph: 93 | - Token: 94 | Text: three 95 | - Paragraph: 96 | - Token: 97 | Text: two 98 | - Paragraph: 99 | - Token: 100 | Text: one 101 | - Paragraph: 102 | - Token: 103 | Text: none 104 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__footnotes.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | expression: examples 4 | --- 5 | - - RangeableDetachedModifier: 6 | modifier_type: Footnote 7 | title: 8 | - Token: 9 | Text: Title 10 | extensions: [] 11 | content: 12 | - Paragraph: 13 | - Token: 14 | Text: Content 15 | - - RangeableDetachedModifier: 16 | modifier_type: Footnote 17 | title: 18 | - Token: 19 | Text: Title 20 | extensions: [] 21 | content: 22 | - Paragraph: 23 | - Token: 24 | Text: Long 25 | - Token: Whitespace 26 | - Token: 27 | Text: content 28 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__headings.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | expression: examples 4 | --- 5 | - - Heading: 6 | level: 1 7 | title: 8 | - Token: 9 | Text: Heading 10 | extensions: [] 11 | - - Heading: 12 | level: 9 13 | title: 14 | - Token: 15 | Text: Heading 16 | extensions: [] 17 | - - Heading: 18 | level: 1 19 | title: 20 | - Token: 21 | Text: Heading 22 | extensions: [] 23 | - Paragraph: 24 | - Token: 25 | Text: content 26 | - Token: 27 | Special: "." 28 | - - Heading: 29 | level: 7 30 | title: 31 | - Token: 32 | Text: Heading 33 | extensions: [] 34 | - - Heading: 35 | level: 1 36 | title: 37 | - Token: 38 | Text: Heading 39 | extensions: [] 40 | - Heading: 41 | level: 1 42 | title: 43 | - Token: 44 | Text: Another 45 | - Token: Whitespace 46 | - Token: 47 | Text: heading 48 | extensions: [] 49 | - - Heading: 50 | level: 1 51 | title: 52 | - Token: 53 | Text: Heading 54 | extensions: [] 55 | - Heading: 56 | level: 2 57 | title: 58 | - Token: 59 | Text: Subheading 60 | extensions: [] 61 | - Heading: 62 | level: 1 63 | title: 64 | - Token: 65 | Text: Back 66 | - Token: Whitespace 67 | - Token: 68 | Text: to 69 | - Token: Whitespace 70 | - Token: 71 | Text: regular 72 | - Token: Whitespace 73 | - Token: 74 | Text: heading 75 | extensions: [] 76 | - - Heading: 77 | level: 1 78 | title: 79 | - Token: 80 | Text: Heading 81 | extensions: [] 82 | - Paragraph: 83 | - Token: 84 | Text: sneaky 85 | - Token: Whitespace 86 | - Token: 87 | Text: content 88 | - Token: 89 | Special: "." 90 | - Heading: 91 | level: 2 92 | title: 93 | - Token: 94 | Text: Subheading 95 | extensions: [] 96 | - Paragraph: 97 | - Token: 98 | Text: more 99 | - Token: Whitespace 100 | - Token: 101 | Text: sneaky 102 | - Token: Whitespace 103 | - Token: 104 | Text: content 105 | - Token: Whitespace 106 | - Token: 107 | Text: inside 108 | - Token: 109 | Special: "." 110 | - Heading: 111 | level: 1 112 | title: 113 | - Token: 114 | Text: Back 115 | - Token: Whitespace 116 | - Token: 117 | Text: to 118 | - Token: Whitespace 119 | - Token: 120 | Text: regular 121 | - Token: Whitespace 122 | - Token: 123 | Text: heading 124 | extensions: [] 125 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__headings_tree.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | expression: headings_tree_examples 4 | --- 5 | - - Heading: 6 | level: 1 7 | title: 8 | - Token: 9 | Text: Heading 10 | extensions: [] 11 | content: 12 | - Heading: 13 | level: 2 14 | title: 15 | - Token: 16 | Text: Another 17 | - Token: Whitespace 18 | - Token: 19 | Text: heading 20 | extensions: [] 21 | content: [] 22 | - - Heading: 23 | level: 1 24 | title: 25 | - Token: 26 | Text: Heading 27 | extensions: [] 28 | content: 29 | - Heading: 30 | level: 2 31 | title: 32 | - Token: 33 | Text: Subheading 34 | extensions: [] 35 | content: 36 | - Paragraph: 37 | - Token: 38 | Text: content 39 | - Heading: 40 | level: 1 41 | title: 42 | - Token: 43 | Text: Back 44 | - Token: Whitespace 45 | - Token: 46 | Text: to 47 | - Token: Whitespace 48 | - Token: 49 | Text: regular 50 | - Token: Whitespace 51 | - Token: 52 | Text: heading 53 | extensions: [] 54 | content: [] 55 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__infirm_tags.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | expression: examples 4 | --- 5 | - - InfirmTag: 6 | name: 7 | - tag 8 | parameters: [] 9 | - - InfirmTag: 10 | name: 11 | - tag-name_with-complexchars 12 | parameters: [] 13 | - - InfirmTag: 14 | name: 15 | - tag-name_ 16 | parameters: 17 | - parameter 18 | - - InfirmTag: 19 | name: 20 | - tag-name_ 21 | parameters: 22 | - "one\\ large\\ parameter" 23 | - - InfirmTag: 24 | name: 25 | - tag-name_ 26 | parameters: 27 | - "one\\ large\\ parameter" 28 | - "&^@!" 29 | - third 30 | - parameter 31 | - - InfirmTag: 32 | name: 33 | - tag 34 | - name 35 | - image 36 | parameters: 37 | - "https://github.com/super-special/repo.git?text=hello&other_text=bye" 38 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__inline_verbatim.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | assertion_line: 588 4 | expression: examples 5 | --- 6 | - - Paragraph: 7 | - Token: 8 | Text: some 9 | - Token: Whitespace 10 | - Token: 11 | Text: text 12 | - Token: Whitespace 13 | - InlineVerbatim: 14 | - Text: inline 15 | - Whitespace 16 | - Text: verbatim 17 | - - Paragraph: 18 | - InlineVerbatim: 19 | - Text: verbatim 20 | - Whitespace 21 | - Text: at 22 | - Whitespace 23 | - Text: start 24 | - - Paragraph: 25 | - Link: 26 | filepath: ~ 27 | targets: 28 | - Path: some_link.txt 29 | description: 30 | - Token: 31 | Text: with 32 | - Token: Whitespace 33 | - InlineVerbatim: 34 | - Text: inline 35 | - Whitespace 36 | - Text: verbatim 37 | - Token: Whitespace 38 | - Token: 39 | Text: in 40 | - Token: Whitespace 41 | - Token: 42 | Text: anchor 43 | - - Paragraph: 44 | - InlineVerbatim: 45 | - Special: "*" 46 | - Text: markup 47 | - Special: "*" 48 | - Whitespace 49 | - Special: / 50 | - Text: inside 51 | - Special: / 52 | - Whitespace 53 | - Special: / 54 | - Special: "-" 55 | - Text: verbatim 56 | - Special: "-" 57 | - Special: / 58 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__links.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | expression: examples 4 | --- 5 | - - Paragraph: 6 | - Link: 7 | filepath: ~ 8 | targets: 9 | - Url: "https://github.com/nvim-neorg/neorg" 10 | description: ~ 11 | - - Paragraph: 12 | - Link: 13 | filepath: ~ 14 | targets: 15 | - Definition: 16 | - Token: 17 | Text: hello 18 | - Token: 19 | Special: "!" 20 | description: ~ 21 | - - Paragraph: 22 | - Link: 23 | filepath: ~ 24 | targets: 25 | - Path: a-path.txt 26 | description: ~ 27 | - - Paragraph: 28 | - Link: 29 | filepath: ~ 30 | targets: 31 | - Heading: 32 | level: 9 33 | title: 34 | - Token: 35 | Text: hello 36 | - Token: 37 | Special: "!" 38 | description: ~ 39 | - - Paragraph: 40 | - Link: 41 | filepath: /some/file 42 | targets: 43 | - Heading: 44 | level: 3 45 | title: 46 | - Token: 47 | Text: a 48 | - Token: Whitespace 49 | - AttachedModifier: 50 | modifier_type: "-" 51 | content: 52 | - Token: 53 | Text: path 54 | - Token: 55 | Special: "." 56 | - Token: 57 | Text: txt 58 | description: ~ 59 | - - Paragraph: 60 | - Anchor: 61 | content: 62 | - Token: 63 | Text: anchor 64 | description: ~ 65 | - - Paragraph: 66 | - Anchor: 67 | content: 68 | - Token: 69 | Text: anchor 70 | description: 71 | - Token: 72 | Text: description 73 | - - Paragraph: 74 | - Link: 75 | filepath: ~ 76 | targets: 77 | - Heading: 78 | level: 1 79 | title: 80 | - Token: 81 | Text: hello 82 | description: 83 | - Token: 84 | Text: description 85 | - - Paragraph: 86 | - AnchorDefinition: 87 | content: 88 | - Token: 89 | Text: description 90 | target: 91 | Link: 92 | filepath: ~ 93 | targets: 94 | - Heading: 95 | level: 1 96 | title: 97 | - Token: 98 | Text: hello 99 | description: ~ 100 | - - Paragraph: 101 | - Token: 102 | Text: This 103 | - Token: Whitespace 104 | - Token: 105 | Text: is 106 | - Token: Whitespace 107 | - Token: 108 | Text: a 109 | - Token: Whitespace 110 | - InlineLinkTarget: 111 | - Token: 112 | Text: link 113 | - Token: 114 | Special: "!" 115 | - - Paragraph: 116 | - InlineLinkTarget: 117 | - AttachedModifier: 118 | modifier_type: "*" 119 | content: 120 | - Token: 121 | Text: linkable 122 | - Token: Whitespace 123 | - Token: 124 | Text: with 125 | - Token: Whitespace 126 | - Token: 127 | Text: markup 128 | - Token: Whitespace 129 | - Token: 130 | Text: here 131 | - Token: 132 | Special: "!" 133 | - - Paragraph: 134 | - Link: 135 | filepath: another_file 136 | targets: [] 137 | description: ~ 138 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__lists.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | expression: examples 4 | --- 5 | - - NestableDetachedModifier: 6 | modifier_type: UnorderedList 7 | level: 1 8 | extensions: [] 9 | content: 10 | Paragraph: 11 | - Token: 12 | Text: Test 13 | - Token: Whitespace 14 | - Token: 15 | Text: list 16 | - - NestableDetachedModifier: 17 | modifier_type: UnorderedList 18 | level: 4 19 | extensions: [] 20 | content: 21 | Paragraph: 22 | - Token: 23 | Text: Test 24 | - Token: Whitespace 25 | - Token: 26 | Text: list 27 | - - NestableDetachedModifier: 28 | modifier_type: UnorderedList 29 | level: 1 30 | extensions: [] 31 | content: 32 | Paragraph: 33 | - Token: 34 | Text: Test 35 | - Token: Whitespace 36 | - Token: 37 | Text: list 38 | - NestableDetachedModifier: 39 | modifier_type: UnorderedList 40 | level: 1 41 | extensions: [] 42 | content: 43 | Paragraph: 44 | - Token: 45 | Text: Test 46 | - Token: Whitespace 47 | - Token: 48 | Text: list 49 | - NestableDetachedModifier: 50 | modifier_type: UnorderedList 51 | level: 2 52 | extensions: [] 53 | content: 54 | Paragraph: 55 | - Token: 56 | Text: Test 57 | - Token: Whitespace 58 | - Token: 59 | Text: list 60 | - NestableDetachedModifier: 61 | modifier_type: UnorderedList 62 | level: 2 63 | extensions: [] 64 | content: 65 | Paragraph: 66 | - Token: 67 | Text: Test 68 | - Token: Whitespace 69 | - Token: 70 | Text: list 71 | - NestableDetachedModifier: 72 | modifier_type: UnorderedList 73 | level: 1 74 | extensions: [] 75 | content: 76 | Paragraph: 77 | - Token: 78 | Text: Test 79 | - Token: Whitespace 80 | - Token: 81 | Text: list 82 | - NestableDetachedModifier: 83 | modifier_type: UnorderedList 84 | level: 3 85 | extensions: [] 86 | content: 87 | Paragraph: 88 | - Token: 89 | Text: Test 90 | - Token: Whitespace 91 | - Token: 92 | Text: list 93 | - - Paragraph: 94 | - Token: 95 | Special: "-" 96 | - Token: 97 | Special: "-" 98 | - Token: 99 | Special: "-" 100 | - Token: 101 | Text: not 102 | - Token: Whitespace 103 | - Token: 104 | Text: list 105 | - - Paragraph: 106 | - Token: 107 | Special: "-" 108 | - Token: 109 | Special: "-" 110 | - Token: 111 | Special: ">" 112 | - Token: Whitespace 113 | - Token: 114 | Text: not 115 | - Token: Whitespace 116 | - Token: 117 | Text: a 118 | - Token: Whitespace 119 | - Token: 120 | Text: list 121 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__lists_tree.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | expression: examples 4 | --- 5 | - - NestableDetachedModifier: 6 | modifier_type: UnorderedList 7 | level: 1 8 | extensions: [] 9 | text: 10 | Paragraph: 11 | - Token: 12 | Text: base 13 | content: [] 14 | - - NestableDetachedModifier: 15 | modifier_type: UnorderedList 16 | level: 1 17 | extensions: [] 18 | text: 19 | Paragraph: 20 | - Token: 21 | Text: one 22 | content: 23 | - NestableDetachedModifier: 24 | modifier_type: UnorderedList 25 | level: 2 26 | extensions: [] 27 | text: 28 | Paragraph: 29 | - Token: 30 | Text: two 31 | content: [] 32 | - - NestableDetachedModifier: 33 | modifier_type: UnorderedList 34 | level: 1 35 | extensions: [] 36 | text: 37 | Paragraph: 38 | - Token: 39 | Text: one 40 | content: 41 | - NestableDetachedModifier: 42 | modifier_type: UnorderedList 43 | level: 2 44 | extensions: [] 45 | text: 46 | Paragraph: 47 | - Token: 48 | Text: two 49 | - Token: Whitespace 50 | - Token: 51 | Text: with 52 | - Token: Whitespace 53 | - Token: 54 | Text: content 55 | content: [] 56 | - NestableDetachedModifier: 57 | modifier_type: UnorderedList 58 | level: 2 59 | extensions: [] 60 | text: 61 | Paragraph: 62 | - Token: 63 | Text: two 64 | - Token: Whitespace 65 | - Token: 66 | Special: ( 67 | - Token: 68 | Text: "2" 69 | - Token: 70 | Special: ) 71 | content: 72 | - NestableDetachedModifier: 73 | modifier_type: UnorderedList 74 | level: 3 75 | extensions: [] 76 | text: 77 | Paragraph: 78 | - Token: 79 | Text: three 80 | content: [] 81 | - NestableDetachedModifier: 82 | modifier_type: UnorderedList 83 | level: 1 84 | extensions: [] 85 | text: 86 | Paragraph: 87 | - Token: 88 | Text: one 89 | content: [] 90 | - - NestableDetachedModifier: 91 | modifier_type: UnorderedList 92 | level: 2 93 | extensions: [] 94 | text: 95 | Paragraph: 96 | - Token: 97 | Text: two 98 | content: [] 99 | - NestableDetachedModifier: 100 | modifier_type: UnorderedList 101 | level: 1 102 | extensions: [] 103 | text: 104 | Paragraph: 105 | - Token: 106 | Text: one 107 | content: [] 108 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__modifier_extensions.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | expression: examples 4 | --- 5 | - - NestableDetachedModifier: 6 | modifier_type: UnorderedList 7 | level: 1 8 | extensions: 9 | - Todo: Undone 10 | content: 11 | Paragraph: 12 | - Token: 13 | Text: undone 14 | - - Heading: 15 | level: 1 16 | title: 17 | - Token: Whitespace 18 | - Token: 19 | Text: done 20 | extensions: 21 | - Todo: Done 22 | - - NestableDetachedModifier: 23 | modifier_type: UnorderedList 24 | level: 1 25 | extensions: 26 | - Todo: Paused 27 | content: 28 | Paragraph: 29 | - Token: 30 | Text: hold 31 | - - Heading: 32 | level: 1 33 | title: 34 | - Token: Whitespace 35 | - Token: 36 | Text: canceled 37 | extensions: 38 | - Todo: Canceled 39 | - - NestableDetachedModifier: 40 | modifier_type: UnorderedList 41 | level: 1 42 | extensions: 43 | - Todo: Pending 44 | content: 45 | Paragraph: 46 | - Token: 47 | Text: pending 48 | - - Heading: 49 | level: 1 50 | title: 51 | - Token: Whitespace 52 | - Token: 53 | Text: urgent 54 | extensions: 55 | - Todo: Urgent 56 | - - NestableDetachedModifier: 57 | modifier_type: UnorderedList 58 | level: 1 59 | extensions: 60 | - Todo: 61 | Recurring: ~ 62 | content: 63 | Paragraph: 64 | - Token: 65 | Text: recurring 66 | - - NestableDetachedModifier: 67 | modifier_type: OrderedList 68 | level: 1 69 | extensions: 70 | - Todo: 71 | Recurring: Friday 72 | content: 73 | Paragraph: 74 | - Token: 75 | Text: recurring 76 | - Token: Whitespace 77 | - Token: 78 | Text: with 79 | - Token: Whitespace 80 | - Token: 81 | Text: date 82 | - - Heading: 83 | level: 2 84 | title: 85 | - Token: Whitespace 86 | - Token: 87 | Text: undone 88 | - Token: 89 | Special: "," 90 | - Token: Whitespace 91 | - Token: 92 | Text: low 93 | - Token: 94 | Special: "," 95 | - Token: Whitespace 96 | - Token: 97 | Text: "&" 98 | - Token: Whitespace 99 | - Token: 100 | Text: before 101 | - Token: Whitespace 102 | - Token: 103 | Text: Feb 104 | extensions: 105 | - Todo: Undone 106 | - Priority: Low 107 | - DueDate: Feb 1 108 | - - Heading: 109 | level: 2 110 | title: 111 | - Token: Whitespace 112 | - Token: 113 | Text: All 114 | - Token: Whitespace 115 | - Token: 116 | Text: of 117 | - Token: Whitespace 118 | - Token: 119 | Text: them 120 | extensions: 121 | - Priority: Two Words 122 | - Todo: Done 123 | - Todo: Undone 124 | - Todo: Urgent 125 | - Todo: 126 | Recurring: ~ 127 | - Todo: Canceled 128 | - Todo: 129 | Recurring: 5th 130 | - Todo: Paused 131 | - Todo: Pending 132 | - DueDate: Feb 1 133 | - StartDate: "2025" 134 | - Timestamp: Jan 1 2025 135 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__modifiers.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | expression: examples 4 | --- 5 | - - Paragraph: 6 | - Token: 7 | Text: this 8 | - Token: Whitespace 9 | - AttachedModifier: 10 | modifier_type: "*" 11 | content: 12 | - Token: 13 | Text: is 14 | - Token: Whitespace 15 | - Token: 16 | Text: a 17 | - Token: Whitespace 18 | - Token: 19 | Text: test 20 | - - Paragraph: 21 | - Token: 22 | Text: hello 23 | - Token: 24 | Special: "," 25 | - Token: Whitespace 26 | - AttachedModifier: 27 | modifier_type: "*" 28 | content: 29 | - Token: 30 | Text: world 31 | - Token: 32 | Special: "!" 33 | - - Paragraph: 34 | - AttachedModifier: 35 | modifier_type: "*" 36 | content: 37 | - Token: 38 | Text: hello 39 | - Token: 40 | Special: "," 41 | - Token: Whitespace 42 | - Token: 43 | Text: world 44 | - Token: 45 | Special: "!" 46 | - - Paragraph: 47 | - AttachedModifier: 48 | modifier_type: "*" 49 | content: 50 | - Token: 51 | Text: hello 52 | - Token: 53 | Special: "," 54 | - Token: Whitespace 55 | - Token: 56 | Text: world 57 | - Token: 58 | Special: "!" 59 | - - Paragraph: 60 | - AttachedModifier: 61 | modifier_type: "*" 62 | content: 63 | - AttachedModifier: 64 | modifier_type: / 65 | content: 66 | - Token: 67 | Text: hello 68 | - Token: 69 | Special: "," 70 | - Token: Whitespace 71 | - Token: 72 | Text: world 73 | - Token: 74 | Special: "!" 75 | - - Paragraph: 76 | - AttachedModifier: 77 | modifier_type: "*" 78 | content: 79 | - Token: 80 | Text: hi 81 | - Token: 82 | Special: "!" 83 | - Token: Whitespace 84 | - Token: 85 | Text: how 86 | - Token: Whitespace 87 | - Token: 88 | Text: are 89 | - Token: Whitespace 90 | - Token: 91 | Text: you? 92 | - - Paragraph: 93 | - Token: 94 | Text: this 95 | - Token: Whitespace 96 | - Token: 97 | Special: "*" 98 | - Token: 99 | Text: is 100 | - Token: Whitespace 101 | - Token: 102 | Text: a 103 | - Token: Whitespace 104 | - Token: 105 | Text: test 106 | - - Paragraph: 107 | - Token: 108 | Text: this 109 | - Token: Whitespace 110 | - Token: 111 | Special: "*" 112 | - Token: 113 | Text: is 114 | - Token: 115 | Special: / 116 | - Token: Whitespace 117 | - Token: 118 | Text: a 119 | - Token: Whitespace 120 | - Token: 121 | Text: test 122 | - - Paragraph: 123 | - Token: 124 | Text: this 125 | - Token: Whitespace 126 | - AttachedModifier: 127 | modifier_type: "*" 128 | content: 129 | - Token: 130 | Text: is 131 | - Token: 132 | Special: / 133 | - Token: Whitespace 134 | - Token: 135 | Text: a 136 | - Token: Whitespace 137 | - Token: 138 | Text: test 139 | - - Paragraph: 140 | - Token: 141 | Text: this 142 | - Token: Whitespace 143 | - AttachedModifier: 144 | modifier_type: "*" 145 | content: 146 | - AttachedModifier: 147 | modifier_type: / 148 | content: 149 | - Token: 150 | Text: is 151 | - Token: 152 | Special: / 153 | - Token: Whitespace 154 | - Token: 155 | Text: a 156 | - Token: Whitespace 157 | - Token: 158 | Text: test 159 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__ordered_lists.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | expression: examples 4 | --- 5 | - - NestableDetachedModifier: 6 | modifier_type: OrderedList 7 | level: 1 8 | extensions: [] 9 | content: 10 | Paragraph: 11 | - Token: 12 | Text: Test 13 | - Token: Whitespace 14 | - Token: 15 | Text: list 16 | - - NestableDetachedModifier: 17 | modifier_type: OrderedList 18 | level: 4 19 | extensions: [] 20 | content: 21 | Paragraph: 22 | - Token: 23 | Text: Test 24 | - Token: Whitespace 25 | - Token: 26 | Text: list 27 | - - NestableDetachedModifier: 28 | modifier_type: OrderedList 29 | level: 1 30 | extensions: [] 31 | content: 32 | Paragraph: 33 | - Token: 34 | Text: Test 35 | - Token: Whitespace 36 | - Token: 37 | Text: list 38 | - NestableDetachedModifier: 39 | modifier_type: OrderedList 40 | level: 1 41 | extensions: [] 42 | content: 43 | Paragraph: 44 | - Token: 45 | Text: Test 46 | - Token: Whitespace 47 | - Token: 48 | Text: list 49 | - NestableDetachedModifier: 50 | modifier_type: OrderedList 51 | level: 2 52 | extensions: [] 53 | content: 54 | Paragraph: 55 | - Token: 56 | Text: Test 57 | - Token: Whitespace 58 | - Token: 59 | Text: list 60 | - NestableDetachedModifier: 61 | modifier_type: OrderedList 62 | level: 2 63 | extensions: [] 64 | content: 65 | Paragraph: 66 | - Token: 67 | Text: Test 68 | - Token: Whitespace 69 | - Token: 70 | Text: list 71 | - NestableDetachedModifier: 72 | modifier_type: OrderedList 73 | level: 1 74 | extensions: [] 75 | content: 76 | Paragraph: 77 | - Token: 78 | Text: Test 79 | - Token: Whitespace 80 | - Token: 81 | Text: list 82 | - NestableDetachedModifier: 83 | modifier_type: OrderedList 84 | level: 3 85 | extensions: [] 86 | content: 87 | Paragraph: 88 | - Token: 89 | Text: Test 90 | - Token: Whitespace 91 | - Token: 92 | Text: list 93 | - - Paragraph: 94 | - Token: 95 | Special: "~" 96 | - Token: 97 | Special: "~" 98 | - Token: 99 | Special: "~" 100 | - Token: 101 | Text: not 102 | - Token: Whitespace 103 | - Token: 104 | Text: list 105 | - - Paragraph: 106 | - Token: 107 | Special: "~" 108 | - Token: 109 | Special: "~" 110 | - Token: 111 | Special: ">" 112 | - Token: Whitespace 113 | - Token: 114 | Text: not 115 | - Token: Whitespace 116 | - Token: 117 | Text: a 118 | - Token: Whitespace 119 | - Token: 120 | Text: list 121 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__paragraphs.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | expression: examples 4 | --- 5 | - - Paragraph: 6 | - Token: 7 | Text: hello 8 | - Token: 9 | Special: "," 10 | - Token: Whitespace 11 | - Token: 12 | Text: world 13 | - Token: 14 | Special: "!" 15 | - - Paragraph: 16 | - AttachedModifier: 17 | modifier_type: "*" 18 | content: 19 | - Token: 20 | Text: hello 21 | - Token: 22 | Special: "," 23 | - Token: Whitespace 24 | - Token: 25 | Text: world 26 | - Token: 27 | Special: "!" 28 | - - Paragraph: 29 | - AttachedModifier: 30 | modifier_type: "*" 31 | content: 32 | - Token: 33 | Text: hello 34 | - Token: 35 | Special: "," 36 | - Token: Whitespace 37 | - Token: 38 | Text: world 39 | - Token: 40 | Special: "!" 41 | - - Paragraph: 42 | - Token: 43 | Text: two 44 | - Paragraph: 45 | - Token: 46 | Text: paragraphs 47 | - - Paragraph: 48 | - Token: 49 | Text: paragraph 50 | - Token: Whitespace 51 | - Token: 52 | Text: here 53 | - Paragraph: 54 | - Token: 55 | Text: another 56 | - Token: Whitespace 57 | - Token: 58 | Text: paragraph 59 | - Token: Whitespace 60 | - Token: 61 | Text: here 62 | - Token: 63 | Special: "." 64 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__quotes.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | expression: examples 4 | --- 5 | - - NestableDetachedModifier: 6 | modifier_type: Quote 7 | level: 1 8 | extensions: [] 9 | content: 10 | Paragraph: 11 | - Token: 12 | Text: Test 13 | - Token: Whitespace 14 | - Token: 15 | Text: quote 16 | - - NestableDetachedModifier: 17 | modifier_type: Quote 18 | level: 4 19 | extensions: [] 20 | content: 21 | Paragraph: 22 | - Token: 23 | Text: Test 24 | - Token: Whitespace 25 | - Token: 26 | Text: quote 27 | - - NestableDetachedModifier: 28 | modifier_type: Quote 29 | level: 1 30 | extensions: [] 31 | content: 32 | Paragraph: 33 | - Token: 34 | Text: Test 35 | - Token: Whitespace 36 | - Token: 37 | Text: quote 38 | - NestableDetachedModifier: 39 | modifier_type: Quote 40 | level: 1 41 | extensions: [] 42 | content: 43 | Paragraph: 44 | - Token: 45 | Text: Test 46 | - Token: Whitespace 47 | - Token: 48 | Text: quote 49 | - NestableDetachedModifier: 50 | modifier_type: Quote 51 | level: 2 52 | extensions: [] 53 | content: 54 | Paragraph: 55 | - Token: 56 | Text: Test 57 | - Token: Whitespace 58 | - Token: 59 | Text: quote 60 | - NestableDetachedModifier: 61 | modifier_type: Quote 62 | level: 2 63 | extensions: [] 64 | content: 65 | Paragraph: 66 | - Token: 67 | Text: Test 68 | - Token: Whitespace 69 | - Token: 70 | Text: quote 71 | - NestableDetachedModifier: 72 | modifier_type: Quote 73 | level: 1 74 | extensions: [] 75 | content: 76 | Paragraph: 77 | - Token: 78 | Text: Test 79 | - Token: Whitespace 80 | - Token: 81 | Text: quote 82 | - NestableDetachedModifier: 83 | modifier_type: Quote 84 | level: 3 85 | extensions: [] 86 | content: 87 | Paragraph: 88 | - Token: 89 | Text: Test 90 | - Token: Whitespace 91 | - Token: 92 | Text: quote 93 | - - Paragraph: 94 | - Token: 95 | Special: ">" 96 | - Token: 97 | Special: ">" 98 | - Token: 99 | Special: ">" 100 | - Token: 101 | Text: not 102 | - Token: Whitespace 103 | - Token: 104 | Text: quote 105 | - - Paragraph: 106 | - Token: 107 | Special: ">" 108 | - Token: 109 | Special: ">" 110 | - Token: 111 | Special: "-" 112 | - Token: Whitespace 113 | - Token: 114 | Text: not 115 | - Token: Whitespace 116 | - Token: 117 | Text: a 118 | - Token: Whitespace 119 | - Token: 120 | Text: quote 121 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__ranged_tags.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | expression: examples 4 | --- 5 | - - RangedTag: 6 | name: 7 | - example 8 | parameters: [] 9 | content: 10 | - Paragraph: 11 | - Token: 12 | Text: Hello 13 | - Token: Whitespace 14 | - Token: 15 | Text: world 16 | - Token: 17 | Special: "!" 18 | - - RangedTag: 19 | name: 20 | - example 21 | - some-text 22 | - here 23 | parameters: 24 | - "one\\ parameter" 25 | - second-parameter 26 | content: 27 | - CarryoverTag: 28 | tag_type: Macro 29 | name: 30 | - carryover 31 | parameters: [] 32 | next_object: 33 | Paragraph: 34 | - Token: 35 | Text: text 36 | - Token: Whitespace 37 | - Token: 38 | Text: within 39 | - - RangedTag: 40 | name: 41 | - some-complex_tag_ 42 | parameters: 43 | - first-parameter 44 | - "#&*(&$!)" 45 | - third-parameter 46 | content: 47 | - Paragraph: 48 | - Token: 49 | Text: this 50 | - Token: Whitespace 51 | - Token: 52 | Text: is 53 | - Token: Whitespace 54 | - Token: 55 | Text: some 56 | - Token: Whitespace 57 | - Token: 58 | Text: text 59 | - Token: Whitespace 60 | - Token: 61 | Text: within 62 | - - RangedTag: 63 | name: 64 | - example 65 | parameters: [] 66 | content: 67 | - Heading: 68 | level: 1 69 | title: 70 | - Token: 71 | Text: Hello 72 | - Token: Whitespace 73 | - Token: 74 | Text: world 75 | - Token: 76 | Special: "!" 77 | extensions: [] 78 | - - RangedTag: 79 | name: 80 | - example 81 | parameters: [] 82 | content: 83 | - RangedTag: 84 | name: 85 | - example 86 | parameters: [] 87 | content: 88 | - Heading: 89 | level: 1 90 | title: 91 | - Token: 92 | Text: Hello 93 | - Token: Whitespace 94 | - Token: 95 | Text: world 96 | - Token: 97 | Special: "!" 98 | extensions: [] 99 | - - RangedTag: 100 | name: 101 | - example 102 | parameters: [] 103 | content: 104 | - Paragraph: 105 | - Token: 106 | Text: Hello 107 | - Token: Whitespace 108 | - Token: 109 | Text: world 110 | - Token: 111 | Special: "!" 112 | - - RangedTag: 113 | name: 114 | - example 115 | - some-text 116 | - here 117 | parameters: 118 | - "one\\ parameter" 119 | - second-parameter 120 | content: 121 | - CarryoverTag: 122 | tag_type: Macro 123 | name: 124 | - carryover 125 | parameters: [] 126 | next_object: 127 | Paragraph: 128 | - Token: 129 | Text: text 130 | - Token: Whitespace 131 | - Token: 132 | Text: within 133 | - - RangedTag: 134 | name: 135 | - some-complex_tag_ 136 | parameters: 137 | - first-parameter 138 | - "#&*(&$!)" 139 | - third-parameter 140 | content: 141 | - Paragraph: 142 | - Token: 143 | Text: this 144 | - Token: Whitespace 145 | - Token: 146 | Text: is 147 | - Token: Whitespace 148 | - Token: 149 | Text: some 150 | - Token: Whitespace 151 | - Token: 152 | Text: text 153 | - Token: Whitespace 154 | - Token: 155 | Text: within 156 | - - RangedTag: 157 | name: 158 | - example 159 | parameters: [] 160 | content: 161 | - Heading: 162 | level: 1 163 | title: 164 | - Token: 165 | Text: Hello 166 | - Token: Whitespace 167 | - Token: 168 | Text: world 169 | - Token: 170 | Special: "!" 171 | extensions: [] 172 | - - RangedTag: 173 | name: 174 | - example 175 | parameters: [] 176 | content: 177 | - RangedTag: 178 | name: 179 | - example 180 | parameters: [] 181 | content: 182 | - Heading: 183 | level: 1 184 | title: 185 | - Token: 186 | Text: Hello 187 | - Token: Whitespace 188 | - Token: 189 | Text: world 190 | - Token: 191 | Special: "!" 192 | extensions: [] 193 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__ranged_verbatim_tags.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | expression: examples 4 | --- 5 | - - VerbatimRangedTag: 6 | name: 7 | - code 8 | parameters: [] 9 | content: "print(\"Hello world!\")\n" 10 | - - VerbatimRangedTag: 11 | name: 12 | - code 13 | - some-text 14 | - here 15 | parameters: 16 | - "lua\\ language" 17 | - second-parameter 18 | content: "print(\"Hello world!\")\n" 19 | - - VerbatimRangedTag: 20 | name: 21 | - some-complex_tag_ 22 | parameters: 23 | - first-parameter 24 | - "#&*(&$!)" 25 | - third-parameter 26 | content: "function hello()\n print(\"Hello World\")\nend\n\nhello()\n" 27 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__tables.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/lib.rs 3 | expression: examples 4 | --- 5 | - - RangeableDetachedModifier: 6 | modifier_type: Table 7 | title: 8 | - Token: 9 | Text: A1 10 | extensions: [] 11 | content: 12 | - Paragraph: 13 | - Token: 14 | Text: Cell 15 | - Token: Whitespace 16 | - Token: 17 | Text: content 18 | - - RangeableDetachedModifier: 19 | modifier_type: Table 20 | title: 21 | - Token: 22 | Text: A1 23 | extensions: [] 24 | content: 25 | - Paragraph: 26 | - Token: 27 | Text: Long 28 | - Token: Whitespace 29 | - Token: 30 | Text: cell 31 | - Token: Whitespace 32 | - Token: 33 | Text: content 34 | - Token: 35 | Special: "." 36 | -------------------------------------------------------------------------------- /src/snapshots/rust_norg__tests__verbatim_tags.snap: -------------------------------------------------------------------------------- 1 | --- 2 | source: src/main.rs 3 | expression: examples 4 | --- 5 | - - RangedTag: 6 | name: 7 | - example 8 | parameters: [] 9 | content: 10 | - Paragraph: 11 | - Text: Hello 12 | - Whitespace 13 | - Text: world 14 | - Special: "!" 15 | - - RangedTag: 16 | name: 17 | - example 18 | - some-text 19 | - here 20 | parameters: 21 | - one parameter 22 | - second-parameter 23 | content: 24 | - CarryoverTag: 25 | tag_type: Macro 26 | name: 27 | - carryover 28 | parameters: [] 29 | next_object: 30 | Paragraph: 31 | - Text: text 32 | - Whitespace 33 | - Text: within 34 | - - RangedTag: 35 | name: 36 | - some-complex_tag_ 37 | parameters: 38 | - first-parameter 39 | - "#&*(&$!)" 40 | - third-parameter 41 | content: 42 | - Paragraph: 43 | - Text: this 44 | - Whitespace 45 | - Text: is 46 | - Whitespace 47 | - Text: some 48 | - Whitespace 49 | - Text: text 50 | - Whitespace 51 | - Text: within 52 | - - RangedTag: 53 | name: 54 | - example 55 | parameters: [] 56 | content: 57 | - Heading: 58 | level: 1 59 | title: 60 | - Text: Hello 61 | - Whitespace 62 | - Text: world 63 | - Special: "!" 64 | extensions: [] 65 | - - RangedTag: 66 | name: 67 | - example 68 | parameters: [] 69 | content: 70 | - RangedTag: 71 | name: 72 | - example 73 | parameters: [] 74 | content: 75 | - Heading: 76 | level: 1 77 | title: 78 | - Text: Hello 79 | - Whitespace 80 | - Text: world 81 | - Special: "!" 82 | extensions: [] 83 | - - RangedTag: 84 | name: 85 | - example 86 | parameters: [] 87 | content: 88 | - Paragraph: 89 | - Text: Hello 90 | - Whitespace 91 | - Text: world 92 | - Special: "!" 93 | - - RangedTag: 94 | name: 95 | - example 96 | - some-text 97 | - here 98 | parameters: 99 | - one parameter 100 | - second-parameter 101 | content: 102 | - CarryoverTag: 103 | tag_type: Macro 104 | name: 105 | - carryover 106 | parameters: [] 107 | next_object: 108 | Paragraph: 109 | - Text: text 110 | - Whitespace 111 | - Text: within 112 | - - RangedTag: 113 | name: 114 | - some-complex_tag_ 115 | parameters: 116 | - first-parameter 117 | - "#&*(&$!)" 118 | - third-parameter 119 | content: 120 | - Paragraph: 121 | - Text: this 122 | - Whitespace 123 | - Text: is 124 | - Whitespace 125 | - Text: some 126 | - Whitespace 127 | - Text: text 128 | - Whitespace 129 | - Text: within 130 | - - RangedTag: 131 | name: 132 | - example 133 | parameters: [] 134 | content: 135 | - Heading: 136 | level: 1 137 | title: 138 | - Text: Hello 139 | - Whitespace 140 | - Text: world 141 | - Special: "!" 142 | extensions: [] 143 | - - RangedTag: 144 | name: 145 | - example 146 | parameters: [] 147 | content: 148 | - RangedTag: 149 | name: 150 | - example 151 | parameters: [] 152 | content: 153 | - Heading: 154 | level: 1 155 | title: 156 | - Text: Hello 157 | - Whitespace 158 | - Text: world 159 | - Special: "!" 160 | extensions: [] 161 | -------------------------------------------------------------------------------- /src/stage_1.rs: -------------------------------------------------------------------------------- 1 | //! This file contains the initial lexing stage, which breaks up characters into distinct tokens. 2 | 3 | use std::fmt::Write as _; 4 | 5 | use chumsky::prelude::*; 6 | use chumsky::{ 7 | text::{keyword, Character}, 8 | Parser, 9 | }; 10 | use serde::Serialize; 11 | use unicode_categories::UnicodeCategories; 12 | 13 | /// Describes an individual part of the document. 14 | #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)] 15 | pub enum NorgToken { 16 | Whitespace(u16), 17 | SingleNewline, 18 | Newlines(u16), 19 | Regular(char), 20 | Special(char), 21 | Escape(char), 22 | End(char), 23 | Eof, 24 | } 25 | 26 | impl std::fmt::Display for NorgToken { 27 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 28 | match self { 29 | Self::End(c) => write!(f, "{}end", c), 30 | Self::Eof => f.write_char('\0'), 31 | Self::Escape(c) => write!(f, "\\{}", c), 32 | Self::Newlines(count) => f.write_str(&"\n".repeat(*count as usize)), 33 | Self::Regular(c) | Self::Special(c) => f.write_char(*c), 34 | Self::SingleNewline => f.write_char('\n'), 35 | Self::Whitespace(count) => f.write_str(&" ".repeat(*count as usize)), 36 | } 37 | } 38 | } 39 | 40 | impl From for String { 41 | fn from(value: NorgToken) -> Self { 42 | value.to_string() 43 | } 44 | } 45 | 46 | /// A list of characters which are considered "special", i.e. for parsing of attached modifiers. 47 | const SPECIAL_CHARS: &str = "*-~/_!%^,\"'`$:@|=.#+<>()[]{}\\"; 48 | 49 | /// Parses a `.norg` document and breaks it up into tokens. 50 | pub fn stage_1() -> impl Parser, Error = chumsky::error::Simple> { 51 | let ws = filter(|c: &char| c.is_inline_whitespace() || c.is_separator_space()) 52 | .repeated() 53 | .at_least(1) 54 | .map(|content| NorgToken::Whitespace(content.len() as u16)); 55 | 56 | // Fallback parser for any non-special character. 57 | let character = any().map(NorgToken::Regular); 58 | 59 | let parse_newline = filter(|c: &char| { 60 | *c == '\n' || *c == '\r' || c.is_separator_line() || c.is_separator_paragraph() 61 | }); 62 | 63 | let newline = parse_newline 64 | .to(NorgToken::SingleNewline); 65 | 66 | let newlines = parse_newline 67 | .repeated() 68 | .at_least(2) 69 | .map(|content| NorgToken::Newlines(content.len() as u16)); 70 | 71 | let special = one_of(SPECIAL_CHARS).map(NorgToken::Special); 72 | 73 | let escape = just('\\').ignore_then(any()).map(NorgToken::Escape); 74 | 75 | let tag_end = one_of(SPECIAL_CHARS) 76 | .then_ignore(keyword("end")) 77 | .then_ignore(choice((one_of("\n\r").rewind().map(|_| ()), end()))) 78 | .map(NorgToken::End); 79 | 80 | choice((tag_end, escape, special, newlines, newline, ws, character)) 81 | .repeated() 82 | .chain(end().to(NorgToken::Eof)) 83 | } 84 | -------------------------------------------------------------------------------- /src/stage_2.rs: -------------------------------------------------------------------------------- 1 | //! Converts a set of Norg tokens into a set of blocks. 2 | 3 | use std::fmt::Write as _; 4 | 5 | use chumsky::Parser; 6 | use itertools::Itertools; 7 | use serde::Serialize; 8 | 9 | use crate::stage_1::NorgToken; 10 | use chumsky::prelude::*; 11 | 12 | #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)] 13 | pub enum ParagraphSegmentToken { 14 | Text(String), 15 | Whitespace, 16 | Special(char), 17 | Escape(char), 18 | } 19 | 20 | impl std::fmt::Display for ParagraphSegmentToken { 21 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 22 | match self { 23 | Self::Escape(c) => write!(f, "\\{}", c), 24 | Self::Text(str) => f.write_str(str), 25 | Self::Special(c) => f.write_char(*c), 26 | Self::Whitespace => f.write_char(' '), 27 | } 28 | } 29 | } 30 | 31 | impl From for String { 32 | fn from(value: ParagraphSegmentToken) -> Self { 33 | value.to_string() 34 | } 35 | } 36 | 37 | pub type ParagraphTokenList = Vec; 38 | 39 | fn tokens_to_paragraph_segment(tokens: Vec) -> ParagraphTokenList { 40 | tokens 41 | .into_iter() 42 | .peekable() 43 | .batching(|it| match it.next() { 44 | Some(NorgToken::SingleNewline) | Some(NorgToken::Whitespace(_)) => { 45 | Some(ParagraphSegmentToken::Whitespace) 46 | } 47 | Some(NorgToken::Special(c)) => Some(ParagraphSegmentToken::Special(c)), 48 | Some(NorgToken::Escape(c)) => Some(ParagraphSegmentToken::Escape(c)), 49 | Some(NorgToken::Regular(c)) => { 50 | let mut result: String = it 51 | .peeking_take_while(|token| matches!(token, NorgToken::Regular(_))) 52 | .map_into::() 53 | .collect(); 54 | 55 | result.insert(0, c); 56 | 57 | Some(ParagraphSegmentToken::Text(result)) 58 | } 59 | None => None, 60 | _x => { 61 | unreachable!(); 62 | } 63 | }) 64 | .collect() 65 | } 66 | 67 | /// Represents various Norg blocks parsed from tokens. 68 | #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)] 69 | pub enum NorgBlock { 70 | /// A segment of a paragraph consisting of Norg tokens. 71 | ParagraphSegment(ParagraphTokenList), 72 | /// End of a paragraph segment. 73 | ParagraphSegmentEnd(ParagraphTokenList), 74 | /// A heading with a specified level, title, and optional extension section. 75 | Heading { 76 | level: u16, 77 | title: ParagraphTokenList, 78 | extension_section: ParagraphTokenList, 79 | }, 80 | /// A nestable detached modifier with a type, level, and optional extension section. 81 | NestableDetachedModifier { 82 | modifier_type: char, 83 | level: u16, 84 | extension_section: ParagraphTokenList, 85 | }, 86 | /// A rangeable detached modifier with an indication if it is ranged, type, title, and optional extension section. 87 | RangeableDetachedModifier { 88 | ranged: bool, 89 | modifier_type: char, 90 | title: ParagraphTokenList, 91 | extension_section: ParagraphTokenList, 92 | }, 93 | /// Closing tag for a rangeable detached modifier. 94 | RangeableDetachedModifierClose(char), 95 | /// A ranged tag with a type, name, and optional parameters. 96 | RangedTag { 97 | tag_type: char, 98 | name: ParagraphTokenList, 99 | parameters: Option>, 100 | }, 101 | /// End of a ranged tag. 102 | RangedTagEnd(char), 103 | /// A verbatim ranged tag with a name, optional parameters, and content. 104 | VerbatimRangedTag { 105 | name: ParagraphTokenList, 106 | parameters: Option>, 107 | content: Vec, 108 | }, 109 | /// An infirm tag with a name and optional parameters. 110 | InfirmTag { 111 | name: ParagraphTokenList, 112 | parameters: Option>, 113 | }, 114 | /// A carryover tag with a type, name, and optional parameters. 115 | CarryoverTag { 116 | tag_type: char, 117 | name: ParagraphTokenList, 118 | parameters: Option>, 119 | }, 120 | /// A delimiting modifier, defined by a single char `-` (weak), `=` (string), or `_` (horizontal rule) 121 | DelimitingModifier(char), 122 | } 123 | 124 | /// Defines the parser for stage 2 of the Norg parsing process, which converts tokens into blocks. 125 | /// 126 | /// # Returns 127 | /// 128 | /// * A parser that processes `NorgToken`s into a vector of `NorgBlock`s, which properly define 129 | /// paragraph boundaries. 130 | pub fn stage_2() -> impl Parser, Error = chumsky::error::Simple> 131 | { 132 | use NorgToken::*; 133 | 134 | let whitespace = select! { Whitespace(_) => () }; 135 | 136 | let newlines_or_eof = select! { 137 | s @ SingleNewline => s, 138 | n @ Newlines(..) => n, 139 | e @ Eof => e, 140 | }; 141 | 142 | let newlines_whitespace = select! { 143 | Newlines(_) => (), 144 | SingleNewline => (), 145 | Whitespace(_) => (), 146 | }; 147 | 148 | let newlines_whitespace_or_eof = select! { 149 | Newlines(_) => (), 150 | SingleNewline => (), 151 | Whitespace(_) => (), 152 | Eof => (), 153 | }; 154 | 155 | let paragraph_segment = newlines_or_eof.not().repeated().at_least(1); 156 | 157 | let extension_section = select! { 158 | SingleNewline => (), 159 | Newlines(_) => (), 160 | Eof => (), 161 | Special(')') => (), 162 | } 163 | .not() 164 | .repeated() 165 | .at_least(1) 166 | .delimited_by(just(Special('(')), just(Special(')'))); 167 | 168 | let parameters = newlines_whitespace_or_eof 169 | .not() 170 | .repeated() 171 | .at_least(1) 172 | .separated_by(whitespace.repeated().at_least(1)); 173 | 174 | let heading = select! { 175 | Special('*') => (), 176 | } 177 | .ignored() 178 | .repeated() 179 | .at_least(1) 180 | .map(|chars| chars.len() as u16) 181 | .then_ignore(whitespace.repeated().at_least(1)) 182 | .then(extension_section.clone().or_not()) 183 | .then(paragraph_segment) 184 | .then_ignore(newlines_or_eof) 185 | .map(|((level, extension_section), title)| NorgBlock::Heading { 186 | level, 187 | title: tokens_to_paragraph_segment(title), 188 | extension_section: extension_section 189 | .map(tokens_to_paragraph_segment) 190 | .unwrap_or_default(), 191 | }) 192 | .labelled("heading"); 193 | 194 | let nestable_detached_modifier = select! { 195 | Special(c) if c == '-' || c == '~' || c == '>' => c, 196 | } 197 | .repeated() 198 | .at_least(1) 199 | .try_map(|chars, span| { 200 | if chars.iter().all_equal() { 201 | Ok((chars[0], chars.len() as u16)) 202 | } else { 203 | // Get the type of element that the user tried to create. 204 | let modifier_type = match chars[0] { 205 | '-' => "unordered list", 206 | '~' => "ordered list", 207 | '>' => "quote", 208 | _ => unreachable!(), 209 | }; 210 | Err(Simple::custom( 211 | span, 212 | format!(" 213 | Expected a sequence of '{}' characters when creating {}. 214 | Norg does not permit mixing of modifiers, e.g. `-~>`. Keep all your modifiers the same, e.g. `---`. 215 | ", chars[0], modifier_type), 216 | )) 217 | } 218 | }) 219 | .then_ignore(whitespace.repeated().at_least(1)) 220 | .then(extension_section.clone().or_not()) 221 | .map( 222 | |((modifier_type, level), extension_section)| NorgBlock::NestableDetachedModifier { 223 | modifier_type, 224 | level, 225 | extension_section: extension_section.map(tokens_to_paragraph_segment).unwrap_or_default(), 226 | }, 227 | ) 228 | .labelled("nestabled_detached_modifier"); 229 | 230 | let rangeable_mod = |c: char| { 231 | select! { Special(x) if x == c => x } 232 | .repeated() 233 | .at_least(1) 234 | .at_most(2) 235 | .map(|chars| (chars[0], chars.len() == 2)) 236 | .then_ignore(whitespace.repeated().at_least(1)) 237 | .then(extension_section.clone().or_not()) 238 | .then(paragraph_segment) 239 | .then_ignore(newlines_or_eof) 240 | .map(|(((modifier_type, ranged), extension_section), title)| { 241 | NorgBlock::RangeableDetachedModifier { 242 | modifier_type, 243 | ranged, 244 | title: tokens_to_paragraph_segment(title), 245 | extension_section: extension_section 246 | .map(tokens_to_paragraph_segment) 247 | .unwrap_or_default(), 248 | } 249 | }) 250 | .labelled("rangeable_detached_modifier") 251 | }; 252 | 253 | let rangeable_mod_closer = |c: char| { 254 | select! { Special(x) if x == c => x } 255 | .repeated() 256 | .exactly(2) 257 | .ignored() 258 | .then_ignore(newlines_or_eof) 259 | .map(move |_| NorgBlock::RangeableDetachedModifierClose(c)) 260 | .labelled("rangeable_detached_modifier_closed") 261 | }; 262 | 263 | let verbatim_ranged_tag = |c: char| { 264 | let parse_char = select! { Special(x) if x == c => x }; 265 | let tag_end = select! { 266 | End(x) if x == c => x, 267 | }; 268 | 269 | let tag_parameters = select! { 270 | Newlines(_) => (), 271 | SingleNewline => (), 272 | Whitespace(_) => (), 273 | Eof => (), 274 | End(x) if x == c => () 275 | } 276 | .not() 277 | .repeated() 278 | .at_least(1) 279 | .separated_by(whitespace.repeated().at_least(1)); 280 | 281 | parse_char 282 | .ignore_then(newlines_whitespace_or_eof.not().repeated().at_least(1)) 283 | .then( 284 | whitespace 285 | .repeated() 286 | .at_least(1) 287 | .ignore_then(tag_parameters) 288 | .or_not(), 289 | ) 290 | .then_ignore(just(SingleNewline).or_not()) 291 | .then_ignore(filter(|c| matches!(c, Newlines(_))).or_not()) 292 | .then(tag_end.not().repeated().or_not()) 293 | .then_ignore(tag_end) 294 | .map( 295 | |((name, parameters), content)| NorgBlock::VerbatimRangedTag { 296 | name: tokens_to_paragraph_segment(name), 297 | parameters: parameters.map(|tokens| { 298 | tokens 299 | .into_iter() 300 | .map(tokens_to_paragraph_segment) 301 | .collect() 302 | }), 303 | content: content.unwrap_or(vec![]), 304 | }, 305 | ) 306 | }; 307 | 308 | let ranged_tag = |c: char| { 309 | let parse_char = select! { Special(x) if x == c => x }; 310 | 311 | parse_char 312 | .ignore_then(newlines_whitespace_or_eof.not().repeated().at_least(1)) 313 | .then( 314 | whitespace 315 | .repeated() 316 | .at_least(1) 317 | .ignore_then(parameters) 318 | .or_not(), 319 | ) 320 | .then_ignore(select! { 321 | SingleNewline => (), 322 | Newlines(_) => (), 323 | }) 324 | .map(move |(name, parameters)| NorgBlock::RangedTag { 325 | tag_type: c, 326 | name: tokens_to_paragraph_segment(name), 327 | parameters: parameters.map(|tokens| { 328 | tokens 329 | .into_iter() 330 | .map(tokens_to_paragraph_segment) 331 | .collect() 332 | }), 333 | }) 334 | }; 335 | 336 | let infirm_tag = { 337 | select! { Special('.') => '.' } 338 | .ignore_then(newlines_whitespace_or_eof.not().repeated().at_least(1)) 339 | .then( 340 | whitespace 341 | .repeated() 342 | .at_least(1) 343 | .ignore_then(parameters) 344 | .or_not(), 345 | ) 346 | .then_ignore(select! { 347 | SingleNewline => (), 348 | Newlines(_) => (), 349 | }) 350 | .map(|(name, parameters)| NorgBlock::InfirmTag { 351 | name: tokens_to_paragraph_segment(name), 352 | parameters: parameters.map(|tokens| { 353 | tokens 354 | .into_iter() 355 | .map(tokens_to_paragraph_segment) 356 | .collect() 357 | }), 358 | }) 359 | }; 360 | 361 | let carryover_tags = { 362 | select! { 363 | Special('+') => '+', 364 | Special('#') => '#', 365 | } 366 | .then(newlines_whitespace_or_eof.not().repeated().at_least(1)) 367 | .then( 368 | whitespace 369 | .repeated() 370 | .at_least(1) 371 | .ignore_then(parameters) 372 | .or_not(), 373 | ) 374 | .then_ignore(select! { 375 | Newlines(_) => (), 376 | SingleNewline => (), 377 | }) 378 | .map(|((tag_type, name), parameters)| NorgBlock::CarryoverTag { 379 | tag_type, 380 | name: tokens_to_paragraph_segment(name), 381 | parameters: parameters.map(|tokens| { 382 | tokens 383 | .into_iter() 384 | .map(tokens_to_paragraph_segment) 385 | .collect() 386 | }), 387 | }) 388 | }; 389 | 390 | let tag_end = select! { 391 | NorgToken::End(c) => NorgBlock::RangedTagEnd(c), 392 | }; 393 | 394 | let delimiting_mod = select! { 395 | NorgToken::Special(c @ ('-' | '=' | '_')) => c, 396 | } 397 | .repeated() 398 | .at_least(2) 399 | .then_ignore(newlines_or_eof) 400 | .map(|chars| NorgBlock::DelimitingModifier(chars[0])); 401 | 402 | choice(( 403 | heading, 404 | nestable_detached_modifier, 405 | delimiting_mod, 406 | rangeable_mod('$'), 407 | rangeable_mod_closer('$'), 408 | rangeable_mod('^'), 409 | rangeable_mod_closer('^'), 410 | rangeable_mod(':'), 411 | rangeable_mod_closer(':'), 412 | verbatim_ranged_tag('@'), 413 | ranged_tag('|'), 414 | ranged_tag('='), 415 | infirm_tag, 416 | carryover_tags, 417 | tag_end, 418 | paragraph_segment 419 | .then(newlines_or_eof.repeated().at_least(1).rewind()) 420 | .map(|(content, trailing)| match trailing.last().unwrap() { 421 | NorgToken::Eof => { 422 | NorgBlock::ParagraphSegmentEnd(tokens_to_paragraph_segment(content)) 423 | } 424 | NorgToken::Newlines(_) => { 425 | NorgBlock::ParagraphSegmentEnd(tokens_to_paragraph_segment(content)) 426 | } 427 | NorgToken::SingleNewline => NorgBlock::ParagraphSegment( 428 | tokens_to_paragraph_segment(content.into_iter().chain(trailing).collect()), 429 | ), 430 | _ => unreachable!(), 431 | }) 432 | .labelled("paragraph_segment"), 433 | )) 434 | .padded_by(newlines_whitespace.repeated()) 435 | .repeated() 436 | .then_ignore(just(Eof)) 437 | } 438 | -------------------------------------------------------------------------------- /src/stage_3.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Write; 2 | 3 | use chumsky::prelude::*; 4 | use itertools::Itertools; 5 | use serde::Serialize; 6 | use textwrap::dedent; 7 | 8 | use crate::stage_2::{NorgBlock, ParagraphSegmentToken, ParagraphTokenList}; 9 | 10 | #[derive(Clone, Hash, Debug, PartialEq, Eq, Serialize)] 11 | pub enum NestableDetachedModifier { 12 | Quote, 13 | UnorderedList, 14 | OrderedList, 15 | } 16 | 17 | impl std::fmt::Display for NestableDetachedModifier { 18 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 19 | match self { 20 | Self::Quote => f.write_char('>'), 21 | Self::UnorderedList => f.write_char('-'), 22 | Self::OrderedList => f.write_char('~'), 23 | } 24 | } 25 | } 26 | 27 | #[derive(Clone, Hash, Debug, PartialEq, Eq, Serialize)] 28 | pub enum RangeableDetachedModifier { 29 | Definition, 30 | Footnote, 31 | Table, 32 | } 33 | 34 | impl std::fmt::Display for RangeableDetachedModifier { 35 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 36 | match self { 37 | Self::Definition => f.write_char('$'), 38 | Self::Footnote => f.write_char('^'), 39 | Self::Table => f.write_char(':'), 40 | } 41 | } 42 | } 43 | 44 | #[derive(Clone, Hash, Debug, PartialEq, Eq, Serialize)] 45 | pub enum TodoStatus { 46 | /// ` ` 47 | Undone, 48 | /// `x` 49 | Done, 50 | /// `?` 51 | NeedsClarification, 52 | /// `=` 53 | Paused, 54 | /// `!` 55 | Urgent, 56 | /// `+` or `+ 4th may` 57 | Recurring(Option), 58 | /// `-` 59 | Pending, 60 | /// `_` 61 | Canceled, 62 | } 63 | 64 | #[derive(Clone, Hash, Debug, PartialEq, Eq, Serialize)] 65 | pub enum DetachedModifierExtension { 66 | /// todo item status: 67 | /// `- ( ) undone` 68 | /// `- (x) done` 69 | /// `- (?) needs clarification` 70 | /// `- (=) paused/on hold` 71 | /// `- (!) urgent` 72 | /// `- (+) recurring` 73 | /// `- (+ 15th May) recurring with a time stamp` 74 | /// `- (-) in progress/pending` 75 | /// `- (_) put down/canceled` 76 | Todo(TodoStatus), 77 | 78 | /// Priority, `#` and then any text 79 | /// `- (# A) Priority A` 80 | Priority(String), 81 | 82 | /// Time stamp extension: 83 | /// `- (@ ) list item text` 84 | Timestamp(String), 85 | 86 | /// Time stamp for the due date/deadline for this item 87 | /// `- (< 1 Jan 2025) Do something` 88 | DueDate(String), 89 | 90 | /// Time stamp for the start time of the item: 91 | /// `- (> 2 Jan 2025)` Start something 92 | StartDate(String), 93 | } 94 | 95 | #[derive(Clone, Hash, Debug, PartialEq, Eq, Serialize)] 96 | pub enum CarryoverTag { 97 | Attribute, // `+` 98 | Macro, // `#` 99 | } 100 | 101 | #[derive(PartialEq, Serialize)] 102 | pub enum RangedTag { 103 | Macro, 104 | Standard, 105 | } 106 | 107 | fn paragraph_parser_opener_candidates_and_links() -> impl Parser< 108 | ParagraphSegmentToken, 109 | Vec, 110 | Error = chumsky::error::Simple, 111 | > { 112 | let token = any().map(ParagraphSegment::Token); 113 | let modifier = select! { 114 | ParagraphSegmentToken::Special(c @ ('*' | '/' | '_' | '-')) => c, 115 | }; 116 | 117 | let whitespace_or_special = select! { 118 | w @ ParagraphSegmentToken::Whitespace => w, 119 | s @ ParagraphSegmentToken::Special(_) => s, 120 | }; 121 | 122 | let opening_modifier_candidate = whitespace_or_special 123 | .then(modifier.repeated().at_least(1)) 124 | .then(just(ParagraphSegmentToken::Whitespace).not()) 125 | .map(|((left, modifiers), right)| { 126 | ParagraphSegment::AttachedModifierOpener((Some(left), modifiers, right)) 127 | }); 128 | 129 | let left_empty_opening_modifier = modifier 130 | .repeated() 131 | .at_least(1) 132 | .then(just(ParagraphSegmentToken::Whitespace).not()) 133 | .map(|(modifiers, right)| { 134 | ParagraphSegment::AttachedModifierOpener((None, modifiers, right)) 135 | }); 136 | 137 | let inline_verbatim = just(ParagraphSegmentToken::Special('`')) 138 | .ignore_then( 139 | just(ParagraphSegmentToken::Special('`')) 140 | .not() 141 | .repeated() 142 | .at_least(1), 143 | ) 144 | .then_ignore(just(ParagraphSegmentToken::Special('`'))) 145 | .map(ParagraphSegment::InlineVerbatim); 146 | 147 | let anchor = just(ParagraphSegmentToken::Special('[')) 148 | .ignore_then( 149 | just(ParagraphSegmentToken::Special(']')) 150 | .not() 151 | .repeated() 152 | .at_least(1), 153 | ) 154 | .then_ignore(just(ParagraphSegmentToken::Special(']'))); 155 | 156 | let link = just(ParagraphSegmentToken::Special('{')) 157 | .ignore_then( 158 | just(ParagraphSegmentToken::Special(':')) 159 | .ignore_then( 160 | just(ParagraphSegmentToken::Special(':')) 161 | .not() 162 | .repeated() 163 | .at_least(1), 164 | ) 165 | .then_ignore(just(ParagraphSegmentToken::Special(':'))) 166 | .or_not(), 167 | ) 168 | .then( 169 | choice(( 170 | just(ParagraphSegmentToken::Special('*')) 171 | .repeated() 172 | .at_least(1) 173 | .map(|tokens| "*".repeat(tokens.len())), 174 | just(ParagraphSegmentToken::Special('$')).to("$".to_string()), 175 | just(ParagraphSegmentToken::Special('^')).to("^".to_string()), 176 | just(ParagraphSegmentToken::Special('/')).to("/".to_string()), 177 | just(ParagraphSegmentToken::Special('=')).to("=".to_string()), 178 | just(ParagraphSegmentToken::Special('?')).to("?".to_string()), 179 | just(ParagraphSegmentToken::Special('@')).to("@".to_string()), 180 | )) 181 | .then_ignore( 182 | just(ParagraphSegmentToken::Whitespace) 183 | .repeated() 184 | .at_least(1), 185 | ) 186 | .or_not(), 187 | ) 188 | .then( 189 | just(ParagraphSegmentToken::Special('}')) 190 | .not() 191 | .repeated() 192 | .at_least(1) 193 | .or_not(), 194 | ) 195 | .then_ignore(just(ParagraphSegmentToken::Special('}'))) 196 | .then(anchor.clone().or_not()) 197 | .map( 198 | |(((filepath, modifiers), content), description)| ParagraphSegment::Link { 199 | filepath: filepath 200 | .map(|content| content.into_iter().map_into::().collect()), 201 | description: description.map(|content| parse_paragraph(content).unwrap()), 202 | targets: if let Some(content) = content { 203 | vec![if let Some(modifiers) = modifiers { 204 | match modifiers.as_str() { 205 | "$" => LinkTarget::Definition(parse_paragraph(content).unwrap()), 206 | "^" => LinkTarget::Footnote(parse_paragraph(content).unwrap()), 207 | "?" => LinkTarget::Wiki(parse_paragraph(content).unwrap()), 208 | "=" => LinkTarget::Extendable(parse_paragraph(content).unwrap()), 209 | "/" => { 210 | LinkTarget::Path(content.into_iter().map_into::().collect()) 211 | } 212 | "@" => LinkTarget::Timestamp( 213 | content.into_iter().map_into::().collect(), 214 | ), 215 | 216 | // Only other possibility is a heading. 217 | str => LinkTarget::Heading { 218 | level: str.len() as u16, 219 | title: parse_paragraph(content).unwrap(), 220 | }, 221 | } 222 | } else { 223 | LinkTarget::Url(content.into_iter().map_into::().collect()) 224 | }] 225 | } else { 226 | vec![] 227 | }, 228 | }, 229 | ); 230 | 231 | let inline_linkable = just(ParagraphSegmentToken::Special('<')) 232 | .ignore_then( 233 | just(ParagraphSegmentToken::Special('>')) 234 | .not() 235 | .repeated() 236 | .at_least(1), 237 | ) 238 | .then_ignore(just(ParagraphSegmentToken::Special('>'))) 239 | .map(|content| ParagraphSegment::InlineLinkTarget(parse_paragraph(content).unwrap())); 240 | 241 | left_empty_opening_modifier.or_not().chain( 242 | choice(( 243 | link.clone(), 244 | anchor 245 | .clone() 246 | .then(link) 247 | .map(|(content, link)| ParagraphSegment::AnchorDefinition { 248 | content: parse_paragraph(content).unwrap(), 249 | target: Box::new(link), 250 | }), 251 | inline_verbatim, 252 | anchor 253 | .clone() 254 | .then(anchor.clone().or_not()) 255 | .map(|(content, description)| ParagraphSegment::Anchor { 256 | content: parse_paragraph(content).unwrap(), 257 | description: description.map(|content| parse_paragraph(content).unwrap()), 258 | }), 259 | inline_linkable, 260 | opening_modifier_candidate, 261 | token, 262 | )) 263 | .repeated() 264 | .at_least(1), 265 | ) 266 | } 267 | 268 | fn dedup_opener_candidates(input: Vec) -> Vec { 269 | use ParagraphSegment::*; 270 | 271 | input 272 | .into_iter() 273 | .coalesce(|prev, next| match (prev.clone(), next.clone()) { 274 | (AttachedModifierOpener(_), AttachedModifierOpener(data)) => { 275 | Err((prev, AttachedModifierOpenerFail(data))) 276 | } 277 | _ => Err((prev, next)), 278 | }) 279 | .collect() 280 | } 281 | 282 | fn paragraph_parser_closer_candidates( 283 | ) -> impl Parser, Error = chumsky::error::Simple> 284 | { 285 | use ParagraphSegment::*; 286 | 287 | let token = any(); 288 | let modifier = select! { 289 | Token(ParagraphSegmentToken::Special(c @ ('*' | '/' | '_' | '-'))) => c, 290 | }; 291 | 292 | let whitespace_or_special = select! { 293 | w @ Token(ParagraphSegmentToken::Whitespace) => w, 294 | s @ Token(ParagraphSegmentToken::Special(_)) => s, 295 | }; 296 | 297 | let closing_modifier_candidate = just(Token(ParagraphSegmentToken::Whitespace)) 298 | .not() 299 | .then(modifier.repeated().at_least(1)) 300 | .then(whitespace_or_special) 301 | .map(|((left, modifiers), right)| { 302 | ParagraphSegment::AttachedModifierCloserCandidate(( 303 | Box::new(left), 304 | modifiers, 305 | Some(Box::new(right)), 306 | )) 307 | }); 308 | 309 | // TODO(vhyrro): This is not optimal, as it causes a second parse of a potentially long string 310 | // of nodes. Ideally, the `end()` check should be done directly in a single parse. 311 | let closing_modifier_candidate_with_eof = just(Token(ParagraphSegmentToken::Whitespace)) 312 | .not() 313 | .then(modifier.repeated().at_least(1)) 314 | .then_ignore(end()) 315 | .map(|(left, modifiers)| { 316 | ParagraphSegment::AttachedModifierCloserCandidate((Box::new(left), modifiers, None)) 317 | }); 318 | 319 | choice(( 320 | closing_modifier_candidate, 321 | closing_modifier_candidate_with_eof, 322 | token, 323 | )) 324 | .repeated() 325 | .at_least(1) 326 | } 327 | 328 | fn unravel_candidates(input: Vec) -> Vec { 329 | use ParagraphSegment::*; 330 | 331 | input 332 | .into_iter() 333 | .fold(Vec::new(), |mut acc: Vec, segment| { 334 | match segment { 335 | t @ Token(_) => acc.push(t), 336 | AttachedModifierOpener((left, modifiers, right)) => { 337 | if let Some(left) = left { 338 | acc.push(Token(left)); 339 | } 340 | acc.extend(modifiers.into_iter().map(|modifier_type| { 341 | AttachedModifierCandidate { 342 | modifier_type, 343 | content: Vec::default(), 344 | closer: None, 345 | } 346 | })); 347 | acc.push(Token(right)); 348 | } 349 | AttachedModifierCloserCandidate((left, modifiers, right)) => { 350 | acc.push(*left); 351 | acc.extend(modifiers.into_iter().map(AttachedModifierCloser)); 352 | if let Some(right) = right { 353 | acc.push(*right); 354 | } 355 | } 356 | AttachedModifierCloser(c) => acc.push(Token(ParagraphSegmentToken::Special(c))), 357 | AttachedModifierOpenerFail((left, modifiers, right)) => { 358 | if let Some(left) = left { 359 | acc.push(Token(left)); 360 | } 361 | acc.extend( 362 | modifiers 363 | .into_iter() 364 | .map(|c| Token(ParagraphSegmentToken::Special(c))), 365 | ); 366 | acc.push(Token(right)); 367 | } 368 | others => acc.push(others), 369 | }; 370 | 371 | acc 372 | }) 373 | } 374 | 375 | fn paragraph_rollup_candidates( 376 | ) -> impl Parser, Error = chumsky::error::Simple> 377 | { 378 | let candidate = select! { ParagraphSegment::AttachedModifierCloser(c) => c, }; 379 | 380 | let attached_modifier = recursive(|attached_modifier| { 381 | select! { 382 | ParagraphSegment::AttachedModifierCandidate { modifier_type, .. } => modifier_type, 383 | } 384 | .then(attached_modifier.or(candidate.not()).repeated().at_least(1)) 385 | .then(candidate) 386 | .try_map(|((modifier_type, content), closer), span| { 387 | if modifier_type == closer { 388 | Ok(ParagraphSegment::AttachedModifier { 389 | modifier_type, 390 | content, 391 | }) 392 | } else { 393 | Err(Simple::custom( 394 | span, 395 | "differing opening and closing modifiers found", 396 | )) 397 | } 398 | }) 399 | }); 400 | 401 | choice((attached_modifier, any())).repeated().at_least(1) 402 | } 403 | 404 | fn eliminate_invalid_candidates(input: Vec) -> Vec { 405 | input 406 | .into_iter() 407 | .fold(Vec::new(), |mut acc: Vec, segment| { 408 | match segment { 409 | ParagraphSegment::AttachedModifierCandidate { 410 | modifier_type, 411 | content, 412 | closer, 413 | } => { 414 | acc.push(ParagraphSegment::Token(ParagraphSegmentToken::Special( 415 | modifier_type, 416 | ))); 417 | acc.extend(content); 418 | 419 | if let Some(closer) = closer { 420 | acc.push(*closer); 421 | } 422 | } 423 | _ => acc.push(segment), 424 | }; 425 | 426 | acc 427 | }) 428 | } 429 | 430 | #[derive(Clone, Hash, Debug, PartialEq, Eq, Serialize)] 431 | pub enum LinkTarget { 432 | Heading { 433 | level: u16, 434 | title: Vec, 435 | }, 436 | Footnote(Vec), 437 | Definition(Vec), 438 | Generic(Vec), 439 | Wiki(Vec), 440 | Extendable(Vec), 441 | Path(String), 442 | Url(String), 443 | Timestamp(String), 444 | } 445 | 446 | #[derive(Debug, Clone, PartialEq, Serialize, Hash, Eq)] 447 | pub enum ParagraphSegment { 448 | Token(ParagraphSegmentToken), 449 | AttachedModifierOpener( 450 | ( 451 | Option, 452 | Vec, 453 | ParagraphSegmentToken, 454 | ), 455 | ), 456 | AttachedModifierOpenerFail( 457 | ( 458 | Option, 459 | Vec, 460 | ParagraphSegmentToken, 461 | ), 462 | ), 463 | AttachedModifierCloserCandidate( 464 | ( 465 | Box, 466 | Vec, 467 | Option>, 468 | ), 469 | ), 470 | AttachedModifierCloser(char), 471 | AttachedModifierCandidate { 472 | modifier_type: char, 473 | content: Vec, 474 | closer: Option>, 475 | }, 476 | AttachedModifier { 477 | modifier_type: char, 478 | content: Vec, 479 | }, 480 | Link { 481 | filepath: Option, 482 | targets: Vec, 483 | description: Option>, 484 | }, 485 | AnchorDefinition { 486 | content: Vec, 487 | target: Box, 488 | }, 489 | Anchor { 490 | content: Vec, 491 | description: Option>, 492 | }, 493 | InlineLinkTarget(Vec), 494 | InlineVerbatim(Vec), 495 | } 496 | 497 | fn parse_paragraph( 498 | input: Vec, 499 | ) -> Result, Vec>> { 500 | Ok(eliminate_invalid_candidates(unravel_candidates( 501 | paragraph_rollup_candidates() 502 | .parse(unravel_candidates( 503 | paragraph_parser_closer_candidates() 504 | .parse(unravel_candidates(dedup_opener_candidates( 505 | paragraph_parser_opener_candidates_and_links().parse(input)?, 506 | ))) 507 | .unwrap(), 508 | )) 509 | .unwrap(), 510 | ))) 511 | } 512 | 513 | #[derive(Clone, Debug, PartialEq, Hash, Eq, Serialize)] 514 | pub enum NorgASTFlat { 515 | Paragraph(Vec), 516 | NestableDetachedModifier { 517 | modifier_type: NestableDetachedModifier, 518 | level: u16, 519 | extensions: Vec, 520 | content: Box, 521 | }, 522 | RangeableDetachedModifier { 523 | modifier_type: RangeableDetachedModifier, 524 | title: Vec, 525 | extensions: Vec, 526 | content: Vec, 527 | }, 528 | Heading { 529 | level: u16, 530 | title: Vec, 531 | extensions: Vec, 532 | }, 533 | CarryoverTag { 534 | tag_type: CarryoverTag, 535 | name: Vec, 536 | parameters: Vec, 537 | next_object: Box, 538 | }, 539 | VerbatimRangedTag { 540 | name: Vec, 541 | parameters: Vec, 542 | content: String, 543 | }, 544 | RangedTag { 545 | name: Vec, 546 | parameters: Vec, 547 | content: Vec, 548 | }, 549 | InfirmTag { 550 | name: Vec, 551 | parameters: Vec, 552 | }, 553 | DelimitingModifier(DelimitingModifier), 554 | } 555 | 556 | #[derive(Clone, Hash, Debug, PartialEq, Eq, Serialize)] 557 | pub enum DelimitingModifier { 558 | Weak, 559 | Strong, 560 | HorizontalRule, 561 | } 562 | 563 | fn detached_modifier_extensions() -> impl Parser< 564 | ParagraphSegmentToken, 565 | Vec, 566 | Error = chumsky::error::Simple, 567 | > { 568 | use ParagraphSegmentToken::*; 569 | 570 | let detached_modifier_extension_tokens = select! { 571 | c @ Special('@' | '#' | '<' | '>' | '+' | '=' | '_' | '-' | '!') => c, 572 | Whitespace => Whitespace, 573 | Text(c) if c == "x" || c == "?" => Text(c), 574 | }; 575 | 576 | let detached_modifier_extension = detached_modifier_extension_tokens 577 | .then( 578 | just(Whitespace) 579 | .ignore_then(select!(Special('|') => Special('|')).not().repeated()) 580 | .or_not() 581 | .map(|tokens| { 582 | if let Some(tokens) = tokens { 583 | tokens 584 | .into_iter() 585 | .map_into::() 586 | .collect() 587 | } else { 588 | String::from("") 589 | } 590 | }), 591 | ) 592 | .map(|(spec, metadata)| match spec { 593 | Special('@') => DetachedModifierExtension::Timestamp(metadata), 594 | Special('#') => DetachedModifierExtension::Priority(metadata), 595 | Special('<') => DetachedModifierExtension::DueDate(metadata), 596 | Special('>') => DetachedModifierExtension::StartDate(metadata), 597 | Special('+') => { 598 | DetachedModifierExtension::Todo(TodoStatus::Recurring(if metadata.is_empty() { 599 | None 600 | } else { 601 | Some(metadata) 602 | })) 603 | } 604 | Special('=') => DetachedModifierExtension::Todo(TodoStatus::Paused), 605 | Special('_') => DetachedModifierExtension::Todo(TodoStatus::Canceled), 606 | Special('-') => DetachedModifierExtension::Todo(TodoStatus::Pending), 607 | Special('!') => DetachedModifierExtension::Todo(TodoStatus::Urgent), 608 | Whitespace => DetachedModifierExtension::Todo(TodoStatus::Undone), 609 | Text(str) if str == "x" => DetachedModifierExtension::Todo(TodoStatus::Done), 610 | Text(str) if str == "?" => { 611 | DetachedModifierExtension::Todo(TodoStatus::NeedsClarification) 612 | } 613 | _ => unreachable!(), 614 | }); 615 | 616 | detached_modifier_extension 617 | .separated_by(just(Special('|'))) 618 | .at_least(1) 619 | } 620 | 621 | pub fn stage_3( 622 | ) -> impl Parser, Error = chumsky::error::Simple> { 623 | recursive(|stage_3| { 624 | let paragraph_segment = select! { 625 | NorgBlock::ParagraphSegment(content) => content, 626 | }; 627 | 628 | let paragraph_segment_end = select! { 629 | NorgBlock::ParagraphSegmentEnd(content) => content, 630 | }; 631 | 632 | let paragraph = choice(( 633 | paragraph_segment 634 | .repeated() 635 | .at_least(1) 636 | .flatten() 637 | .chain(paragraph_segment_end.or_not()), 638 | paragraph_segment_end, 639 | )) 640 | .map(|mut tokens| { 641 | // Trim trailing whitespace (both user-induced but also induced by us when 642 | // converting single newlines to whitespace). 643 | if let Some(ParagraphSegmentToken::Whitespace) = tokens.last() { 644 | tokens.pop(); 645 | } 646 | 647 | NorgASTFlat::Paragraph(parse_paragraph(tokens).unwrap()) 648 | }); 649 | 650 | let nestable_detached_modifier = select! { 651 | NorgBlock::NestableDetachedModifier { modifier_type: '-', level, extension_section } => (NestableDetachedModifier::UnorderedList, level, extension_section), 652 | NorgBlock::NestableDetachedModifier { modifier_type: '~', level, extension_section } => (NestableDetachedModifier::OrderedList, level, extension_section), 653 | NorgBlock::NestableDetachedModifier { modifier_type: '>', level, extension_section } => (NestableDetachedModifier::Quote, level, extension_section), 654 | }.then(paragraph).map(|((modifier_type, level, extension_section), paragraph)| NorgASTFlat::NestableDetachedModifier { 655 | modifier_type, 656 | level, 657 | extensions: detached_modifier_extensions().parse(extension_section).unwrap_or_default(), 658 | content: Box::new(paragraph), 659 | }); 660 | 661 | let nonranged_detached_modifier = select! { 662 | NorgBlock::RangeableDetachedModifier { modifier_type: '$', ranged: false, title, extension_section } => (RangeableDetachedModifier::Definition, title, extension_section), 663 | NorgBlock::RangeableDetachedModifier { modifier_type: '^', ranged: false, title, extension_section} => (RangeableDetachedModifier::Footnote, title, extension_section), 664 | NorgBlock::RangeableDetachedModifier { modifier_type: ':', ranged: false, title, extension_section } => (RangeableDetachedModifier::Table, title, extension_section), 665 | }.then(paragraph).map(|((modifier_type, title, extension_section), paragraph)| NorgASTFlat::RangeableDetachedModifier { 666 | modifier_type, 667 | title: parse_paragraph(title).unwrap(), 668 | extensions: detached_modifier_extensions().parse(extension_section).unwrap_or_default(), 669 | content: vec![paragraph], 670 | }); 671 | 672 | let ranged_detached_modifier = select! { 673 | NorgBlock::RangeableDetachedModifier { modifier_type: '$', ranged: true, title, extension_section } => ('$', RangeableDetachedModifier::Definition, title, extension_section), 674 | NorgBlock::RangeableDetachedModifier { modifier_type: '^', ranged: true, title, extension_section } => ('^', RangeableDetachedModifier::Footnote, title, extension_section), 675 | NorgBlock::RangeableDetachedModifier { modifier_type: ':', ranged: true, title, extension_section } => (':', RangeableDetachedModifier::Table, title, extension_section), 676 | } 677 | .then(stage_3.clone().repeated()) 678 | .then(select! { NorgBlock::RangeableDetachedModifierClose(c) => c }) 679 | .try_map(|(((opening_ch, modifier_type, title, extension_section), content), closing_ch), span| 680 | if opening_ch == closing_ch { 681 | Ok(NorgASTFlat::RangeableDetachedModifier { 682 | modifier_type, 683 | title: parse_paragraph(title).unwrap(), 684 | extensions: detached_modifier_extensions().parse(extension_section).unwrap_or_default(), 685 | content, 686 | }) 687 | } else { 688 | Err(Simple::custom(span, format!("Expected '{0}{0}' to close modifier, found '{1}{1}' instead.", opening_ch, closing_ch))) 689 | }); 690 | 691 | let heading = select! { 692 | NorgBlock::Heading { level, title, extension_section } => (level, title, extension_section), 693 | } 694 | .try_map(move |(level, title, extension_section), _span| Ok(NorgASTFlat::Heading { 695 | level, 696 | title: parse_paragraph(title).unwrap(), 697 | extensions: detached_modifier_extensions().parse(extension_section).unwrap_or_default(), 698 | })); 699 | 700 | let stringify_tokens_and_split = move |tokens: ParagraphTokenList| -> Vec { 701 | tokens.into_iter().map_into::().collect::().split('.').map_into().collect() 702 | }; 703 | 704 | let carryover_tag = select! { 705 | NorgBlock::CarryoverTag { tag_type: '+', name, parameters } => (CarryoverTag::Attribute, name, parameters), 706 | NorgBlock::CarryoverTag { tag_type: '#', name, parameters } => (CarryoverTag::Macro, name, parameters), 707 | }.then(stage_3.clone()).map(move |((tag_type, name, parameters), next_object)| { 708 | NorgASTFlat::CarryoverTag { 709 | tag_type, 710 | name: stringify_tokens_and_split(name), 711 | parameters: parameters.unwrap_or_default().into_iter().map(|parameter| parameter.into_iter().map_into::().collect()).collect(), 712 | next_object: Box::new(next_object), 713 | } 714 | }); 715 | 716 | let verbatim_ranged_tag = select! { 717 | NorgBlock::VerbatimRangedTag { name, parameters, content } => { 718 | NorgASTFlat::VerbatimRangedTag { 719 | name: stringify_tokens_and_split(name), 720 | parameters: parameters.unwrap_or_default().into_iter().map(|parameter| parameter.into_iter().map_into::().collect()).collect(), 721 | content: dedent(content.into_iter().map_into::().collect::().as_str()), 722 | } 723 | }, 724 | }; 725 | 726 | let ranged_tag = select! { 727 | NorgBlock::RangedTag { tag_type: '=', name, parameters } => (RangedTag::Macro, stringify_tokens_and_split(name), parameters.unwrap_or_default().into_iter().map(|parameter| parameter.into_iter().map_into::().collect()).collect()), 728 | NorgBlock::RangedTag { tag_type: '|', name, parameters } => (RangedTag::Standard, stringify_tokens_and_split(name), parameters.unwrap_or_default().into_iter().map(|parameter| parameter.into_iter().map_into::().collect()).collect()) 729 | }.then(stage_3.repeated()).then(select! { 730 | NorgBlock::RangedTagEnd('=') => RangedTag::Macro, 731 | NorgBlock::RangedTagEnd('|') => RangedTag::Standard, 732 | }).try_map(|(((tag_type, name, parameters), content), closing_tag_type), span| if tag_type == closing_tag_type { 733 | Ok(NorgASTFlat::RangedTag { name, parameters, content }) 734 | } else { 735 | Err(Simple::custom(span, "Invalid closing modifier for ranged tag.")) // TODO: Improve errors 736 | }); 737 | 738 | let infirm_tag = select! { 739 | NorgBlock::InfirmTag { name, parameters, } => NorgASTFlat::InfirmTag { name: stringify_tokens_and_split(name), parameters: parameters.unwrap_or_default().into_iter().map(|parameter| parameter.into_iter().map_into::().collect()).collect() }, 740 | }; 741 | 742 | let delimiting_mod = select! { 743 | NorgBlock::DelimitingModifier('-') => NorgASTFlat::DelimitingModifier(DelimitingModifier::Weak), 744 | NorgBlock::DelimitingModifier('=') => NorgASTFlat::DelimitingModifier(DelimitingModifier::Strong), 745 | NorgBlock::DelimitingModifier('_') => NorgASTFlat::DelimitingModifier(DelimitingModifier::HorizontalRule), 746 | }; 747 | 748 | choice(( 749 | carryover_tag, 750 | verbatim_ranged_tag, 751 | ranged_tag, 752 | infirm_tag, 753 | delimiting_mod, 754 | heading, 755 | nestable_detached_modifier, 756 | nonranged_detached_modifier, 757 | ranged_detached_modifier, 758 | paragraph, 759 | )) 760 | }).repeated().at_least(1) 761 | } 762 | -------------------------------------------------------------------------------- /src/stage_4.rs: -------------------------------------------------------------------------------- 1 | use serde::Serialize; 2 | 3 | use crate::{ 4 | stage_3::{DelimitingModifier, NorgASTFlat, ParagraphSegment}, 5 | CarryoverTag, DetachedModifierExtension, NestableDetachedModifier, RangeableDetachedModifier, 6 | }; 7 | 8 | #[derive(Debug, PartialEq, Eq, Clone, Hash, Serialize)] 9 | pub enum NorgAST { 10 | Paragraph(Vec), 11 | NestableDetachedModifier { 12 | modifier_type: NestableDetachedModifier, 13 | level: u16, 14 | extensions: Vec, 15 | text: Box, 16 | content: Vec, 17 | }, 18 | RangeableDetachedModifier { 19 | modifier_type: RangeableDetachedModifier, 20 | title: Vec, 21 | extensions: Vec, 22 | content: Vec, 23 | }, 24 | Heading { 25 | level: u16, 26 | title: Vec, 27 | extensions: Vec, 28 | content: Vec, 29 | }, 30 | CarryoverTag { 31 | tag_type: CarryoverTag, 32 | name: Vec, 33 | parameters: Vec, 34 | next_object: Box, 35 | }, 36 | VerbatimRangedTag { 37 | name: Vec, 38 | parameters: Vec, 39 | content: String, 40 | }, 41 | RangedTag { 42 | name: Vec, 43 | parameters: Vec, 44 | content: Vec, 45 | }, 46 | InfirmTag { 47 | name: Vec, 48 | parameters: Vec, 49 | }, 50 | DelimitingModifier(DelimitingModifier), 51 | } 52 | 53 | fn convert(flat: NorgASTFlat) -> NorgAST { 54 | match flat { 55 | NorgASTFlat::Paragraph(tokens) => NorgAST::Paragraph(tokens), 56 | NorgASTFlat::RangeableDetachedModifier { 57 | modifier_type, 58 | title, 59 | extensions, 60 | content, 61 | } => NorgAST::RangeableDetachedModifier { 62 | modifier_type, 63 | title, 64 | extensions, 65 | content, 66 | }, 67 | NorgASTFlat::VerbatimRangedTag { 68 | name, 69 | parameters, 70 | content, 71 | } => NorgAST::VerbatimRangedTag { 72 | name, 73 | parameters, 74 | content, 75 | }, 76 | NorgASTFlat::RangedTag { 77 | name, 78 | parameters, 79 | content, 80 | } => NorgAST::RangedTag { 81 | name, 82 | parameters, 83 | content, 84 | }, 85 | NorgASTFlat::InfirmTag { name, parameters } => NorgAST::InfirmTag { name, parameters }, 86 | NorgASTFlat::DelimitingModifier(t) => NorgAST::DelimitingModifier(t), 87 | NorgASTFlat::NestableDetachedModifier { 88 | modifier_type, 89 | level, 90 | extensions, 91 | content, 92 | } => NorgAST::NestableDetachedModifier { 93 | modifier_type, 94 | level, 95 | extensions, 96 | text: content, 97 | content: vec![], 98 | }, 99 | NorgASTFlat::Heading { 100 | level, 101 | title, 102 | extensions, 103 | } => NorgAST::Heading { 104 | level, 105 | title, 106 | extensions, 107 | content: vec![], 108 | }, 109 | NorgASTFlat::CarryoverTag { 110 | tag_type, 111 | name, 112 | parameters, 113 | next_object, 114 | } => NorgAST::CarryoverTag { 115 | tag_type, 116 | name, 117 | parameters, 118 | next_object: Box::new(convert(*next_object.clone())), 119 | }, 120 | } 121 | } 122 | 123 | fn consume_heading_content(start_level: &u16, flat: &[NorgASTFlat], i: &mut usize) -> Vec { 124 | let mut heading_level = *start_level as i16; 125 | let mut content = vec![]; 126 | let mut seen = false; 127 | for j in (*i + 1)..flat.len() { 128 | match &flat[j] { 129 | NorgASTFlat::Heading { level, .. } => { 130 | if level <= start_level { 131 | // stop. 132 | content = stage_4(flat[(*i + 1)..j].to_vec()); 133 | *i = j - 1; 134 | seen = true; 135 | break; 136 | } else { 137 | heading_level = *level as i16; 138 | } 139 | } 140 | NorgASTFlat::DelimitingModifier(DelimitingModifier::Weak) => { 141 | heading_level -= 1; 142 | if heading_level < *start_level as i16 { 143 | content = stage_4(flat[(*i + 1)..j].to_vec()); 144 | *i = j; 145 | seen = true; 146 | break; 147 | } 148 | } 149 | NorgASTFlat::DelimitingModifier(DelimitingModifier::Strong) => { 150 | content = stage_4(flat[(*i + 1)..j].to_vec()); 151 | *i = j; 152 | seen = true; 153 | break; 154 | } 155 | NorgASTFlat::CarryoverTag { next_object, .. } 156 | if matches!(**next_object, NorgASTFlat::Heading { .. }) => 157 | { 158 | if let NorgASTFlat::Heading { level, .. } = **next_object { 159 | if level <= *start_level { 160 | // stop. 161 | content = stage_4(flat[(*i + 1)..j].to_vec()); 162 | *i = j - 1; 163 | seen = true; 164 | break; 165 | } else { 166 | heading_level = level as i16; 167 | } 168 | } else { 169 | unreachable!() 170 | } 171 | } 172 | _ => {} 173 | } 174 | } 175 | if !seen { 176 | content = stage_4(flat[*i + 1..].to_vec()); 177 | *i = flat.len(); 178 | } 179 | content 180 | } 181 | 182 | /// Loop over the given flat tree from the given index `i` until a non-NestableDetachedModifier is 183 | /// found, OR until a NestableDetachedModifier with level <= the given start_level. 184 | /// 185 | /// In English: finds all the stuff that should be in the `content` field of the 186 | /// NorgAST::NestableDetachedModifier, and returns it 187 | /// 188 | /// **Mutates** i to be the index in `flat` that we stopped consuming values at. 189 | fn consume_nestable_detached_mod_content( 190 | start_level: &u16, 191 | flat: &[NorgASTFlat], 192 | i: &mut usize, 193 | ) -> Vec { 194 | let mut content = vec![]; 195 | for j in (*i + 1)..flat.len() { 196 | match &flat[j] { 197 | NorgASTFlat::NestableDetachedModifier { level, .. } => { 198 | if level <= start_level { 199 | content = stage_4(flat[(*i + 1)..j].to_vec()); 200 | *i = j - 1; 201 | break; 202 | } else if j == flat.len() - 1 { 203 | content = stage_4(flat[(*i + 1)..].to_vec()); 204 | *i = j + 1; 205 | break; 206 | } 207 | } 208 | NorgASTFlat::CarryoverTag { next_object, .. } 209 | if matches!(**next_object, NorgASTFlat::NestableDetachedModifier { .. }) => 210 | { 211 | if let NorgASTFlat::NestableDetachedModifier { level, .. } = **next_object { 212 | if level <= *start_level { 213 | content = stage_4(flat[(*i + 1)..j].to_vec()); 214 | *i = j - 1; 215 | break; 216 | } else if j == flat.len() - 1 { 217 | content = stage_4(flat[(*i + 1)..].to_vec()); 218 | *i = j + 1; 219 | break; 220 | } 221 | } else { 222 | unreachable!() 223 | } 224 | } 225 | _ => { 226 | content = stage_4(flat[(*i + 1)..j].to_vec()); 227 | *i = j - 1; 228 | // stop immediately if we see something that's not a NestableDetachedModifier 229 | // of lesser level 230 | break; 231 | } 232 | } 233 | } 234 | 235 | content 236 | } 237 | 238 | pub fn stage_4(flat: Vec) -> Vec { 239 | let mut ast = vec![]; 240 | let mut i = 0; 241 | while i < flat.len() { 242 | let item = &flat[i]; 243 | match item { 244 | NorgASTFlat::Heading { 245 | level: start_level, 246 | title, 247 | extensions, 248 | } => { 249 | let content = consume_heading_content(start_level, &flat, &mut i); 250 | 251 | ast.push(NorgAST::Heading { 252 | level: *start_level, 253 | title: title.to_vec(), 254 | extensions: extensions.to_vec(), 255 | content, 256 | }) 257 | } 258 | NorgASTFlat::CarryoverTag { 259 | tag_type, 260 | name, 261 | parameters, 262 | next_object, 263 | } => { 264 | match *next_object.clone() { 265 | NorgASTFlat::Heading { 266 | level, 267 | title, 268 | extensions, 269 | } => { 270 | let content = consume_heading_content(&level, &flat, &mut i); 271 | ast.push(NorgAST::CarryoverTag { 272 | tag_type: tag_type.clone(), 273 | name: name.to_vec(), 274 | parameters: parameters.to_vec(), 275 | next_object: Box::new(NorgAST::Heading { 276 | level, 277 | title, 278 | extensions, 279 | content, 280 | }), 281 | }) 282 | } 283 | NorgASTFlat::NestableDetachedModifier { 284 | modifier_type, 285 | level, 286 | extensions, 287 | content, 288 | } => { 289 | let new_content = 290 | consume_nestable_detached_mod_content(&level, &flat, &mut i); 291 | ast.push(NorgAST::CarryoverTag { 292 | tag_type: tag_type.clone(), 293 | name: name.to_vec(), 294 | parameters: parameters.to_vec(), 295 | next_object: Box::new(NorgAST::NestableDetachedModifier { 296 | modifier_type, 297 | level, 298 | extensions, 299 | text: content, 300 | content: new_content, 301 | }), 302 | }) 303 | } 304 | _ => { 305 | ast.push(convert(item.clone())) 306 | } 307 | } 308 | } 309 | NorgASTFlat::NestableDetachedModifier { 310 | level: start_level, 311 | modifier_type, 312 | extensions, 313 | content: text, 314 | } => { 315 | let content = consume_nestable_detached_mod_content(start_level, &flat, &mut i); 316 | 317 | ast.push(NorgAST::NestableDetachedModifier { 318 | modifier_type: modifier_type.clone(), 319 | level: *start_level, 320 | extensions: extensions.to_vec(), 321 | text: text.clone(), 322 | content, 323 | }); 324 | } 325 | _ => { 326 | ast.push(convert(item.clone())); 327 | } 328 | } 329 | 330 | i += 1; 331 | } 332 | 333 | ast 334 | } 335 | --------------------------------------------------------------------------------